lib/Perl/Tidy.pm

   1 ############################################################
   2 #
   3 #    perltidy - a perl script indenter and formatter
   4 #
   5 #    Copyright (c) 2000-2006 by Steve Hancock
   6 #    Distributed under the GPL license agreement; see file COPYING
   7 #
   8 #    This program is free software; you can redistribute it and/or modify
   9 #    it under the terms of the GNU General Public License as published by
  10 #    the Free Software Foundation; either version 2 of the License, or
  11 #    (at your option) any later version.
  12 #
  13 #    This program is distributed in the hope that it will be useful,
  14 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 #    GNU General Public License for more details.
  17 #
  18 #    You should have received a copy of the GNU General Public License
  19 #    along with this program; if not, write to the Free Software
  20 #    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21 #
  22 #    For brief instructions instructions, try 'perltidy -h'.
  23 #    For more complete documentation, try 'man perltidy'
  24 #    or visit http://perltidy.sourceforge.net
  25 #
  26 #    This script is an example of the default style.  It was formatted with:
  27 #
  28 #      perltidy Tidy.pm
  29 #
  30 #    Code Contributions:
  31 #      Michael Cartmell supplied code for adaptation to VMS and helped with
  32 #        v-strings.
  33 #      Hugh S. Myers supplied sub streamhandle and the supporting code to
  34 #        create a Perl::Tidy module which can operate on strings, arrays, etc.
  35 #      Yves Orton supplied coding to help detect Windows versions.
  36 #      Axel Rose supplied a patch for MacPerl.
  37 #      Sebastien Aperghis-Tramoni supplied a patch for the defined or operator.
  38 #      Many others have supplied key ideas, suggestions, and bug reports;
  39 #        see the CHANGES file.
  40 #
  41 ############################################################
  42
  43 package Perl::Tidy;
  44 use 5.004;    # need IO::File from 5.004 or later
  45 BEGIN { $^W = 1; }    # turn on warnings
  46
  47 use strict;
  48 use Exporter;
  49 use Carp;
  50 $|++;
  51
  52 use vars qw{
  53   $VERSION
  54   @ISA
  55   @EXPORT
  56   $missing_file_spec
  57 };
  58
  59 @ISA    = qw( Exporter );
  60 @EXPORT = qw( &perltidy );
  61
  62 use IO::File;
  63 use File::Basename;
  64
  65 BEGIN {
  66     ( $VERSION = q($Id: Tidy.pm,v 1.56 2006/07/19 23:13:33 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
  67 }
  68
  69 sub streamhandle {
  70
  71     # given filename and mode (r or w), create an object which:
  72     #   has a 'getline' method if mode='r', and
  73     #   has a 'print' method if mode='w'.
  74     # The objects also need a 'close' method.
  75     #
  76     # How the object is made:
  77     #
  78     # if $filename is:     Make object using:
  79     # ----------------     -----------------
  80     # '-'                  (STDIN if mode = 'r', STDOUT if mode='w')
  81     # string               IO::File
  82     # ARRAY  ref           Perl::Tidy::IOScalarArray (formerly IO::ScalarArray)
  83     # STRING ref           Perl::Tidy::IOScalar      (formerly IO::Scalar)
  84     # object               object
  85     #                      (check for 'print' method for 'w' mode)
  86     #                      (check for 'getline' method for 'r' mode)
  87     my $ref = ref( my $filename = shift );
  88     my $mode = shift;
  89     my $New;
  90     my $fh;
  91
  92     # handle a reference
  93     if ($ref) {
  94         if ( $ref eq 'ARRAY' ) {
  95             $New = sub { Perl::Tidy::IOScalarArray->new(@_) };
  96         }
  97         elsif ( $ref eq 'SCALAR' ) {
  98             $New = sub { Perl::Tidy::IOScalar->new(@_) };
  99         }
 100         else {
 101
 102             # Accept an object with a getline method for reading. Note:
 103             # IO::File is built-in and does not respond to the defined
 104             # operator.  If this causes trouble, the check can be
 105             # skipped and we can just let it crash if there is no
 106             # getline.
 107             if ( $mode =~ /[rR]/ ) {
 108                 if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) {
 109                     $New = sub { $filename };
 110                 }
 111                 else {
 112                     $New = sub { undef };
 113                     confess <<EOM;
 114 ------------------------------------------------------------------------
 115 No 'getline' method is defined for object of class $ref
 116 Please check your call to Perl::Tidy::perltidy.  Trace follows.
 117 ------------------------------------------------------------------------
 118 EOM
 119                 }
 120             }
 121
 122             # Accept an object with a print method for writing.
 123             # See note above about IO::File
 124             if ( $mode =~ /[wW]/ ) {
 125                 if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) {
 126                     $New = sub { $filename };
 127                 }
 128                 else {
 129                     $New = sub { undef };
 130                     confess <<EOM;
 131 ------------------------------------------------------------------------
 132 No 'print' method is defined for object of class $ref
 133 Please check your call to Perl::Tidy::perltidy. Trace follows.
 134 ------------------------------------------------------------------------
 135 EOM
 136                 }
 137             }
 138         }
 139     }
 140
 141     # handle a string
 142     else {
 143         if ( $filename eq '-' ) {
 144             $New = sub { $mode eq 'w' ? *STDOUT : *STDIN }
 145         }
 146         else {
 147             $New = sub { IO::File->new(@_) };
 148         }
 149     }
 150     $fh = $New->( $filename, $mode )
 151       or warn "Couldn't open file:$filename in mode:$mode : $!\n";
 152     return $fh, ( $ref or $filename );
 153 }
 154
 155 sub find_input_line_ending {
 156
 157     # Peek at a file and return first line ending character.
 158     # Quietly return undef in case of any trouble.
 159     my ($input_file) = @_;
 160     my $ending;
 161
 162     # silently ignore input from object or stdin
 163     if ( ref($input_file) || $input_file eq '-' ) {
 164         return $ending;
 165     }
 166     open( INFILE, $input_file ) || return $ending;
 167
 168     binmode INFILE;
 169     my $buf;
 170     read( INFILE, $buf, 1024 );
 171     close INFILE;
 172     if ( $buf && $buf =~ /([\012\015]+)/ ) {
 173         my $test = $1;
 174
 175         # dos
 176         if ( $test =~ /^(\015\012)+$/ ) { $ending = "\015\012" }
 177
 178         # mac
 179         elsif ( $test =~ /^\015+$/ ) { $ending = "\015" }
 180
 181         # unix
 182         elsif ( $test =~ /^\012+$/ ) { $ending = "\012" }
 183
 184         # unknown
 185         else { }
 186     }
 187
 188     # no ending seen
 189     else { }
 190
 191     return $ending;
 192 }
 193
 194 sub catfile {
 195
 196     # concatenate a path and file basename
 197     # returns undef in case of error
 198
 199     BEGIN { eval "require File::Spec"; $missing_file_spec = $@; }
 200
 201     # use File::Spec if we can
 202     unless ($missing_file_spec) {
 203         return File::Spec->catfile(@_);
 204     }
 205
 206     # Perl 5.004 systems may not have File::Spec so we'll make
 207     # a simple try.  We assume File::Basename is available.
 208     # return undef if not successful.
 209     my $name      = pop @_;
 210     my $path      = join '/', @_;
 211     my $test_file = $path . $name;
 212     my ( $test_name, $test_path ) = fileparse($test_file);
 213     return $test_file if ( $test_name eq $name );
 214     return undef      if ( $^O        eq 'VMS' );
 215
 216     # this should work at least for Windows and Unix:
 217     $test_file = $path . '/' . $name;
 218     ( $test_name, $test_path ) = fileparse($test_file);
 219     return $test_file if ( $test_name eq $name );
 220     return undef;
 221 }
 222
 223 sub make_temporary_filename {
 224
 225     # Make a temporary filename.
 226     #
 227     # The POSIX tmpnam() function tends to be unreliable for non-unix
 228     # systems (at least for the win32 systems that I've tested), so use
 229     # a pre-defined name.  A slight disadvantage of this is that two
 230     # perltidy runs in the same working directory may conflict.
 231     # However, the chance of that is small and managable by the user.
 232     # An alternative would be to check for the file's existance and use,
 233     # say .TMP0, .TMP1, etc, but that scheme has its own problems.  So,
 234     # keep it simple.
 235     my $name = "perltidy.TMP";
 236     if ( $^O =~ /win32|dos/i || $^O eq 'VMS' || $^O eq 'MacOs' ) {
 237         return $name;
 238     }
 239     eval "use POSIX qw(tmpnam)";
 240     if ($@) { return $name }
 241     use IO::File;
 242
 243     # just make a couple of tries before giving up and using the default
 244     for ( 0 .. 1 ) {
 245         my $tmpname = tmpnam();
 246         my $fh = IO::File->new( $tmpname, O_RDWR | O_CREAT | O_EXCL );
 247         if ($fh) {
 248             $fh->close();
 249             return ($tmpname);
 250             last;
 251         }
 252     }
 253     return ($name);
 254 }
 255
 256 # Here is a map of the flow of data from the input source to the output
 257 # line sink:
 258 #
 259 # LineSource-->Tokenizer-->Formatter-->VerticalAligner-->FileWriter-->
 260 #       input                         groups                 output
 261 #       lines   tokens      lines       of          lines    lines
 262 #                                      lines
 263 #
 264 # The names correspond to the package names responsible for the unit processes.
 265 #
 266 # The overall process is controlled by the "main" package.
 267 #
 268 # LineSource is the stream of input lines
 269 #
 270 # Tokenizer analyzes a line and breaks it into tokens, peeking ahead
 271 # if necessary.  A token is any section of the input line which should be
 272 # manipulated as a single entity during formatting.  For example, a single
 273 # ',' character is a token, and so is an entire side comment.  It handles
 274 # the complexities of Perl syntax, such as distinguishing between '<<' as
 275 # a shift operator and as a here-document, or distinguishing between '/'
 276 # as a divide symbol and as a pattern delimiter.
 277 #
 278 # Formatter inserts and deletes whitespace between tokens, and breaks
 279 # sequences of tokens at appropriate points as output lines.  It bases its
 280 # decisions on the default rules as modified by any command-line options.
 281 #
 282 # VerticalAligner collects groups of lines together and tries to line up
 283 # certain tokens, such as '=>', '#', and '=' by adding whitespace.
 284 #
 285 # FileWriter simply writes lines to the output stream.
 286 #
 287 # The Logger package, not shown, records significant events and warning
 288 # messages.  It writes a .LOG file, which may be saved with a
 289 # '-log' or a '-g' flag.
 290
 291 {
 292
 293     # variables needed by interrupt handler:
 294     my $tokenizer;
 295     my $input_file;
 296
 297     # this routine may be called to give a status report if interrupted.  If a
 298     # parameter is given, it will call exit with that parameter.  This is no
 299     # longer used because it works under Unix but not under Windows.
 300     sub interrupt_handler {
 301
 302         my $exit_flag = shift;
 303         print STDERR "perltidy interrupted";
 304         if ($tokenizer) {
 305             my $input_line_number =
 306               Perl::Tidy::Tokenizer::get_input_line_number();
 307             print STDERR " at line $input_line_number";
 308         }
 309         if ($input_file) {
 310
 311             if   ( ref $input_file ) { print STDERR " of reference to:" }
 312             else                     { print STDERR " of file:" }
 313             print STDERR " $input_file";
 314         }
 315         print STDERR "\n";
 316         exit $exit_flag if defined($exit_flag);
 317     }
 318
 319     sub perltidy {
 320
 321         my %defaults = (
 322             argv                  => undef,
 323             destination           => undef,
 324             formatter             => undef,
 325             logfile               => undef,
 326             errorfile             => undef,
 327             perltidyrc            => undef,
 328             source                => undef,
 329             stderr                => undef,
 330             dump_options          => undef,
 331             dump_options_type     => undef,
 332             dump_getopt_flags     => undef,
 333             dump_options_category => undef,
 334             dump_options_range    => undef,
 335             dump_abbreviations    => undef,
 336         );
 337
 338         # don't overwrite callers ARGV
 339         local @ARGV = @ARGV;
 340
 341         my %input_hash = @_;
 342
 343         if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) {
 344             local $" = ')(';
 345             my @good_keys = sort keys %defaults;
 346             @bad_keys = sort @bad_keys;
 347             confess <<EOM;
 348 ------------------------------------------------------------------------
 349 Unknown perltidy parameter : (@bad_keys)
 350 perltidy only understands : (@good_keys)
 351 ------------------------------------------------------------------------
 352
 353 EOM
 354         }
 355
 356         my $get_hash_ref = sub {
 357             my ($key) = @_;
 358             my $hash_ref = $input_hash{$key};
 359             if ( defined($hash_ref) ) {
 360                 unless ( ref($hash_ref) eq 'HASH' ) {
 361                     my $what = ref($hash_ref);
 362                     my $but_is =
 363                       $what ? "but is ref to $what" : "but is not a reference";
 364                     croak <<EOM;
 365 ------------------------------------------------------------------------
 366 error in call to perltidy:
 367 -$key must be reference to HASH $but_is
 368 ------------------------------------------------------------------------
 369 EOM
 370                 }
 371             }
 372             return $hash_ref;
 373         };
 374
 375         %input_hash = ( %defaults, %input_hash );
 376         my $argv               = $input_hash{'argv'};
 377         my $destination_stream = $input_hash{'destination'};
 378         my $errorfile_stream   = $input_hash{'errorfile'};
 379         my $logfile_stream     = $input_hash{'logfile'};
 380         my $perltidyrc_stream  = $input_hash{'perltidyrc'};
 381         my $source_stream      = $input_hash{'source'};
 382         my $stderr_stream      = $input_hash{'stderr'};
 383         my $user_formatter     = $input_hash{'formatter'};
 384
 385         # various dump parameters
 386         my $dump_options_type     = $input_hash{'dump_options_type'};
 387         my $dump_options          = $get_hash_ref->('dump_options');
 388         my $dump_getopt_flags     = $get_hash_ref->('dump_getopt_flags');
 389         my $dump_options_category = $get_hash_ref->('dump_options_category');
 390         my $dump_abbreviations    = $get_hash_ref->('dump_abbreviations');
 391         my $dump_options_range    = $get_hash_ref->('dump_options_range');
 392
 393         # validate dump_options_type
 394         if ( defined($dump_options) ) {
 395             unless ( defined($dump_options_type) ) {
 396                 $dump_options_type = 'perltidyrc';
 397             }
 398             unless ( $dump_options_type =~ /^(perltidyrc|full)$/ ) {
 399                 croak <<EOM;
 400 ------------------------------------------------------------------------
 401 Please check value of -dump_options_type in call to perltidy;
 402 saw: '$dump_options_type'
 403 expecting: 'perltidyrc' or 'full'
 404 ------------------------------------------------------------------------
 405 EOM
 406
 407             }
 408         }
 409         else {
 410             $dump_options_type = "";
 411         }
 412
 413         if ($user_formatter) {
 414
 415             # if the user defines a formatter, there is no output stream,
 416             # but we need a null stream to keep coding simple
 417             $destination_stream = Perl::Tidy::DevNull->new();
 418         }
 419
 420         # see if ARGV is overridden
 421         if ( defined($argv) ) {
 422
 423             my $rargv = ref $argv;
 424             if ( $rargv eq 'SCALAR' ) { $argv = $$argv; $rargv = undef }
 425
 426             # ref to ARRAY
 427             if ($rargv) {
 428                 if ( $rargv eq 'ARRAY' ) {
 429                     @ARGV = @$argv;
 430                 }
 431                 else {
 432                     croak <<EOM;
 433 ------------------------------------------------------------------------
 434 Please check value of -argv in call to perltidy;
 435 it must be a string or ref to ARRAY but is: $rargv
 436 ------------------------------------------------------------------------
 437 EOM
 438                 }
 439             }
 440
 441             # string
 442             else {
 443                 my ( $rargv, $msg ) = parse_args($argv);
 444                 if ($msg) {
 445                     die <<EOM;
 446 Error parsing this string passed to to perltidy with 'argv':
 447 $msg
 448 EOM
 449                 }
 450                 @ARGV = @{$rargv};
 451             }
 452         }
 453
 454         # redirect STDERR if requested
 455         if ($stderr_stream) {
 456             my ( $fh_stderr, $stderr_file ) =
 457               Perl::Tidy::streamhandle( $stderr_stream, 'w' );
 458             if ($fh_stderr) { *STDERR = $fh_stderr }
 459             else {
 460                 croak <<EOM;
 461 ------------------------------------------------------------------------
 462 Unable to redirect STDERR to $stderr_stream
 463 Please check value of -stderr in call to perltidy
 464 ------------------------------------------------------------------------
 465 EOM
 466             }
 467         }
 468
 469         my $rpending_complaint;
 470         $$rpending_complaint = "";
 471         my $rpending_logfile_message;
 472         $$rpending_logfile_message = "";
 473
 474         my ( $is_Windows, $Windows_type ) =
 475           look_for_Windows($rpending_complaint);
 476
 477         # VMS file names are restricted to a 40.40 format, so we append _tdy
 478         # instead of .tdy, etc. (but see also sub check_vms_filename)
 479         my $dot;
 480         my $dot_pattern;
 481         if ( $^O eq 'VMS' ) {
 482             $dot         = '_';
 483             $dot_pattern = '_';
 484         }
 485         else {
 486             $dot         = '.';
 487             $dot_pattern = '\.';    # must escape for use in regex
 488         }
 489
 490         # handle command line options
 491         my ( $rOpts, $config_file, $rraw_options, $saw_extrude, $roption_string,
 492             $rexpansion, $roption_category, $roption_range )
 493           = process_command_line(
 494             $perltidyrc_stream,  $is_Windows, $Windows_type,
 495             $rpending_complaint, $dump_options_type,
 496           );
 497
 498         # return or exit immediately after all dumps
 499         my $quit_now = 0;
 500
 501         # Getopt parameters and their flags
 502         if ( defined($dump_getopt_flags) ) {
 503             $quit_now = 1;
 504             foreach my $op ( @{$roption_string} ) {
 505                 my $opt  = $op;
 506                 my $flag = "";
 507                 if ( $opt =~ /(.*)(!|=.*|:.*)$/ ) {
 508                     $opt  = $1;
 509                     $flag = $2;
 510                 }
 511                 $dump_getopt_flags->{$opt} = $flag;
 512             }
 513         }
 514
 515         if ( defined($dump_options_category) ) {
 516             $quit_now = 1;
 517             %{$dump_options_category} = %{$roption_category};
 518         }
 519
 520         if ( defined($dump_options_range) ) {
 521             $quit_now = 1;
 522             %{$dump_options_range} = %{$roption_range};
 523         }
 524
 525         if ( defined($dump_abbreviations) ) {
 526             $quit_now = 1;
 527             %{$dump_abbreviations} = %{$rexpansion};
 528         }
 529
 530         if ( defined($dump_options) ) {
 531             $quit_now = 1;
 532             %{$dump_options} = %{$rOpts};
 533         }
 534
 535         return if ($quit_now);
 536
 537         # dump from command line
 538         if ( $rOpts->{'dump-options'} ) {
 539             dump_options( $rOpts, $roption_string );
 540             exit 1;
 541         }
 542
 543         check_options( $rOpts, $is_Windows, $Windows_type,
 544             $rpending_complaint );
 545
 546         if ($user_formatter) {
 547             $rOpts->{'format'} = 'user';
 548         }
 549
 550         # there must be one entry here for every possible format
 551         my %default_file_extension = (
 552             tidy => 'tdy',
 553             html => 'html',
 554             user => '',
 555         );
 556
 557         # be sure we have a valid output format
 558         unless ( exists $default_file_extension{ $rOpts->{'format'} } ) {
 559             my $formats = join ' ',
 560               sort map { "'" . $_ . "'" } keys %default_file_extension;
 561             my $fmt = $rOpts->{'format'};
 562             die "-format='$fmt' but must be one of: $formats\n";
 563         }
 564
 565         my $output_extension =
 566           make_extension( $rOpts->{'output-file-extension'},
 567             $default_file_extension{ $rOpts->{'format'} }, $dot );
 568
 569         my $backup_extension =
 570           make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot );
 571
 572         my $html_toc_extension =
 573           make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot );
 574
 575         my $html_src_extension =
 576           make_extension( $rOpts->{'html-src-extension'}, 'src', $dot );
 577
 578         # check for -b option;
 579         my $in_place_modify = $rOpts->{'backup-and-modify-in-place'}
 580           && $rOpts->{'format'} eq 'tidy' # silently ignore unless beautify mode
 581           && @ARGV > 0;    # silently ignore if standard input;
 582                            # this allows -b to be in a .perltidyrc file
 583                            # without error messages when running from an editor
 584
 585         # turn off -b with warnings in case of conflicts with other options
 586         if ($in_place_modify) {
 587             if ( $rOpts->{'standard-output'} ) {
 588                 warn "Ignoring -b; you may not use -b and -st together\n";
 589                 $in_place_modify = 0;
 590             }
 591             if ($destination_stream) {
 592                 warn
 593 "Ignoring -b; you may not specify a destination array and -b together\n";
 594                 $in_place_modify = 0;
 595             }
 596             if ($source_stream) {
 597                 warn
 598 "Ignoring -b; you may not specify a source array and -b together\n";
 599                 $in_place_modify = 0;
 600             }
 601             if ( $rOpts->{'outfile'} ) {
 602                 warn "Ignoring -b; you may not use -b and -o together\n";
 603                 $in_place_modify = 0;
 604             }
 605             if ( defined( $rOpts->{'output-path'} ) ) {
 606                 warn "Ignoring -b; you may not use -b and -opath together\n";
 607                 $in_place_modify = 0;
 608             }
 609         }
 610
 611         Perl::Tidy::Formatter::check_options($rOpts);
 612         if ( $rOpts->{'format'} eq 'html' ) {
 613             Perl::Tidy::HtmlWriter->check_options($rOpts);
 614         }
 615
 616         # make the pattern of file extensions that we shouldn't touch
 617         my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)";
 618         if ($output_extension) {
 619             $_ = quotemeta($output_extension);
 620             $forbidden_file_extensions .= "|$_";
 621         }
 622         if ( $in_place_modify && $backup_extension ) {
 623             $_ = quotemeta($backup_extension);
 624             $forbidden_file_extensions .= "|$_";
 625         }
 626         $forbidden_file_extensions .= ')$';
 627
 628         # Create a diagnostics object if requested;
 629         # This is only useful for code development
 630         my $diagnostics_object = undef;
 631         if ( $rOpts->{'DIAGNOSTICS'} ) {
 632             $diagnostics_object = Perl::Tidy::Diagnostics->new();
 633         }
 634
 635         # no filenames should be given if input is from an array
 636         if ($source_stream) {
 637             if ( @ARGV > 0 ) {
 638                 die
 639 "You may not specify any filenames when a source array is given\n";
 640             }
 641
 642             # we'll stuff the source array into ARGV
 643             unshift( @ARGV, $source_stream );
 644
 645             # No special treatment for source stream which is a filename.
 646             # This will enable checks for binary files and other bad stuff.
 647             $source_stream = undef unless ref($source_stream);
 648         }
 649
 650         # use stdin by default if no source array and no args
 651         else {
 652             unshift( @ARGV, '-' ) unless @ARGV;
 653         }
 654
 655         # loop to process all files in argument list
 656         my $number_of_files = @ARGV;
 657         my $formatter       = undef;
 658         $tokenizer = undef;
 659         while ( $input_file = shift @ARGV ) {
 660             my $fileroot;
 661             my $input_file_permissions;
 662
 663             #---------------------------------------------------------------
 664             # determine the input file name
 665             #---------------------------------------------------------------
 666             if ($source_stream) {
 667                 $fileroot = "perltidy";
 668             }
 669             elsif ( $input_file eq '-' ) {    # '-' indicates input from STDIN
 670                 $fileroot = "perltidy";   # root name to use for .ERR, .LOG, etc
 671                 $in_place_modify = 0;
 672             }
 673             else {
 674                 $fileroot = $input_file;
 675                 unless ( -e $input_file ) {
 676
 677                     # file doesn't exist - check for a file glob
 678                     if ( $input_file =~ /([\?\*\[\{])/ ) {
 679
 680                         # Windows shell may not remove quotes, so do it
 681                         my $input_file = $input_file;
 682                         if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 }
 683                         if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 }
 684                         my $pattern = fileglob_to_re($input_file);
 685                         eval "/$pattern/";
 686                         if ( !$@ && opendir( DIR, './' ) ) {
 687                             my @files =
 688                               grep { /$pattern/ && !-d $_ } readdir(DIR);
 689                             closedir(DIR);
 690                             if (@files) {
 691                                 unshift @ARGV, @files;
 692                                 next;
 693                             }
 694                         }
 695                     }
 696                     print "skipping file: '$input_file': no matches found\n";
 697                     next;
 698                 }
 699
 700                 unless ( -f $input_file ) {
 701                     print "skipping file: $input_file: not a regular file\n";
 702                     next;
 703                 }
 704
 705                 unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) {
 706                     print
 707 "skipping file: $input_file: Non-text (override with -f)\n";
 708                     next;
 709                 }
 710
 711                 # we should have a valid filename now
 712                 $fileroot               = $input_file;
 713                 $input_file_permissions = ( stat $input_file )[2] & 07777;
 714
 715                 if ( $^O eq 'VMS' ) {
 716                     ( $fileroot, $dot ) = check_vms_filename($fileroot);
 717                 }
 718
 719                 # add option to change path here
 720                 if ( defined( $rOpts->{'output-path'} ) ) {
 721
 722                     my ( $base, $old_path ) = fileparse($fileroot);
 723                     my $new_path = $rOpts->{'output-path'};
 724                     unless ( -d $new_path ) {
 725                         unless ( mkdir $new_path, 0777 ) {
 726                             die "unable to create directory $new_path: $!\n";
 727                         }
 728                     }
 729                     my $path = $new_path;
 730                     $fileroot = catfile( $path, $base );
 731                     unless ($fileroot) {
 732                         die <<EOM;
 733 ------------------------------------------------------------------------
 734 Problem combining $new_path and $base to make a filename; check -opath
 735 ------------------------------------------------------------------------
 736 EOM
 737                     }
 738                 }
 739             }
 740
 741             # Skip files with same extension as the output files because
 742             # this can lead to a messy situation with files like
 743             # script.tdy.tdy.tdy ... or worse problems ...  when you
 744             # rerun perltidy over and over with wildcard input.
 745             if (
 746                 !$source_stream
 747                 && (   $input_file =~ /$forbidden_file_extensions/o
 748                     || $input_file eq 'DIAGNOSTICS' )
 749               )
 750             {
 751                 print "skipping file: $input_file: wrong extension\n";
 752                 next;
 753             }
 754
 755             # the 'source_object' supplies a method to read the input file
 756             my $source_object =
 757               Perl::Tidy::LineSource->new( $input_file, $rOpts,
 758                 $rpending_logfile_message );
 759             next unless ($source_object);
 760
 761             # register this file name with the Diagnostics package
 762             $diagnostics_object->set_input_file($input_file)
 763               if $diagnostics_object;
 764
 765             #---------------------------------------------------------------
 766             # determine the output file name
 767             #---------------------------------------------------------------
 768             my $output_file = undef;
 769             my $actual_output_extension;
 770
 771             if ( $rOpts->{'outfile'} ) {
 772
 773                 if ( $number_of_files <= 1 ) {
 774
 775                     if ( $rOpts->{'standard-output'} ) {
 776                         die "You may not use -o and -st together\n";
 777                     }
 778                     elsif ($destination_stream) {
 779                         die
 780 "You may not specify a destination array and -o together\n";
 781                     }
 782                     elsif ( defined( $rOpts->{'output-path'} ) ) {
 783                         die "You may not specify -o and -opath together\n";
 784                     }
 785                     elsif ( defined( $rOpts->{'output-file-extension'} ) ) {
 786                         die "You may not specify -o and -oext together\n";
 787                     }
 788                     $output_file = $rOpts->{outfile};
 789
 790                     # make sure user gives a file name after -o
 791                     if ( $output_file =~ /^-/ ) {
 792                         die "You must specify a valid filename after -o\n";
 793                     }
 794
 795                     # do not overwrite input file with -o
 796                     if ( defined($input_file_permissions)
 797                         && ( $output_file eq $input_file ) )
 798                     {
 799                         die
 800                           "Use 'perltidy -b $input_file' to modify in-place\n";
 801                     }
 802                 }
 803                 else {
 804                     die "You may not use -o with more than one input file\n";
 805                 }
 806             }
 807             elsif ( $rOpts->{'standard-output'} ) {
 808                 if ($destination_stream) {
 809                     die
 810 "You may not specify a destination array and -st together\n";
 811                 }
 812                 $output_file = '-';
 813
 814                 if ( $number_of_files <= 1 ) {
 815                 }
 816                 else {
 817                     die "You may not use -st with more than one input file\n";
 818                 }
 819             }
 820             elsif ($destination_stream) {
 821                 $output_file = $destination_stream;
 822             }
 823             elsif ($source_stream) {  # source but no destination goes to stdout
 824                 $output_file = '-';
 825             }
 826             elsif ( $input_file eq '-' ) {
 827                 $output_file = '-';
 828             }
 829             else {
 830                 if ($in_place_modify) {
 831                     $output_file = IO::File->new_tmpfile()
 832                       or die "cannot open temp file for -b option: $!\n";
 833                 }
 834                 else {
 835                     $actual_output_extension = $output_extension;
 836                     $output_file             = $fileroot . $output_extension;
 837                 }
 838             }
 839
 840             # the 'sink_object' knows how to write the output file
 841             my $tee_file = $fileroot . $dot . "TEE";
 842
 843             my $line_separator = $rOpts->{'output-line-ending'};
 844             if ( $rOpts->{'preserve-line-endings'} ) {
 845                 $line_separator = find_input_line_ending($input_file);
 846             }
 847             $line_separator = "\n" unless defined($line_separator);
 848
 849             my $sink_object =
 850               Perl::Tidy::LineSink->new( $output_file, $tee_file,
 851                 $line_separator, $rOpts, $rpending_logfile_message );
 852
 853             #---------------------------------------------------------------
 854             # initialize the error logger
 855             #---------------------------------------------------------------
 856             my $warning_file = $fileroot . $dot . "ERR";
 857             if ($errorfile_stream) { $warning_file = $errorfile_stream }
 858             my $log_file = $fileroot . $dot . "LOG";
 859             if ($logfile_stream) { $log_file = $logfile_stream }
 860
 861             my $logger_object =
 862               Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file,
 863                 $saw_extrude );
 864             write_logfile_header(
 865                 $rOpts,        $logger_object, $config_file,
 866                 $rraw_options, $Windows_type
 867             );
 868             if ($$rpending_logfile_message) {
 869                 $logger_object->write_logfile_entry($$rpending_logfile_message);
 870             }
 871             if ($$rpending_complaint) {
 872                 $logger_object->complain($$rpending_complaint);
 873             }
 874
 875             #---------------------------------------------------------------
 876             # initialize the debug object, if any
 877             #---------------------------------------------------------------
 878             my $debugger_object = undef;
 879             if ( $rOpts->{DEBUG} ) {
 880                 $debugger_object =
 881                   Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" );
 882             }
 883
 884             #---------------------------------------------------------------
 885             # create a formatter for this file : html writer or pretty printer
 886             #---------------------------------------------------------------
 887
 888             # we have to delete any old formatter because, for safety,
 889             # the formatter will check to see that there is only one.
 890             $formatter = undef;
 891
 892             if ($user_formatter) {
 893                 $formatter = $user_formatter;
 894             }
 895             elsif ( $rOpts->{'format'} eq 'html' ) {
 896                 $formatter =
 897                   Perl::Tidy::HtmlWriter->new( $fileroot, $output_file,
 898                     $actual_output_extension, $html_toc_extension,
 899                     $html_src_extension );
 900             }
 901             elsif ( $rOpts->{'format'} eq 'tidy' ) {
 902                 $formatter = Perl::Tidy::Formatter->new(
 903                     logger_object      => $logger_object,
 904                     diagnostics_object => $diagnostics_object,
 905                     sink_object        => $sink_object,
 906                 );
 907             }
 908             else {
 909                 die "I don't know how to do -format=$rOpts->{'format'}\n";
 910             }
 911
 912             unless ($formatter) {
 913                 die "Unable to continue with $rOpts->{'format'} formatting\n";
 914             }
 915
 916             #---------------------------------------------------------------
 917             # create the tokenizer for this file
 918             #---------------------------------------------------------------
 919             $tokenizer = undef;                     # must destroy old tokenizer
 920             $tokenizer = Perl::Tidy::Tokenizer->new(
 921                 source_object       => $source_object,
 922                 logger_object       => $logger_object,
 923                 debugger_object     => $debugger_object,
 924                 diagnostics_object  => $diagnostics_object,
 925                 starting_level      => $rOpts->{'starting-indentation-level'},
 926                 tabs                => $rOpts->{'tabs'},
 927                 indent_columns      => $rOpts->{'indent-columns'},
 928                 look_for_hash_bang  => $rOpts->{'look-for-hash-bang'},
 929                 look_for_autoloader => $rOpts->{'look-for-autoloader'},
 930                 look_for_selfloader => $rOpts->{'look-for-selfloader'},
 931                 trim_qw             => $rOpts->{'trim-qw'},
 932             );
 933
 934             #---------------------------------------------------------------
 935             # now we can do it
 936             #---------------------------------------------------------------
 937             process_this_file( $tokenizer, $formatter );
 938
 939             #---------------------------------------------------------------
 940             # close the input source and report errors
 941             #---------------------------------------------------------------
 942             $source_object->close_input_file();
 943
 944             # get file names to use for syntax check
 945             my $ifname = $source_object->get_input_file_copy_name();
 946             my $ofname = $sink_object->get_output_file_copy();
 947
 948             #---------------------------------------------------------------
 949             # handle the -b option (backup and modify in-place)
 950             #---------------------------------------------------------------
 951             if ($in_place_modify) {
 952                 unless ( -f $input_file ) {
 953
 954                     # oh, oh, no real file to backup ..
 955                     # shouldn't happen because of numerous preliminary checks
 956                     die print
 957 "problem with -b backing up input file '$input_file': not a file\n";
 958                 }
 959                 my $backup_name = $input_file . $backup_extension;
 960                 if ( -f $backup_name ) {
 961                     unlink($backup_name)
 962                       or die
 963 "unable to remove previous '$backup_name' for -b option; check permissions: $!\n";
 964                 }
 965                 rename( $input_file, $backup_name )
 966                   or die
 967 "problem renaming $input_file to $backup_name for -b option: $!\n";
 968                 $ifname = $backup_name;
 969
 970                 seek( $output_file, 0, 0 )
 971                   or die "unable to rewind tmp file for -b option: $!\n";
 972
 973                 my $fout = IO::File->new("> $input_file")
 974                   or die
 975 "problem opening $input_file for write for -b option; check directory permissions: $!\n";
 976                 my $line;
 977                 while ( $line = $output_file->getline() ) {
 978                     $fout->print($line);
 979                 }
 980                 $fout->close();
 981                 $output_file = $input_file;
 982                 $ofname      = $input_file;
 983             }
 984
 985             #---------------------------------------------------------------
 986             # clean up and report errors
 987             #---------------------------------------------------------------
 988             $sink_object->close_output_file()    if $sink_object;
 989             $debugger_object->close_debug_file() if $debugger_object;
 990
 991             my $infile_syntax_ok = 0;    # -1 no  0=don't know   1 yes
 992             if ($output_file) {
 993
 994                 if ($input_file_permissions) {
 995
 996                     # give output script same permissions as input script, but
 997                     # make it user-writable or else we can't run perltidy again.
 998                     # Thus we retain whatever executable flags were set.
 999                     if ( $rOpts->{'format'} eq 'tidy' ) {
1000                         chmod( $input_file_permissions | 0600, $output_file );
1001                     }
1002
1003                     # else use default permissions for html and any other format
1004
1005                 }
1006                 if ( $logger_object && $rOpts->{'check-syntax'} ) {
1007                     $infile_syntax_ok =
1008                       check_syntax( $ifname, $ofname, $logger_object, $rOpts );
1009                 }
1010             }
1011
1012             $logger_object->finish( $infile_syntax_ok, $formatter )
1013               if $logger_object;
1014         }    # end of loop to process all files
1015     }    # end of main program
1016 }
1017
1018 sub fileglob_to_re {
1019
1020     # modified (corrected) from version in find2perl
1021     my $x = shift;
1022     $x =~ s#([./^\$()])#\\$1#g;    # escape special characters
1023     $x =~ s#\*#.*#g;               # '*' -> '.*'
1024     $x =~ s#\?#.#g;                # '?' -> '.'
1025     "^$x\\z";                      # match whole word
1026 }
1027
1028 sub make_extension {
1029
1030     # Make a file extension, including any leading '.' if necessary
1031     # The '.' may actually be an '_' under VMS
1032     my ( $extension, $default, $dot ) = @_;
1033
1034     # Use the default if none specified
1035     $extension = $default unless ($extension);
1036
1037     # Only extensions with these leading characters get a '.'
1038     # This rule gives the user some freedom
1039     if ( $extension =~ /^[a-zA-Z0-9]/ ) {
1040         $extension = $dot . $extension;
1041     }
1042     return $extension;
1043 }
1044
1045 sub write_logfile_header {
1046     my ( $rOpts, $logger_object, $config_file, $rraw_options, $Windows_type ) =
1047       @_;
1048     $logger_object->write_logfile_entry(
1049 "perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n"
1050     );
1051     if ($Windows_type) {
1052         $logger_object->write_logfile_entry("Windows type is $Windows_type\n");
1053     }
1054     my $options_string = join( ' ', @$rraw_options );
1055
1056     if ($config_file) {
1057         $logger_object->write_logfile_entry(
1058             "Found Configuration File >>> $config_file \n");
1059     }
1060     $logger_object->write_logfile_entry(
1061         "Configuration and command line parameters for this run:\n");
1062     $logger_object->write_logfile_entry("$options_string\n");
1063
1064     if ( $rOpts->{'DEBUG'} || $rOpts->{'show-options'} ) {
1065         $rOpts->{'logfile'} = 1;    # force logfile to be saved
1066         $logger_object->write_logfile_entry(
1067             "Final parameter set for this run\n");
1068         $logger_object->write_logfile_entry(
1069             "------------------------------------\n");
1070
1071         foreach ( keys %{$rOpts} ) {
1072             $logger_object->write_logfile_entry( '--' . "$_=$rOpts->{$_}\n" );
1073         }
1074         $logger_object->write_logfile_entry(
1075             "------------------------------------\n");
1076     }
1077     $logger_object->write_logfile_entry(
1078         "To find error messages search for 'WARNING' with your editor\n");
1079 }
1080
1081 sub generate_options {
1082
1083     ######################################################################
1084     # Generate and return references to:
1085     #  @option_string - the list of options to be passed to Getopt::Long
1086     #  @defaults - the list of default options
1087     #  %expansion - a hash showing how all abbreviations are expanded
1088     #  %category - a hash giving the general category of each option
1089     #  %option_range - a hash giving the valid ranges of certain options
1090
1091     # Note: a few options are not documented in the man page and usage
1092     # message. This is because these are experimental or debug options and
1093     # may or may not be retained in future versions.
1094     #
1095     # Here are the undocumented flags as far as I know.  Any of them
1096     # may disappear at any time.  They are mainly for fine-tuning
1097     # and debugging.
1098     #
1099     # fll --> fuzzy-line-length           # a trivial parameter which gets
1100     #                                       turned off for the extrude option
1101     #                                       which is mainly for debugging
1102     # chk --> check-multiline-quotes      # check for old bug; to be deleted
1103     # scl --> short-concatenation-item-length   # helps break at '.'
1104     # recombine                           # for debugging line breaks
1105     # valign                              # for debugging vertical alignment
1106     # I   --> DIAGNOSTICS                 # for debugging
1107     ######################################################################
1108
1109     # here is a summary of the Getopt codes:
1110     # <none> does not take an argument
1111     # =s takes a mandatory string
1112     # :s takes an optional string  (DO NOT USE - filenames will get eaten up)
1113     # =i takes a mandatory integer
1114     # :i takes an optional integer (NOT RECOMMENDED - can cause trouble)
1115     # ! does not take an argument and may be negated
1116     #  i.e., -foo and -nofoo are allowed
1117     # a double dash signals the end of the options list
1118     #
1119     #---------------------------------------------------------------
1120     # Define the option string passed to GetOptions.
1121     #---------------------------------------------------------------
1122
1123     my @option_string   = ();
1124     my %expansion       = ();
1125     my %option_category = ();
1126     my %option_range    = ();
1127     my $rexpansion      = \%expansion;
1128
1129     # names of categories in manual
1130     # leading integers will allow sorting
1131     my @category_name = (
1132         '0. I/O control',
1133         '1. Basic formatting options',
1134         '2. Code indentation control',
1135         '3. Whitespace control',
1136         '4. Comment controls',
1137         '5. Linebreak controls',
1138         '6. Controlling list formatting',
1139         '7. Retaining or ignoring existing line breaks',
1140         '8. Blank line control',
1141         '9. Other controls',
1142         '10. HTML options',
1143         '11. pod2html options',
1144         '12. Controlling HTML properties',
1145         '13. Debugging',
1146     );
1147
1148     #  These options are parsed directly by perltidy:
1149     #    help h
1150     #    version v
1151     #  However, they are included in the option set so that they will
1152     #  be seen in the options dump.
1153
1154     # These long option names have no abbreviations or are treated specially
1155     @option_string = qw(
1156       html!
1157       noprofile
1158       no-profile
1159       npro
1160       recombine!
1161       valign!
1162     );
1163
1164     my $category = 13;    # Debugging
1165     foreach (@option_string) {
1166         my $opt = $_;     # must avoid changing the actual flag
1167         $opt =~ s/!$//;
1168         $option_category{$opt} = $category_name[$category];
1169     }
1170
1171     $category = 11;                                       # HTML
1172     $option_category{html} = $category_name[$category];
1173
1174     # routine to install and check options
1175     my $add_option = sub {
1176         my ( $long_name, $short_name, $flag ) = @_;
1177         push @option_string, $long_name . $flag;
1178         $option_category{$long_name} = $category_name[$category];
1179         if ($short_name) {
1180             if ( $expansion{$short_name} ) {
1181                 my $existing_name = $expansion{$short_name}[0];
1182                 die
1183 "redefining abbreviation $short_name for $long_name; already used for $existing_name\n";
1184             }
1185             $expansion{$short_name} = [$long_name];
1186             if ( $flag eq '!' ) {
1187                 my $nshort_name = 'n' . $short_name;
1188                 my $nolong_name = 'no' . $long_name;
1189                 if ( $expansion{$nshort_name} ) {
1190                     my $existing_name = $expansion{$nshort_name}[0];
1191                     die
1192 "attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n";
1193                 }
1194                 $expansion{$nshort_name} = [$nolong_name];
1195             }
1196         }
1197     };
1198
1199     # Install long option names which have a simple abbreviation.
1200     # Options with code '!' get standard negation ('no' for long names,
1201     # 'n' for abbreviations).  Categories follow the manual.
1202
1203     ###########################
1204     $category = 0;    # I/O_Control
1205     ###########################
1206     $add_option->( 'backup-and-modify-in-place', 'b',     '!' );
1207     $add_option->( 'backup-file-extension',      'bext',  '=s' );
1208     $add_option->( 'force-read-binary',          'f',     '!' );
1209     $add_option->( 'format',                     'fmt',   '=s' );
1210     $add_option->( 'logfile',                    'log',   '!' );
1211     $add_option->( 'logfile-gap',                'g',     ':i' );
1212     $add_option->( 'outfile',                    'o',     '=s' );
1213     $add_option->( 'output-file-extension',      'oext',  '=s' );
1214     $add_option->( 'output-path',                'opath', '=s' );
1215     $add_option->( 'profile',                    'pro',   '=s' );
1216     $add_option->( 'quiet',                      'q',     '!' );
1217     $add_option->( 'standard-error-output',      'se',    '!' );
1218     $add_option->( 'standard-output',            'st',    '!' );
1219     $add_option->( 'warning-output',             'w',     '!' );
1220
1221     ########################################
1222     $category = 1;    # Basic formatting options
1223     ########################################
1224     $add_option->( 'check-syntax',             'syn',  '!' );
1225     $add_option->( 'entab-leading-whitespace', 'et',   '=i' );
1226     $add_option->( 'indent-columns',           'i',    '=i' );
1227     $add_option->( 'maximum-line-length',      'l',    '=i' );
1228     $add_option->( 'output-line-ending',       'ole',  '=s' );
1229     $add_option->( 'perl-syntax-check-flags',  'pscf', '=s' );
1230     $add_option->( 'preserve-line-endings',    'ple',  '!' );
1231     $add_option->( 'tabs',                     't',    '!' );
1232
1233     ########################################
1234     $category = 2;    # Code indentation control
1235     ########################################
1236     $add_option->( 'continuation-indentation',           'ci',   '=i' );
1237     $add_option->( 'starting-indentation-level',         'sil',  '=i' );
1238     $add_option->( 'line-up-parentheses',                'lp',   '!' );
1239     $add_option->( 'outdent-keyword-list',               'okwl', '=s' );
1240     $add_option->( 'outdent-keywords',                   'okw',  '!' );
1241     $add_option->( 'outdent-labels',                     'ola',  '!' );
1242     $add_option->( 'outdent-long-quotes',                'olq',  '!' );
1243     $add_option->( 'indent-closing-brace',               'icb',  '!' );
1244     $add_option->( 'closing-token-indentation',          'cti',  '=i' );
1245     $add_option->( 'closing-paren-indentation',          'cpi',  '=i' );
1246     $add_option->( 'closing-brace-indentation',          'cbi',  '=i' );
1247     $add_option->( 'closing-square-bracket-indentation', 'csbi', '=i' );
1248     $add_option->( 'brace-left-and-indent',              'bli',  '!' );
1249     $add_option->( 'brace-left-and-indent-list',         'blil', '=s' );
1250
1251     ########################################
1252     $category = 3;    # Whitespace control
1253     ########################################
1254     $add_option->( 'add-semicolons',                            'asc',   '!' );
1255     $add_option->( 'add-whitespace',                            'aws',   '!' );
1256     $add_option->( 'block-brace-tightness',                     'bbt',   '=i' );
1257     $add_option->( 'brace-tightness',                           'bt',    '=i' );
1258     $add_option->( 'delete-old-whitespace',                     'dws',   '!' );
1259     $add_option->( 'delete-semicolons',                         'dsm',   '!' );
1260     $add_option->( 'nospace-after-keyword',                     'nsak',  '=s' );
1261     $add_option->( 'nowant-left-space',                         'nwls',  '=s' );
1262     $add_option->( 'nowant-right-space',                        'nwrs',  '=s' );
1263     $add_option->( 'paren-tightness',                           'pt',    '=i' );
1264     $add_option->( 'space-after-keyword',                       'sak',   '=s' );
1265     $add_option->( 'space-for-semicolon',                       'sfs',   '!' );
1266     $add_option->( 'space-function-paren',                      'sfp',   '!' );
1267     $add_option->( 'space-keyword-paren',                       'skp',   '!' );
1268     $add_option->( 'space-terminal-semicolon',                  'sts',   '!' );
1269     $add_option->( 'square-bracket-tightness',                  'sbt',   '=i' );
1270     $add_option->( 'square-bracket-vertical-tightness',         'sbvt',  '=i' );
1271     $add_option->( 'square-bracket-vertical-tightness-closing', 'sbvtc', '=i' );
1272     $add_option->( 'trim-qw',                                   'tqw',   '!' );
1273     $add_option->( 'want-left-space',                           'wls',   '=s' );
1274     $add_option->( 'want-right-space',                          'wrs',   '=s' );
1275
1276     ########################################
1277     $category = 4;    # Comment controls
1278     ########################################
1279     $add_option->( 'closing-side-comment-else-flag',    'csce', '=i' );
1280     $add_option->( 'closing-side-comment-interval',     'csci', '=i' );
1281     $add_option->( 'closing-side-comment-list',         'cscl', '=s' );
1282     $add_option->( 'closing-side-comment-maximum-text', 'csct', '=i' );
1283     $add_option->( 'closing-side-comment-prefix',       'cscp', '=s' );
1284     $add_option->( 'closing-side-comment-warnings',     'cscw', '!' );
1285     $add_option->( 'closing-side-comments',             'csc',  '!' );
1286     $add_option->( 'format-skipping',                   'fs',   '!' );
1287     $add_option->( 'format-skipping-begin',             'fsb',  '=s' );
1288     $add_option->( 'format-skipping-end',               'fse',  '=s' );
1289     $add_option->( 'hanging-side-comments',             'hsc',  '!' );
1290     $add_option->( 'indent-block-comments',             'ibc',  '!' );
1291     $add_option->( 'indent-spaced-block-comments',      'isbc', '!' );
1292     $add_option->( 'minimum-space-to-comment',          'msc',  '=i' );
1293     $add_option->( 'outdent-long-comments',             'olc',  '!' );
1294     $add_option->( 'outdent-static-block-comments',     'osbc', '!' );
1295     $add_option->( 'static-block-comment-prefix',       'sbcp', '=s' );
1296     $add_option->( 'static-block-comments',             'sbc',  '!' );
1297     $add_option->( 'static-side-comment-prefix',        'sscp', '=s' );
1298     $add_option->( 'static-side-comments',              'ssc',  '!' );
1299
1300     ########################################
1301     $category = 5;    # Linebreak controls
1302     ########################################
1303     $add_option->( 'add-newlines',                        'anl',   '!' );
1304     $add_option->( 'block-brace-vertical-tightness',      'bbvt',  '=i' );
1305     $add_option->( 'block-brace-vertical-tightness-list', 'bbvtl', '=s' );
1306     $add_option->( 'brace-vertical-tightness',            'bvt',   '=i' );
1307     $add_option->( 'brace-vertical-tightness-closing',    'bvtc',  '=i' );
1308     $add_option->( 'cuddled-else',                        'ce',    '!' );
1309     $add_option->( 'delete-old-newlines',                 'dnl',   '!' );
1310     $add_option->( 'opening-brace-always-on-right',       'bar',   '' );
1311     $add_option->( 'opening-brace-on-new-line',           'bl',    '!' );
1312     $add_option->( 'opening-hash-brace-right',            'ohbr',  '!' );
1313     $add_option->( 'opening-paren-right',                 'opr',   '!' );
1314     $add_option->( 'opening-square-bracket-right',        'osbr',  '!' );
1315     $add_option->( 'opening-sub-brace-on-new-line',       'sbl',   '!' );
1316     $add_option->( 'paren-vertical-tightness',            'pvt',   '=i' );
1317     $add_option->( 'paren-vertical-tightness-closing',    'pvtc',  '=i' );
1318     $add_option->( 'stack-closing-hash-brace',            'schb',  '!' );
1319     $add_option->( 'stack-closing-paren',                 'scp',   '!' );
1320     $add_option->( 'stack-closing-square-bracket',        'scsb',  '!' );
1321     $add_option->( 'stack-opening-hash-brace',            'sohb',  '!' );
1322     $add_option->( 'stack-opening-paren',                 'sop',   '!' );
1323     $add_option->( 'stack-opening-square-bracket',        'sosb',  '!' );
1324     $add_option->( 'vertical-tightness',                  'vt',    '=i' );
1325     $add_option->( 'vertical-tightness-closing',          'vtc',   '=i' );
1326     $add_option->( 'want-break-after',                    'wba',   '=s' );
1327     $add_option->( 'want-break-before',                   'wbb',   '=s' );
1328
1329     ########################################
1330     $category = 6;    # Controlling list formatting
1331     ########################################
1332     $add_option->( 'break-at-old-comma-breakpoints', 'boc', '!' );
1333     $add_option->( 'comma-arrow-breakpoints',        'cab', '=i' );
1334     $add_option->( 'maximum-fields-per-table',       'mft', '=i' );
1335
1336     ########################################
1337     $category = 7;    # Retaining or ignoring existing line breaks
1338     ########################################
1339     $add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' );
1340     $add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' );
1341     $add_option->( 'break-at-old-ternary-breakpoints', 'bot', '!' );
1342     $add_option->( 'ignore-old-breakpoints',           'iob', '!' );
1343
1344     ########################################
1345     $category = 8;    # Blank line control
1346     ########################################
1347     $add_option->( 'blanks-before-blocks',            'bbb', '!' );
1348     $add_option->( 'blanks-before-comments',          'bbc', '!' );
1349     $add_option->( 'blanks-before-subs',              'bbs', '!' );
1350     $add_option->( 'long-block-line-count',           'lbl', '=i' );
1351     $add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' );
1352     $add_option->( 'swallow-optional-blank-lines',    'sob', '!' );
1353
1354     ########################################
1355     $category = 9;    # Other controls
1356     ########################################
1357     $add_option->( 'delete-block-comments',        'dbc',  '!' );
1358     $add_option->( 'delete-closing-side-comments', 'dcsc', '!' );
1359     $add_option->( 'delete-pod',                   'dp',   '!' );
1360     $add_option->( 'delete-side-comments',         'dsc',  '!' );
1361     $add_option->( 'tee-block-comments',           'tbc',  '!' );
1362     $add_option->( 'tee-pod',                      'tp',   '!' );
1363     $add_option->( 'tee-side-comments',            'tsc',  '!' );
1364     $add_option->( 'look-for-autoloader',          'lal',  '!' );
1365     $add_option->( 'look-for-hash-bang',           'x',    '!' );
1366     $add_option->( 'look-for-selfloader',          'lsl',  '!' );
1367     $add_option->( 'pass-version-line',            'pvl',  '!' );
1368
1369     ########################################
1370     $category = 13;    # Debugging
1371     ########################################
1372     $add_option->( 'DEBUG',                           'D',    '!' );
1373     $add_option->( 'DIAGNOSTICS',                     'I',    '!' );
1374     $add_option->( 'check-multiline-quotes',          'chk',  '!' );
1375     $add_option->( 'dump-defaults',                   'ddf',  '!' );
1376     $add_option->( 'dump-long-names',                 'dln',  '!' );
1377     $add_option->( 'dump-options',                    'dop',  '!' );
1378     $add_option->( 'dump-profile',                    'dpro', '!' );
1379     $add_option->( 'dump-short-names',                'dsn',  '!' );
1380     $add_option->( 'dump-token-types',                'dtt',  '!' );
1381     $add_option->( 'dump-want-left-space',            'dwls', '!' );
1382     $add_option->( 'dump-want-right-space',           'dwrs', '!' );
1383     $add_option->( 'fuzzy-line-length',               'fll',  '!' );
1384     $add_option->( 'help',                            'h',    '' );
1385     $add_option->( 'short-concatenation-item-length', 'scl',  '=i' );
1386     $add_option->( 'show-options',                    'opt',  '!' );
1387     $add_option->( 'version',                         'v',    '' );
1388
1389     #---------------------------------------------------------------------
1390
1391     # The Perl::Tidy::HtmlWriter will add its own options to the string
1392     Perl::Tidy::HtmlWriter->make_getopt_long_names( \@option_string );
1393
1394     ########################################
1395     # Set categories 10, 11, 12
1396     ########################################
1397     # Based on their known order
1398     $category = 12;    # HTML properties
1399     foreach my $opt (@option_string) {
1400         my $long_name = $opt;
1401         $long_name =~ s/(!|=.*|:.*)$//;
1402         unless ( defined( $option_category{$long_name} ) ) {
1403             if ( $long_name =~ /^html-linked/ ) {
1404                 $category = 10;    # HTML options
1405             }
1406             elsif ( $long_name =~ /^pod2html/ ) {
1407                 $category = 11;    # Pod2html
1408             }
1409             $option_category{$long_name} = $category_name[$category];
1410         }
1411     }
1412
1413     #---------------------------------------------------------------
1414     # Assign valid ranges to certain options
1415     #---------------------------------------------------------------
1416     # In the future, these may be used to make preliminary checks
1417     # hash keys are long names
1418     # If key or value is undefined:
1419     #   strings may have any value
1420     #   integer ranges are >=0
1421     # If value is defined:
1422     #   value is [qw(any valid words)] for strings
1423     #   value is [min, max] for integers
1424     #   if min is undefined, there is no lower limit
1425     #   if max is undefined, there is no upper limit
1426     # Parameters not listed here have defaults
1427     $option_range{'format'}             = [qw(tidy html user)];
1428     $option_range{'output-line-ending'} = [qw(dos win mac unix)];
1429
1430     $option_range{'block-brace-tightness'}    = [ 0, 2 ];
1431     $option_range{'brace-tightness'}          = [ 0, 2 ];
1432     $option_range{'paren-tightness'}          = [ 0, 2 ];
1433     $option_range{'square-bracket-tightness'} = [ 0, 2 ];
1434
1435     $option_range{'block-brace-vertical-tightness'}            = [ 0, 2 ];
1436     $option_range{'brace-vertical-tightness'}                  = [ 0, 2 ];
1437     $option_range{'brace-vertical-tightness-closing'}          = [ 0, 2 ];
1438     $option_range{'paren-vertical-tightness'}                  = [ 0, 2 ];
1439     $option_range{'paren-vertical-tightness-closing'}          = [ 0, 2 ];
1440     $option_range{'square-bracket-vertical-tightness'}         = [ 0, 2 ];
1441     $option_range{'square-bracket-vertical-tightness-closing'} = [ 0, 2 ];
1442     $option_range{'vertical-tightness'}                        = [ 0, 2 ];
1443     $option_range{'vertical-tightness-closing'}                = [ 0, 2 ];
1444
1445     $option_range{'closing-brace-indentation'}          = [ 0, 3 ];
1446     $option_range{'closing-paren-indentation'}          = [ 0, 3 ];
1447     $option_range{'closing-square-bracket-indentation'} = [ 0, 3 ];
1448     $option_range{'closing-token-indentation'}          = [ 0, 3 ];
1449
1450     $option_range{'closing-side-comment-else-flag'} = [ 0, 2 ];
1451     $option_range{'comma-arrow-breakpoints'}        = [ 0, 3 ];
1452
1453 # Note: we could actually allow negative ci if someone really wants it:
1454 # $option_range{'continuation-indentation'}                  = [ undef, undef ];
1455
1456     #---------------------------------------------------------------
1457     # Assign default values to the above options here, except
1458     # for 'outfile' and 'help'.
1459     # These settings should approximate the perlstyle(1) suggestions.
1460     #---------------------------------------------------------------
1461     my @defaults = qw(
1462       add-newlines
1463       add-semicolons
1464       add-whitespace
1465       blanks-before-blocks
1466       blanks-before-comments
1467       blanks-before-subs
1468       block-brace-tightness=0
1469       block-brace-vertical-tightness=0
1470       brace-tightness=1
1471       brace-vertical-tightness-closing=0
1472       brace-vertical-tightness=0
1473       break-at-old-logical-breakpoints
1474       break-at-old-ternary-breakpoints
1475       break-at-old-keyword-breakpoints
1476       comma-arrow-breakpoints=1
1477       nocheck-syntax
1478       closing-side-comment-interval=6
1479       closing-side-comment-maximum-text=20
1480       closing-side-comment-else-flag=0
1481       closing-paren-indentation=0
1482       closing-brace-indentation=0
1483       closing-square-bracket-indentation=0
1484       continuation-indentation=2
1485       delete-old-newlines
1486       delete-semicolons
1487       fuzzy-line-length
1488       hanging-side-comments
1489       indent-block-comments
1490       indent-columns=4
1491       long-block-line-count=8
1492       look-for-autoloader
1493       look-for-selfloader
1494       maximum-consecutive-blank-lines=1
1495       maximum-fields-per-table=0
1496       maximum-line-length=80
1497       minimum-space-to-comment=4
1498       nobrace-left-and-indent
1499       nocuddled-else
1500       nodelete-old-whitespace
1501       nohtml
1502       nologfile
1503       noquiet
1504       noshow-options
1505       nostatic-side-comments
1506       noswallow-optional-blank-lines
1507       notabs
1508       nowarning-output
1509       outdent-labels
1510       outdent-long-quotes
1511       outdent-long-comments
1512       paren-tightness=1
1513       paren-vertical-tightness-closing=0
1514       paren-vertical-tightness=0
1515       pass-version-line
1516       recombine
1517       valign
1518       short-concatenation-item-length=8
1519       space-for-semicolon
1520       square-bracket-tightness=1
1521       square-bracket-vertical-tightness-closing=0
1522       square-bracket-vertical-tightness=0
1523       static-block-comments
1524       trim-qw
1525       format=tidy
1526       backup-file-extension=bak
1527       format-skipping
1528
1529       pod2html
1530       html-table-of-contents
1531       html-entities
1532     );
1533
1534     push @defaults, "perl-syntax-check-flags=-c -T";
1535
1536     #---------------------------------------------------------------
1537     # Define abbreviations which will be expanded into the above primitives.
1538     # These may be defined recursively.
1539     #---------------------------------------------------------------
1540     %expansion = (
1541         %expansion,
1542         'freeze-newlines'    => [qw(noadd-newlines nodelete-old-newlines)],
1543         'fnl'                => [qw(freeze-newlines)],
1544         'freeze-whitespace'  => [qw(noadd-whitespace nodelete-old-whitespace)],
1545         'fws'                => [qw(freeze-whitespace)],
1546         'indent-only'        => [qw(freeze-newlines freeze-whitespace)],
1547         'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)],
1548         'nooutdent-long-lines' =>
1549           [qw(nooutdent-long-quotes nooutdent-long-comments)],
1550         'noll' => [qw(nooutdent-long-lines)],
1551         'io'   => [qw(indent-only)],
1552         'delete-all-comments' =>
1553           [qw(delete-block-comments delete-side-comments delete-pod)],
1554         'nodelete-all-comments' =>
1555           [qw(nodelete-block-comments nodelete-side-comments nodelete-pod)],
1556         'dac'  => [qw(delete-all-comments)],
1557         'ndac' => [qw(nodelete-all-comments)],
1558         'gnu'  => [qw(gnu-style)],
1559         'pbp'  => [qw(perl-best-practices)],
1560         'tee-all-comments' =>
1561           [qw(tee-block-comments tee-side-comments tee-pod)],
1562         'notee-all-comments' =>
1563           [qw(notee-block-comments notee-side-comments notee-pod)],
1564         'tac'   => [qw(tee-all-comments)],
1565         'ntac'  => [qw(notee-all-comments)],
1566         'html'  => [qw(format=html)],
1567         'nhtml' => [qw(format=tidy)],
1568         'tidy'  => [qw(format=tidy)],
1569
1570         'break-after-comma-arrows'   => [qw(cab=0)],
1571         'nobreak-after-comma-arrows' => [qw(cab=1)],
1572         'baa'                        => [qw(cab=0)],
1573         'nbaa'                       => [qw(cab=1)],
1574
1575         'break-at-old-trinary-breakpoints' => [qw(bot)],
1576
1577         'cti=0' => [qw(cpi=0 cbi=0 csbi=0)],
1578         'cti=1' => [qw(cpi=1 cbi=1 csbi=1)],
1579         'cti=2' => [qw(cpi=2 cbi=2 csbi=2)],
1580         'icp'   => [qw(cpi=2 cbi=2 csbi=2)],
1581         'nicp'  => [qw(cpi=0 cbi=0 csbi=0)],
1582
1583         'closing-token-indentation=0' => [qw(cpi=0 cbi=0 csbi=0)],
1584         'closing-token-indentation=1' => [qw(cpi=1 cbi=1 csbi=1)],
1585         'closing-token-indentation=2' => [qw(cpi=2 cbi=2 csbi=2)],
1586         'indent-closing-paren'        => [qw(cpi=2 cbi=2 csbi=2)],
1587         'noindent-closing-paren'      => [qw(cpi=0 cbi=0 csbi=0)],
1588
1589         'vt=0' => [qw(pvt=0 bvt=0 sbvt=0)],
1590         'vt=1' => [qw(pvt=1 bvt=1 sbvt=1)],
1591         'vt=2' => [qw(pvt=2 bvt=2 sbvt=2)],
1592
1593         'vertical-tightness=0' => [qw(pvt=0 bvt=0 sbvt=0)],
1594         'vertical-tightness=1' => [qw(pvt=1 bvt=1 sbvt=1)],
1595         'vertical-tightness=2' => [qw(pvt=2 bvt=2 sbvt=2)],
1596
1597         'vtc=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
1598         'vtc=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
1599         'vtc=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
1600
1601         'vertical-tightness-closing=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
1602         'vertical-tightness-closing=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
1603         'vertical-tightness-closing=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
1604
1605         'otr'                   => [qw(opr ohbr osbr)],
1606         'opening-token-right'   => [qw(opr ohbr osbr)],
1607         'notr'                  => [qw(nopr nohbr nosbr)],
1608         'noopening-token-right' => [qw(nopr nohbr nosbr)],
1609
1610         'sot'                    => [qw(sop sohb sosb)],
1611         'nsot'                   => [qw(nsop nsohb nsosb)],
1612         'stack-opening-tokens'   => [qw(sop sohb sosb)],
1613         'nostack-opening-tokens' => [qw(nsop nsohb nsosb)],
1614
1615         'sct'                    => [qw(scp schb scsb)],
1616         'stack-closing-tokens'   => => [qw(scp schb scsb)],
1617         'nsct'                   => [qw(nscp nschb nscsb)],
1618         'nostack-opening-tokens' => [qw(nscp nschb nscsb)],
1619
1620         # 'mangle' originally deleted pod and comments, but to keep it
1621         # reversible, it no longer does.  But if you really want to
1622         # delete them, just use:
1623         #   -mangle -dac
1624
1625         # An interesting use for 'mangle' is to do this:
1626         #    perltidy -mangle myfile.pl -st | perltidy -o myfile.pl.new
1627         # which will form as many one-line blocks as possible
1628
1629         'mangle' => [
1630             qw(
1631               check-syntax
1632               delete-old-newlines
1633               delete-old-whitespace
1634               delete-semicolons
1635               indent-columns=0
1636               maximum-consecutive-blank-lines=0
1637               maximum-line-length=100000
1638               noadd-newlines
1639               noadd-semicolons
1640               noadd-whitespace
1641               noblanks-before-blocks
1642               noblanks-before-subs
1643               notabs
1644               )
1645         ],
1646
1647         # 'extrude' originally deleted pod and comments, but to keep it
1648         # reversible, it no longer does.  But if you really want to
1649         # delete them, just use
1650         #   extrude -dac
1651         #
1652         # An interesting use for 'extrude' is to do this:
1653         #    perltidy -extrude myfile.pl -st | perltidy -o myfile.pl.new
1654         # which will break up all one-line blocks.
1655
1656         'extrude' => [
1657             qw(
1658               check-syntax
1659               ci=0
1660               delete-old-newlines
1661               delete-old-whitespace
1662               delete-semicolons
1663               indent-columns=0
1664               maximum-consecutive-blank-lines=0
1665               maximum-line-length=1
1666               noadd-semicolons
1667               noadd-whitespace
1668               noblanks-before-blocks
1669               noblanks-before-subs
1670               nofuzzy-line-length
1671               notabs
1672               )
1673         ],
1674
1675         # this style tries to follow the GNU Coding Standards (which do
1676         # not really apply to perl but which are followed by some perl
1677         # programmers).
1678         'gnu-style' => [
1679             qw(
1680               lp bl noll pt=2 bt=2 sbt=2 cpi=1 csbi=1 cbi=1
1681               )
1682         ],
1683
1684         # Style suggested in Damian Conway's Perl Best Practices
1685         'perl-best-practices' => [
1686             qw(l=78 i=4 ci=4 st se vt=2 cti=0 pt=1 bt=1 sbt=1 bbt=1 nsfs nolq),
1687 q(wbb=% + - * / x != == >= <= =~ !~ < > | & >= < = **= += *= &= <<= &&= -= /= |= >>= ||= .= %= ^= x=)
1688         ],
1689
1690         # Additional styles can be added here
1691     );
1692
1693     Perl::Tidy::HtmlWriter->make_abbreviated_names( \%expansion );
1694
1695     # Uncomment next line to dump all expansions for debugging:
1696     # dump_short_names(\%expansion);
1697     return (
1698         \@option_string,   \@defaults, \%expansion,
1699         \%option_category, \%option_range
1700     );
1701
1702 }    # end of generate_options
1703
1704 sub process_command_line {
1705
1706     my (
1707         $perltidyrc_stream,  $is_Windows, $Windows_type,
1708         $rpending_complaint, $dump_options_type
1709     ) = @_;
1710
1711     use Getopt::Long;
1712
1713     my (
1714         $roption_string,   $rdefaults, $rexpansion,
1715         $roption_category, $roption_range
1716     ) = generate_options();
1717
1718     #---------------------------------------------------------------
1719     # set the defaults by passing the above list through GetOptions
1720     #---------------------------------------------------------------
1721     my %Opts = ();
1722     {
1723         local @ARGV;
1724         my $i;
1725
1726         # do not load the defaults if we are just dumping perltidyrc
1727         unless ( $dump_options_type eq 'perltidyrc' ) {
1728             for $i (@$rdefaults) { push @ARGV, "--" . $i }
1729         }
1730
1731         # Patch to save users Getopt::Long configuration
1732         # and set to Getopt::Long defaults.  Use eval to avoid
1733         # breaking old versions of Perl without these routines.
1734         my $glc;
1735         eval { $glc = Getopt::Long::Configure() };
1736         unless ($@) {
1737             eval { Getopt::Long::ConfigDefaults() };
1738         }
1739         else { $glc = undef }
1740
1741         if ( !GetOptions( \%Opts, @$roption_string ) ) {
1742             die "Programming Bug: error in setting default options";
1743         }
1744
1745         # Patch to put the previous Getopt::Long configuration back
1746         eval { Getopt::Long::Configure($glc) } if defined $glc;
1747     }
1748
1749     my $word;
1750     my @raw_options        = ();
1751     my $config_file        = "";
1752     my $saw_ignore_profile = 0;
1753     my $saw_extrude        = 0;
1754     my $saw_dump_profile   = 0;
1755     my $i;
1756
1757     #---------------------------------------------------------------
1758     # Take a first look at the command-line parameters.  Do as many
1759     # immediate dumps as possible, which can avoid confusion if the
1760     # perltidyrc file has an error.
1761     #---------------------------------------------------------------
1762     foreach $i (@ARGV) {
1763
1764         $i =~ s/^--/-/;
1765         if ( $i =~ /^-(npro|noprofile|no-profile)$/ ) {
1766             $saw_ignore_profile = 1;
1767         }
1768
1769         # note: this must come before -pro and -profile, below:
1770         elsif ( $i =~ /^-(dump-profile|dpro)$/ ) {
1771             $saw_dump_profile = 1;
1772         }
1773         elsif ( $i =~ /^-(pro|profile)=(.+)/ ) {
1774             if ($config_file) {
1775                 warn
1776 "Only one -pro=filename allowed, using '$2' instead of '$config_file'\n";
1777             }
1778             $config_file = $2;
1779             unless ( -e $config_file ) {
1780                 warn "cannot find file given with -pro=$config_file: $!\n";
1781                 $config_file = "";
1782             }
1783         }
1784         elsif ( $i =~ /^-(pro|profile)=?$/ ) {
1785             die "usage: -pro=filename or --profile=filename, no spaces\n";
1786         }
1787         elsif ( $i =~ /^-extrude$/ ) {
1788             $saw_extrude = 1;
1789         }
1790         elsif ( $i =~ /^-(help|h|HELP|H)$/ ) {
1791             usage();
1792             exit 1;
1793         }
1794         elsif ( $i =~ /^-(version|v)$/ ) {
1795             show_version();
1796             exit 1;
1797         }
1798         elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) {
1799             dump_defaults(@$rdefaults);
1800             exit 1;
1801         }
1802         elsif ( $i =~ /^-(dump-long-names|dln)$/ ) {
1803             dump_long_names(@$roption_string);
1804             exit 1;
1805         }
1806         elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) {
1807             dump_short_names($rexpansion);
1808             exit 1;
1809         }
1810         elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) {
1811             Perl::Tidy::Tokenizer->dump_token_types(*STDOUT);
1812             exit 1;
1813         }
1814     }
1815
1816     if ( $saw_dump_profile && $saw_ignore_profile ) {
1817         warn "No profile to dump because of -npro\n";
1818         exit 1;
1819     }
1820
1821     #---------------------------------------------------------------
1822     # read any .perltidyrc configuration file
1823     #---------------------------------------------------------------
1824     unless ($saw_ignore_profile) {
1825
1826         # resolve possible conflict between $perltidyrc_stream passed
1827         # as call parameter to perltidy and -pro=filename on command
1828         # line.
1829         if ($perltidyrc_stream) {
1830             if ($config_file) {
1831                 warn <<EOM;
1832  Conflict: a perltidyrc configuration file was specified both as this
1833  perltidy call parameter: $perltidyrc_stream
1834  and with this -profile=$config_file.
1835  Using -profile=$config_file.
1836 EOM
1837             }
1838             else {
1839                 $config_file = $perltidyrc_stream;
1840             }
1841         }
1842
1843         # look for a config file if we don't have one yet
1844         my $rconfig_file_chatter;
1845         $$rconfig_file_chatter = "";
1846         $config_file =
1847           find_config_file( $is_Windows, $Windows_type, $rconfig_file_chatter,
1848             $rpending_complaint )
1849           unless $config_file;
1850
1851         # open any config file
1852         my $fh_config;
1853         if ($config_file) {
1854             ( $fh_config, $config_file ) =
1855               Perl::Tidy::streamhandle( $config_file, 'r' );
1856             unless ($fh_config) {
1857                 $$rconfig_file_chatter .=
1858                   "# $config_file exists but cannot be opened\n";
1859             }
1860         }
1861
1862         if ($saw_dump_profile) {
1863             if ($saw_dump_profile) {
1864                 dump_config_file( $fh_config, $config_file,
1865                     $rconfig_file_chatter );
1866                 exit 1;
1867             }
1868         }
1869
1870         if ($fh_config) {
1871
1872             my ( $rconfig_list, $death_message ) =
1873               read_config_file( $fh_config, $config_file, $rexpansion );
1874             die $death_message if ($death_message);
1875
1876             # process any .perltidyrc parameters right now so we can
1877             # localize errors
1878             if (@$rconfig_list) {
1879                 local @ARGV = @$rconfig_list;
1880
1881                 expand_command_abbreviations( $rexpansion, \@raw_options,
1882                     $config_file );
1883
1884                 if ( !GetOptions( \%Opts, @$roption_string ) ) {
1885                     die
1886 "Error in this config file: $config_file  \nUse -npro to ignore this file, -h for help'\n";
1887                 }
1888
1889                 # Anything left in this local @ARGV is an error and must be
1890                 # invalid bare words from the configuration file.  We cannot
1891                 # check this earlier because bare words may have been valid
1892                 # values for parameters.  We had to wait for GetOptions to have
1893                 # a look at @ARGV.
1894                 if (@ARGV) {
1895                     my $count = @ARGV;
1896                     my $str   = "\'" . pop(@ARGV) . "\'";
1897                     while ( my $param = pop(@ARGV) ) {
1898                         if ( length($str) < 70 ) {
1899                             $str .= ", '$param'";
1900                         }
1901                         else {
1902                             $str .= ", ...";
1903                             last;
1904                         }
1905                     }
1906                     die <<EOM;
1907 There are $count unrecognized values in the configuration file '$config_file':
1908 $str
1909 Use leading dashes for parameters.  Use -npro to ignore this file.
1910 EOM
1911                 }
1912
1913                 # Undo any options which cause premature exit.  They are not
1914                 # appropriate for a config file, and it could be hard to
1915                 # diagnose the cause of the premature exit.
1916                 foreach (
1917                     qw{
1918                     dump-defaults
1919                     dump-long-names
1920                     dump-options
1921                     dump-profile
1922                     dump-short-names
1923                     dump-token-types
1924                     dump-want-left-space
1925                     dump-want-right-space
1926                     help
1927                     stylesheet
1928                     version
1929                     }
1930                   )
1931                 {
1932
1933                     if ( defined( $Opts{$_} ) ) {
1934                         delete $Opts{$_};
1935                         warn "ignoring --$_ in config file: $config_file\n";
1936                     }
1937                 }
1938             }
1939         }
1940     }
1941
1942     #---------------------------------------------------------------
1943     # now process the command line parameters
1944     #---------------------------------------------------------------
1945     expand_command_abbreviations( $rexpansion, \@raw_options, $config_file );
1946
1947     if ( !GetOptions( \%Opts, @$roption_string ) ) {
1948         die "Error on command line; for help try 'perltidy -h'\n";
1949     }
1950
1951     return ( \%Opts, $config_file, \@raw_options, $saw_extrude, $roption_string,
1952         $rexpansion, $roption_category, $roption_range );
1953 }    # end of process_command_line
1954
1955 sub check_options {
1956
1957     my ( $rOpts, $is_Windows, $Windows_type, $rpending_complaint ) = @_;
1958
1959     #---------------------------------------------------------------
1960     # check and handle any interactions among the basic options..
1961     #---------------------------------------------------------------
1962
1963     # Since -vt, -vtc, and -cti are abbreviations, but under
1964     # msdos, an unquoted input parameter like vtc=1 will be
1965     # seen as 2 parameters, vtc and 1, so the abbreviations
1966     # won't be seen.  Therefore, we will catch them here if
1967     # they get through.
1968
1969     if ( defined $rOpts->{'vertical-tightness'} ) {
1970         my $vt = $rOpts->{'vertical-tightness'};
1971         $rOpts->{'paren-vertical-tightness'}          = $vt;
1972         $rOpts->{'square-bracket-vertical-tightness'} = $vt;
1973         $rOpts->{'brace-vertical-tightness'}          = $vt;
1974     }
1975
1976     if ( defined $rOpts->{'vertical-tightness-closing'} ) {
1977         my $vtc = $rOpts->{'vertical-tightness-closing'};
1978         $rOpts->{'paren-vertical-tightness-closing'}          = $vtc;
1979         $rOpts->{'square-bracket-vertical-tightness-closing'} = $vtc;
1980         $rOpts->{'brace-vertical-tightness-closing'}          = $vtc;
1981     }
1982
1983     if ( defined $rOpts->{'closing-token-indentation'} ) {
1984         my $cti = $rOpts->{'closing-token-indentation'};
1985         $rOpts->{'closing-square-bracket-indentation'} = $cti;
1986         $rOpts->{'closing-brace-indentation'}          = $cti;
1987         $rOpts->{'closing-paren-indentation'}          = $cti;
1988     }
1989
1990     # In quiet mode, there is no log file and hence no way to report
1991     # results of syntax check, so don't do it.
1992     if ( $rOpts->{'quiet'} ) {
1993         $rOpts->{'check-syntax'} = 0;
1994     }
1995
1996     # can't check syntax if no output
1997     if ( $rOpts->{'format'} ne 'tidy' ) {
1998         $rOpts->{'check-syntax'} = 0;
1999     }
2000
2001     # Never let Windows 9x/Me systems run syntax check -- this will prevent a
2002     # wide variety of nasty problems on these systems, because they cannot
2003     # reliably run backticks.  Don't even think about changing this!
2004     if (   $rOpts->{'check-syntax'}
2005         && $is_Windows
2006         && ( !$Windows_type || $Windows_type =~ /^(9|Me)/ ) )
2007     {
2008         $rOpts->{'check-syntax'} = 0;
2009     }
2010
2011     # It's really a bad idea to check syntax as root unless you wrote
2012     # the script yourself.  FIXME: not sure if this works with VMS
2013     unless ($is_Windows) {
2014
2015         if ( $< == 0 && $rOpts->{'check-syntax'} ) {
2016             $rOpts->{'check-syntax'} = 0;
2017             $$rpending_complaint .=
2018 "Syntax check deactivated for safety; you shouldn't run this as root\n";
2019         }
2020     }
2021
2022     # see if user set a non-negative logfile-gap
2023     if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) {
2024
2025         # a zero gap will be taken as a 1
2026         if ( $rOpts->{'logfile-gap'} == 0 ) {
2027             $rOpts->{'logfile-gap'} = 1;
2028         }
2029
2030         # setting a non-negative logfile gap causes logfile to be saved
2031         $rOpts->{'logfile'} = 1;
2032     }
2033
2034     # not setting logfile gap, or setting it negative, causes default of 50
2035     else {
2036         $rOpts->{'logfile-gap'} = 50;
2037     }
2038
2039     # set short-cut flag when only indentation is to be done.
2040     # Note that the user may or may not have already set the
2041     # indent-only flag.
2042     if (   !$rOpts->{'add-whitespace'}
2043         && !$rOpts->{'delete-old-whitespace'}
2044         && !$rOpts->{'add-newlines'}
2045         && !$rOpts->{'delete-old-newlines'} )
2046     {
2047         $rOpts->{'indent-only'} = 1;
2048     }
2049
2050     # -isbc implies -ibc
2051     if ( $rOpts->{'indent-spaced-block-comments'} ) {
2052         $rOpts->{'indent-block-comments'} = 1;
2053     }
2054
2055     # -bli flag implies -bl
2056     if ( $rOpts->{'brace-left-and-indent'} ) {
2057         $rOpts->{'opening-brace-on-new-line'} = 1;
2058     }
2059
2060     if (   $rOpts->{'opening-brace-always-on-right'}
2061         && $rOpts->{'opening-brace-on-new-line'} )
2062     {
2063         warn <<EOM;
2064  Conflict: you specified both 'opening-brace-always-on-right' (-bar) and
2065   'opening-brace-on-new-line' (-bl).  Ignoring -bl.
2066 EOM
2067         $rOpts->{'opening-brace-on-new-line'} = 0;
2068     }
2069
2070     # it simplifies things if -bl is 0 rather than undefined
2071     if ( !defined( $rOpts->{'opening-brace-on-new-line'} ) ) {
2072         $rOpts->{'opening-brace-on-new-line'} = 0;
2073     }
2074
2075     # -sbl defaults to -bl if not defined
2076     if ( !defined( $rOpts->{'opening-sub-brace-on-new-line'} ) ) {
2077         $rOpts->{'opening-sub-brace-on-new-line'} =
2078           $rOpts->{'opening-brace-on-new-line'};
2079     }
2080
2081     # set shortcut flag if no blanks to be written
2082     unless ( $rOpts->{'maximum-consecutive-blank-lines'} ) {
2083         $rOpts->{'swallow-optional-blank-lines'} = 1;
2084     }
2085
2086     if ( $rOpts->{'entab-leading-whitespace'} ) {
2087         if ( $rOpts->{'entab-leading-whitespace'} < 0 ) {
2088             warn "-et=n must use a positive integer; ignoring -et\n";
2089             $rOpts->{'entab-leading-whitespace'} = undef;
2090         }
2091
2092         # entab leading whitespace has priority over the older 'tabs' option
2093         if ( $rOpts->{'tabs'} ) { $rOpts->{'tabs'} = 0; }
2094     }
2095
2096     if ( $rOpts->{'output-line-ending'} ) {
2097         unless ( is_unix() ) {
2098             warn "ignoring -ole; only works under unix\n";
2099             $rOpts->{'output-line-ending'} = undef;
2100         }
2101     }
2102     if ( $rOpts->{'preserve-line-endings'} ) {
2103         unless ( is_unix() ) {
2104             warn "ignoring -ple; only works under unix\n";
2105             $rOpts->{'preserve-line-endings'} = undef;
2106         }
2107     }
2108
2109 }
2110
2111 sub expand_command_abbreviations {
2112
2113     # go through @ARGV and expand any abbreviations
2114
2115     my ( $rexpansion, $rraw_options, $config_file ) = @_;
2116     my ($word);
2117
2118     # set a pass limit to prevent an infinite loop;
2119     # 10 should be plenty, but it may be increased to allow deeply
2120     # nested expansions.
2121     my $max_passes = 10;
2122     my @new_argv   = ();
2123
2124     # keep looping until all expansions have been converted into actual
2125     # dash parameters..
2126     for ( my $pass_count = 0 ; $pass_count <= $max_passes ; $pass_count++ ) {
2127         my @new_argv     = ();
2128         my $abbrev_count = 0;
2129
2130         # loop over each item in @ARGV..
2131         foreach $word (@ARGV) {
2132
2133             # convert any leading 'no-' to just 'no'
2134             if ( $word =~ /^(-[-]?no)-(.*)/ ) { $word = $1 . $2 }
2135
2136             # if it is a dash flag (instead of a file name)..
2137             if ( $word =~ /^-[-]?([\w\-]+)(.*)/ ) {
2138
2139                 my $abr   = $1;
2140                 my $flags = $2;
2141
2142                 # save the raw input for debug output in case of circular refs
2143                 if ( $pass_count == 0 ) {
2144                     push( @$rraw_options, $word );
2145                 }
2146
2147                 # recombine abbreviation and flag, if necessary,
2148                 # to allow abbreviations with arguments such as '-vt=1'
2149                 if ( $rexpansion->{ $abr . $flags } ) {
2150                     $abr   = $abr . $flags;
2151                     $flags = "";
2152                 }
2153
2154                 # if we see this dash item in the expansion hash..
2155                 if ( $rexpansion->{$abr} ) {
2156                     $abbrev_count++;
2157
2158                     # stuff all of the words that it expands to into the
2159                     # new arg list for the next pass
2160                     foreach my $abbrev ( @{ $rexpansion->{$abr} } ) {
2161                         next unless $abbrev;    # for safety; shouldn't happen
2162                         push( @new_argv, '--' . $abbrev . $flags );
2163                     }
2164                 }
2165
2166                 # not in expansion hash, must be actual long name
2167                 else {
2168                     push( @new_argv, $word );
2169                 }
2170             }
2171
2172             # not a dash item, so just save it for the next pass
2173             else {
2174                 push( @new_argv, $word );
2175             }
2176         }    # end of this pass
2177
2178         # update parameter list @ARGV to the new one
2179         @ARGV = @new_argv;
2180         last unless ( $abbrev_count > 0 );
2181
2182         # make sure we are not in an infinite loop
2183         if ( $pass_count == $max_passes ) {
2184             print STDERR
2185 "I'm tired. We seem to be in an infinite loop trying to expand aliases.\n";
2186             print STDERR "Here are the raw options\n";
2187             local $" = ')(';
2188             print STDERR "(@$rraw_options)\n";
2189             my $num = @new_argv;
2190
2191             if ( $num < 50 ) {
2192                 print STDERR "After $max_passes passes here is ARGV\n";
2193                 print STDERR "(@new_argv)\n";
2194             }
2195             else {
2196                 print STDERR "After $max_passes passes ARGV has $num entries\n";
2197             }
2198
2199             if ($config_file) {
2200                 die <<"DIE";
2201 Please check your configuration file $config_file for circular-references.
2202 To deactivate it, use -npro.
2203 DIE
2204             }
2205             else {
2206                 die <<'DIE';
2207 Program bug - circular-references in the %expansion hash, probably due to
2208 a recent program change.
2209 DIE
2210             }
2211         }    # end of check for circular references
2212     }    # end of loop over all passes
2213 }
2214
2215 # Debug routine -- this will dump the expansion hash
2216 sub dump_short_names {
2217     my $rexpansion = shift;
2218     print STDOUT <<EOM;
2219 List of short names.  This list shows how all abbreviations are
2220 translated into other abbreviations and, eventually, into long names.
2221 New abbreviations may be defined in a .perltidyrc file.
2222 For a list of all long names, use perltidy --dump-long-names (-dln).
2223 --------------------------------------------------------------------------
2224 EOM
2225     foreach my $abbrev ( sort keys %$rexpansion ) {
2226         my @list = @{ $$rexpansion{$abbrev} };
2227         print STDOUT "$abbrev --> @list\n";
2228     }
2229 }
2230
2231 sub check_vms_filename {
2232
2233     # given a valid filename (the perltidy input file)
2234     # create a modified filename and separator character
2235     # suitable for VMS.
2236     #
2237     # Contributed by Michael Cartmell
2238     #
2239     my ( $base, $path ) = fileparse( $_[0] );
2240
2241     # remove explicit ; version
2242     $base =~ s/;-?\d*$//
2243
2244       # remove explicit . version ie two dots in filename NB ^ escapes a dot
2245       or $base =~ s/(          # begin capture $1
2246                   (?:^|[^^])\. # match a dot not preceded by a caret
2247                   (?:          # followed by nothing
2248                     |          # or
2249                     .*[^^]     # anything ending in a non caret
2250                   )
2251                 )              # end capture $1
2252                 \.-?\d*$       # match . version number
2253               /$1/x;
2254
2255     # normalise filename, if there are no unescaped dots then append one
2256     $base .= '.' unless $base =~ /(?:^|[^^])\./;
2257
2258     # if we don't already have an extension then we just append the extention
2259     my $separator = ( $base =~ /\.$/ ) ? "" : "_";
2260     return ( $path . $base, $separator );
2261 }
2262
2263 sub Win_OS_Type {
2264
2265     # TODO: are these more standard names?
2266     # Win32s Win95 Win98 WinMe WinNT3.51 WinNT4 Win2000 WinXP/.Net Win2003
2267
2268     # Returns a string that determines what MS OS we are on.
2269     # Returns win32s,95,98,Me,NT3.51,NT4,2000,XP/.Net,Win2003
2270     # Returns blank string if not an MS system.
2271     # Original code contributed by: Yves Orton
2272     # We need to know this to decide where to look for config files
2273
2274     my $rpending_complaint = shift;
2275     my $os                 = "";
2276     return $os unless $^O =~ /win32|dos/i;    # is it a MS box?
2277
2278     # Systems built from Perl source may not have Win32.pm
2279     # But probably have Win32::GetOSVersion() anyway so the
2280     # following line is not 'required':
2281     # return $os unless eval('require Win32');
2282
2283     # Use the standard API call to determine the version
2284     my ( $undef, $major, $minor, $build, $id );
2285     eval { ( $undef, $major, $minor, $build, $id ) = Win32::GetOSVersion() };
2286
2287     #
2288     #    NAME                   ID   MAJOR  MINOR
2289     #    Windows NT 4           2      4       0
2290     #    Windows 2000           2      5       0
2291     #    Windows XP             2      5       1
2292     #    Windows Server 2003    2      5       2
2293
2294     return "win32s" unless $id;    # If id==0 then its a win32s box.
2295     $os = {                        # Magic numbers from MSDN
2296                                    # documentation of GetOSVersion
2297         1 => {
2298             0  => "95",
2299             10 => "98",
2300             90 => "Me"
2301         },
2302         2 => {
2303             0  => "2000",          # or NT 4, see below
2304             1  => "XP/.Net",
2305             2  => "Win2003",
2306             51 => "NT3.51"
2307         }
2308     }->{$id}->{$minor};
2309
2310     # If $os is undefined, the above code is out of date.  Suggested updates
2311     # are welcome.
2312     unless ( defined $os ) {
2313         $os = "";
2314         $$rpending_complaint .= <<EOS;
2315 Error trying to discover Win_OS_Type: $id:$major:$minor Has no name of record!
2316 We won't be able to look for a system-wide config file.
2317 EOS
2318     }
2319
2320     # Unfortunately the logic used for the various versions isnt so clever..
2321     # so we have to handle an outside case.
2322     return ( $os eq "2000" && $major != 5 ) ? "NT4" : $os;
2323 }
2324
2325 sub is_unix {
2326     return ( $^O !~ /win32|dos/i )
2327       && ( $^O ne 'VMS' )
2328       && ( $^O ne 'OS2' )
2329       && ( $^O ne 'MacOS' );
2330 }
2331
2332 sub look_for_Windows {
2333
2334     # determine Windows sub-type and location of
2335     # system-wide configuration files
2336     my $rpending_complaint = shift;
2337     my $is_Windows         = ( $^O =~ /win32|dos/i );
2338     my $Windows_type       = Win_OS_Type($rpending_complaint) if $is_Windows;
2339     return ( $is_Windows, $Windows_type );
2340 }
2341
2342 sub find_config_file {
2343
2344     # look for a .perltidyrc configuration file
2345     my ( $is_Windows, $Windows_type, $rconfig_file_chatter,
2346         $rpending_complaint ) = @_;
2347
2348     $$rconfig_file_chatter .= "# Config file search...system reported as:";
2349     if ($is_Windows) {
2350         $$rconfig_file_chatter .= "Windows $Windows_type\n";
2351     }
2352     else {
2353         $$rconfig_file_chatter .= " $^O\n";
2354     }
2355
2356     # sub to check file existance and record all tests
2357     my $exists_config_file = sub {
2358         my $config_file = shift;
2359         return 0 unless $config_file;
2360         $$rconfig_file_chatter .= "# Testing: $config_file\n";
2361         return -f $config_file;
2362     };
2363
2364     my $config_file;
2365
2366     # look in current directory first
2367     $config_file = ".perltidyrc";
2368     return $config_file if $exists_config_file->($config_file);
2369
2370     # Default environment vars.
2371     my @envs = qw(PERLTIDY HOME);
2372
2373     # Check the NT/2k/XP locations, first a local machine def, then a
2374     # network def
2375     push @envs, qw(USERPROFILE HOMESHARE) if $^O =~ /win32/i;
2376
2377     # Now go through the enviornment ...
2378     foreach my $var (@envs) {
2379         $$rconfig_file_chatter .= "# Examining: \$ENV{$var}";
2380         if ( defined( $ENV{$var} ) ) {
2381             $$rconfig_file_chatter .= " = $ENV{$var}\n";
2382
2383             # test ENV{ PERLTIDY } as file:
2384             if ( $var eq 'PERLTIDY' ) {
2385                 $config_file = "$ENV{$var}";
2386                 return $config_file if $exists_config_file->($config_file);
2387             }
2388
2389             # test ENV as directory:
2390             $config_file = catfile( $ENV{$var}, ".perltidyrc" );
2391             return $config_file if $exists_config_file->($config_file);
2392         }
2393         else {
2394             $$rconfig_file_chatter .= "\n";
2395         }
2396     }
2397
2398     # then look for a system-wide definition
2399     # where to look varies with OS
2400     if ($is_Windows) {
2401
2402         if ($Windows_type) {
2403             my ( $os, $system, $allusers ) =
2404               Win_Config_Locs( $rpending_complaint, $Windows_type );
2405
2406             # Check All Users directory, if there is one.
2407             if ($allusers) {
2408                 $config_file = catfile( $allusers, ".perltidyrc" );
2409                 return $config_file if $exists_config_file->($config_file);
2410             }
2411
2412             # Check system directory.
2413             $config_file = catfile( $system, ".perltidyrc" );
2414             return $config_file if $exists_config_file->($config_file);
2415         }
2416     }
2417
2418     # Place to add customization code for other systems
2419     elsif ( $^O eq 'OS2' ) {
2420     }
2421     elsif ( $^O eq 'MacOS' ) {
2422     }
2423     elsif ( $^O eq 'VMS' ) {
2424     }
2425
2426     # Assume some kind of Unix
2427     else {
2428
2429         $config_file = "/usr/local/etc/perltidyrc";
2430         return $config_file if $exists_config_file->($config_file);
2431
2432         $config_file = "/etc/perltidyrc";
2433         return $config_file if $exists_config_file->($config_file);
2434     }
2435
2436     # Couldn't find a config file
2437     return;
2438 }
2439
2440 sub Win_Config_Locs {
2441
2442     # In scalar context returns the OS name (95 98 ME NT3.51 NT4 2000 XP),
2443     # or undef if its not a win32 OS.  In list context returns OS, System
2444     # Directory, and All Users Directory.  All Users will be empty on a
2445     # 9x/Me box.  Contributed by: Yves Orton.
2446
2447     my $rpending_complaint = shift;
2448     my $os = (@_) ? shift: Win_OS_Type();
2449     return unless $os;
2450
2451     my $system   = "";
2452     my $allusers = "";
2453
2454     if ( $os =~ /9[58]|Me/ ) {
2455         $system = "C:/Windows";
2456     }
2457     elsif ( $os =~ /NT|XP|200?/ ) {
2458         $system = ( $os =~ /XP/ ) ? "C:/Windows/" : "C:/WinNT/";
2459         $allusers =
2460           ( $os =~ /NT/ )
2461           ? "C:/WinNT/profiles/All Users/"
2462           : "C:/Documents and Settings/All Users/";
2463     }
2464     else {
2465
2466         # This currently would only happen on a win32s computer.  I dont have
2467         # one to test, so I am unsure how to proceed.  Suggestions welcome!
2468         $$rpending_complaint .=
2469 "I dont know a sensible place to look for config files on an $os system.\n";
2470         return;
2471     }
2472     return wantarray ? ( $os, $system, $allusers ) : $os;
2473 }
2474
2475 sub dump_config_file {
2476     my $fh                   = shift;
2477     my $config_file          = shift;
2478     my $rconfig_file_chatter = shift;
2479     print STDOUT "$$rconfig_file_chatter";
2480     if ($fh) {
2481         print STDOUT "# Dump of file: '$config_file'\n";
2482         while ( $_ = $fh->getline() ) { print STDOUT }
2483         eval { $fh->close() };
2484     }
2485     else {
2486         print STDOUT "# ...no config file found\n";
2487     }
2488 }
2489
2490 sub read_config_file {
2491
2492     my ( $fh, $config_file, $rexpansion ) = @_;
2493     my @config_list = ();
2494
2495     # file is bad if non-empty $death_message is returned
2496     my $death_message = "";
2497
2498     my $name = undef;
2499     my $line_no;
2500     while ( $_ = $fh->getline() ) {
2501         $line_no++;
2502         chomp;
2503         next if /^\s*#/;    # skip full-line comment
2504         ( $_, $death_message ) = strip_comment( $_, $config_file, $line_no );
2505         last if ($death_message);
2506         s/^\s*(.*?)\s*$/$1/;    # trim both ends
2507         next unless $_;
2508
2509         # look for something of the general form
2510         #    newname { body }
2511         # or just
2512         #    body
2513
2514         if ( $_ =~ /^((\w+)\s*\{)?([^}]*)(\})?$/ ) {
2515             my ( $newname, $body, $curly ) = ( $2, $3, $4 );
2516
2517             # handle a new alias definition
2518             if ($newname) {
2519                 if ($name) {
2520                     $death_message =
2521 "No '}' seen after $name and before $newname in config file $config_file line $.\n";
2522                     last;
2523                 }
2524                 $name = $newname;
2525
2526                 if ( ${$rexpansion}{$name} ) {
2527                     local $" = ')(';
2528                     my @names = sort keys %$rexpansion;
2529                     $death_message =
2530                         "Here is a list of all installed aliases\n(@names)\n"
2531                       . "Attempting to redefine alias ($name) in config file $config_file line $.\n";
2532                     last;
2533                 }
2534                 ${$rexpansion}{$name} = [];
2535             }
2536
2537             # now do the body
2538             if ($body) {
2539
2540                 my ( $rbody_parts, $msg ) = parse_args($body);
2541                 if ($msg) {
2542                     $death_message = <<EOM;
2543 Error reading file '$config_file' at line number $line_no.
2544 $msg
2545 Please fix this line or use -npro to avoid reading this file
2546 EOM
2547                     last;
2548                 }
2549
2550                 if ($name) {
2551
2552                     # remove leading dashes if this is an alias
2553                     foreach (@$rbody_parts) { s/^\-+//; }
2554                     push @{ ${$rexpansion}{$name} }, @$rbody_parts;
2555                 }
2556                 else {
2557                     push( @config_list, @$rbody_parts );
2558                 }
2559             }
2560
2561             if ($curly) {
2562                 unless ($name) {
2563                     $death_message =
2564 "Unexpected '}' seen in config file $config_file line $.\n";
2565                     last;
2566                 }
2567                 $name = undef;
2568             }
2569         }
2570     }
2571     eval { $fh->close() };
2572     return ( \@config_list, $death_message );
2573 }
2574
2575 sub strip_comment {
2576
2577     my ( $instr, $config_file, $line_no ) = @_;
2578     my $msg = "";
2579
2580     # nothing to do if no comments
2581     if ( $instr !~ /#/ ) {
2582         return ( $instr, $msg );
2583     }
2584
2585     # use simple method of no quotes
2586     elsif ( $instr !~ /['"]/ ) {
2587         $instr =~ s/\s*\#.*$//;    # simple trim
2588         return ( $instr, $msg );
2589     }
2590
2591     # handle comments and quotes
2592     my $outstr     = "";
2593     my $quote_char = "";
2594     while (1) {
2595
2596         # looking for ending quote character
2597         if ($quote_char) {
2598             if ( $instr =~ /\G($quote_char)/gc ) {
2599                 $quote_char = "";
2600                 $outstr .= $1;
2601             }
2602             elsif ( $instr =~ /\G(.)/gc ) {
2603                 $outstr .= $1;
2604             }
2605
2606             # error..we reached the end without seeing the ending quote char
2607             else {
2608                 $msg = <<EOM;
2609 Error reading file $config_file at line number $line_no.
2610 Did not see ending quote character <$quote_char> in this text:
2611 $instr
2612 Please fix this line or use -npro to avoid reading this file
2613 EOM
2614                 last;
2615             }
2616         }
2617
2618         # accumulating characters and looking for start of a quoted string
2619         else {
2620             if ( $instr =~ /\G([\"\'])/gc ) {
2621                 $outstr .= $1;
2622                 $quote_char = $1;
2623             }
2624             elsif ( $instr =~ /\G#/gc ) {
2625                 last;
2626             }
2627             elsif ( $instr =~ /\G(.)/gc ) {
2628                 $outstr .= $1;
2629             }
2630             else {
2631                 last;
2632             }
2633         }
2634     }
2635     return ( $outstr, $msg );
2636 }
2637
2638 sub parse_args {
2639
2640     # Parse a command string containing multiple string with possible
2641     # quotes, into individual commands.  It might look like this, for example:
2642     #
2643     #    -wba=" + - "  -some-thing -wbb='. && ||'
2644     #
2645     # There is no need, at present, to handle escaped quote characters.
2646     # (They are not perltidy tokens, so needn't be in strings).
2647
2648     my ($body)     = @_;
2649     my @body_parts = ();
2650     my $quote_char = "";
2651     my $part       = "";
2652     my $msg        = "";
2653     while (1) {
2654
2655         # looking for ending quote character
2656         if ($quote_char) {
2657             if ( $body =~ /\G($quote_char)/gc ) {
2658                 $quote_char = "";
2659             }
2660             elsif ( $body =~ /\G(.)/gc ) {
2661                 $part .= $1;
2662             }
2663
2664             # error..we reached the end without seeing the ending quote char
2665             else {
2666                 if ( length($part) ) { push @body_parts, $part; }
2667                 $msg = <<EOM;
2668 Did not see ending quote character <$quote_char> in this text:
2669 $body
2670 EOM
2671                 last;
2672             }
2673         }
2674
2675         # accumulating characters and looking for start of a quoted string
2676         else {
2677             if ( $body =~ /\G([\"\'])/gc ) {
2678                 $quote_char = $1;
2679             }
2680             elsif ( $body =~ /\G(\s+)/gc ) {
2681                 if ( length($part) ) { push @body_parts, $part; }
2682                 $part = "";
2683             }
2684             elsif ( $body =~ /\G(.)/gc ) {
2685                 $part .= $1;
2686             }
2687             else {
2688                 if ( length($part) ) { push @body_parts, $part; }
2689                 last;
2690             }
2691         }
2692     }
2693     return ( \@body_parts, $msg );
2694 }
2695
2696 sub dump_long_names {
2697
2698     my @names = sort @_;
2699     print STDOUT <<EOM;
2700 # Command line long names (passed to GetOptions)
2701 #---------------------------------------------------------------
2702 # here is a summary of the Getopt codes:
2703 # <none> does not take an argument
2704 # =s takes a mandatory string
2705 # :s takes an optional string
2706 # =i takes a mandatory integer
2707 # :i takes an optional integer
2708 # ! does not take an argument and may be negated
2709 #  i.e., -foo and -nofoo are allowed
2710 # a double dash signals the end of the options list
2711 #
2712 #---------------------------------------------------------------
2713 EOM
2714
2715     foreach (@names) { print STDOUT "$_\n" }
2716 }
2717
2718 sub dump_defaults {
2719     my @defaults = sort @_;
2720     print STDOUT "Default command line options:\n";
2721     foreach (@_) { print STDOUT "$_\n" }
2722 }
2723
2724 sub dump_options {
2725
2726     # write the options back out as a valid .perltidyrc file
2727     my ( $rOpts, $roption_string ) = @_;
2728     my %Getopt_flags;
2729     my $rGetopt_flags = \%Getopt_flags;
2730     foreach my $opt ( @{$roption_string} ) {
2731         my $flag = "";
2732         if ( $opt =~ /(.*)(!|=.*)$/ ) {
2733             $opt  = $1;
2734             $flag = $2;
2735         }
2736         if ( defined( $rOpts->{$opt} ) ) {
2737             $rGetopt_flags->{$opt} = $flag;
2738         }
2739     }
2740     print STDOUT "# Final parameter set for this run:\n";
2741     foreach my $key ( sort keys %{$rOpts} ) {
2742         my $flag   = $rGetopt_flags->{$key};
2743         my $value  = $rOpts->{$key};
2744         my $prefix = '--';
2745         my $suffix = "";
2746         if ($flag) {
2747             if ( $flag =~ /^=/ ) {
2748                 if ( $value !~ /^\d+$/ ) { $value = '"' . $value . '"' }
2749                 $suffix = "=" . $value;
2750             }
2751             elsif ( $flag =~ /^!/ ) {
2752                 $prefix .= "no" unless ($value);
2753             }
2754             else {
2755
2756                 # shouldn't happen
2757                 print
2758                   "# ERROR in dump_options: unrecognized flag $flag for $key\n";
2759             }
2760         }
2761         print STDOUT $prefix . $key . $suffix . "\n";
2762     }
2763 }
2764
2765 sub show_version {
2766     print <<"EOM";
2767 This is perltidy, v$VERSION
2768
2769 Copyright 2000-2006, Steve Hancock
2770
2771 Perltidy is free software and may be copied under the terms of the GNU
2772 General Public License, which is included in the distribution files.
2773
2774 Complete documentation for perltidy can be found using 'man perltidy'
2775 or on the internet at http://perltidy.sourceforge.net.
2776 EOM
2777 }
2778
2779 sub usage {
2780
2781     print STDOUT <<EOF;
2782 This is perltidy version $VERSION, a perl script indenter.  Usage:
2783
2784     perltidy [ options ] file1 file2 file3 ...
2785             (output goes to file1.tdy, file2.tdy, file3.tdy, ...)
2786     perltidy [ options ] file1 -o outfile
2787     perltidy [ options ] file1 -st >outfile
2788     perltidy [ options ] <infile >outfile
2789
2790 Options have short and long forms. Short forms are shown; see
2791 man pages for long forms.  Note: '=s' indicates a required string,
2792 and '=n' indicates a required integer.
2793
2794 I/O control
2795  -h      show this help
2796  -o=file name of the output file (only if single input file)
2797  -oext=s change output extension from 'tdy' to s
2798  -opath=path  change path to be 'path' for output files
2799  -b      backup original to .bak and modify file in-place
2800  -bext=s change default backup extension from 'bak' to s
2801  -q      deactivate error messages (for running under editor)
2802  -w      include non-critical warning messages in the .ERR error output
2803  -syn    run perl -c to check syntax (default under unix systems)
2804  -log    save .LOG file, which has useful diagnostics
2805  -f      force perltidy to read a binary file
2806  -g      like -log but writes more detailed .LOG file, for debugging scripts
2807  -opt    write the set of options actually used to a .LOG file
2808  -npro   ignore .perltidyrc configuration command file
2809  -pro=file   read configuration commands from file instead of .perltidyrc
2810  -st     send output to standard output, STDOUT
2811  -se     send error output to standard error output, STDERR
2812  -v      display version number to standard output and quit
2813
2814 Basic Options:
2815  -i=n    use n columns per indentation level (default n=4)
2816  -t      tabs: use one tab character per indentation level, not recommeded
2817  -nt     no tabs: use n spaces per indentation level (default)
2818  -et=n   entab leading whitespace n spaces per tab; not recommended
2819  -io     "indent only": just do indentation, no other formatting.
2820  -sil=n  set starting indentation level to n;  use if auto detection fails
2821  -ole=s  specify output line ending (s=dos or win, mac, unix)
2822  -ple    keep output line endings same as input (input must be filename)
2823
2824 Whitespace Control
2825  -fws    freeze whitespace; this disables all whitespace changes
2826            and disables the following switches:
2827  -bt=n   sets brace tightness,  n= (0 = loose, 1=default, 2 = tight)
2828  -bbt    same as -bt but for code block braces; same as -bt if not given
2829  -bbvt   block braces vertically tight; use with -bl or -bli
2830  -bbvtl=s  make -bbvt to apply to selected list of block types
2831  -pt=n   paren tightness (n=0, 1 or 2)
2832  -sbt=n  square bracket tightness (n=0, 1, or 2)
2833  -bvt=n  brace vertical tightness,
2834          n=(0=open, 1=close unless multiple steps on a line, 2=always close)
2835  -pvt=n  paren vertical tightness (see -bvt for n)
2836  -sbvt=n square bracket vertical tightness (see -bvt for n)
2837  -bvtc=n closing brace vertical tightness:
2838          n=(0=open, 1=sometimes close, 2=always close)
2839  -pvtc=n closing paren vertical tightness, see -bvtc for n.
2840  -sbvtc=n closing square bracket vertical tightness, see -bvtc for n.
2841  -ci=n   sets continuation indentation=n,  default is n=2 spaces
2842  -lp     line up parentheses, brackets, and non-BLOCK braces
2843  -sfs    add space before semicolon in for( ; ; )
2844  -aws    allow perltidy to add whitespace (default)
2845  -dws    delete all old non-essential whitespace
2846  -icb    indent closing brace of a code block
2847  -cti=n  closing indentation of paren, square bracket, or non-block brace:
2848          n=0 none, =1 align with opening, =2 one full indentation level
2849  -icp    equivalent to -cti=2
2850  -wls=s  want space left of tokens in string; i.e. -nwls='+ - * /'
2851  -wrs=s  want space right of tokens in string;
2852  -sts    put space before terminal semicolon of a statement
2853  -sak=s  put space between keywords given in s and '(';
2854  -nsak=s no space between keywords in s and '('; i.e. -nsak='my our local'
2855
2856 Line Break Control
2857  -fnl    freeze newlines; this disables all line break changes
2858             and disables the following switches:
2859  -anl    add newlines;  ok to introduce new line breaks
2860  -bbs    add blank line before subs and packages
2861  -bbc    add blank line before block comments
2862  -bbb    add blank line between major blocks
2863  -sob    swallow optional blank lines
2864  -ce     cuddled else; use this style: '} else {'
2865  -dnl    delete old newlines (default)
2866  -mbl=n  maximum consecutive blank lines (default=1)
2867  -l=n    maximum line length;  default n=80
2868  -bl     opening brace on new line
2869  -sbl    opening sub brace on new line.  value of -bl is used if not given.
2870  -bli    opening brace on new line and indented
2871  -bar    opening brace always on right, even for long clauses
2872  -vt=n   vertical tightness (requires -lp); n controls break after opening
2873          token: 0=never  1=no break if next line balanced   2=no break
2874  -vtc=n  vertical tightness of closing container; n controls if closing
2875          token starts new line: 0=always  1=not unless list  1=never
2876  -wba=s  want break after tokens in string; i.e. wba=': .'
2877  -wbb=s  want break before tokens in string
2878
2879 Following Old Breakpoints
2880  -boc    break at old comma breaks: turns off all automatic list formatting
2881  -bol    break at old logical breakpoints: or, and, ||, && (default)
2882  -bok    break at old list keyword breakpoints such as map, sort (default)
2883  -bot    break at old conditional (ternary ?:) operator breakpoints (default)
2884  -cab=n  break at commas after a comma-arrow (=>):
2885          n=0 break at all commas after =>
2886          n=1 stable: break unless this breaks an existing one-line container
2887          n=2 break only if a one-line container cannot be formed
2888          n=3 do not treat commas after => specially at all
2889
2890 Comment controls
2891  -ibc    indent block comments (default)
2892  -isbc   indent spaced block comments; may indent unless no leading space
2893  -msc=n  minimum desired spaces to side comment, default 4
2894  -csc    add or update closing side comments after closing BLOCK brace
2895  -dcsc   delete closing side comments created by a -csc command
2896  -cscp=s change closing side comment prefix to be other than '## end'
2897  -cscl=s change closing side comment to apply to selected list of blocks
2898  -csci=n minimum number of lines needed to apply a -csc tag, default n=6
2899  -csct=n maximum number of columns of appended text, default n=20
2900  -cscw   causes warning if old side comment is overwritten with -csc
2901
2902  -sbc    use 'static block comments' identified by leading '##' (default)
2903  -sbcp=s change static block comment identifier to be other than '##'
2904  -osbc   outdent static block comments
2905
2906  -ssc    use 'static side comments' identified by leading '##' (default)
2907  -sscp=s change static side comment identifier to be other than '##'
2908
2909 Delete selected text
2910  -dac    delete all comments AND pod
2911  -dbc    delete block comments
2912  -dsc    delete side comments
2913  -dp     delete pod
2914
2915 Send selected text to a '.TEE' file
2916  -tac    tee all comments AND pod
2917  -tbc    tee block comments
2918  -tsc    tee side comments
2919  -tp     tee pod
2920
2921 Outdenting
2922  -olq    outdent long quoted strings (default)
2923  -olc    outdent a long block comment line
2924  -ola    outdent statement labels
2925  -okw    outdent control keywords (redo, next, last, goto, return)
2926  -okwl=s specify alternative keywords for -okw command
2927
2928 Other controls
2929  -mft=n  maximum fields per table; default n=40
2930  -x      do not format lines before hash-bang line (i.e., for VMS)
2931  -asc    allows perltidy to add a ';' when missing (default)
2932  -dsm    allows perltidy to delete an unnecessary ';'  (default)
2933
2934 Combinations of other parameters
2935  -gnu     attempt to follow GNU Coding Standards as applied to perl
2936  -mangle  remove as many newlines as possible (but keep comments and pods)
2937  -extrude  insert as many newlines as possible
2938
2939 Dump and die, debugging
2940  -dop    dump options used in this run to standard output and quit
2941  -ddf    dump default options to standard output and quit
2942  -dsn    dump all option short names to standard output and quit
2943  -dln    dump option long names to standard output and quit
2944  -dpro   dump whatever configuration file is in effect to standard output
2945  -dtt    dump all token types to standard output and quit
2946
2947 HTML
2948  -html write an html file (see 'man perl2web' for many options)
2949        Note: when -html is used, no indentation or formatting are done.
2950        Hint: try perltidy -html -css=mystyle.css filename.pl
2951        and edit mystyle.css to change the appearance of filename.html.
2952        -nnn gives line numbers
2953        -pre only writes out <pre>..</pre> code section
2954        -toc places a table of contents to subs at the top (default)
2955        -pod passes pod text through pod2html (default)
2956        -frm write html as a frame (3 files)
2957        -text=s extra extension for table of contents if -frm, default='toc'
2958        -sext=s extra extension for file content if -frm, default='src'
2959
2960 A prefix of "n" negates short form toggle switches, and a prefix of "no"
2961 negates the long forms.  For example, -nasc means don't add missing
2962 semicolons.
2963
2964 If you are unable to see this entire text, try "perltidy -h | more"
2965 For more detailed information, and additional options, try "man perltidy",
2966 or go to the perltidy home page at http://perltidy.sourceforge.net
2967 EOF
2968
2969 }
2970
2971 sub process_this_file {
2972
2973     my ( $truth, $beauty ) = @_;
2974
2975     # loop to process each line of this file
2976     while ( my $line_of_tokens = $truth->get_line() ) {
2977         $beauty->write_line($line_of_tokens);
2978     }
2979
2980     # finish up
2981     eval { $beauty->finish_formatting() };
2982     $truth->report_tokenization_errors();
2983 }
2984
2985 sub check_syntax {
2986
2987     # Use 'perl -c' to make sure that we did not create bad syntax
2988     # This is a very good independent check for programming errors
2989     #
2990     # Given names of the input and output files, ($ifname, $ofname),
2991     # we do the following:
2992     # - check syntax of the input file
2993     # - if bad, all done (could be an incomplete code snippet)
2994     # - if infile syntax ok, then check syntax of the output file;
2995     #   - if outfile syntax bad, issue warning; this implies a code bug!
2996     # - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good
2997
2998     my ( $ifname, $ofname, $logger_object, $rOpts ) = @_;
2999     my $infile_syntax_ok = 0;
3000     my $line_of_dashes   = '-' x 42 . "\n";
3001
3002     my $flags = $rOpts->{'perl-syntax-check-flags'};
3003
3004     # be sure we invoke perl with -c
3005     # note: perl will accept repeated flags like '-c -c'.  It is safest
3006     # to append another -c than try to find an interior bundled c, as
3007     # in -Tc, because such a 'c' might be in a quoted string, for example.
3008     if ( $flags !~ /(^-c|\s+-c)/ ) { $flags .= " -c" }
3009
3010     # be sure we invoke perl with -x if requested
3011     # same comments about repeated parameters applies
3012     if ( $rOpts->{'look-for-hash-bang'} ) {
3013         if ( $flags !~ /(^-x|\s+-x)/ ) { $flags .= " -x" }
3014     }
3015
3016     # this shouldn't happen unless a termporary file couldn't be made
3017     if ( $ifname eq '-' ) {
3018         $logger_object->write_logfile_entry(
3019             "Cannot run perl -c on STDIN and STDOUT\n");
3020         return $infile_syntax_ok;
3021     }
3022
3023     $logger_object->write_logfile_entry(
3024         "checking input file syntax with perl $flags\n");
3025     $logger_object->write_logfile_entry($line_of_dashes);
3026
3027     # Not all operating systems/shells support redirection of the standard
3028     # error output.
3029     my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1';
3030
3031     my $perl_output = do_syntax_check( $ifname, $flags, $error_redirection );
3032     $logger_object->write_logfile_entry("$perl_output\n");
3033
3034     if ( $perl_output =~ /syntax\s*OK/ ) {
3035         $infile_syntax_ok = 1;
3036         $logger_object->write_logfile_entry($line_of_dashes);
3037         $logger_object->write_logfile_entry(
3038             "checking output file syntax with perl $flags ...\n");
3039         $logger_object->write_logfile_entry($line_of_dashes);
3040
3041         my $perl_output =
3042           do_syntax_check( $ofname, $flags, $error_redirection );
3043         $logger_object->write_logfile_entry("$perl_output\n");
3044
3045         unless ( $perl_output =~ /syntax\s*OK/ ) {
3046             $logger_object->write_logfile_entry($line_of_dashes);
3047             $logger_object->warning(
3048 "The output file has a syntax error when tested with perl $flags $ofname !\n"
3049             );
3050             $logger_object->warning(
3051                 "This implies an error in perltidy; the file $ofname is bad\n");
3052             $logger_object->report_definite_bug();
3053
3054             # the perl version number will be helpful for diagnosing the problem
3055             $logger_object->write_logfile_entry(
3056                 qx/perl -v $error_redirection/ . "\n" );
3057         }
3058     }
3059     else {
3060
3061         # Only warn of perl -c syntax errors.  Other messages,
3062         # such as missing modules, are too common.  They can be
3063         # seen by running with perltidy -w
3064         $logger_object->complain("A syntax check using perl $flags gives: \n");
3065         $logger_object->complain($line_of_dashes);
3066         $logger_object->complain("$perl_output\n");
3067         $logger_object->complain($line_of_dashes);
3068         $infile_syntax_ok = -1;
3069         $logger_object->write_logfile_entry($line_of_dashes);
3070         $logger_object->write_logfile_entry(
3071 "The output file will not be checked because of input file problems\n"
3072         );
3073     }
3074     return $infile_syntax_ok;
3075 }
3076
3077 sub do_syntax_check {
3078     my ( $fname, $flags, $error_redirection ) = @_;
3079
3080     # We have to quote the filename in case it has unusual characters
3081     # or spaces.  Example: this filename #CM11.pm# gives trouble.
3082     $fname = '"' . $fname . '"';
3083
3084     # Under VMS something like -T will become -t (and an error) so we
3085     # will put quotes around the flags.  Double quotes seem to work on
3086     # Unix/Windows/VMS, but this may not work on all systems.  (Single
3087     # quotes do not work under Windows).  It could become necessary to
3088     # put double quotes around each flag, such as:  -"c"  -"T"
3089     # We may eventually need some system-dependent coding here.
3090     $flags = '"' . $flags . '"';
3091
3092     # now wish for luck...
3093     return qx/perl $flags $fname $error_redirection/;
3094 }
3095
3096 #####################################################################
3097 #
3098 # This is a stripped down version of IO::Scalar
3099 # Given a reference to a scalar, it supplies either:
3100 # a getline method which reads lines (mode='r'), or
3101 # a print method which reads lines (mode='w')
3102 #
3103 #####################################################################
3104 package Perl::Tidy::IOScalar;
3105 use Carp;
3106
3107 sub new {
3108     my ( $package, $rscalar, $mode ) = @_;
3109     my $ref = ref $rscalar;
3110     if ( $ref ne 'SCALAR' ) {
3111         confess <<EOM;
3112 ------------------------------------------------------------------------
3113 expecting ref to SCALAR but got ref to ($ref); trace follows:
3114 ------------------------------------------------------------------------
3115 EOM
3116
3117     }
3118     if ( $mode eq 'w' ) {
3119         $$rscalar = "";
3120         return bless [ $rscalar, $mode ], $package;
3121     }
3122     elsif ( $mode eq 'r' ) {
3123
3124         # Convert a scalar to an array.
3125         # This avoids looking for "\n" on each call to getline
3126         my @array = map { $_ .= "\n" } split /\n/, ${$rscalar};
3127         my $i_next = 0;
3128         return bless [ \@array, $mode, $i_next ], $package;
3129     }
3130     else {
3131         confess <<EOM;
3132 ------------------------------------------------------------------------
3133 expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
3134 ------------------------------------------------------------------------
3135 EOM
3136     }
3137 }
3138
3139 sub getline {
3140     my $self = shift;
3141     my $mode = $self->[1];
3142     if ( $mode ne 'r' ) {
3143         confess <<EOM;
3144 ------------------------------------------------------------------------
3145 getline call requires mode = 'r' but mode = ($mode); trace follows:
3146 ------------------------------------------------------------------------
3147 EOM
3148     }
3149     my $i = $self->[2]++;
3150     ##my $line = $self->[0]->[$i];
3151     return $self->[0]->[$i];
3152 }
3153
3154 sub print {
3155     my $self = shift;
3156     my $mode = $self->[1];
3157     if ( $mode ne 'w' ) {
3158         confess <<EOM;
3159 ------------------------------------------------------------------------
3160 print call requires mode = 'w' but mode = ($mode); trace follows:
3161 ------------------------------------------------------------------------
3162 EOM
3163     }
3164     ${ $self->[0] } .= $_[0];
3165 }
3166 sub close { return }
3167
3168 #####################################################################
3169 #
3170 # This is a stripped down version of IO::ScalarArray
3171 # Given a reference to an array, it supplies either:
3172 # a getline method which reads lines (mode='r'), or
3173 # a print method which reads lines (mode='w')
3174 #
3175 # NOTE: this routine assumes that that there aren't any embedded
3176 # newlines within any of the array elements.  There are no checks
3177 # for that.
3178 #
3179 #####################################################################
3180 package Perl::Tidy::IOScalarArray;
3181 use Carp;
3182
3183 sub new {
3184     my ( $package, $rarray, $mode ) = @_;
3185     my $ref = ref $rarray;
3186     if ( $ref ne 'ARRAY' ) {
3187         confess <<EOM;
3188 ------------------------------------------------------------------------
3189 expecting ref to ARRAY but got ref to ($ref); trace follows:
3190 ------------------------------------------------------------------------
3191 EOM
3192
3193     }
3194     if ( $mode eq 'w' ) {
3195         @$rarray = ();
3196         return bless [ $rarray, $mode ], $package;
3197     }
3198     elsif ( $mode eq 'r' ) {
3199         my $i_next = 0;
3200         return bless [ $rarray, $mode, $i_next ], $package;
3201     }
3202     else {
3203         confess <<EOM;
3204 ------------------------------------------------------------------------
3205 expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
3206 ------------------------------------------------------------------------
3207 EOM
3208     }
3209 }
3210
3211 sub getline {
3212     my $self = shift;
3213     my $mode = $self->[1];
3214     if ( $mode ne 'r' ) {
3215         confess <<EOM;
3216 ------------------------------------------------------------------------
3217 getline requires mode = 'r' but mode = ($mode); trace follows:
3218 ------------------------------------------------------------------------
3219 EOM
3220     }
3221     my $i = $self->[2]++;
3222     ##my $line = $self->[0]->[$i];
3223     return $self->[0]->[$i];
3224 }
3225
3226 sub print {
3227     my $self = shift;
3228     my $mode = $self->[1];
3229     if ( $mode ne 'w' ) {
3230         confess <<EOM;
3231 ------------------------------------------------------------------------
3232 print requires mode = 'w' but mode = ($mode); trace follows:
3233 ------------------------------------------------------------------------
3234 EOM
3235     }
3236     push @{ $self->[0] }, $_[0];
3237 }
3238 sub close { return }
3239
3240 #####################################################################
3241 #
3242 # the Perl::Tidy::LineSource class supplies an object with a 'get_line()' method
3243 # which returns the next line to be parsed
3244 #
3245 #####################################################################
3246
3247 package Perl::Tidy::LineSource;
3248
3249 sub new {
3250
3251     my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_;
3252     my $input_file_copy = undef;
3253     my $fh_copy;
3254
3255     my $input_line_ending;
3256     if ( $rOpts->{'preserve-line-endings'} ) {
3257         $input_line_ending = Perl::Tidy::find_input_line_ending($input_file);
3258     }
3259
3260     ( my $fh, $input_file ) = Perl::Tidy::streamhandle( $input_file, 'r' );
3261     return undef unless $fh;
3262
3263     # in order to check output syntax when standard output is used,
3264     # or when it is an object, we have to make a copy of the file
3265     if ( ( $input_file eq '-' || ref $input_file ) && $rOpts->{'check-syntax'} )
3266     {
3267
3268         # Turning off syntax check when input output is used.
3269         # The reason is that temporary files cause problems on
3270         # on many systems.
3271         $rOpts->{'check-syntax'} = 0;
3272         $input_file_copy = '-';
3273
3274         $$rpending_logfile_message .= <<EOM;
3275 Note: --syntax check will be skipped because standard input is used
3276 EOM
3277
3278     }
3279
3280     return bless {
3281         _fh                => $fh,
3282         _fh_copy           => $fh_copy,
3283         _filename          => $input_file,
3284         _input_file_copy   => $input_file_copy,
3285         _input_line_ending => $input_line_ending,
3286         _rinput_buffer     => [],
3287         _started           => 0,
3288     }, $class;
3289 }
3290
3291 sub get_input_file_copy_name {
3292     my $self   = shift;
3293     my $ifname = $self->{_input_file_copy};
3294     unless ($ifname) {
3295         $ifname = $self->{_filename};
3296     }
3297     return $ifname;
3298 }
3299
3300 sub close_input_file {
3301     my $self = shift;
3302     eval { $self->{_fh}->close() };
3303     eval { $self->{_fh_copy}->close() } if $self->{_fh_copy};
3304 }
3305
3306 sub get_line {
3307     my $self          = shift;
3308     my $line          = undef;
3309     my $fh            = $self->{_fh};
3310     my $fh_copy       = $self->{_fh_copy};
3311     my $rinput_buffer = $self->{_rinput_buffer};
3312
3313     if ( scalar(@$rinput_buffer) ) {
3314         $line = shift @$rinput_buffer;
3315     }
3316     else {
3317         $line = $fh->getline();
3318
3319         # patch to read raw mac files under unix, dos
3320         # see if the first line has embedded \r's
3321         if ( $line && !$self->{_started} ) {
3322             if ( $line =~ /[\015][^\015\012]/ ) {
3323
3324                 # found one -- break the line up and store in a buffer
3325                 @$rinput_buffer = map { $_ . "\n" } split /\015/, $line;
3326                 my $count = @$rinput_buffer;
3327                 $line = shift @$rinput_buffer;
3328             }
3329             $self->{_started}++;
3330         }
3331     }
3332     if ( $line && $fh_copy ) { $fh_copy->print($line); }
3333     return $line;
3334 }
3335
3336 sub old_get_line {
3337     my $self    = shift;
3338     my $line    = undef;
3339     my $fh      = $self->{_fh};
3340     my $fh_copy = $self->{_fh_copy};
3341     $line = $fh->getline();
3342     if ( $line && $fh_copy ) { $fh_copy->print($line); }
3343     return $line;
3344 }
3345
3346 #####################################################################
3347 #
3348 # the Perl::Tidy::LineSink class supplies a write_line method for
3349 # actual file writing
3350 #
3351 #####################################################################
3352
3353 package Perl::Tidy::LineSink;
3354
3355 sub new {
3356
3357     my ( $class, $output_file, $tee_file, $line_separator, $rOpts,
3358         $rpending_logfile_message )
3359       = @_;
3360     my $fh               = undef;
3361     my $fh_copy          = undef;
3362     my $fh_tee           = undef;
3363     my $output_file_copy = "";
3364     my $output_file_open = 0;
3365
3366     if ( $rOpts->{'format'} eq 'tidy' ) {
3367         ( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' );
3368         unless ($fh) { die "Cannot write to output stream\n"; }
3369         $output_file_open = 1;
3370     }
3371
3372     # in order to check output syntax when standard output is used,
3373     # or when it is an object, we have to make a copy of the file
3374     if ( $output_file eq '-' || ref $output_file ) {
3375         if ( $rOpts->{'check-syntax'} ) {
3376
3377             # Turning off syntax check when standard output is used.
3378             # The reason is that temporary files cause problems on
3379             # on many systems.
3380             $rOpts->{'check-syntax'} = 0;
3381             $output_file_copy = '-';
3382             $$rpending_logfile_message .= <<EOM;
3383 Note: --syntax check will be skipped because standard output is used
3384 EOM
3385
3386         }
3387     }
3388
3389     bless {
3390         _fh               => $fh,
3391         _fh_copy          => $fh_copy,
3392         _fh_tee           => $fh_tee,
3393         _output_file      => $output_file,
3394         _output_file_open => $output_file_open,
3395         _output_file_copy => $output_file_copy,
3396         _tee_flag         => 0,
3397         _tee_file         => $tee_file,
3398         _tee_file_opened  => 0,
3399         _line_separator   => $line_separator,
3400     }, $class;
3401 }
3402
3403 sub write_line {
3404
3405     my $self    = shift;
3406     my $fh      = $self->{_fh};
3407     my $fh_copy = $self->{_fh_copy};
3408
3409     my $output_file_open = $self->{_output_file_open};
3410     chomp $_[0];
3411     $_[0] .= $self->{_line_separator};
3412
3413     $fh->print( $_[0] ) if ( $self->{_output_file_open} );
3414     print $fh_copy $_[0] if ( $fh_copy && $self->{_output_file_copy} );
3415
3416     if ( $self->{_tee_flag} ) {
3417         unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() }
3418         my $fh_tee = $self->{_fh_tee};
3419         print $fh_tee $_[0];
3420     }
3421 }
3422
3423 sub get_output_file_copy {
3424     my $self   = shift;
3425     my $ofname = $self->{_output_file_copy};
3426     unless ($ofname) {
3427         $ofname = $self->{_output_file};
3428     }
3429     return $ofname;
3430 }
3431
3432 sub tee_on {
3433     my $self = shift;
3434     $self->{_tee_flag} = 1;
3435 }
3436
3437 sub tee_off {
3438     my $self = shift;
3439     $self->{_tee_flag} = 0;
3440 }
3441
3442 sub really_open_tee_file {
3443     my $self     = shift;
3444     my $tee_file = $self->{_tee_file};
3445     my $fh_tee;
3446     $fh_tee = IO::File->new(">$tee_file")
3447       or die("couldn't open TEE file $tee_file: $!\n");
3448     $self->{_tee_file_opened} = 1;
3449     $self->{_fh_tee}          = $fh_tee;
3450 }
3451
3452 sub close_output_file {
3453     my $self = shift;
3454     eval { $self->{_fh}->close() }      if $self->{_output_file_open};
3455     eval { $self->{_fh_copy}->close() } if ( $self->{_output_file_copy} );
3456     $self->close_tee_file();
3457 }
3458
3459 sub close_tee_file {
3460     my $self = shift;
3461
3462     if ( $self->{_tee_file_opened} ) {
3463         eval { $self->{_fh_tee}->close() };
3464         $self->{_tee_file_opened} = 0;
3465     }
3466 }
3467
3468 #####################################################################
3469 #
3470 # The Perl::Tidy::Diagnostics class writes the DIAGNOSTICS file, which is
3471 # useful for program development.
3472 #
3473 # Only one such file is created regardless of the number of input
3474 # files processed.  This allows the results of processing many files
3475 # to be summarized in a single file.
3476 #
3477 #####################################################################
3478
3479 package Perl::Tidy::Diagnostics;
3480
3481 sub new {
3482
3483     my $class = shift;
3484     bless {
3485         _write_diagnostics_count => 0,
3486         _last_diagnostic_file    => "",
3487         _input_file              => "",
3488         _fh                      => undef,
3489     }, $class;
3490 }
3491
3492 sub set_input_file {
3493     my $self = shift;
3494     $self->{_input_file} = $_[0];
3495 }
3496
3497 # This is a diagnostic routine which is useful for program development.
3498 # Output from debug messages go to a file named DIAGNOSTICS, where
3499 # they are labeled by file and line.  This allows many files to be
3500 # scanned at once for some particular condition of interest.
3501 sub write_diagnostics {
3502     my $self = shift;
3503
3504     unless ( $self->{_write_diagnostics_count} ) {
3505         open DIAGNOSTICS, ">DIAGNOSTICS"
3506           or death("couldn't open DIAGNOSTICS: $!\n");
3507     }
3508
3509     my $last_diagnostic_file = $self->{_last_diagnostic_file};
3510     my $input_file           = $self->{_input_file};
3511     if ( $last_diagnostic_file ne $input_file ) {
3512         print DIAGNOSTICS "\nFILE:$input_file\n";
3513     }
3514     $self->{_last_diagnostic_file} = $input_file;
3515     my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number();
3516     print DIAGNOSTICS "$input_line_number:\t@_";
3517     $self->{_write_diagnostics_count}++;
3518 }
3519
3520 #####################################################################
3521 #
3522 # The Perl::Tidy::Logger class writes the .LOG and .ERR files
3523 #
3524 #####################################################################
3525
3526 package Perl::Tidy::Logger;
3527
3528 sub new {
3529     my $class = shift;
3530     my $fh;
3531     my ( $rOpts, $log_file, $warning_file, $saw_extrude ) = @_;
3532
3533     # remove any old error output file
3534     unless ( ref($warning_file) ) {
3535         if ( -e $warning_file ) { unlink($warning_file) }
3536     }
3537
3538     bless {
3539         _log_file                      => $log_file,
3540         _fh_warnings                   => undef,
3541         _rOpts                         => $rOpts,
3542         _fh_warnings                   => undef,
3543         _last_input_line_written       => 0,
3544         _at_end_of_file                => 0,
3545         _use_prefix                    => 1,
3546         _block_log_output              => 0,
3547         _line_of_tokens                => undef,
3548         _output_line_number            => undef,
3549         _wrote_line_information_string => 0,
3550         _wrote_column_headings         => 0,
3551         _warning_file                  => $warning_file,
3552         _warning_count                 => 0,
3553         _complaint_count               => 0,
3554         _saw_code_bug    => -1,             # -1=no 0=maybe 1=for sure
3555         _saw_brace_error => 0,
3556         _saw_extrude     => $saw_extrude,
3557         _output_array    => [],
3558     }, $class;
3559 }
3560
3561 sub close_log_file {
3562
3563     my $self = shift;
3564     if ( $self->{_fh_warnings} ) {
3565         eval { $self->{_fh_warnings}->close() };
3566         $self->{_fh_warnings} = undef;
3567     }
3568 }
3569
3570 sub get_warning_count {
3571     my $self = shift;
3572     return $self->{_warning_count};
3573 }
3574
3575 sub get_use_prefix {
3576     my $self = shift;
3577     return $self->{_use_prefix};
3578 }
3579
3580 sub block_log_output {
3581     my $self = shift;
3582     $self->{_block_log_output} = 1;
3583 }
3584
3585 sub unblock_log_output {
3586     my $self = shift;
3587     $self->{_block_log_output} = 0;
3588 }
3589
3590 sub interrupt_logfile {
3591     my $self = shift;
3592     $self->{_use_prefix} = 0;
3593     $self->warning("\n");
3594     $self->write_logfile_entry( '#' x 24 . "  WARNING  " . '#' x 25 . "\n" );
3595 }
3596
3597 sub resume_logfile {
3598     my $self = shift;
3599     $self->write_logfile_entry( '#' x 60 . "\n" );
3600     $self->{_use_prefix} = 1;
3601 }
3602
3603 sub we_are_at_the_last_line {
3604     my $self = shift;
3605     unless ( $self->{_wrote_line_information_string} ) {
3606         $self->write_logfile_entry("Last line\n\n");
3607     }
3608     $self->{_at_end_of_file} = 1;
3609 }
3610
3611 # record some stuff in case we go down in flames
3612 sub black_box {
3613     my $self = shift;
3614     my ( $line_of_tokens, $output_line_number ) = @_;
3615     my $input_line        = $line_of_tokens->{_line_text};
3616     my $input_line_number = $line_of_tokens->{_line_number};
3617
3618     # save line information in case we have to write a logfile message
3619     $self->{_line_of_tokens}                = $line_of_tokens;
3620     $self->{_output_line_number}            = $output_line_number;
3621     $self->{_wrote_line_information_string} = 0;
3622
3623     my $last_input_line_written = $self->{_last_input_line_written};
3624     my $rOpts                   = $self->{_rOpts};
3625     if (
3626         (
3627             ( $input_line_number - $last_input_line_written ) >=
3628             $rOpts->{'logfile-gap'}
3629         )
3630         || ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ )
3631       )
3632     {
3633         my $rlevels                      = $line_of_tokens->{_rlevels};
3634         my $structural_indentation_level = $$rlevels[0];
3635         $self->{_last_input_line_written} = $input_line_number;
3636         ( my $out_str = $input_line ) =~ s/^\s*//;
3637         chomp $out_str;
3638
3639         $out_str = ( '.' x $structural_indentation_level ) . $out_str;
3640
3641         if ( length($out_str) > 35 ) {
3642             $out_str = substr( $out_str, 0, 35 ) . " ....";
3643         }
3644         $self->logfile_output( "", "$out_str\n" );
3645     }
3646 }
3647
3648 sub write_logfile_entry {
3649     my $self = shift;
3650
3651     # add leading >>> to avoid confusing error mesages and code
3652     $self->logfile_output( ">>>", "@_" );
3653 }
3654
3655 sub write_column_headings {
3656     my $self = shift;
3657
3658     $self->{_wrote_column_headings} = 1;
3659     my $routput_array = $self->{_output_array};
3660     push @{$routput_array}, <<EOM;
3661 The nesting depths in the table below are at the start of the lines.
3662 The indicated output line numbers are not always exact.
3663 ci = levels of continuation indentation; bk = 1 if in BLOCK, 0 if not.
3664
3665 in:out indent c b  nesting   code + messages; (messages begin with >>>)
3666 lines  levels i k            (code begins with one '.' per indent level)
3667 ------  ----- - - --------   -------------------------------------------
3668 EOM
3669 }
3670
3671 sub make_line_information_string {
3672
3673     # make columns of information when a logfile message needs to go out
3674     my $self                    = shift;
3675     my $line_of_tokens          = $self->{_line_of_tokens};
3676     my $input_line_number       = $line_of_tokens->{_line_number};
3677     my $line_information_string = "";
3678     if ($input_line_number) {
3679
3680         my $output_line_number   = $self->{_output_line_number};
3681         my $brace_depth          = $line_of_tokens->{_curly_brace_depth};
3682         my $paren_depth          = $line_of_tokens->{_paren_depth};
3683         my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth};
3684         my $python_indentation_level =
3685           $line_of_tokens->{_python_indentation_level};
3686         my $rlevels         = $line_of_tokens->{_rlevels};
3687         my $rnesting_tokens = $line_of_tokens->{_rnesting_tokens};
3688         my $rci_levels      = $line_of_tokens->{_rci_levels};
3689         my $rnesting_blocks = $line_of_tokens->{_rnesting_blocks};
3690
3691         my $structural_indentation_level = $$rlevels[0];
3692
3693         $self->write_column_headings() unless $self->{_wrote_column_headings};
3694
3695         # keep logfile columns aligned for scripts up to 999 lines;
3696         # for longer scripts it doesn't really matter
3697         my $extra_space = "";
3698         $extra_space .=
3699             ( $input_line_number < 10 )  ? "  "
3700           : ( $input_line_number < 100 ) ? " "
3701           :                                "";
3702         $extra_space .=
3703             ( $output_line_number < 10 )  ? "  "
3704           : ( $output_line_number < 100 ) ? " "
3705           :                                 "";
3706
3707         # there are 2 possible nesting strings:
3708         # the original which looks like this:  (0 [1 {2
3709         # the new one, which looks like this:  {{[
3710         # the new one is easier to read, and shows the order, but
3711         # could be arbitrarily long, so we use it unless it is too long
3712         my $nesting_string =
3713           "($paren_depth [$square_bracket_depth {$brace_depth";
3714         my $nesting_string_new = $$rnesting_tokens[0];
3715
3716         my $ci_level = $$rci_levels[0];
3717         if ( $ci_level > 9 ) { $ci_level = '*' }
3718         my $bk = ( $$rnesting_blocks[0] =~ /1$/ ) ? '1' : '0';
3719
3720         if ( length($nesting_string_new) <= 8 ) {
3721             $nesting_string =
3722               $nesting_string_new . " " x ( 8 - length($nesting_string_new) );
3723         }
3724         if ( $python_indentation_level < 0 ) { $python_indentation_level = 0 }
3725         $line_information_string =
3726 "L$input_line_number:$output_line_number$extra_space i$python_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string";
3727     }
3728     return $line_information_string;
3729 }
3730
3731 sub logfile_output {
3732     my $self = shift;
3733     my ( $prompt, $msg ) = @_;
3734     return if ( $self->{_block_log_output} );
3735
3736     my $routput_array = $self->{_output_array};
3737     if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) {
3738         push @{$routput_array}, "$msg";
3739     }
3740     else {
3741         my $line_information_string = $self->make_line_information_string();
3742         $self->{_wrote_line_information_string} = 1;
3743
3744         if ($line_information_string) {
3745             push @{$routput_array}, "$line_information_string   $prompt$msg";
3746         }
3747         else {
3748             push @{$routput_array}, "$msg";
3749         }
3750     }
3751 }
3752
3753 sub get_saw_brace_error {
3754     my $self = shift;
3755     return $self->{_saw_brace_error};
3756 }
3757
3758 sub increment_brace_error {
3759     my $self = shift;
3760     $self->{_saw_brace_error}++;
3761 }
3762
3763 sub brace_warning {
3764     my $self = shift;
3765     use constant BRACE_WARNING_LIMIT => 10;
3766     my $saw_brace_error = $self->{_saw_brace_error};
3767
3768     if ( $saw_brace_error < BRACE_WARNING_LIMIT ) {
3769         $self->warning(@_);
3770     }
3771     $saw_brace_error++;
3772     $self->{_saw_brace_error} = $saw_brace_error;
3773
3774     if ( $saw_brace_error == BRACE_WARNING_LIMIT ) {
3775         $self->warning("No further warnings of this type will be given\n");
3776     }
3777 }
3778
3779 sub complain {
3780
3781     # handle non-critical warning messages based on input flag
3782     my $self  = shift;
3783     my $rOpts = $self->{_rOpts};
3784
3785     # these appear in .ERR output only if -w flag is used
3786     if ( $rOpts->{'warning-output'} ) {
3787         $self->warning(@_);
3788     }
3789
3790     # otherwise, they go to the .LOG file
3791     else {
3792         $self->{_complaint_count}++;
3793         $self->write_logfile_entry(@_);
3794     }
3795 }
3796
3797 sub warning {
3798
3799     # report errors to .ERR file (or stdout)
3800     my $self = shift;
3801     use constant WARNING_LIMIT => 50;
3802
3803     my $rOpts = $self->{_rOpts};
3804     unless ( $rOpts->{'quiet'} ) {
3805
3806         my $warning_count = $self->{_warning_count};
3807         unless ($warning_count) {
3808             my $warning_file = $self->{_warning_file};
3809             my $fh_warnings;
3810             if ( $rOpts->{'standard-error-output'} ) {
3811                 $fh_warnings = *STDERR;
3812             }
3813             else {
3814                 ( $fh_warnings, my $filename ) =
3815                   Perl::Tidy::streamhandle( $warning_file, 'w' );
3816                 $fh_warnings or die("couldn't open $filename $!\n");
3817                 warn "## Please see file $filename\n";
3818             }
3819             $self->{_fh_warnings} = $fh_warnings;
3820         }
3821
3822         my $fh_warnings = $self->{_fh_warnings};
3823         if ( $warning_count < WARNING_LIMIT ) {
3824             if ( $self->get_use_prefix() > 0 ) {
3825                 my $input_line_number =
3826                   Perl::Tidy::Tokenizer::get_input_line_number();
3827                 print $fh_warnings "$input_line_number:\t@_";
3828                 $self->write_logfile_entry("WARNING: @_");
3829             }
3830             else {
3831                 print $fh_warnings @_;
3832                 $self->write_logfile_entry(@_);
3833             }
3834         }
3835         $warning_count++;
3836         $self->{_warning_count} = $warning_count;
3837
3838         if ( $warning_count == WARNING_LIMIT ) {
3839             print $fh_warnings "No further warnings will be given";
3840         }
3841     }
3842 }
3843
3844 # programming bug codes:
3845 #   -1 = no bug
3846 #    0 = maybe, not sure.
3847 #    1 = definitely
3848 sub report_possible_bug {
3849     my $self         = shift;
3850     my $saw_code_bug = $self->{_saw_code_bug};
3851     $self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug;
3852 }
3853
3854 sub report_definite_bug {
3855     my $self = shift;
3856     $self->{_saw_code_bug} = 1;
3857 }
3858
3859 sub ask_user_for_bug_report {
3860     my $self = shift;
3861
3862     my ( $infile_syntax_ok, $formatter ) = @_;
3863     my $saw_code_bug = $self->{_saw_code_bug};
3864     if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) {
3865         $self->warning(<<EOM);
3866
3867 You may have encountered a code bug in perltidy.  If you think so, and
3868 the problem is not listed in the BUGS file at
3869 http://perltidy.sourceforge.net, please report it so that it can be
3870 corrected.  Include the smallest possible script which has the problem,
3871 along with the .LOG file. See the manual pages for contact information.
3872 Thank you!
3873 EOM
3874
3875     }
3876     elsif ( $saw_code_bug == 1 ) {
3877         if ( $self->{_saw_extrude} ) {
3878             $self->warning(<<EOM);
3879
3880 You may have encountered a bug in perltidy.  However, since you are using the
3881 -extrude option, the problem may be with perl or one of its modules, which have
3882 occasional problems with this type of file.  If you believe that the
3883 problem is with perltidy, and the problem is not listed in the BUGS file at
3884 http://perltidy.sourceforge.net, please report it so that it can be corrected.
3885 Include the smallest possible script which has the problem, along with the .LOG
3886 file. See the manual pages for contact information.
3887 Thank you!
3888 EOM
3889         }
3890         else {
3891             $self->warning(<<EOM);
3892
3893 Oops, you seem to have encountered a bug in perltidy.  Please check the
3894 BUGS file at http://perltidy.sourceforge.net.  If the problem is not
3895 listed there, please report it so that it can be corrected.  Include the
3896 smallest possible script which produces this message, along with the
3897 .LOG file if appropriate.  See the manual pages for contact information.
3898 Your efforts are appreciated.
3899 Thank you!
3900 EOM
3901             my $added_semicolon_count = 0;
3902             eval {
3903                 $added_semicolon_count =
3904                   $formatter->get_added_semicolon_count();
3905             };
3906             if ( $added_semicolon_count > 0 ) {
3907                 $self->warning(<<EOM);
3908
3909 The log file shows that perltidy added $added_semicolon_count semicolons.
3910 Please rerun with -nasc to see if that is the cause of the syntax error.  Even
3911 if that is the problem, please report it so that it can be fixed.
3912 EOM
3913
3914             }
3915         }
3916     }
3917 }
3918
3919 sub finish {
3920
3921     # called after all formatting to summarize errors
3922     my $self = shift;
3923     my ( $infile_syntax_ok, $formatter ) = @_;
3924
3925     my $rOpts         = $self->{_rOpts};
3926     my $warning_count = $self->{_warning_count};
3927     my $saw_code_bug  = $self->{_saw_code_bug};
3928
3929     my $save_logfile = ( $saw_code_bug == 0 && $infile_syntax_ok == 1 )
3930       || $saw_code_bug == 1
3931       || $rOpts->{'logfile'};
3932     my $log_file = $self->{_log_file};
3933     if ($warning_count) {
3934         if ($save_logfile) {
3935             $self->block_log_output();    # avoid echoing this to the logfile
3936             $self->warning(
3937                 "The logfile $log_file may contain useful information\n");
3938             $self->unblock_log_output();
3939         }
3940
3941         if ( $self->{_complaint_count} > 0 ) {
3942             $self->warning(
3943 "To see $self->{_complaint_count} non-critical warnings rerun with -w\n"
3944             );
3945         }
3946
3947         if ( $self->{_saw_brace_error}
3948             && ( $rOpts->{'logfile-gap'} > 1 || !$save_logfile ) )
3949         {
3950             $self->warning("To save a full .LOG file rerun with -g\n");
3951         }
3952     }
3953     $self->ask_user_for_bug_report( $infile_syntax_ok, $formatter );
3954
3955     if ($save_logfile) {
3956         my $log_file = $self->{_log_file};
3957         my ( $fh, $filename ) = Perl::Tidy::streamhandle( $log_file, 'w' );
3958         if ($fh) {
3959             my $routput_array = $self->{_output_array};
3960             foreach ( @{$routput_array} ) { $fh->print($_) }
3961             eval                          { $fh->close() };
3962         }
3963     }
3964 }
3965
3966 #####################################################################
3967 #
3968 # The Perl::Tidy::DevNull class supplies a dummy print method
3969 #
3970 #####################################################################
3971
3972 package Perl::Tidy::DevNull;
3973 sub new { return bless {}, $_[0] }
3974 sub print { return }
3975 sub close { return }
3976
3977 #####################################################################
3978 #
3979 # The Perl::Tidy::HtmlWriter class writes a copy of the input stream in html
3980 #
3981 #####################################################################
3982
3983 package Perl::Tidy::HtmlWriter;
3984
3985 use File::Basename;
3986
3987 # class variables
3988 use vars qw{
3989   %html_color
3990   %html_bold
3991   %html_italic
3992   %token_short_names
3993   %short_to_long_names
3994   $rOpts
3995   $css_filename
3996   $css_linkname
3997   $missing_html_entities
3998 };
3999
4000 # replace unsafe characters with HTML entity representation if HTML::Entities
4001 # is available
4002 { eval "use HTML::Entities"; $missing_html_entities = $@; }
4003
4004 sub new {
4005
4006     my ( $class, $input_file, $html_file, $extension, $html_toc_extension,
4007         $html_src_extension )
4008       = @_;
4009
4010     my $html_file_opened = 0;
4011     my $html_fh;
4012     ( $html_fh, my $html_filename ) =
4013       Perl::Tidy::streamhandle( $html_file, 'w' );
4014     unless ($html_fh) {
4015         warn("can't open $html_file: $!\n");
4016         return undef;
4017     }
4018     $html_file_opened = 1;
4019
4020     if ( !$input_file || $input_file eq '-' || ref($input_file) ) {
4021         $input_file = "NONAME";
4022     }
4023
4024     # write the table of contents to a string
4025     my $toc_string;
4026     my $html_toc_fh = Perl::Tidy::IOScalar->new( \$toc_string, 'w' );
4027
4028     my $html_pre_fh;
4029     my @pre_string_stack;
4030     if ( $rOpts->{'html-pre-only'} ) {
4031
4032         # pre section goes directly to the output stream
4033         $html_pre_fh = $html_fh;
4034         $html_pre_fh->print( <<"PRE_END");
4035 <pre>
4036 PRE_END
4037     }
4038     else {
4039
4040         # pre section go out to a temporary string
4041         my $pre_string;
4042         $html_pre_fh = Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
4043         push @pre_string_stack, \$pre_string;
4044     }
4045
4046     # pod text gets diverted if the 'pod2html' is used
4047     my $html_pod_fh;
4048     my $pod_string;
4049     if ( $rOpts->{'pod2html'} ) {
4050         if ( $rOpts->{'html-pre-only'} ) {
4051             undef $rOpts->{'pod2html'};
4052         }
4053         else {
4054             eval "use Pod::Html";
4055             if ($@) {
4056                 warn
4057 "unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n";
4058                 undef $rOpts->{'pod2html'};
4059             }
4060             else {
4061                 $html_pod_fh = Perl::Tidy::IOScalar->new( \$pod_string, 'w' );
4062             }
4063         }
4064     }
4065
4066     my $toc_filename;
4067     my $src_filename;
4068     if ( $rOpts->{'frames'} ) {
4069         unless ($extension) {
4070             warn
4071 "cannot use frames without a specified output extension; ignoring -frm\n";
4072             undef $rOpts->{'frames'};
4073         }
4074         else {
4075             $toc_filename = $input_file . $html_toc_extension . $extension;
4076             $src_filename = $input_file . $html_src_extension . $extension;
4077         }
4078     }
4079
4080     # ----------------------------------------------------------
4081     # Output is now directed as follows:
4082     # html_toc_fh <-- table of contents items
4083     # html_pre_fh <-- the <pre> section of formatted code, except:
4084     # html_pod_fh <-- pod goes here with the pod2html option
4085     # ----------------------------------------------------------
4086
4087     my $title = $rOpts->{'title'};
4088     unless ($title) {
4089         ( $title, my $path ) = fileparse($input_file);
4090     }
4091     my $toc_item_count = 0;
4092     my $in_toc_package = "";
4093     my $last_level     = 0;
4094     bless {
4095         _input_file        => $input_file,          # name of input file
4096         _title             => $title,               # title, unescaped
4097         _html_file         => $html_file,           # name of .html output file
4098         _toc_filename      => $toc_filename,        # for frames option
4099         _src_filename      => $src_filename,        # for frames option
4100         _html_file_opened  => $html_file_opened,    # a flag
4101         _html_fh           => $html_fh,             # the output stream
4102         _html_pre_fh       => $html_pre_fh,         # pre section goes here
4103         _rpre_string_stack => \@pre_string_stack,   # stack of pre sections
4104         _html_pod_fh       => $html_pod_fh,         # pod goes here if pod2html
4105         _rpod_string       => \$pod_string,         # string holding pod
4106         _pod_cut_count     => 0,                    # how many =cut's?
4107         _html_toc_fh       => $html_toc_fh,         # fh for table of contents
4108         _rtoc_string       => \$toc_string,         # string holding toc
4109         _rtoc_item_count   => \$toc_item_count,     # how many toc items
4110         _rin_toc_package   => \$in_toc_package,     # package name
4111         _rtoc_name_count   => {},                   # hash to track unique names
4112         _rpackage_stack    => [],                   # stack to check for package
4113                                                     # name changes
4114         _rlast_level       => \$last_level,         # brace indentation level
4115     }, $class;
4116 }
4117
4118 sub add_toc_item {
4119
4120     # Add an item to the html table of contents.
4121     # This is called even if no table of contents is written,
4122     # because we still want to put the anchors in the <pre> text.
4123     # We are given an anchor name and its type; types are:
4124     #      'package', 'sub', '__END__', '__DATA__', 'EOF'
4125     # There must be an 'EOF' call at the end to wrap things up.
4126     my $self = shift;
4127     my ( $name, $type ) = @_;
4128     my $html_toc_fh     = $self->{_html_toc_fh};
4129     my $html_pre_fh     = $self->{_html_pre_fh};
4130     my $rtoc_name_count = $self->{_rtoc_name_count};
4131     my $rtoc_item_count = $self->{_rtoc_item_count};
4132     my $rlast_level     = $self->{_rlast_level};
4133     my $rin_toc_package = $self->{_rin_toc_package};
4134     my $rpackage_stack  = $self->{_rpackage_stack};
4135
4136     # packages contain sublists of subs, so to avoid errors all package
4137     # items are written and finished with the following routines
4138     my $end_package_list = sub {
4139         if ($$rin_toc_package) {
4140             $html_toc_fh->print("</ul>\n</li>\n");
4141             $$rin_toc_package = "";
4142         }
4143     };
4144
4145     my $start_package_list = sub {
4146         my ( $unique_name, $package ) = @_;
4147         if ($$rin_toc_package) { $end_package_list->() }
4148         $html_toc_fh->print(<<EOM);
4149 <li><a href=\"#$unique_name\">package $package</a>
4150 <ul>
4151 EOM
4152         $$rin_toc_package = $package;
4153     };
4154
4155     # start the table of contents on the first item
4156     unless ($$rtoc_item_count) {
4157
4158         # but just quit if we hit EOF without any other entries
4159         # in this case, there will be no toc
4160         return if ( $type eq 'EOF' );
4161         $html_toc_fh->print( <<"TOC_END");
4162 <!-- BEGIN CODE INDEX --><a name="code-index"></a>
4163 <ul>
4164 TOC_END
4165     }
4166     $$rtoc_item_count++;
4167
4168     # make a unique anchor name for this location:
4169     #   - packages get a 'package-' prefix
4170     #   - subs use their names
4171     my $unique_name = $name;
4172     if ( $type eq 'package' ) { $unique_name = "package-$name" }
4173
4174     # append '-1', '-2', etc if necessary to make unique; this will
4175     # be unique because subs and packages cannot have a '-'
4176     if ( my $count = $rtoc_name_count->{ lc $unique_name }++ ) {
4177         $unique_name .= "-$count";
4178     }
4179
4180     #   - all names get terminal '-' if pod2html is used, to avoid
4181     #     conflicts with anchor names created by pod2html
4182     if ( $rOpts->{'pod2html'} ) { $unique_name .= '-' }
4183
4184     # start/stop lists of subs
4185     if ( $type eq 'sub' ) {
4186         my $package = $rpackage_stack->[$$rlast_level];
4187         unless ($package) { $package = 'main' }
4188
4189         # if we're already in a package/sub list, be sure its the right
4190         # package or else close it
4191         if ( $$rin_toc_package && $$rin_toc_package ne $package ) {
4192             $end_package_list->();
4193         }
4194
4195         # start a package/sub list if necessary
4196         unless ($$rin_toc_package) {
4197             $start_package_list->( $unique_name, $package );
4198         }
4199     }
4200
4201     # now write an entry in the toc for this item
4202     if ( $type eq 'package' ) {
4203         $start_package_list->( $unique_name, $name );
4204     }
4205     elsif ( $type eq 'sub' ) {
4206         $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
4207     }
4208     else {
4209         $end_package_list->();
4210         $html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
4211     }
4212
4213     # write the anchor in the <pre> section
4214     $html_pre_fh->print("<a name=\"$unique_name\"></a>");
4215
4216     # end the table of contents, if any, on the end of file
4217     if ( $type eq 'EOF' ) {
4218         $html_toc_fh->print( <<"TOC_END");
4219 </ul>
4220 <!-- END CODE INDEX -->
4221 TOC_END
4222     }
4223 }
4224
4225 BEGIN {
4226
4227     # This is the official list of tokens which may be identified by the
4228     # user.  Long names are used as getopt keys.  Short names are
4229     # convenient short abbreviations for specifying input.  Short names
4230     # somewhat resemble token type characters, but are often different
4231     # because they may only be alphanumeric, to allow command line
4232     # input.  Also, note that because of case insensitivity of html,
4233     # this table must be in a single case only (I've chosen to use all
4234     # lower case).
4235     # When adding NEW_TOKENS: update this hash table
4236     # short names => long names
4237     %short_to_long_names = (
4238         'n'  => 'numeric',
4239         'p'  => 'paren',
4240         'q'  => 'quote',
4241         's'  => 'structure',
4242         'c'  => 'comment',
4243         'v'  => 'v-string',
4244         'cm' => 'comma',
4245         'w'  => 'bareword',
4246         'co' => 'colon',
4247         'pu' => 'punctuation',
4248         'i'  => 'identifier',
4249         'j'  => 'label',
4250         'h'  => 'here-doc-target',
4251         'hh' => 'here-doc-text',
4252         'k'  => 'keyword',
4253         'sc' => 'semicolon',
4254         'm'  => 'subroutine',
4255         'pd' => 'pod-text',
4256     );
4257
4258     # Now we have to map actual token types into one of the above short
4259     # names; any token types not mapped will get 'punctuation'
4260     # properties.
4261
4262     # The values of this hash table correspond to the keys of the
4263     # previous hash table.
4264     # The keys of this hash table are token types and can be seen
4265     # by running with --dump-token-types (-dtt).
4266
4267     # When adding NEW_TOKENS: update this hash table
4268     # $type => $short_name
4269     %token_short_names = (
4270         '#'  => 'c',
4271         'n'  => 'n',
4272         'v'  => 'v',
4273         'k'  => 'k',
4274         'F'  => 'k',
4275         'Q'  => 'q',
4276         'q'  => 'q',
4277         'J'  => 'j',
4278         'j'  => 'j',
4279         'h'  => 'h',
4280         'H'  => 'hh',
4281         'w'  => 'w',
4282         ','  => 'cm',
4283         '=>' => 'cm',
4284         ';'  => 'sc',
4285         ':'  => 'co',
4286         'f'  => 'sc',
4287         '('  => 'p',
4288         ')'  => 'p',
4289         'M'  => 'm',
4290         'P'  => 'pd',
4291         'A'  => 'co',
4292     );
4293
4294     # These token types will all be called identifiers for now
4295     # FIXME: need to separate user defined modules as separate type
4296     my @identifier = qw" i t U C Y Z G :: ";
4297     @token_short_names{@identifier} = ('i') x scalar(@identifier);
4298
4299     # These token types will be called 'structure'
4300     my @structure = qw" { } ";
4301     @token_short_names{@structure} = ('s') x scalar(@structure);
4302
4303     # OLD NOTES: save for reference
4304     # Any of these could be added later if it would be useful.
4305     # For now, they will by default become punctuation
4306     #    my @list = qw" L R [ ] ";
4307     #    @token_long_names{@list} = ('non-structure') x scalar(@list);
4308     #
4309     #    my @list = qw"
4310     #      / /= * *= ** **= + += - -= % %= = ++ -- << <<= >> >>= pp p m mm
4311     #      ";
4312     #    @token_long_names{@list} = ('math') x scalar(@list);
4313     #
4314     #    my @list = qw" & &= ~ ~= ^ ^= | |= ";
4315     #    @token_long_names{@list} = ('bit') x scalar(@list);
4316     #
4317     #    my @list = qw" == != < > <= <=> ";
4318     #    @token_long_names{@list} = ('numerical-comparison') x scalar(@list);
4319     #
4320     #    my @list = qw" && || ! &&= ||= //= ";
4321     #    @token_long_names{@list} = ('logical') x scalar(@list);
4322     #
4323     #    my @list = qw" . .= =~ !~ x x= ";
4324     #    @token_long_names{@list} = ('string-operators') x scalar(@list);
4325     #
4326     #    # Incomplete..
4327     #    my @list = qw" .. -> <> ... \ ? ";
4328     #    @token_long_names{@list} = ('misc-operators') x scalar(@list);
4329
4330 }
4331
4332 sub make_getopt_long_names {
4333     my $class = shift;
4334     my ($rgetopt_names) = @_;
4335     while ( my ( $short_name, $name ) = each %short_to_long_names ) {
4336         push @$rgetopt_names, "html-color-$name=s";
4337         push @$rgetopt_names, "html-italic-$name!";
4338         push @$rgetopt_names, "html-bold-$name!";
4339     }
4340     push @$rgetopt_names, "html-color-background=s";
4341     push @$rgetopt_names, "html-linked-style-sheet=s";
4342     push @$rgetopt_names, "nohtml-style-sheets";
4343     push @$rgetopt_names, "html-pre-only";
4344     push @$rgetopt_names, "html-line-numbers";
4345     push @$rgetopt_names, "html-entities!";
4346     push @$rgetopt_names, "stylesheet";
4347     push @$rgetopt_names, "html-table-of-contents!";
4348     push @$rgetopt_names, "pod2html!";
4349     push @$rgetopt_names, "frames!";
4350     push @$rgetopt_names, "html-toc-extension=s";
4351     push @$rgetopt_names, "html-src-extension=s";
4352
4353     # Pod::Html parameters:
4354     push @$rgetopt_names, "backlink=s";
4355     push @$rgetopt_names, "cachedir=s";
4356     push @$rgetopt_names, "htmlroot=s";
4357     push @$rgetopt_names, "libpods=s";
4358     push @$rgetopt_names, "podpath=s";
4359     push @$rgetopt_names, "podroot=s";
4360     push @$rgetopt_names, "title=s";
4361
4362     # Pod::Html parameters with leading 'pod' which will be removed
4363     # before the call to Pod::Html
4364     push @$rgetopt_names, "podquiet!";
4365     push @$rgetopt_names, "podverbose!";
4366     push @$rgetopt_names, "podrecurse!";
4367     push @$rgetopt_names, "podflush";
4368     push @$rgetopt_names, "podheader!";
4369     push @$rgetopt_names, "podindex!";
4370 }
4371
4372 sub make_abbreviated_names {
4373
4374     # We're appending things like this to the expansion list:
4375     #      'hcc'    => [qw(html-color-comment)],
4376     #      'hck'    => [qw(html-color-keyword)],
4377     #  etc
4378     my $class = shift;
4379     my ($rexpansion) = @_;
4380
4381     # abbreviations for color/bold/italic properties
4382     while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
4383         ${$rexpansion}{"hc$short_name"}  = ["html-color-$long_name"];
4384         ${$rexpansion}{"hb$short_name"}  = ["html-bold-$long_name"];
4385         ${$rexpansion}{"hi$short_name"}  = ["html-italic-$long_name"];
4386         ${$rexpansion}{"nhb$short_name"} = ["nohtml-bold-$long_name"];
4387         ${$rexpansion}{"nhi$short_name"} = ["nohtml-italic-$long_name"];
4388     }
4389
4390     # abbreviations for all other html options
4391     ${$rexpansion}{"hcbg"}  = ["html-color-background"];
4392     ${$rexpansion}{"pre"}   = ["html-pre-only"];
4393     ${$rexpansion}{"toc"}   = ["html-table-of-contents"];
4394     ${$rexpansion}{"ntoc"}  = ["nohtml-table-of-contents"];
4395     ${$rexpansion}{"nnn"}   = ["html-line-numbers"];
4396     ${$rexpansion}{"hent"}  = ["html-entities"];
4397     ${$rexpansion}{"nhent"} = ["nohtml-entities"];
4398     ${$rexpansion}{"css"}   = ["html-linked-style-sheet"];
4399     ${$rexpansion}{"nss"}   = ["nohtml-style-sheets"];
4400     ${$rexpansion}{"ss"}    = ["stylesheet"];
4401     ${$rexpansion}{"pod"}   = ["pod2html"];
4402     ${$rexpansion}{"npod"}  = ["nopod2html"];
4403     ${$rexpansion}{"frm"}   = ["frames"];
4404     ${$rexpansion}{"nfrm"}  = ["noframes"];
4405     ${$rexpansion}{"text"}  = ["html-toc-extension"];
4406     ${$rexpansion}{"sext"}  = ["html-src-extension"];
4407 }
4408
4409 sub check_options {
4410
4411     # This will be called once after options have been parsed
4412     my $class = shift;
4413     $rOpts = shift;
4414
4415     # X11 color names for default settings that seemed to look ok
4416     # (these color names are only used for programming clarity; the hex
4417     # numbers are actually written)
4418     use constant ForestGreen   => "#228B22";
4419     use constant SaddleBrown   => "#8B4513";
4420     use constant magenta4      => "#8B008B";
4421     use constant IndianRed3    => "#CD5555";
4422     use constant DeepSkyBlue4  => "#00688B";
4423     use constant MediumOrchid3 => "#B452CD";
4424     use constant black         => "#000000";
4425     use constant white         => "#FFFFFF";
4426     use constant red           => "#FF0000";
4427
4428     # set default color, bold, italic properties
4429     # anything not listed here will be given the default (punctuation) color --
4430     # these types currently not listed and get default: ws pu s sc cm co p
4431     # When adding NEW_TOKENS: add an entry here if you don't want defaults
4432
4433     # set_default_properties( $short_name, default_color, bold?, italic? );
4434     set_default_properties( 'c',  ForestGreen,   0, 0 );
4435     set_default_properties( 'pd', ForestGreen,   0, 1 );
4436     set_default_properties( 'k',  magenta4,      1, 0 );    # was SaddleBrown
4437     set_default_properties( 'q',  IndianRed3,    0, 0 );
4438     set_default_properties( 'hh', IndianRed3,    0, 1 );
4439     set_default_properties( 'h',  IndianRed3,    1, 0 );
4440     set_default_properties( 'i',  DeepSkyBlue4,  0, 0 );
4441     set_default_properties( 'w',  black,         0, 0 );
4442     set_default_properties( 'n',  MediumOrchid3, 0, 0 );
4443     set_default_properties( 'v',  MediumOrchid3, 0, 0 );
4444     set_default_properties( 'j',  IndianRed3,    1, 0 );
4445     set_default_properties( 'm',  red,           1, 0 );
4446
4447     set_default_color( 'html-color-background',  white );
4448     set_default_color( 'html-color-punctuation', black );
4449
4450     # setup property lookup tables for tokens based on their short names
4451     # every token type has a short name, and will use these tables
4452     # to do the html markup
4453     while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
4454         $html_color{$short_name}  = $rOpts->{"html-color-$long_name"};
4455         $html_bold{$short_name}   = $rOpts->{"html-bold-$long_name"};
4456         $html_italic{$short_name} = $rOpts->{"html-italic-$long_name"};
4457     }
4458
4459     # write style sheet to STDOUT and die if requested
4460     if ( defined( $rOpts->{'stylesheet'} ) ) {
4461         write_style_sheet_file('-');
4462         exit 1;
4463     }
4464
4465     # make sure user gives a file name after -css
4466     if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) {
4467         $css_linkname = $rOpts->{'html-linked-style-sheet'};
4468         if ( $css_linkname =~ /^-/ ) {
4469             die "You must specify a valid filename after -css\n";
4470         }
4471     }
4472
4473     # check for conflict
4474     if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) {
4475         $rOpts->{'nohtml-style-sheets'} = 0;
4476         warning("You can't specify both -css and -nss; -nss ignored\n");
4477     }
4478
4479     # write a style sheet file if necessary
4480     if ($css_linkname) {
4481
4482         # if the selected filename exists, don't write, because user may
4483         # have done some work by hand to create it; use backup name instead
4484         # Also, this will avoid a potential disaster in which the user
4485         # forgets to specify the style sheet, like this:
4486         #    perltidy -html -css myfile1.pl myfile2.pl
4487         # This would cause myfile1.pl to parsed as the style sheet by GetOpts
4488         my $css_filename = $css_linkname;
4489         unless ( -e $css_filename ) {
4490             write_style_sheet_file($css_filename);
4491         }
4492     }
4493     $missing_html_entities = 1 unless $rOpts->{'html-entities'};
4494 }
4495
4496 sub write_style_sheet_file {
4497
4498     my $css_filename = shift;
4499     my $fh;
4500     unless ( $fh = IO::File->new("> $css_filename") ) {
4501         die "can't open $css_filename: $!\n";
4502     }
4503     write_style_sheet_data($fh);
4504     eval { $fh->close };
4505 }
4506
4507 sub write_style_sheet_data {
4508
4509     # write the style sheet data to an open file handle
4510     my $fh = shift;
4511
4512     my $bg_color   = $rOpts->{'html-color-background'};
4513     my $text_color = $rOpts->{'html-color-punctuation'};
4514
4515     # pre-bgcolor is new, and may not be defined
4516     my $pre_bg_color = $rOpts->{'html-pre-color-background'};
4517     $pre_bg_color = $bg_color unless $pre_bg_color;
4518
4519     $fh->print(<<"EOM");
4520 /* default style sheet generated by perltidy */
4521 body {background: $bg_color; color: $text_color}
4522 pre { color: $text_color;
4523       background: $pre_bg_color;
4524       font-family: courier;
4525     }
4526
4527 EOM
4528
4529     foreach my $short_name ( sort keys %short_to_long_names ) {
4530         my $long_name = $short_to_long_names{$short_name};
4531
4532         my $abbrev = '.' . $short_name;
4533         if ( length($short_name) == 1 ) { $abbrev .= ' ' }    # for alignment
4534         my $color = $html_color{$short_name};
4535         if ( !defined($color) ) { $color = $text_color }
4536         $fh->print("$abbrev \{ color: $color;");
4537
4538         if ( $html_bold{$short_name} ) {
4539             $fh->print(" font-weight:bold;");
4540         }
4541
4542         if ( $html_italic{$short_name} ) {
4543             $fh->print(" font-style:italic;");
4544         }
4545         $fh->print("} /* $long_name */\n");
4546     }
4547 }
4548
4549 sub set_default_color {
4550
4551     # make sure that options hash $rOpts->{$key} contains a valid color
4552     my ( $key, $color ) = @_;
4553     if ( $rOpts->{$key} ) { $color = $rOpts->{$key} }
4554     $rOpts->{$key} = check_RGB($color);
4555 }
4556
4557 sub check_RGB {
4558
4559     # if color is a 6 digit hex RGB value, prepend a #, otherwise
4560     # assume that it is a valid ascii color name
4561     my ($color) = @_;
4562     if ( $color =~ /^[0-9a-fA-F]{6,6}$/ ) { $color = "#$color" }
4563     return $color;
4564 }
4565
4566 sub set_default_properties {
4567     my ( $short_name, $color, $bold, $italic ) = @_;
4568
4569     set_default_color( "html-color-$short_to_long_names{$short_name}", $color );
4570     my $key;
4571     $key = "html-bold-$short_to_long_names{$short_name}";
4572     $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold;
4573     $key = "html-italic-$short_to_long_names{$short_name}";
4574     $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic;
4575 }
4576
4577 sub pod_to_html {
4578
4579     # Use Pod::Html to process the pod and make the page
4580     # then merge the perltidy code sections into it.
4581     # return 1 if success, 0 otherwise
4582     my $self = shift;
4583     my ( $pod_string, $css_string, $toc_string, $rpre_string_stack ) = @_;
4584     my $input_file   = $self->{_input_file};
4585     my $title        = $self->{_title};
4586     my $success_flag = 0;
4587
4588     # don't try to use pod2html if no pod
4589     unless ($pod_string) {
4590         return $success_flag;
4591     }
4592
4593     # Pod::Html requires a real temporary filename
4594     # If we are making a frame, we have a name available
4595     # Otherwise, we have to fine one
4596     my $tmpfile;
4597     if ( $rOpts->{'frames'} ) {
4598         $tmpfile = $self->{_toc_filename};
4599     }
4600     else {
4601         $tmpfile = Perl::Tidy::make_temporary_filename();
4602     }
4603     my $fh_tmp = IO::File->new( $tmpfile, 'w' );
4604     unless ($fh_tmp) {
4605         warn "unable to open temporary file $tmpfile; cannot use pod2html\n";
4606         return $success_flag;
4607     }
4608
4609     #------------------------------------------------------------------
4610     # Warning: a temporary file is open; we have to clean up if
4611     # things go bad.  From here on all returns should be by going to
4612     # RETURN so that the temporary file gets unlinked.
4613     #------------------------------------------------------------------
4614
4615     # write the pod text to the temporary file
4616     $fh_tmp->print($pod_string);
4617     $fh_tmp->close();
4618
4619     # Hand off the pod to pod2html.
4620     # Note that we can use the same temporary filename for input and output
4621     # because of the way pod2html works.
4622     {
4623
4624         my @args;
4625         push @args, "--infile=$tmpfile", "--outfile=$tmpfile", "--title=$title";
4626         my $kw;
4627
4628         # Flags with string args:
4629         # "backlink=s", "cachedir=s", "htmlroot=s", "libpods=s",
4630         # "podpath=s", "podroot=s"
4631         # Note: -css=s is handled by perltidy itself
4632         foreach $kw (qw(backlink cachedir htmlroot libpods podpath podroot)) {
4633             if ( $rOpts->{$kw} ) { push @args, "--$kw=$rOpts->{$kw}" }
4634         }
4635
4636         # Toggle switches; these have extra leading 'pod'
4637         # "header!", "index!", "recurse!", "quiet!", "verbose!"
4638         foreach $kw (qw(podheader podindex podrecurse podquiet podverbose)) {
4639             my $kwd = $kw;    # allows us to strip 'pod'
4640             if ( $rOpts->{$kw} ) { $kwd =~ s/^pod//; push @args, "--$kwd" }
4641             elsif ( defined( $rOpts->{$kw} ) ) {
4642                 $kwd =~ s/^pod//;
4643                 push @args, "--no$kwd";
4644             }
4645         }
4646
4647         # "flush",
4648         $kw = 'podflush';
4649         if ( $rOpts->{$kw} ) { $kw =~ s/^pod//; push @args, "--$kw" }
4650
4651         # Must clean up if pod2html dies (it can);
4652         # Be careful not to overwrite callers __DIE__ routine
4653         local $SIG{__DIE__} = sub {
4654             print $_[0];
4655             unlink $tmpfile if -e $tmpfile;
4656             exit 1;
4657         };
4658
4659         pod2html(@args);
4660     }
4661     $fh_tmp = IO::File->new( $tmpfile, 'r' );
4662     unless ($fh_tmp) {
4663
4664         # this error shouldn't happen ... we just used this filename
4665         warn "unable to open temporary file $tmpfile; cannot use pod2html\n";
4666         goto RETURN;
4667     }
4668
4669     my $html_fh = $self->{_html_fh};
4670     my @toc;
4671     my $in_toc;
4672     my $no_print;
4673
4674     # This routine will write the html selectively and store the toc
4675     my $html_print = sub {
4676         foreach (@_) {
4677             $html_fh->print($_) unless ($no_print);
4678             if ($in_toc) { push @toc, $_ }
4679         }
4680     };
4681
4682     # loop over lines of html output from pod2html and merge in
4683     # the necessary perltidy html sections
4684     my ( $saw_body, $saw_index, $saw_body_end );
4685     while ( my $line = $fh_tmp->getline() ) {
4686
4687         if ( $line =~ /^\s*<html>\s*$/i ) {
4688             my $date = localtime;
4689             $html_print->("<!-- Generated by perltidy on $date -->\n");
4690             $html_print->($line);
4691         }
4692
4693         # Copy the perltidy css, if any, after <body> tag
4694         elsif ( $line =~ /^\s*<body.*>\s*$/i ) {
4695             $saw_body = 1;
4696             $html_print->($css_string) if $css_string;
4697             $html_print->($line);
4698
4699             # add a top anchor and heading
4700             $html_print->("<a name=\"-top-\"></a>\n");
4701             $title = escape_html($title);
4702             $html_print->("<h1>$title</h1>\n");
4703         }
4704         elsif ( $line =~ /^\s*<!-- INDEX BEGIN -->\s*$/i ) {
4705             $in_toc = 1;
4706
4707             # when frames are used, an extra table of contents in the
4708             # contents panel is confusing, so don't print it
4709             $no_print = $rOpts->{'frames'}
4710               || !$rOpts->{'html-table-of-contents'};
4711             $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'};
4712             $html_print->($line);
4713         }
4714
4715         # Copy the perltidy toc, if any, after the Pod::Html toc
4716         elsif ( $line =~ /^\s*<!-- INDEX END -->\s*$/i ) {
4717             $saw_index = 1;
4718             $html_print->($line);
4719             if ($toc_string) {
4720                 $html_print->("<hr />\n") if $rOpts->{'frames'};
4721                 $html_print->("<h2>Code Index:</h2>\n");
4722                 my @toc = map { $_ .= "\n" } split /\n/, $toc_string;
4723                 $html_print->(@toc);
4724             }
4725             $in_toc   = 0;
4726             $no_print = 0;
4727         }
4728
4729         # Copy one perltidy section after each marker
4730         elsif ( $line =~ /^(.*)<!-- pERLTIDY sECTION -->(.*)$/ ) {
4731             $line = $2;
4732             $html_print->($1) if $1;
4733
4734             # Intermingle code and pod sections if we saw multiple =cut's.
4735             if ( $self->{_pod_cut_count} > 1 ) {
4736                 my $rpre_string = shift(@$rpre_string_stack);
4737                 if ($$rpre_string) {
4738                     $html_print->('<pre>');
4739                     $html_print->($$rpre_string);
4740                     $html_print->('</pre>');
4741                 }
4742                 else {
4743
4744                     # shouldn't happen: we stored a string before writing
4745                     # each marker.
4746                     warn
4747 "Problem merging html stream with pod2html; order may be wrong\n";
4748                 }
4749                 $html_print->($line);
4750             }
4751
4752             # If didn't see multiple =cut lines, we'll put the pod out first
4753             # and then the code, because it's less confusing.
4754             else {
4755
4756                 # since we are not intermixing code and pod, we don't need
4757                 # or want any <hr> lines which separated pod and code
4758                 $html_print->($line) unless ( $line =~ /^\s*<hr>\s*$/i );
4759             }
4760         }
4761
4762         # Copy any remaining code section before the </body> tag
4763         elsif ( $line =~ /^\s*<\/body>\s*$/i ) {
4764             $saw_body_end = 1;
4765             if (@$rpre_string_stack) {
4766                 unless ( $self->{_pod_cut_count} > 1 ) {
4767                     $html_print->('<hr />');
4768                 }
4769                 while ( my $rpre_string = shift(@$rpre_string_stack) ) {
4770                     $html_print->('<pre>');
4771                     $html_print->($$rpre_string);
4772                     $html_print->('</pre>');
4773                 }
4774             }
4775             $html_print->($line);
4776         }
4777         else {
4778             $html_print->($line);
4779         }
4780     }
4781
4782     $success_flag = 1;
4783     unless ($saw_body) {
4784         warn "Did not see <body> in pod2html output\n";
4785         $success_flag = 0;
4786     }
4787     unless ($saw_body_end) {
4788         warn "Did not see </body> in pod2html output\n";
4789         $success_flag = 0;
4790     }
4791     unless ($saw_index) {
4792         warn "Did not find INDEX END in pod2html output\n";
4793         $success_flag = 0;
4794     }
4795
4796   RETURN:
4797     eval { $html_fh->close() };
4798
4799     # note that we have to unlink tmpfile before making frames
4800     # because the tmpfile may be one of the names used for frames
4801     unlink $tmpfile if -e $tmpfile;
4802     if ( $success_flag && $rOpts->{'frames'} ) {
4803         $self->make_frame( \@toc );
4804     }
4805     return $success_flag;
4806 }
4807
4808 sub make_frame {
4809
4810     # Make a frame with table of contents in the left panel
4811     # and the text in the right panel.
4812     # On entry:
4813     #  $html_filename contains the no-frames html output
4814     #  $rtoc is a reference to an array with the table of contents
4815     my $self          = shift;
4816     my ($rtoc)        = @_;
4817     my $input_file    = $self->{_input_file};
4818     my $html_filename = $self->{_html_file};
4819     my $toc_filename  = $self->{_toc_filename};
4820     my $src_filename  = $self->{_src_filename};
4821     my $title         = $self->{_title};
4822     $title = escape_html($title);
4823
4824     # FUTURE input parameter:
4825     my $top_basename = "";
4826
4827     # We need to produce 3 html files:
4828     # 1. - the table of contents
4829     # 2. - the contents (source code) itself
4830     # 3. - the frame which contains them
4831
4832     # get basenames for relative links
4833     my ( $toc_basename, $toc_path ) = fileparse($toc_filename);
4834     my ( $src_basename, $src_path ) = fileparse($src_filename);
4835
4836     # 1. Make the table of contents panel, with appropriate changes
4837     # to the anchor names
4838     my $src_frame_name = 'SRC';
4839     my $first_anchor =
4840       write_toc_html( $title, $toc_filename, $src_basename, $rtoc,
4841         $src_frame_name );
4842
4843     # 2. The current .html filename is renamed to be the contents panel
4844     rename( $html_filename, $src_filename )
4845       or die "Cannot rename $html_filename to $src_filename:$!\n";
4846
4847     # 3. Then use the original html filename for the frame
4848     write_frame_html(
4849         $title,        $html_filename, $top_basename,
4850         $toc_basename, $src_basename,  $src_frame_name
4851     );
4852 }
4853
4854 sub write_toc_html {
4855
4856     # write a separate html table of contents file for frames
4857     my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_;
4858     my $fh = IO::File->new( $toc_filename, 'w' )
4859       or die "Cannot open $toc_filename:$!\n";
4860     $fh->print(<<EOM);
4861 <html>
4862 <head>
4863 <title>$title</title>
4864 </head>
4865 <body>
4866 <h1><a href=\"$src_basename#-top-" target="$src_frame_name">$title</a></h1>
4867 EOM
4868
4869     my $first_anchor =
4870       change_anchor_names( $rtoc, $src_basename, "$src_frame_name" );
4871     $fh->print( join "", @$rtoc );
4872
4873     $fh->print(<<EOM);
4874 </body>
4875 </html>
4876 EOM
4877
4878 }
4879
4880 sub write_frame_html {
4881
4882     # write an html file to be the table of contents frame
4883     my (
4884         $title,        $frame_filename, $top_basename,
4885         $toc_basename, $src_basename,   $src_frame_name
4886     ) = @_;
4887
4888     my $fh = IO::File->new( $frame_filename, 'w' )
4889       or die "Cannot open $toc_basename:$!\n";
4890
4891     $fh->print(<<EOM);
4892 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
4893     "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
4894 <?xml version="1.0" encoding="iso-8859-1" ?>
4895 <html xmlns="http://www.w3.org/1999/xhtml">
4896 <head>
4897 <title>$title</title>
4898 </head>
4899 EOM
4900
4901     # two left panels, one right, if master index file
4902     if ($top_basename) {
4903         $fh->print(<<EOM);
4904 <frameset cols="20%,80%">
4905 <frameset rows="30%,70%">
4906 <frame src = "$top_basename" />
4907 <frame src = "$toc_basename" />
4908 </frameset>
4909 EOM
4910     }
4911
4912     # one left panels, one right, if no master index file
4913     else {
4914         $fh->print(<<EOM);
4915 <frameset cols="20%,*">
4916 <frame src = "$toc_basename" />
4917 EOM
4918     }
4919     $fh->print(<<EOM);
4920 <frame src = "$src_basename" name = "$src_frame_name" />
4921 <noframes>
4922 <body>
4923 <p>If you see this message, you are using a non-frame-capable web client.</p>
4924 <p>This document contains:</p>
4925 <ul>
4926 <li><a href="$toc_basename">A table of contents</a></li>
4927 <li><a href="$src_basename">The source code</a></li>
4928 </ul>
4929 </body>
4930 </noframes>
4931 </frameset>
4932 </html>
4933 EOM
4934 }
4935
4936 sub change_anchor_names {
4937
4938     # add a filename and target to anchors
4939     # also return the first anchor
4940     my ( $rlines, $filename, $target ) = @_;
4941     my $first_anchor;
4942     foreach my $line (@$rlines) {
4943
4944         #  We're looking for lines like this:
4945         #  <LI><A HREF="#synopsis">SYNOPSIS</A></LI>
4946         #  ----  -       --------  -----------------
4947         #  $1              $4            $5
4948         if ( $line =~ /^(.*)<a(.*)href\s*=\s*"([^#]*)#([^"]+)"[^>]*>(.*)$/i ) {
4949             my $pre  = $1;
4950             my $name = $4;
4951             my $post = $5;
4952             my $href = "$filename#$name";
4953             $line = "$pre<a href=\"$href\" target=\"$target\">$post\n";
4954             unless ($first_anchor) { $first_anchor = $href }
4955         }
4956     }
4957     return $first_anchor;
4958 }
4959
4960 sub close_html_file {
4961     my $self = shift;
4962     return unless $self->{_html_file_opened};
4963
4964     my $html_fh     = $self->{_html_fh};
4965     my $rtoc_string = $self->{_rtoc_string};
4966
4967     # There are 3 basic paths to html output...
4968
4969     # ---------------------------------
4970     # Path 1: finish up if in -pre mode
4971     # ---------------------------------
4972     if ( $rOpts->{'html-pre-only'} ) {
4973         $html_fh->print( <<"PRE_END");
4974 </pre>
4975 PRE_END
4976         eval { $html_fh->close() };
4977         return;
4978     }
4979
4980     # Finish the index
4981     $self->add_toc_item( 'EOF', 'EOF' );
4982
4983     my $rpre_string_stack = $self->{_rpre_string_stack};
4984
4985     # Patch to darken the <pre> background color in case of pod2html and
4986     # interleaved code/documentation.  Otherwise, the distinction
4987     # between code and documentation is blurred.
4988     if (   $rOpts->{pod2html}
4989         && $self->{_pod_cut_count} >= 1
4990         && $rOpts->{'html-color-background'} eq '#FFFFFF' )
4991     {
4992         $rOpts->{'html-pre-color-background'} = '#F0F0F0';
4993     }
4994
4995     # put the css or its link into a string, if used
4996     my $css_string;
4997     my $fh_css = Perl::Tidy::IOScalar->new( \$css_string, 'w' );
4998
4999     # use css linked to another file
5000     if ( $rOpts->{'html-linked-style-sheet'} ) {
5001         $fh_css->print(
5002             qq(<link rel="stylesheet" href="$css_linkname" type="text/css" />)
5003         );
5004     }
5005
5006     # use css embedded in this file
5007     elsif ( !$rOpts->{'nohtml-style-sheets'} ) {
5008         $fh_css->print( <<'ENDCSS');
5009 <style type="text/css">
5010 <!--
5011 ENDCSS
5012         write_style_sheet_data($fh_css);
5013         $fh_css->print( <<"ENDCSS");
5014 -->
5015 </style>
5016 ENDCSS
5017     }
5018
5019     # -----------------------------------------------------------
5020     # path 2: use pod2html if requested
5021     #         If we fail for some reason, continue on to path 3
5022     # -----------------------------------------------------------
5023     if ( $rOpts->{'pod2html'} ) {
5024         my $rpod_string = $self->{_rpod_string};
5025         $self->pod_to_html( $$rpod_string, $css_string, $$rtoc_string,
5026             $rpre_string_stack )
5027           && return;
5028     }
5029
5030     # --------------------------------------------------
5031     # path 3: write code in html, with pod only in italics
5032     # --------------------------------------------------
5033     my $input_file = $self->{_input_file};
5034     my $title      = escape_html($input_file);
5035     my $date       = localtime;
5036     $html_fh->print( <<"HTML_START");
5037 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
5038    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5039 <!-- Generated by perltidy on $date -->
5040 <html xmlns="http://www.w3.org/1999/xhtml">
5041 <head>
5042 <title>$title</title>
5043 HTML_START
5044
5045     # output the css, if used
5046     if ($css_string) {
5047         $html_fh->print($css_string);
5048         $html_fh->print( <<"ENDCSS");
5049 </head>
5050 <body>
5051 ENDCSS
5052     }
5053     else {
5054
5055         $html_fh->print( <<"HTML_START");
5056 </head>
5057 <body bgcolor=\"$rOpts->{'html-color-background'}\" text=\"$rOpts->{'html-color-punctuation'}\">
5058 HTML_START
5059     }
5060
5061     $html_fh->print("<a name=\"-top-\"></a>\n");
5062     $html_fh->print( <<"EOM");
5063 <h1>$title</h1>
5064 EOM
5065
5066     # copy the table of contents
5067     if (   $$rtoc_string
5068         && !$rOpts->{'frames'}
5069         && $rOpts->{'html-table-of-contents'} )
5070     {
5071         $html_fh->print($$rtoc_string);
5072     }
5073
5074     # copy the pre section(s)
5075     my $fname_comment = $input_file;
5076     $fname_comment =~ s/--+/-/g;    # protect HTML comment tags
5077     $html_fh->print( <<"END_PRE");
5078 <hr />
5079 <!-- contents of filename: $fname_comment -->
5080 <pre>
5081 END_PRE
5082
5083     foreach my $rpre_string (@$rpre_string_stack) {
5084         $html_fh->print($$rpre_string);
5085     }
5086
5087     # and finish the html page
5088     $html_fh->print( <<"HTML_END");
5089 </pre>
5090 </body>
5091 </html>
5092 HTML_END
5093     eval { $html_fh->close() };    # could be object without close method
5094
5095     if ( $rOpts->{'frames'} ) {
5096         my @toc = map { $_ .= "\n" } split /\n/, $$rtoc_string;
5097         $self->make_frame( \@toc );
5098     }
5099 }
5100
5101 sub markup_tokens {
5102     my $self = shift;
5103     my ( $rtokens, $rtoken_type, $rlevels ) = @_;
5104     my ( @colored_tokens, $j, $string, $type, $token, $level );
5105     my $rlast_level    = $self->{_rlast_level};
5106     my $rpackage_stack = $self->{_rpackage_stack};
5107
5108     for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
5109         $type  = $$rtoken_type[$j];
5110         $token = $$rtokens[$j];
5111         $level = $$rlevels[$j];
5112         $level = 0 if ( $level < 0 );
5113
5114         #-------------------------------------------------------
5115         # Update the package stack.  The package stack is needed to keep
5116         # the toc correct because some packages may be declared within
5117         # blocks and go out of scope when we leave the block.
5118         #-------------------------------------------------------
5119         if ( $level > $$rlast_level ) {
5120             unless ( $rpackage_stack->[ $level - 1 ] ) {
5121                 $rpackage_stack->[ $level - 1 ] = 'main';
5122             }
5123             $rpackage_stack->[$level] = $rpackage_stack->[ $level - 1 ];
5124         }
5125         elsif ( $level < $$rlast_level ) {
5126             my $package = $rpackage_stack->[$level];
5127             unless ($package) { $package = 'main' }
5128
5129             # if we change packages due to a nesting change, we
5130             # have to make an entry in the toc
5131             if ( $package ne $rpackage_stack->[ $level + 1 ] ) {
5132                 $self->add_toc_item( $package, 'package' );
5133             }
5134         }
5135         $$rlast_level = $level;
5136
5137         #-------------------------------------------------------
5138         # Intercept a sub name here; split it
5139         # into keyword 'sub' and sub name; and add an
5140         # entry in the toc
5141         #-------------------------------------------------------
5142         if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) {
5143             $token = $self->markup_html_element( $1, 'k' );
5144             push @colored_tokens, $token;
5145             $token = $2;
5146             $type  = 'M';
5147
5148             # but don't include sub declarations in the toc;
5149             # these wlll have leading token types 'i;'
5150             my $signature = join "", @$rtoken_type;
5151             unless ( $signature =~ /^i;/ ) {
5152                 my $subname = $token;
5153                 $subname =~ s/[\s\(].*$//; # remove any attributes and prototype
5154                 $self->add_toc_item( $subname, 'sub' );
5155             }
5156         }
5157
5158         #-------------------------------------------------------
5159         # Intercept a package name here; split it
5160         # into keyword 'package' and name; add to the toc,
5161         # and update the package stack
5162         #-------------------------------------------------------
5163         if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) {
5164             $token = $self->markup_html_element( $1, 'k' );
5165             push @colored_tokens, $token;
5166             $token = $2;
5167             $type  = 'i';
5168             $self->add_toc_item( "$token", 'package' );
5169             $rpackage_stack->[$level] = $token;
5170         }
5171
5172         $token = $self->markup_html_element( $token, $type );
5173         push @colored_tokens, $token;
5174     }
5175     return ( \@colored_tokens );
5176 }
5177
5178 sub markup_html_element {
5179     my $self = shift;
5180     my ( $token, $type ) = @_;
5181
5182     return $token if ( $type eq 'b' );    # skip a blank token
5183     return $token if ( $token =~ /^\s*$/ );    # skip a blank line
5184     $token = escape_html($token);
5185
5186     # get the short abbreviation for this token type
5187     my $short_name = $token_short_names{$type};
5188     if ( !defined($short_name) ) {
5189         $short_name = "pu";                    # punctuation is default
5190     }
5191
5192     # handle style sheets..
5193     if ( !$rOpts->{'nohtml-style-sheets'} ) {
5194         if ( $short_name ne 'pu' ) {
5195             $token = qq(<span class="$short_name">) . $token . "</span>";
5196         }
5197     }
5198
5199     # handle no style sheets..
5200     else {
5201         my $color = $html_color{$short_name};
5202
5203         if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) {
5204             $token = qq(<font color="$color">) . $token . "</font>";
5205         }
5206         if ( $html_italic{$short_name} ) { $token = "<i>$token</i>" }
5207         if ( $html_bold{$short_name} )   { $token = "<b>$token</b>" }
5208     }
5209     return $token;
5210 }
5211
5212 sub escape_html {
5213
5214     my $token = shift;
5215     if ($missing_html_entities) {
5216         $token =~ s/\&/&amp;/g;
5217         $token =~ s/\</&lt;/g;
5218         $token =~ s/\>/&gt;/g;
5219         $token =~ s/\"/&quot;/g;
5220     }
5221     else {
5222         HTML::Entities::encode_entities($token);
5223     }
5224     return $token;
5225 }
5226
5227 sub finish_formatting {
5228
5229     # called after last line
5230     my $self = shift;
5231     $self->close_html_file();
5232     return;
5233 }
5234
5235 sub write_line {
5236
5237     my $self = shift;
5238     return unless $self->{_html_file_opened};
5239     my $html_pre_fh      = $self->{_html_pre_fh};
5240     my ($line_of_tokens) = @_;
5241     my $line_type        = $line_of_tokens->{_line_type};
5242     my $input_line       = $line_of_tokens->{_line_text};
5243     my $line_number      = $line_of_tokens->{_line_number};
5244     chomp $input_line;
5245
5246     # markup line of code..
5247     my $html_line;
5248     if ( $line_type eq 'CODE' ) {
5249         my $rtoken_type = $line_of_tokens->{_rtoken_type};
5250         my $rtokens     = $line_of_tokens->{_rtokens};
5251         my $rlevels     = $line_of_tokens->{_rlevels};
5252
5253         if ( $input_line =~ /(^\s*)/ ) {
5254             $html_line = $1;
5255         }
5256         else {
5257             $html_line = "";
5258         }
5259         my ($rcolored_tokens) =
5260           $self->markup_tokens( $rtokens, $rtoken_type, $rlevels );
5261         $html_line .= join '', @$rcolored_tokens;
5262     }
5263
5264     # markup line of non-code..
5265     else {
5266         my $line_character;
5267         if    ( $line_type eq 'HERE' )       { $line_character = 'H' }
5268         elsif ( $line_type eq 'HERE_END' )   { $line_character = 'h' }
5269         elsif ( $line_type eq 'FORMAT' )     { $line_character = 'H' }
5270         elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' }
5271         elsif ( $line_type eq 'SYSTEM' )     { $line_character = 'c' }
5272         elsif ( $line_type eq 'END_START' ) {
5273             $line_character = 'k';
5274             $self->add_toc_item( '__END__', '__END__' );
5275         }
5276         elsif ( $line_type eq 'DATA_START' ) {
5277             $line_character = 'k';
5278             $self->add_toc_item( '__DATA__', '__DATA__' );
5279         }
5280         elsif ( $line_type =~ /^POD/ ) {
5281             $line_character = 'P';
5282             if ( $rOpts->{'pod2html'} ) {
5283                 my $html_pod_fh = $self->{_html_pod_fh};
5284                 if ( $line_type eq 'POD_START' ) {
5285
5286                     my $rpre_string_stack = $self->{_rpre_string_stack};
5287                     my $rpre_string       = $rpre_string_stack->[-1];
5288
5289                     # if we have written any non-blank lines to the
5290                     # current pre section, start writing to a new output
5291                     # string
5292                     if ( $$rpre_string =~ /\S/ ) {
5293                         my $pre_string;
5294                         $html_pre_fh =
5295                           Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
5296                         $self->{_html_pre_fh} = $html_pre_fh;
5297                         push @$rpre_string_stack, \$pre_string;
5298
5299                         # leave a marker in the pod stream so we know
5300                         # where to put the pre section we just
5301                         # finished.
5302                         my $for_html = '=for html';    # don't confuse pod utils
5303                         $html_pod_fh->print(<<EOM);
5304
5305 $for_html
5306 <!-- pERLTIDY sECTION -->
5307
5308 EOM
5309                     }
5310
5311                     # otherwise, just clear the current string and start
5312                     # over
5313                     else {
5314                         $$rpre_string = "";
5315                         $html_pod_fh->print("\n");
5316                     }
5317                 }
5318                 $html_pod_fh->print( $input_line . "\n" );
5319                 if ( $line_type eq 'POD_END' ) {
5320                     $self->{_pod_cut_count}++;
5321                     $html_pod_fh->print("\n");
5322                 }
5323                 return;
5324             }
5325         }
5326         else { $line_character = 'Q' }
5327         $html_line = $self->markup_html_element( $input_line, $line_character );
5328     }
5329
5330     # add the line number if requested
5331     if ( $rOpts->{'html-line-numbers'} ) {
5332         my $extra_space .=
5333             ( $line_number < 10 )   ? "   "
5334           : ( $line_number < 100 )  ? "  "
5335           : ( $line_number < 1000 ) ? " "
5336           :                           "";
5337         $html_line = $extra_space . $line_number . " " . $html_line;
5338     }
5339
5340     # write the line
5341     $html_pre_fh->print("$html_line\n");
5342 }
5343
5344 #####################################################################
5345 #
5346 # The Perl::Tidy::Formatter package adds indentation, whitespace, and
5347 # line breaks to the token stream
5348 #
5349 # WARNING: This is not a real class for speed reasons.  Only one
5350 # Formatter may be used.
5351 #
5352 #####################################################################
5353
5354 package Perl::Tidy::Formatter;
5355
5356 BEGIN {
5357
5358     # Caution: these debug flags produce a lot of output
5359     # They should all be 0 except when debugging small scripts
5360     use constant FORMATTER_DEBUG_FLAG_BOND    => 0;
5361     use constant FORMATTER_DEBUG_FLAG_BREAK   => 0;
5362     use constant FORMATTER_DEBUG_FLAG_CI      => 0;
5363     use constant FORMATTER_DEBUG_FLAG_FLUSH   => 0;
5364     use constant FORMATTER_DEBUG_FLAG_FORCE   => 0;
5365     use constant FORMATTER_DEBUG_FLAG_LIST    => 0;
5366     use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0;
5367     use constant FORMATTER_DEBUG_FLAG_OUTPUT  => 0;
5368     use constant FORMATTER_DEBUG_FLAG_SPARSE  => 0;
5369     use constant FORMATTER_DEBUG_FLAG_STORE   => 0;
5370     use constant FORMATTER_DEBUG_FLAG_UNDOBP  => 0;
5371     use constant FORMATTER_DEBUG_FLAG_WHITE   => 0;
5372
5373     my $debug_warning = sub {
5374         print "FORMATTER_DEBUGGING with key $_[0]\n";
5375     };
5376
5377     FORMATTER_DEBUG_FLAG_BOND    && $debug_warning->('BOND');
5378     FORMATTER_DEBUG_FLAG_BREAK   && $debug_warning->('BREAK');
5379     FORMATTER_DEBUG_FLAG_CI      && $debug_warning->('CI');
5380     FORMATTER_DEBUG_FLAG_FLUSH   && $debug_warning->('FLUSH');
5381     FORMATTER_DEBUG_FLAG_FORCE   && $debug_warning->('FORCE');
5382     FORMATTER_DEBUG_FLAG_LIST    && $debug_warning->('LIST');
5383     FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK');
5384     FORMATTER_DEBUG_FLAG_OUTPUT  && $debug_warning->('OUTPUT');
5385     FORMATTER_DEBUG_FLAG_SPARSE  && $debug_warning->('SPARSE');
5386     FORMATTER_DEBUG_FLAG_STORE   && $debug_warning->('STORE');
5387     FORMATTER_DEBUG_FLAG_UNDOBP  && $debug_warning->('UNDOBP');
5388     FORMATTER_DEBUG_FLAG_WHITE   && $debug_warning->('WHITE');
5389 }
5390
5391 use Carp;
5392 use vars qw{
5393
5394   @gnu_stack
5395   $max_gnu_stack_index
5396   $gnu_position_predictor
5397   $line_start_index_to_go
5398   $last_indentation_written
5399   $last_unadjusted_indentation
5400   $last_leading_token
5401
5402   $saw_VERSION_in_this_file
5403   $saw_END_or_DATA_
5404
5405   @gnu_item_list
5406   $max_gnu_item_index
5407   $gnu_sequence_number
5408   $last_output_indentation
5409   %last_gnu_equals
5410   %gnu_comma_count
5411   %gnu_arrow_count
5412
5413   @block_type_to_go
5414   @type_sequence_to_go
5415   @container_environment_to_go
5416   @bond_strength_to_go
5417   @forced_breakpoint_to_go
5418   @lengths_to_go
5419   @levels_to_go
5420   @leading_spaces_to_go
5421   @reduced_spaces_to_go
5422   @matching_token_to_go
5423   @mate_index_to_go
5424   @nesting_blocks_to_go
5425   @ci_levels_to_go
5426   @nesting_depth_to_go
5427   @nobreak_to_go
5428   @old_breakpoint_to_go
5429   @tokens_to_go
5430   @types_to_go
5431
5432   %saved_opening_indentation
5433
5434   $max_index_to_go
5435   $comma_count_in_batch
5436   $old_line_count_in_batch
5437   $last_nonblank_index_to_go
5438   $last_nonblank_type_to_go
5439   $last_nonblank_token_to_go
5440   $last_last_nonblank_index_to_go
5441   $last_last_nonblank_type_to_go
5442   $last_last_nonblank_token_to_go
5443   @nonblank_lines_at_depth
5444   $starting_in_quote
5445   $ending_in_quote
5446
5447   $in_format_skipping_section
5448   $format_skipping_pattern_begin
5449   $format_skipping_pattern_end
5450
5451   $forced_breakpoint_count
5452   $forced_breakpoint_undo_count
5453   @forced_breakpoint_undo_stack
5454   %postponed_breakpoint
5455
5456   $tabbing
5457   $embedded_tab_count
5458   $first_embedded_tab_at
5459   $last_embedded_tab_at
5460   $deleted_semicolon_count
5461   $first_deleted_semicolon_at
5462   $last_deleted_semicolon_at
5463   $added_semicolon_count
5464   $first_added_semicolon_at
5465   $last_added_semicolon_at
5466   $first_tabbing_disagreement
5467   $last_tabbing_disagreement
5468   $in_tabbing_disagreement
5469   $tabbing_disagreement_count
5470   $input_line_tabbing
5471
5472   $last_line_type
5473   $last_line_leading_type
5474   $last_line_leading_level
5475   $last_last_line_leading_level
5476
5477   %block_leading_text
5478   %block_opening_line_number
5479   $csc_new_statement_ok
5480   $accumulating_text_for_block
5481   $leading_block_text
5482   $rleading_block_if_elsif_text
5483   $leading_block_text_level
5484   $leading_block_text_length_exceeded
5485   $leading_block_text_line_length
5486   $leading_block_text_line_number
5487   $closing_side_comment_prefix_pattern
5488   $closing_side_comment_list_pattern
5489
5490   $last_nonblank_token
5491   $last_nonblank_type
5492   $last_last_nonblank_token
5493   $last_last_nonblank_type
5494   $last_nonblank_block_type
5495   $last_output_level
5496   %is_do_follower
5497   %is_if_brace_follower
5498   %space_after_keyword
5499   $rbrace_follower
5500   $looking_for_else
5501   %is_last_next_redo_return
5502   %is_other_brace_follower
5503   %is_else_brace_follower
5504   %is_anon_sub_brace_follower
5505   %is_anon_sub_1_brace_follower
5506   %is_sort_map_grep
5507   %is_sort_map_grep_eval
5508   %is_sort_map_grep_eval_do
5509   %is_block_without_semicolon
5510   %is_if_unless
5511   %is_and_or
5512   %is_assignment
5513   %is_chain_operator
5514   %is_if_unless_and_or_last_next_redo_return
5515   %is_until_while_for_if_elsif_else
5516
5517   @has_broken_sublist
5518   @dont_align
5519   @want_comma_break
5520
5521   $is_static_block_comment
5522   $index_start_one_line_block
5523   $semicolons_before_block_self_destruct
5524   $index_max_forced_break
5525   $input_line_number
5526   $diagnostics_object
5527   $vertical_aligner_object
5528   $logger_object
5529   $file_writer_object
5530   $formatter_self
5531   @ci_stack
5532   $last_line_had_side_comment
5533   %want_break_before
5534   %outdent_keyword
5535   $static_block_comment_pattern
5536   $static_side_comment_pattern
5537   %opening_vertical_tightness
5538   %closing_vertical_tightness
5539   %closing_token_indentation
5540
5541   %opening_token_right
5542   %stack_opening_token
5543   %stack_closing_token
5544
5545   $block_brace_vertical_tightness_pattern
5546
5547   $rOpts_add_newlines
5548   $rOpts_add_whitespace
5549   $rOpts_block_brace_tightness
5550   $rOpts_block_brace_vertical_tightness
5551   $rOpts_brace_left_and_indent
5552   $rOpts_comma_arrow_breakpoints
5553   $rOpts_break_at_old_keyword_breakpoints
5554   $rOpts_break_at_old_comma_breakpoints
5555   $rOpts_break_at_old_logical_breakpoints
5556   $rOpts_break_at_old_ternary_breakpoints
5557   $rOpts_closing_side_comment_else_flag
5558   $rOpts_closing_side_comment_maximum_text
5559   $rOpts_continuation_indentation
5560   $rOpts_cuddled_else
5561   $rOpts_delete_old_whitespace
5562   $rOpts_fuzzy_line_length
5563   $rOpts_indent_columns
5564   $rOpts_line_up_parentheses
5565   $rOpts_maximum_fields_per_table
5566   $rOpts_maximum_line_length
5567   $rOpts_short_concatenation_item_length
5568   $rOpts_swallow_optional_blank_lines
5569   $rOpts_ignore_old_breakpoints
5570   $rOpts_format_skipping
5571   $rOpts_space_function_paren
5572   $rOpts_space_keyword_paren
5573
5574   $half_maximum_line_length
5575
5576   %is_opening_type
5577   %is_closing_type
5578   %is_keyword_returning_list
5579   %tightness
5580   %matching_token
5581   $rOpts
5582   %right_bond_strength
5583   %left_bond_strength
5584   %binary_ws_rules
5585   %want_left_space
5586   %want_right_space
5587   %is_digraph
5588   %is_trigraph
5589   $bli_pattern
5590   $bli_list_string
5591   %is_closing_type
5592   %is_opening_type
5593   %is_closing_token
5594   %is_opening_token
5595 };
5596
5597 BEGIN {
5598
5599     # default list of block types for which -bli would apply
5600     $bli_list_string = 'if else elsif unless while for foreach do : sub';
5601
5602     @_ = qw(
5603       .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
5604       <= >= == =~ !~ != ++ -- /= x=
5605     );
5606     @is_digraph{@_} = (1) x scalar(@_);
5607
5608     @_ = qw( ... **= <<= >>= &&= ||= //= <=> );
5609     @is_trigraph{@_} = (1) x scalar(@_);
5610
5611     @_ = qw(
5612       = **= += *= &= <<= &&=
5613       -= /= |= >>= ||= //=
5614       .= %= ^=
5615       x=
5616     );
5617     @is_assignment{@_} = (1) x scalar(@_);
5618
5619     @_ = qw(
5620       grep
5621       keys
5622       map
5623       reverse
5624       sort
5625       split
5626     );
5627     @is_keyword_returning_list{@_} = (1) x scalar(@_);
5628
5629     @_ = qw(is if unless and or err last next redo return);
5630     @is_if_unless_and_or_last_next_redo_return{@_} = (1) x scalar(@_);
5631
5632     # always break after a closing curly of these block types:
5633     @_ = qw(until while for if elsif else);
5634     @is_until_while_for_if_elsif_else{@_} = (1) x scalar(@_);
5635
5636     @_ = qw(last next redo return);
5637     @is_last_next_redo_return{@_} = (1) x scalar(@_);
5638
5639     @_ = qw(sort map grep);
5640     @is_sort_map_grep{@_} = (1) x scalar(@_);
5641
5642     @_ = qw(sort map grep eval);
5643     @is_sort_map_grep_eval{@_} = (1) x scalar(@_);
5644
5645     @_ = qw(sort map grep eval do);
5646     @is_sort_map_grep_eval_do{@_} = (1) x scalar(@_);
5647
5648     @_ = qw(if unless);
5649     @is_if_unless{@_} = (1) x scalar(@_);
5650
5651     @_ = qw(and or err);
5652     @is_and_or{@_} = (1) x scalar(@_);
5653
5654     # Identify certain operators which often occur in chains
5655     @_ = qw(&& || and or : ? .);
5656     @is_chain_operator{@_} = (1) x scalar(@_);
5657
5658     # We can remove semicolons after blocks preceded by these keywords
5659     @_ = qw(BEGIN END CHECK INIT AUTOLOAD DESTROY continue if elsif else
5660       unless while until for foreach);
5661     @is_block_without_semicolon{@_} = (1) x scalar(@_);
5662
5663     # 'L' is token for opening { at hash key
5664     @_ = qw" L { ( [ ";
5665     @is_opening_type{@_} = (1) x scalar(@_);
5666
5667     # 'R' is token for closing } at hash key
5668     @_ = qw" R } ) ] ";
5669     @is_closing_type{@_} = (1) x scalar(@_);
5670
5671     @_ = qw" { ( [ ";
5672     @is_opening_token{@_} = (1) x scalar(@_);
5673
5674     @_ = qw" } ) ] ";
5675     @is_closing_token{@_} = (1) x scalar(@_);
5676 }
5677
5678 # whitespace codes
5679 use constant WS_YES      => 1;
5680 use constant WS_OPTIONAL => 0;
5681 use constant WS_NO       => -1;
5682
5683 # Token bond strengths.
5684 use constant NO_BREAK    => 10000;
5685 use constant VERY_STRONG => 100;
5686 use constant STRONG      => 2.1;
5687 use constant NOMINAL     => 1.1;
5688 use constant WEAK        => 0.8;
5689 use constant VERY_WEAK   => 0.55;
5690
5691 # values for testing indexes in output array
5692 use constant UNDEFINED_INDEX => -1;
5693
5694 # Maximum number of little messages; probably need not be changed.
5695 use constant MAX_NAG_MESSAGES => 6;
5696
5697 # increment between sequence numbers for each type
5698 # For example, ?: pairs might have numbers 7,11,15,...
5699 use constant TYPE_SEQUENCE_INCREMENT => 4;
5700
5701 {
5702
5703     # methods to count instances
5704     my $_count = 0;
5705     sub get_count        { $_count; }
5706     sub _increment_count { ++$_count }
5707     sub _decrement_count { --$_count }
5708 }
5709
5710 # interface to Perl::Tidy::Logger routines
5711 sub warning {
5712     if ($logger_object) {
5713         $logger_object->warning(@_);
5714     }
5715 }
5716
5717 sub complain {
5718     if ($logger_object) {
5719         $logger_object->complain(@_);
5720     }
5721 }
5722
5723 sub write_logfile_entry {
5724     if ($logger_object) {
5725         $logger_object->write_logfile_entry(@_);
5726     }
5727 }
5728
5729 sub black_box {
5730     if ($logger_object) {
5731         $logger_object->black_box(@_);
5732     }
5733 }
5734
5735 sub report_definite_bug {
5736     if ($logger_object) {
5737         $logger_object->report_definite_bug();
5738     }
5739 }
5740
5741 sub get_saw_brace_error {
5742     if ($logger_object) {
5743         $logger_object->get_saw_brace_error();
5744     }
5745 }
5746
5747 sub we_are_at_the_last_line {
5748     if ($logger_object) {
5749         $logger_object->we_are_at_the_last_line();
5750     }
5751 }
5752
5753 # interface to Perl::Tidy::Diagnostics routine
5754 sub write_diagnostics {
5755
5756     if ($diagnostics_object) {
5757         $diagnostics_object->write_diagnostics(@_);
5758     }
5759 }
5760
5761 sub get_added_semicolon_count {
5762     my $self = shift;
5763     return $added_semicolon_count;
5764 }
5765
5766 sub DESTROY {
5767     $_[0]->_decrement_count();
5768 }
5769
5770 sub new {
5771
5772     my $class = shift;
5773
5774     # we are given an object with a write_line() method to take lines
5775     my %defaults = (
5776         sink_object        => undef,
5777         diagnostics_object => undef,
5778         logger_object      => undef,
5779     );
5780     my %args = ( %defaults, @_ );
5781
5782     $logger_object      = $args{logger_object};
5783     $diagnostics_object = $args{diagnostics_object};
5784
5785     # we create another object with a get_line() and peek_ahead() method
5786     my $sink_object = $args{sink_object};
5787     $file_writer_object =
5788       Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
5789
5790     # initialize the leading whitespace stack to negative levels
5791     # so that we can never run off the end of the stack
5792     $gnu_position_predictor = 0;    # where the current token is predicted to be
5793     $max_gnu_stack_index    = 0;
5794     $max_gnu_item_index     = -1;
5795     $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
5796     @gnu_item_list               = ();
5797     $last_output_indentation     = 0;
5798     $last_indentation_written    = 0;
5799     $last_unadjusted_indentation = 0;
5800     $last_leading_token          = "";
5801
5802     $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'};
5803     $saw_END_or_DATA_         = 0;
5804
5805     @block_type_to_go            = ();
5806     @type_sequence_to_go         = ();
5807     @container_environment_to_go = ();
5808     @bond_strength_to_go         = ();
5809     @forced_breakpoint_to_go     = ();
5810     @lengths_to_go               = ();    # line length to start of ith token
5811     @levels_to_go                = ();
5812     @matching_token_to_go        = ();
5813     @mate_index_to_go            = ();
5814     @nesting_blocks_to_go        = ();
5815     @ci_levels_to_go             = ();
5816     @nesting_depth_to_go         = (0);
5817     @nobreak_to_go               = ();
5818     @old_breakpoint_to_go        = ();
5819     @tokens_to_go                = ();
5820     @types_to_go                 = ();
5821     @leading_spaces_to_go        = ();
5822     @reduced_spaces_to_go        = ();
5823
5824     @dont_align         = ();
5825     @has_broken_sublist = ();
5826     @want_comma_break   = ();
5827
5828     @ci_stack                   = ("");
5829     $first_tabbing_disagreement = 0;
5830     $last_tabbing_disagreement  = 0;
5831     $tabbing_disagreement_count = 0;
5832     $in_tabbing_disagreement    = 0;
5833     $input_line_tabbing         = undef;
5834
5835     $last_line_type               = "";
5836     $last_last_line_leading_level = 0;
5837     $last_line_leading_level      = 0;
5838     $last_line_leading_type       = '#';
5839
5840     $last_nonblank_token        = ';';
5841     $last_nonblank_type         = ';';
5842     $last_last_nonblank_token   = ';';
5843     $last_last_nonblank_type    = ';';
5844     $last_nonblank_block_type   = "";
5845     $last_output_level          = 0;
5846     $looking_for_else           = 0;
5847     $embedded_tab_count         = 0;
5848     $first_embedded_tab_at      = 0;
5849     $last_embedded_tab_at       = 0;
5850     $deleted_semicolon_count    = 0;
5851     $first_deleted_semicolon_at = 0;
5852     $last_deleted_semicolon_at  = 0;
5853     $added_semicolon_count      = 0;
5854     $first_added_semicolon_at   = 0;
5855     $last_added_semicolon_at    = 0;
5856     $last_line_had_side_comment = 0;
5857     $is_static_block_comment    = 0;
5858     %postponed_breakpoint       = ();
5859
5860     # variables for adding side comments
5861     %block_leading_text        = ();
5862     %block_opening_line_number = ();
5863     $csc_new_statement_ok      = 1;
5864
5865     %saved_opening_indentation  = ();
5866     $in_format_skipping_section = 0;
5867
5868     reset_block_text_accumulator();
5869
5870     prepare_for_new_input_lines();
5871
5872     $vertical_aligner_object =
5873       Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object,
5874         $logger_object, $diagnostics_object );
5875
5876     if ( $rOpts->{'entab-leading-whitespace'} ) {
5877         write_logfile_entry(
5878 "Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
5879         );
5880     }
5881     elsif ( $rOpts->{'tabs'} ) {
5882         write_logfile_entry("Indentation will be with a tab character\n");
5883     }
5884     else {
5885         write_logfile_entry(
5886             "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
5887     }
5888
5889     # This was the start of a formatter referent, but object-oriented
5890     # coding has turned out to be too slow here.
5891     $formatter_self = {};
5892
5893     bless $formatter_self, $class;
5894
5895     # Safety check..this is not a class yet
5896     if ( _increment_count() > 1 ) {
5897         confess
5898 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
5899     }
5900     return $formatter_self;
5901 }
5902
5903 sub prepare_for_new_input_lines {
5904
5905     $gnu_sequence_number++;    # increment output batch counter
5906     %last_gnu_equals                = ();
5907     %gnu_comma_count                = ();
5908     %gnu_arrow_count                = ();
5909     $line_start_index_to_go         = 0;
5910     $max_gnu_item_index             = UNDEFINED_INDEX;
5911     $index_max_forced_break         = UNDEFINED_INDEX;
5912     $max_index_to_go                = UNDEFINED_INDEX;
5913     $last_nonblank_index_to_go      = UNDEFINED_INDEX;
5914     $last_nonblank_type_to_go       = '';
5915     $last_nonblank_token_to_go      = '';
5916     $last_last_nonblank_index_to_go = UNDEFINED_INDEX;
5917     $last_last_nonblank_type_to_go  = '';
5918     $last_last_nonblank_token_to_go = '';
5919     $forced_breakpoint_count        = 0;
5920     $forced_breakpoint_undo_count   = 0;
5921     $rbrace_follower                = undef;
5922     $lengths_to_go[0]               = 0;
5923     $old_line_count_in_batch        = 1;
5924     $comma_count_in_batch           = 0;
5925     $starting_in_quote              = 0;
5926
5927     destroy_one_line_block();
5928 }
5929
5930 sub write_line {
5931
5932     my $self = shift;
5933     my ($line_of_tokens) = @_;
5934
5935     my $line_type  = $line_of_tokens->{_line_type};
5936     my $input_line = $line_of_tokens->{_line_text};
5937
5938     my $want_blank_line_next = 0;
5939
5940     # _line_type codes are:
5941     #   SYSTEM         - system-specific code before hash-bang line
5942     #   CODE           - line of perl code (including comments)
5943     #   POD_START      - line starting pod, such as '=head'
5944     #   POD            - pod documentation text
5945     #   POD_END        - last line of pod section, '=cut'
5946     #   HERE           - text of here-document
5947     #   HERE_END       - last line of here-doc (target word)
5948     #   FORMAT         - format section
5949     #   FORMAT_END     - last line of format section, '.'
5950     #   DATA_START     - __DATA__ line
5951     #   DATA           - unidentified text following __DATA__
5952     #   END_START      - __END__ line
5953     #   END            - unidentified text following __END__
5954     #   ERROR          - we are in big trouble, probably not a perl script
5955     #
5956     # handle line of code..
5957     if ( $line_type eq 'CODE' ) {
5958
5959         # let logger see all non-blank lines of code
5960         if ( $input_line !~ /^\s*$/ ) {
5961             my $output_line_number =
5962               $vertical_aligner_object->get_output_line_number();
5963             black_box( $line_of_tokens, $output_line_number );
5964         }
5965         print_line_of_tokens($line_of_tokens);
5966     }
5967
5968     # handle line of non-code..
5969     else {
5970
5971         # set special flags
5972         my $skip_line = 0;
5973         my $tee_line  = 0;
5974         if ( $line_type =~ /^POD/ ) {
5975
5976             # Pod docs should have a preceding blank line.  But be
5977             # very careful in __END__ and __DATA__ sections, because:
5978             #   1. the user may be using this section for any purpose whatsoever
5979             #   2. the blank counters are not active there
5980             # It should be safe to request a blank line between an
5981             # __END__ or __DATA__ and an immediately following '=head'
5982             # type line, (types END_START and DATA_START), but not for
5983             # any other lines of type END or DATA.
5984             if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
5985             if ( $rOpts->{'tee-pod'} )    { $tee_line  = 1; }
5986             if (   !$skip_line
5987                 && $line_type eq 'POD_START'
5988                 && $last_line_type !~ /^(END|DATA)$/ )
5989             {
5990                 want_blank_line();
5991             }
5992
5993             # patch to put a blank line after =cut
5994             # (required by podchecker)
5995             if ( $line_type eq 'POD_END' && !$saw_END_or_DATA_ ) {
5996                 $file_writer_object->reset_consecutive_blank_lines();
5997                 $want_blank_line_next = 1;
5998             }
5999         }
6000
6001         # leave the blank counters in a predictable state
6002         # after __END__ or __DATA__
6003         elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) {
6004             $file_writer_object->reset_consecutive_blank_lines();
6005             $saw_END_or_DATA_ = 1;
6006         }
6007
6008         # write unindented non-code line
6009         if ( !$skip_line ) {
6010             if ($tee_line) { $file_writer_object->tee_on() }
6011             write_unindented_line($input_line);
6012             if ($tee_line)             { $file_writer_object->tee_off() }
6013             if ($want_blank_line_next) { want_blank_line(); }
6014         }
6015     }
6016     $last_line_type = $line_type;
6017 }
6018
6019 sub create_one_line_block {
6020     $index_start_one_line_block            = $_[0];
6021     $semicolons_before_block_self_destruct = $_[1];
6022 }
6023
6024 sub destroy_one_line_block {
6025     $index_start_one_line_block            = UNDEFINED_INDEX;
6026     $semicolons_before_block_self_destruct = 0;
6027 }
6028
6029 sub leading_spaces_to_go {
6030
6031     # return the number of indentation spaces for a token in the output stream;
6032     # these were previously stored by 'set_leading_whitespace'.
6033
6034     return get_SPACES( $leading_spaces_to_go[ $_[0] ] );
6035
6036 }
6037
6038 sub get_SPACES {
6039
6040     # return the number of leading spaces associated with an indentation
6041     # variable $indentation is either a constant number of spaces or an object
6042     # with a get_SPACES method.
6043     my $indentation = shift;
6044     return ref($indentation) ? $indentation->get_SPACES() : $indentation;
6045 }
6046
6047 sub get_RECOVERABLE_SPACES {
6048
6049     # return the number of spaces (+ means shift right, - means shift left)
6050     # that we would like to shift a group of lines with the same indentation
6051     # to get them to line up with their opening parens
6052     my $indentation = shift;
6053     return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
6054 }
6055
6056 sub get_AVAILABLE_SPACES_to_go {
6057
6058     my $item = $leading_spaces_to_go[ $_[0] ];
6059
6060     # return the number of available leading spaces associated with an
6061     # indentation variable.  $indentation is either a constant number of
6062     # spaces or an object with a get_AVAILABLE_SPACES method.
6063     return ref($item) ? $item->get_AVAILABLE_SPACES() : 0;
6064 }
6065
6066 sub new_lp_indentation_item {
6067
6068     # this is an interface to the IndentationItem class
6069     my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;
6070
6071     # A negative level implies not to store the item in the item_list
6072     my $index = 0;
6073     if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }
6074
6075     my $item = Perl::Tidy::IndentationItem->new(
6076         $spaces,      $level,
6077         $ci_level,    $available_spaces,
6078         $index,       $gnu_sequence_number,
6079         $align_paren, $max_gnu_stack_index,
6080         $line_start_index_to_go,
6081     );
6082
6083     if ( $level >= 0 ) {
6084         $gnu_item_list[$max_gnu_item_index] = $item;
6085     }
6086
6087     return $item;
6088 }
6089
6090 sub set_leading_whitespace {
6091
6092     # This routine defines leading whitespace
6093     # given: the level and continuation_level of a token,
6094     # define: space count of leading string which would apply if it
6095     # were the first token of a new line.
6096
6097     my ( $level, $ci_level, $in_continued_quote ) = @_;
6098
6099     # modify for -bli, which adds one continuation indentation for
6100     # opening braces
6101     if (   $rOpts_brace_left_and_indent
6102         && $max_index_to_go == 0
6103         && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o )
6104     {
6105         $ci_level++;
6106     }
6107
6108     # patch to avoid trouble when input file has negative indentation.
6109     # other logic should catch this error.
6110     if ( $level < 0 ) { $level = 0 }
6111
6112     #-------------------------------------------
6113     # handle the standard indentation scheme
6114     #-------------------------------------------
6115     unless ($rOpts_line_up_parentheses) {
6116         my $space_count = $ci_level * $rOpts_continuation_indentation + $level *
6117           $rOpts_indent_columns;
6118         my $ci_spaces =
6119           ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation;
6120
6121         if ($in_continued_quote) {
6122             $space_count = 0;
6123             $ci_spaces   = 0;
6124         }
6125         $leading_spaces_to_go[$max_index_to_go] = $space_count;
6126         $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces;
6127         return;
6128     }
6129
6130     #-------------------------------------------------------------
6131     # handle case of -lp indentation..
6132     #-------------------------------------------------------------
6133
6134     # The continued_quote flag means that this is the first token of a
6135     # line, and it is the continuation of some kind of multi-line quote
6136     # or pattern.  It requires special treatment because it must have no
6137     # added leading whitespace. So we create a special indentation item
6138     # which is not in the stack.
6139     if ($in_continued_quote) {
6140         my $space_count     = 0;
6141         my $available_space = 0;
6142         $level = -1;    # flag to prevent storing in item_list
6143         $leading_spaces_to_go[$max_index_to_go] =
6144           $reduced_spaces_to_go[$max_index_to_go] =
6145           new_lp_indentation_item( $space_count, $level, $ci_level,
6146             $available_space, 0 );
6147         return;
6148     }
6149
6150     # get the top state from the stack
6151     my $space_count      = $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6152     my $current_level    = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
6153     my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
6154
6155     my $type        = $types_to_go[$max_index_to_go];
6156     my $token       = $tokens_to_go[$max_index_to_go];
6157     my $total_depth = $nesting_depth_to_go[$max_index_to_go];
6158
6159     if ( $type eq '{' || $type eq '(' ) {
6160
6161         $gnu_comma_count{ $total_depth + 1 } = 0;
6162         $gnu_arrow_count{ $total_depth + 1 } = 0;
6163
6164         # If we come to an opening token after an '=' token of some type,
6165         # see if it would be helpful to 'break' after the '=' to save space
6166         my $last_equals = $last_gnu_equals{$total_depth};
6167         if ( $last_equals && $last_equals > $line_start_index_to_go ) {
6168
6169             # find the position if we break at the '='
6170             my $i_test = $last_equals;
6171             if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
6172
6173             # TESTING
6174             ##my $too_close = ($i_test==$max_index_to_go-1);
6175
6176             my $test_position = total_line_length( $i_test, $max_index_to_go );
6177
6178             if (
6179
6180                 # the equals is not just before an open paren (testing)
6181                 ##!$too_close &&
6182
6183                 # if we are beyond the midpoint
6184                 $gnu_position_predictor > $half_maximum_line_length
6185
6186                 # or we are beyont the 1/4 point and there was an old
6187                 # break at the equals
6188                 || (
6189                     $gnu_position_predictor > $half_maximum_line_length / 2
6190                     && (
6191                         $old_breakpoint_to_go[$last_equals]
6192                         || (   $last_equals > 0
6193                             && $old_breakpoint_to_go[ $last_equals - 1 ] )
6194                         || (   $last_equals > 1
6195                             && $types_to_go[ $last_equals - 1 ] eq 'b'
6196                             && $old_breakpoint_to_go[ $last_equals - 2 ] )
6197                     )
6198                 )
6199               )
6200             {
6201
6202                 # then make the switch -- note that we do not set a real
6203                 # breakpoint here because we may not really need one; sub
6204                 # scan_list will do that if necessary
6205                 $line_start_index_to_go = $i_test + 1;
6206                 $gnu_position_predictor = $test_position;
6207             }
6208         }
6209     }
6210
6211     # Check for decreasing depth ..
6212     # Note that one token may have both decreasing and then increasing
6213     # depth. For example, (level, ci) can go from (1,1) to (2,0).  So,
6214     # in this example we would first go back to (1,0) then up to (2,0)
6215     # in a single call.
6216     if ( $level < $current_level || $ci_level < $current_ci_level ) {
6217
6218         # loop to find the first entry at or completely below this level
6219         my ( $lev, $ci_lev );
6220         while (1) {
6221             if ($max_gnu_stack_index) {
6222
6223                 # save index of token which closes this level
6224                 $gnu_stack[$max_gnu_stack_index]->set_CLOSED($max_index_to_go);
6225
6226                 # Undo any extra indentation if we saw no commas
6227                 my $available_spaces =
6228                   $gnu_stack[$max_gnu_stack_index]->get_AVAILABLE_SPACES();
6229
6230                 my $comma_count = 0;
6231                 my $arrow_count = 0;
6232                 if ( $type eq '}' || $type eq ')' ) {
6233                     $comma_count = $gnu_comma_count{$total_depth};
6234                     $arrow_count = $gnu_arrow_count{$total_depth};
6235                     $comma_count = 0 unless $comma_count;
6236                     $arrow_count = 0 unless $arrow_count;
6237                 }
6238                 $gnu_stack[$max_gnu_stack_index]->set_COMMA_COUNT($comma_count);
6239                 $gnu_stack[$max_gnu_stack_index]->set_ARROW_COUNT($arrow_count);
6240
6241                 if ( $available_spaces > 0 ) {
6242
6243                     if ( $comma_count <= 0 || $arrow_count > 0 ) {
6244
6245                         my $i = $gnu_stack[$max_gnu_stack_index]->get_INDEX();
6246                         my $seqno =
6247                           $gnu_stack[$max_gnu_stack_index]
6248                           ->get_SEQUENCE_NUMBER();
6249
6250                         # Be sure this item was created in this batch.  This
6251                         # should be true because we delete any available
6252                         # space from open items at the end of each batch.
6253                         if (   $gnu_sequence_number != $seqno
6254                             || $i > $max_gnu_item_index )
6255                         {
6256                             warning(
6257 "Program bug with -lp.  seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"
6258                             );
6259                             report_definite_bug();
6260                         }
6261
6262                         else {
6263                             if ( $arrow_count == 0 ) {
6264                                 $gnu_item_list[$i]
6265                                   ->permanently_decrease_AVAILABLE_SPACES(
6266                                     $available_spaces);
6267                             }
6268                             else {
6269                                 $gnu_item_list[$i]
6270                                   ->tentatively_decrease_AVAILABLE_SPACES(
6271                                     $available_spaces);
6272                             }
6273
6274                             my $j;
6275                             for (
6276                                 $j = $i + 1 ;
6277                                 $j <= $max_gnu_item_index ;
6278                                 $j++
6279                               )
6280                             {
6281                                 $gnu_item_list[$j]
6282                                   ->decrease_SPACES($available_spaces);
6283                             }
6284                         }
6285                     }
6286                 }
6287
6288                 # go down one level
6289                 --$max_gnu_stack_index;
6290                 $lev    = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
6291                 $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
6292
6293                 # stop when we reach a level at or below the current level
6294                 if ( $lev <= $level && $ci_lev <= $ci_level ) {
6295                     $space_count =
6296                       $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6297                     $current_level    = $lev;
6298                     $current_ci_level = $ci_lev;
6299                     last;
6300                 }
6301             }
6302
6303             # reached bottom of stack .. should never happen because
6304             # only negative levels can get here, and $level was forced
6305             # to be positive above.
6306             else {
6307                 warning(
6308 "program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"
6309                 );
6310                 report_definite_bug();
6311                 last;
6312             }
6313         }
6314     }
6315
6316     # handle increasing depth
6317     if ( $level > $current_level || $ci_level > $current_ci_level ) {
6318
6319         # Compute the standard incremental whitespace.  This will be
6320         # the minimum incremental whitespace that will be used.  This
6321         # choice results in a smooth transition between the gnu-style
6322         # and the standard style.
6323         my $standard_increment =
6324           ( $level - $current_level ) * $rOpts_indent_columns +
6325           ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation;
6326
6327         # Now we have to define how much extra incremental space
6328         # ("$available_space") we want.  This extra space will be
6329         # reduced as necessary when long lines are encountered or when
6330         # it becomes clear that we do not have a good list.
6331         my $available_space = 0;
6332         my $align_paren     = 0;
6333         my $excess          = 0;
6334
6335         # initialization on empty stack..
6336         if ( $max_gnu_stack_index == 0 ) {
6337             $space_count = $level * $rOpts_indent_columns;
6338         }
6339
6340         # if this is a BLOCK, add the standard increment
6341         elsif ($last_nonblank_block_type) {
6342             $space_count += $standard_increment;
6343         }
6344
6345         # if last nonblank token was not structural indentation,
6346         # just use standard increment
6347         elsif ( $last_nonblank_type ne '{' ) {
6348             $space_count += $standard_increment;
6349         }
6350
6351         # otherwise use the space to the first non-blank level change token
6352         else {
6353
6354             $space_count = $gnu_position_predictor;
6355
6356             my $min_gnu_indentation =
6357               $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6358
6359             $available_space = $space_count - $min_gnu_indentation;
6360             if ( $available_space >= $standard_increment ) {
6361                 $min_gnu_indentation += $standard_increment;
6362             }
6363             elsif ( $available_space > 1 ) {
6364                 $min_gnu_indentation += $available_space + 1;
6365             }
6366             elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
6367                 if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
6368                     $min_gnu_indentation += 2;
6369                 }
6370                 else {
6371                     $min_gnu_indentation += 1;
6372                 }
6373             }
6374             else {
6375                 $min_gnu_indentation += $standard_increment;
6376             }
6377             $available_space = $space_count - $min_gnu_indentation;
6378
6379             if ( $available_space < 0 ) {
6380                 $space_count     = $min_gnu_indentation;
6381                 $available_space = 0;
6382             }
6383             $align_paren = 1;
6384         }
6385
6386         # update state, but not on a blank token
6387         if ( $types_to_go[$max_index_to_go] ne 'b' ) {
6388
6389             $gnu_stack[$max_gnu_stack_index]->set_HAVE_CHILD(1);
6390
6391             ++$max_gnu_stack_index;
6392             $gnu_stack[$max_gnu_stack_index] =
6393               new_lp_indentation_item( $space_count, $level, $ci_level,
6394                 $available_space, $align_paren );
6395
6396             # If the opening paren is beyond the half-line length, then
6397             # we will use the minimum (standard) indentation.  This will
6398             # help avoid problems associated with running out of space
6399             # near the end of a line.  As a result, in deeply nested
6400             # lists, there will be some indentations which are limited
6401             # to this minimum standard indentation. But the most deeply
6402             # nested container will still probably be able to shift its
6403             # parameters to the right for proper alignment, so in most
6404             # cases this will not be noticable.
6405             if (   $available_space > 0
6406                 && $space_count > $half_maximum_line_length )
6407             {
6408                 $gnu_stack[$max_gnu_stack_index]
6409                   ->tentatively_decrease_AVAILABLE_SPACES($available_space);
6410             }
6411         }
6412     }
6413
6414     # Count commas and look for non-list characters.  Once we see a
6415     # non-list character, we give up and don't look for any more commas.
6416     if ( $type eq '=>' ) {
6417         $gnu_arrow_count{$total_depth}++;
6418
6419         # tentatively treating '=>' like '=' for estimating breaks
6420         # TODO: this could use some experimentation
6421         $last_gnu_equals{$total_depth} = $max_index_to_go;
6422     }
6423
6424     elsif ( $type eq ',' ) {
6425         $gnu_comma_count{$total_depth}++;
6426     }
6427
6428     elsif ( $is_assignment{$type} ) {
6429         $last_gnu_equals{$total_depth} = $max_index_to_go;
6430     }
6431
6432     # this token might start a new line
6433     # if this is a non-blank..
6434     if ( $type ne 'b' ) {
6435
6436         # and if ..
6437         if (
6438
6439             # this is the first nonblank token of the line
6440             $max_index_to_go == 1 && $types_to_go[0] eq 'b'
6441
6442             # or previous character was one of these:
6443             || $last_nonblank_type_to_go =~ /^([\:\?\,f])$/
6444
6445             # or previous character was opening and this does not close it
6446             || ( $last_nonblank_type_to_go eq '{' && $type ne '}' )
6447             || ( $last_nonblank_type_to_go eq '(' and $type ne ')' )
6448
6449             # or this token is one of these:
6450             || $type =~ /^([\.]|\|\||\&\&)$/
6451
6452             # or this is a closing structure
6453             || (   $last_nonblank_type_to_go eq '}'
6454                 && $last_nonblank_token_to_go eq $last_nonblank_type_to_go )
6455
6456             # or previous token was keyword 'return'
6457             || ( $last_nonblank_type_to_go eq 'k'
6458                 && ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) )
6459
6460             # or starting a new line at certain keywords is fine
6461             || (   $type eq 'k'
6462                 && $is_if_unless_and_or_last_next_redo_return{$token} )
6463
6464             # or this is after an assignment after a closing structure
6465             || (
6466                 $is_assignment{$last_nonblank_type_to_go}
6467                 && (
6468                     $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/
6469
6470                     # and it is significantly to the right
6471                     || $gnu_position_predictor > $half_maximum_line_length
6472                 )
6473             )
6474           )
6475         {
6476             check_for_long_gnu_style_lines();
6477             $line_start_index_to_go = $max_index_to_go;
6478
6479             # back up 1 token if we want to break before that type
6480             # otherwise, we may strand tokens like '?' or ':' on a line
6481             if ( $line_start_index_to_go > 0 ) {
6482                 if ( $last_nonblank_type_to_go eq 'k' ) {
6483
6484                     if ( $want_break_before{$last_nonblank_token_to_go} ) {
6485                         $line_start_index_to_go--;
6486                     }
6487                 }
6488                 elsif ( $want_break_before{$last_nonblank_type_to_go} ) {
6489                     $line_start_index_to_go--;
6490                 }
6491             }
6492         }
6493     }
6494
6495     # remember the predicted position of this token on the output line
6496     if ( $max_index_to_go > $line_start_index_to_go ) {
6497         $gnu_position_predictor =
6498           total_line_length( $line_start_index_to_go, $max_index_to_go );
6499     }
6500     else {
6501         $gnu_position_predictor = $space_count +
6502           token_sequence_length( $max_index_to_go, $max_index_to_go );
6503     }
6504
6505     # store the indentation object for this token
6506     # this allows us to manipulate the leading whitespace
6507     # (in case we have to reduce indentation to fit a line) without
6508     # having to change any token values
6509     $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index];
6510     $reduced_spaces_to_go[$max_index_to_go] =
6511       ( $max_gnu_stack_index > 0 && $ci_level )
6512       ? $gnu_stack[ $max_gnu_stack_index - 1 ]
6513       : $gnu_stack[$max_gnu_stack_index];
6514     return;
6515 }
6516
6517 sub check_for_long_gnu_style_lines {
6518
6519     # look at the current estimated maximum line length, and
6520     # remove some whitespace if it exceeds the desired maximum
6521
6522     # this is only for the '-lp' style
6523     return unless ($rOpts_line_up_parentheses);
6524
6525     # nothing can be done if no stack items defined for this line
6526     return if ( $max_gnu_item_index == UNDEFINED_INDEX );
6527
6528     # see if we have exceeded the maximum desired line length
6529     # keep 2 extra free because they are needed in some cases
6530     # (result of trial-and-error testing)
6531     my $spaces_needed =
6532       $gnu_position_predictor - $rOpts_maximum_line_length + 2;
6533
6534     return if ( $spaces_needed < 0 );
6535
6536     # We are over the limit, so try to remove a requested number of
6537     # spaces from leading whitespace.  We are only allowed to remove
6538     # from whitespace items created on this batch, since others have
6539     # already been used and cannot be undone.
6540     my @candidates = ();
6541     my $i;
6542
6543     # loop over all whitespace items created for the current batch
6544     for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
6545         my $item = $gnu_item_list[$i];
6546
6547         # item must still be open to be a candidate (otherwise it
6548         # cannot influence the current token)
6549         next if ( $item->get_CLOSED() >= 0 );
6550
6551         my $available_spaces = $item->get_AVAILABLE_SPACES();
6552
6553         if ( $available_spaces > 0 ) {
6554             push( @candidates, [ $i, $available_spaces ] );
6555         }
6556     }
6557
6558     return unless (@candidates);
6559
6560     # sort by available whitespace so that we can remove whitespace
6561     # from the maximum available first
6562     @candidates = sort { $b->[1] <=> $a->[1] } @candidates;
6563
6564     # keep removing whitespace until we are done or have no more
6565     my $candidate;
6566     foreach $candidate (@candidates) {
6567         my ( $i, $available_spaces ) = @{$candidate};
6568         my $deleted_spaces =
6569           ( $available_spaces > $spaces_needed )
6570           ? $spaces_needed
6571           : $available_spaces;
6572
6573         # remove the incremental space from this item
6574         $gnu_item_list[$i]->decrease_AVAILABLE_SPACES($deleted_spaces);
6575
6576         my $i_debug = $i;
6577
6578         # update the leading whitespace of this item and all items
6579         # that came after it
6580         for ( ; $i <= $max_gnu_item_index ; $i++ ) {
6581
6582             my $old_spaces = $gnu_item_list[$i]->get_SPACES();
6583             if ( $old_spaces > $deleted_spaces ) {
6584                 $gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
6585             }
6586
6587             # shouldn't happen except for code bug:
6588             else {
6589                 my $level        = $gnu_item_list[$i_debug]->get_LEVEL();
6590                 my $ci_level     = $gnu_item_list[$i_debug]->get_CI_LEVEL();
6591                 my $old_level    = $gnu_item_list[$i]->get_LEVEL();
6592                 my $old_ci_level = $gnu_item_list[$i]->get_CI_LEVEL();
6593                 warning(
6594 "program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level  deleted: level=$old_level ci=$ci_level\n"
6595                 );
6596                 report_definite_bug();
6597             }
6598         }
6599         $gnu_position_predictor -= $deleted_spaces;
6600         $spaces_needed          -= $deleted_spaces;
6601         last unless ( $spaces_needed > 0 );
6602     }
6603 }
6604
6605 sub finish_lp_batch {
6606
6607     # This routine is called once after each each output stream batch is
6608     # finished to undo indentation for all incomplete -lp
6609     # indentation levels.  It is too risky to leave a level open,
6610     # because then we can't backtrack in case of a long line to follow.
6611     # This means that comments and blank lines will disrupt this
6612     # indentation style.  But the vertical aligner may be able to
6613     # get the space back if there are side comments.
6614
6615     # this is only for the 'lp' style
6616     return unless ($rOpts_line_up_parentheses);
6617
6618     # nothing can be done if no stack items defined for this line
6619     return if ( $max_gnu_item_index == UNDEFINED_INDEX );
6620
6621     # loop over all whitespace items created for the current batch
6622     my $i;
6623     for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
6624         my $item = $gnu_item_list[$i];
6625
6626         # only look for open items
6627         next if ( $item->get_CLOSED() >= 0 );
6628
6629         # Tentatively remove all of the available space
6630         # (The vertical aligner will try to get it back later)
6631         my $available_spaces = $item->get_AVAILABLE_SPACES();
6632         if ( $available_spaces > 0 ) {
6633
6634             # delete incremental space for this item
6635             $gnu_item_list[$i]
6636               ->tentatively_decrease_AVAILABLE_SPACES($available_spaces);
6637
6638             # Reduce the total indentation space of any nodes that follow
6639             # Note that any such nodes must necessarily be dependents
6640             # of this node.
6641             foreach ( $i + 1 .. $max_gnu_item_index ) {
6642                 $gnu_item_list[$_]->decrease_SPACES($available_spaces);
6643             }
6644         }
6645     }
6646     return;
6647 }
6648
6649 sub reduce_lp_indentation {
6650
6651     # reduce the leading whitespace at token $i if possible by $spaces_needed
6652     # (a large value of $spaces_needed will remove all excess space)
6653     # NOTE: to be called from scan_list only for a sequence of tokens
6654     # contained between opening and closing parens/braces/brackets
6655
6656     my ( $i, $spaces_wanted ) = @_;
6657     my $deleted_spaces = 0;
6658
6659     my $item             = $leading_spaces_to_go[$i];
6660     my $available_spaces = $item->get_AVAILABLE_SPACES();
6661
6662     if (
6663         $available_spaces > 0
6664         && ( ( $spaces_wanted <= $available_spaces )
6665             || !$item->get_HAVE_CHILD() )
6666       )
6667     {
6668
6669         # we'll remove these spaces, but mark them as recoverable
6670         $deleted_spaces =
6671           $item->tentatively_decrease_AVAILABLE_SPACES($spaces_wanted);
6672     }
6673
6674     return $deleted_spaces;
6675 }
6676
6677 sub token_sequence_length {
6678
6679     # return length of tokens ($ifirst .. $ilast) including first & last
6680     # returns 0 if $ifirst > $ilast
6681     my $ifirst = shift;
6682     my $ilast  = shift;
6683     return 0 if ( $ilast < 0 || $ifirst > $ilast );
6684     return $lengths_to_go[ $ilast + 1 ] if ( $ifirst < 0 );
6685     return $lengths_to_go[ $ilast + 1 ] - $lengths_to_go[$ifirst];
6686 }
6687
6688 sub total_line_length {
6689
6690     # return length of a line of tokens ($ifirst .. $ilast)
6691     my $ifirst = shift;
6692     my $ilast  = shift;
6693     if ( $ifirst < 0 ) { $ifirst = 0 }
6694
6695     return leading_spaces_to_go($ifirst) +
6696       token_sequence_length( $ifirst, $ilast );
6697 }
6698
6699 sub excess_line_length {
6700
6701     # return number of characters by which a line of tokens ($ifirst..$ilast)
6702     # exceeds the allowable line length.
6703     my $ifirst = shift;
6704     my $ilast  = shift;
6705     if ( $ifirst < 0 ) { $ifirst = 0 }
6706     return leading_spaces_to_go($ifirst) +
6707       token_sequence_length( $ifirst, $ilast ) - $rOpts_maximum_line_length;
6708 }
6709
6710 sub finish_formatting {
6711
6712     # flush buffer and write any informative messages
6713     my $self = shift;
6714
6715     flush();
6716     $file_writer_object->decrement_output_line_number()
6717       ;    # fix up line number since it was incremented
6718     we_are_at_the_last_line();
6719     if ( $added_semicolon_count > 0 ) {
6720         my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
6721         my $what =
6722           ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
6723         write_logfile_entry("$added_semicolon_count $what added:\n");
6724         write_logfile_entry(
6725             "  $first at input line $first_added_semicolon_at\n");
6726
6727         if ( $added_semicolon_count > 1 ) {
6728             write_logfile_entry(
6729                 "   Last at input line $last_added_semicolon_at\n");
6730         }
6731         write_logfile_entry("  (Use -nasc to prevent semicolon addition)\n");
6732         write_logfile_entry("\n");
6733     }
6734
6735     if ( $deleted_semicolon_count > 0 ) {
6736         my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
6737         my $what =
6738           ( $deleted_semicolon_count > 1 )
6739           ? "semicolons were"
6740           : "semicolon was";
6741         write_logfile_entry(
6742             "$deleted_semicolon_count unnecessary $what deleted:\n");
6743         write_logfile_entry(
6744             "  $first at input line $first_deleted_semicolon_at\n");
6745
6746         if ( $deleted_semicolon_count > 1 ) {
6747             write_logfile_entry(
6748                 "   Last at input line $last_deleted_semicolon_at\n");
6749         }
6750         write_logfile_entry("  (Use -ndsc to prevent semicolon deletion)\n");
6751         write_logfile_entry("\n");
6752     }
6753
6754     if ( $embedded_tab_count > 0 ) {
6755         my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
6756         my $what =
6757           ( $embedded_tab_count > 1 )
6758           ? "quotes or patterns"
6759           : "quote or pattern";
6760         write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
6761         write_logfile_entry(
6762 "This means the display of this script could vary with device or software\n"
6763         );
6764         write_logfile_entry("  $first at input line $first_embedded_tab_at\n");
6765
6766         if ( $embedded_tab_count > 1 ) {
6767             write_logfile_entry(
6768                 "   Last at input line $last_embedded_tab_at\n");
6769         }
6770         write_logfile_entry("\n");
6771     }
6772
6773     if ($first_tabbing_disagreement) {
6774         write_logfile_entry(
6775 "First indentation disagreement seen at input line $first_tabbing_disagreement\n"
6776         );
6777     }
6778
6779     if ($in_tabbing_disagreement) {
6780         write_logfile_entry(
6781 "Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n"
6782         );
6783     }
6784     else {
6785
6786         if ($last_tabbing_disagreement) {
6787
6788             write_logfile_entry(
6789 "Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
6790             );
6791         }
6792         else {
6793             write_logfile_entry("No indentation disagreement seen\n");
6794         }
6795     }
6796     write_logfile_entry("\n");
6797
6798     $vertical_aligner_object->report_anything_unusual();
6799
6800     $file_writer_object->report_line_length_errors();
6801 }
6802
6803 sub check_options {
6804
6805     # This routine is called to check the Opts hash after it is defined
6806
6807     ($rOpts) = @_;
6808     my ( $tabbing_string, $tab_msg );
6809
6810     make_static_block_comment_pattern();
6811     make_static_side_comment_pattern();
6812     make_closing_side_comment_prefix();
6813     make_closing_side_comment_list_pattern();
6814     $format_skipping_pattern_begin =
6815       make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
6816     $format_skipping_pattern_end =
6817       make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
6818
6819     # If closing side comments ARE selected, then we can safely
6820     # delete old closing side comments unless closing side comment
6821     # warnings are requested.  This is a good idea because it will
6822     # eliminate any old csc's which fall below the line count threshold.
6823     # We cannot do this if warnings are turned on, though, because we
6824     # might delete some text which has been added.  So that must
6825     # be handled when comments are created.
6826     if ( $rOpts->{'closing-side-comments'} ) {
6827         if ( !$rOpts->{'closing-side-comment-warnings'} ) {
6828             $rOpts->{'delete-closing-side-comments'} = 1;
6829         }
6830     }
6831
6832     # If closing side comments ARE NOT selected, but warnings ARE
6833     # selected and we ARE DELETING csc's, then we will pretend to be
6834     # adding with a huge interval.  This will force the comments to be
6835     # generated for comparison with the old comments, but not added.
6836     elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
6837         if ( $rOpts->{'delete-closing-side-comments'} ) {
6838             $rOpts->{'delete-closing-side-comments'}  = 0;
6839             $rOpts->{'closing-side-comments'}         = 1;
6840             $rOpts->{'closing-side-comment-interval'} = 100000000;
6841         }
6842     }
6843
6844     make_bli_pattern();
6845     make_block_brace_vertical_tightness_pattern();
6846
6847     if ( $rOpts->{'line-up-parentheses'} ) {
6848
6849         if (   $rOpts->{'indent-only'}
6850             || !$rOpts->{'add-newlines'}
6851             || !$rOpts->{'delete-old-newlines'} )
6852         {
6853             warn <<EOM;
6854 -----------------------------------------------------------------------
6855 Conflict: -lp  conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
6856
6857 The -lp indentation logic requires that perltidy be able to coordinate
6858 arbitrarily large numbers of line breakpoints.  This isn't possible
6859 with these flags. Sometimes an acceptable workaround is to use -wocb=3
6860 -----------------------------------------------------------------------
6861 EOM
6862             $rOpts->{'line-up-parentheses'} = 0;
6863         }
6864     }
6865
6866     # At present, tabs are not compatable with the line-up-parentheses style
6867     # (it would be possible to entab the total leading whitespace
6868     # just prior to writing the line, if desired).
6869     if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
6870         warn <<EOM;
6871 Conflict: -t (tabs) cannot be used with the -lp  option; ignoring -t; see -et.
6872 EOM
6873         $rOpts->{'tabs'} = 0;
6874     }
6875
6876     # Likewise, tabs are not compatable with outdenting..
6877     if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
6878         warn <<EOM;
6879 Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
6880 EOM
6881         $rOpts->{'tabs'} = 0;
6882     }
6883
6884     if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
6885         warn <<EOM;
6886 Conflict: -t (tabs) cannot be used with the -ola  option; ignoring -t; see -et.
6887 EOM
6888         $rOpts->{'tabs'} = 0;
6889     }
6890
6891     if ( !$rOpts->{'space-for-semicolon'} ) {
6892         $want_left_space{'f'} = -1;
6893     }
6894
6895     if ( $rOpts->{'space-terminal-semicolon'} ) {
6896         $want_left_space{';'} = 1;
6897     }
6898
6899     # implement outdenting preferences for keywords
6900     %outdent_keyword = ();
6901
6902     # load defaults
6903     @_ = qw(next last redo goto return);
6904
6905     # override defaults if requested
6906     if ( $_ = $rOpts->{'outdent-keyword-list'} ) {
6907         s/^\s+//;
6908         s/\s+$//;
6909         @_ = split /\s+/;
6910     }
6911
6912     # FUTURE: if not a keyword, assume that it is an identifier
6913     foreach (@_) {
6914         if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
6915             $outdent_keyword{$_} = 1;
6916         }
6917         else {
6918             warn "ignoring '$_' in -okwl list; not a perl keyword";
6919         }
6920     }
6921
6922     # implement user whitespace preferences
6923     if ( $_ = $rOpts->{'want-left-space'} ) {
6924         s/^\s+//;
6925         s/\s+$//;
6926         @_ = split /\s+/;
6927         @want_left_space{@_} = (1) x scalar(@_);
6928     }
6929
6930     if ( $_ = $rOpts->{'want-right-space'} ) {
6931         s/^\s+//;
6932         s/\s+$//;
6933         @_ = split /\s+/;
6934         @want_right_space{@_} = (1) x scalar(@_);
6935     }
6936     if ( $_ = $rOpts->{'nowant-left-space'} ) {
6937         s/^\s+//;
6938         s/\s+$//;
6939         @_ = split /\s+/;
6940         @want_left_space{@_} = (-1) x scalar(@_);
6941     }
6942
6943     if ( $_ = $rOpts->{'nowant-right-space'} ) {
6944         s/^\s+//;
6945         s/\s+$//;
6946         @_ = split /\s+/;
6947         @want_right_space{@_} = (-1) x scalar(@_);
6948     }
6949     if ( $rOpts->{'dump-want-left-space'} ) {
6950         dump_want_left_space(*STDOUT);
6951         exit 1;
6952     }
6953
6954     if ( $rOpts->{'dump-want-right-space'} ) {
6955         dump_want_right_space(*STDOUT);
6956         exit 1;
6957     }
6958
6959     # default keywords for which space is introduced before an opening paren
6960     # (at present, including them messes up vertical alignment)
6961     @_ = qw(my local our and or err eq ne if else elsif until
6962       unless while for foreach return switch case given when);
6963     @space_after_keyword{@_} = (1) x scalar(@_);
6964
6965     # allow user to modify these defaults
6966     if ( $_ = $rOpts->{'space-after-keyword'} ) {
6967         s/^\s+//;
6968         s/\s+$//;
6969         @_ = split /\s+/;
6970         @space_after_keyword{@_} = (1) x scalar(@_);
6971     }
6972
6973     if ( $_ = $rOpts->{'nospace-after-keyword'} ) {
6974         s/^\s+//;
6975         s/\s+$//;
6976         @_ = split /\s+/;
6977         @space_after_keyword{@_} = (0) x scalar(@_);
6978     }
6979
6980     # implement user break preferences
6981     if ( $_ = $rOpts->{'want-break-after'} ) {
6982         @_ = split /\s+/;
6983         foreach my $tok (@_) {
6984             if ( $tok eq '?' ) { $tok = ':' }    # patch to coordinate ?/:
6985             my $lbs = $left_bond_strength{$tok};
6986             my $rbs = $right_bond_strength{$tok};
6987             if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
6988                 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
6989                   ( $lbs, $rbs );
6990             }
6991         }
6992     }
6993
6994     if ( $_ = $rOpts->{'want-break-before'} ) {
6995         s/^\s+//;
6996         s/\s+$//;
6997         @_ = split /\s+/;
6998         foreach my $tok (@_) {
6999             my $lbs = $left_bond_strength{$tok};
7000             my $rbs = $right_bond_strength{$tok};
7001             if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
7002                 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
7003                   ( $lbs, $rbs );
7004             }
7005         }
7006     }
7007
7008     # make note if breaks are before certain key types
7009     %want_break_before = ();
7010
7011     foreach
7012       my $tok ( '.', ',', ':', '?', '&&', '||', 'and', 'or', 'err', 'xor' )
7013     {
7014         $want_break_before{$tok} =
7015           $left_bond_strength{$tok} < $right_bond_strength{$tok};
7016     }
7017
7018     # Coordinate ?/: breaks, which must be similar
7019     if ( !$want_break_before{':'} ) {
7020         $want_break_before{'?'}   = $want_break_before{':'};
7021         $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
7022         $left_bond_strength{'?'}  = NO_BREAK;
7023     }
7024
7025     # Define here tokens which may follow the closing brace of a do statement
7026     # on the same line, as in:
7027     #   } while ( $something);
7028     @_ = qw(until while unless if ; : );
7029     push @_, ',';
7030     @is_do_follower{@_} = (1) x scalar(@_);
7031
7032     # These tokens may follow the closing brace of an if or elsif block.
7033     # In other words, for cuddled else we want code to look like:
7034     #   } elsif ( $something) {
7035     #   } else {
7036     if ( $rOpts->{'cuddled-else'} ) {
7037         @_ = qw(else elsif);
7038         @is_if_brace_follower{@_} = (1) x scalar(@_);
7039     }
7040     else {
7041         %is_if_brace_follower = ();
7042     }
7043
7044     # nothing can follow the closing curly of an else { } block:
7045     %is_else_brace_follower = ();
7046
7047     # what can follow a multi-line anonymous sub definition closing curly:
7048     @_ = qw# ; : => or and  && || ~~ ) #;
7049     push @_, ',';
7050     @is_anon_sub_brace_follower{@_} = (1) x scalar(@_);
7051
7052     # what can follow a one-line anonynomous sub closing curly:
7053     # one-line anonumous subs also have ']' here...
7054     # see tk3.t and PP.pm
7055     @_ = qw#  ; : => or and  && || ) ] ~~ #;
7056     push @_, ',';
7057     @is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_);
7058
7059     # What can follow a closing curly of a block
7060     # which is not an if/elsif/else/do/sort/map/grep/eval/sub
7061     # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
7062     @_ = qw#  ; : => or and  && || ) #;
7063     push @_, ',';
7064
7065     # allow cuddled continue if cuddled else is specified
7066     if ( $rOpts->{'cuddled-else'} ) { push @_, 'continue'; }
7067
7068     @is_other_brace_follower{@_} = (1) x scalar(@_);
7069
7070     $right_bond_strength{'{'} = WEAK;
7071     $left_bond_strength{'{'}  = VERY_STRONG;
7072
7073     # make -l=0  equal to -l=infinite
7074     if ( !$rOpts->{'maximum-line-length'} ) {
7075         $rOpts->{'maximum-line-length'} = 1000000;
7076     }
7077
7078     # make -lbl=0  equal to -lbl=infinite
7079     if ( !$rOpts->{'long-block-line-count'} ) {
7080         $rOpts->{'long-block-line-count'} = 1000000;
7081     }
7082
7083     my $ole = $rOpts->{'output-line-ending'};
7084     if ($ole) {
7085         my %endings = (
7086             dos  => "\015\012",
7087             win  => "\015\012",
7088             mac  => "\015",
7089             unix => "\012",
7090         );
7091         $ole = lc $ole;
7092         unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
7093             my $str = join " ", keys %endings;
7094             die <<EOM;
7095 Unrecognized line ending '$ole'; expecting one of: $str
7096 EOM
7097         }
7098         if ( $rOpts->{'preserve-line-endings'} ) {
7099             warn "Ignoring -ple; conflicts with -ole\n";
7100             $rOpts->{'preserve-line-endings'} = undef;
7101         }
7102     }
7103
7104     # hashes used to simplify setting whitespace
7105     %tightness = (
7106         '{' => $rOpts->{'brace-tightness'},
7107         '}' => $rOpts->{'brace-tightness'},
7108         '(' => $rOpts->{'paren-tightness'},
7109         ')' => $rOpts->{'paren-tightness'},
7110         '[' => $rOpts->{'square-bracket-tightness'},
7111         ']' => $rOpts->{'square-bracket-tightness'},
7112     );
7113     %matching_token = (
7114         '{' => '}',
7115         '(' => ')',
7116         '[' => ']',
7117         '?' => ':',
7118     );
7119
7120     # frequently used parameters
7121     $rOpts_add_newlines          = $rOpts->{'add-newlines'};
7122     $rOpts_add_whitespace        = $rOpts->{'add-whitespace'};
7123     $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
7124     $rOpts_block_brace_vertical_tightness =
7125       $rOpts->{'block-brace-vertical-tightness'};
7126     $rOpts_brace_left_and_indent   = $rOpts->{'brace-left-and-indent'};
7127     $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
7128     $rOpts_break_at_old_ternary_breakpoints =
7129       $rOpts->{'break-at-old-ternary-breakpoints'};
7130     $rOpts_break_at_old_comma_breakpoints =
7131       $rOpts->{'break-at-old-comma-breakpoints'};
7132     $rOpts_break_at_old_keyword_breakpoints =
7133       $rOpts->{'break-at-old-keyword-breakpoints'};
7134     $rOpts_break_at_old_logical_breakpoints =
7135       $rOpts->{'break-at-old-logical-breakpoints'};
7136     $rOpts_closing_side_comment_else_flag =
7137       $rOpts->{'closing-side-comment-else-flag'};
7138     $rOpts_closing_side_comment_maximum_text =
7139       $rOpts->{'closing-side-comment-maximum-text'};
7140     $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
7141     $rOpts_cuddled_else             = $rOpts->{'cuddled-else'};
7142     $rOpts_delete_old_whitespace    = $rOpts->{'delete-old-whitespace'};
7143     $rOpts_fuzzy_line_length        = $rOpts->{'fuzzy-line-length'};
7144     $rOpts_indent_columns           = $rOpts->{'indent-columns'};
7145     $rOpts_line_up_parentheses      = $rOpts->{'line-up-parentheses'};
7146     $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
7147     $rOpts_maximum_line_length      = $rOpts->{'maximum-line-length'};
7148     $rOpts_short_concatenation_item_length =
7149       $rOpts->{'short-concatenation-item-length'};
7150     $rOpts_swallow_optional_blank_lines =
7151       $rOpts->{'swallow-optional-blank-lines'};
7152     $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
7153     $rOpts_format_skipping        = $rOpts->{'format-skipping'};
7154     $rOpts_space_function_paren   = $rOpts->{'space-function-paren'};
7155     $rOpts_space_keyword_paren    = $rOpts->{'space-keyword-paren'};
7156     $half_maximum_line_length     = $rOpts_maximum_line_length / 2;
7157
7158     # Note that both opening and closing tokens can access the opening
7159     # and closing flags of their container types.
7160     %opening_vertical_tightness = (
7161         '(' => $rOpts->{'paren-vertical-tightness'},
7162         '{' => $rOpts->{'brace-vertical-tightness'},
7163         '[' => $rOpts->{'square-bracket-vertical-tightness'},
7164         ')' => $rOpts->{'paren-vertical-tightness'},
7165         '}' => $rOpts->{'brace-vertical-tightness'},
7166         ']' => $rOpts->{'square-bracket-vertical-tightness'},
7167     );
7168
7169     %closing_vertical_tightness = (
7170         '(' => $rOpts->{'paren-vertical-tightness-closing'},
7171         '{' => $rOpts->{'brace-vertical-tightness-closing'},
7172         '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
7173         ')' => $rOpts->{'paren-vertical-tightness-closing'},
7174         '}' => $rOpts->{'brace-vertical-tightness-closing'},
7175         ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
7176     );
7177
7178     # assume flag for '>' same as ')' for closing qw quotes
7179     %closing_token_indentation = (
7180         ')' => $rOpts->{'closing-paren-indentation'},
7181         '}' => $rOpts->{'closing-brace-indentation'},
7182         ']' => $rOpts->{'closing-square-bracket-indentation'},
7183         '>' => $rOpts->{'closing-paren-indentation'},
7184     );
7185
7186     %opening_token_right = (
7187         '(' => $rOpts->{'opening-paren-right'},
7188         '{' => $rOpts->{'opening-hash-brace-right'},
7189         '[' => $rOpts->{'opening-square-bracket-right'},
7190     );
7191
7192     %stack_opening_token = (
7193         '(' => $rOpts->{'stack-opening-paren'},
7194         '{' => $rOpts->{'stack-opening-hash-brace'},
7195         '[' => $rOpts->{'stack-opening-square-bracket'},
7196     );
7197
7198     %stack_closing_token = (
7199         ')' => $rOpts->{'stack-closing-paren'},
7200         '}' => $rOpts->{'stack-closing-hash-brace'},
7201         ']' => $rOpts->{'stack-closing-square-bracket'},
7202     );
7203 }
7204
7205 sub make_static_block_comment_pattern {
7206
7207     # create the pattern used to identify static block comments
7208     $static_block_comment_pattern = '^\s*##';
7209
7210     # allow the user to change it
7211     if ( $rOpts->{'static-block-comment-prefix'} ) {
7212         my $prefix = $rOpts->{'static-block-comment-prefix'};
7213         $prefix =~ s/^\s*//;
7214         my $pattern = $prefix;
7215
7216         # user may give leading caret to force matching left comments only
7217         if ( $prefix !~ /^\^#/ ) {
7218             if ( $prefix !~ /^#/ ) {
7219                 die
7220 "ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n";
7221             }
7222             $pattern = '^\s*' . $prefix;
7223         }
7224         eval "'##'=~/$pattern/";
7225         if ($@) {
7226             die
7227 "ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n";
7228         }
7229         $static_block_comment_pattern = $pattern;
7230     }
7231 }
7232
7233 sub make_format_skipping_pattern {
7234     my ( $opt_name, $default ) = @_;
7235     my $param = $rOpts->{$opt_name};
7236     unless ($param) { $param = $default }
7237     $param =~ s/^\s*//;
7238     if ( $param !~ /^#/ ) {
7239         die "ERROR: the $opt_name parameter '$param' must begin with '#'\n";
7240     }
7241     my $pattern = '^' . $param . '\s';
7242     eval "'#'=~/$pattern/";
7243     if ($@) {
7244         die
7245 "ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n";
7246     }
7247     return $pattern;
7248 }
7249
7250 sub make_closing_side_comment_list_pattern {
7251
7252     # turn any input list into a regex for recognizing selected block types
7253     $closing_side_comment_list_pattern = '^\w+';
7254     if ( defined( $rOpts->{'closing-side-comment-list'} )
7255         && $rOpts->{'closing-side-comment-list'} )
7256     {
7257         $closing_side_comment_list_pattern =
7258           make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
7259     }
7260 }
7261
7262 sub make_bli_pattern {
7263
7264     if ( defined( $rOpts->{'brace-left-and-indent-list'} )
7265         && $rOpts->{'brace-left-and-indent-list'} )
7266     {
7267         $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
7268     }
7269
7270     $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
7271 }
7272
7273 sub make_block_brace_vertical_tightness_pattern {
7274
7275     # turn any input list into a regex for recognizing selected block types
7276     $block_brace_vertical_tightness_pattern =
7277       '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
7278
7279     if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
7280         && $rOpts->{'block-brace-vertical-tightness-list'} )
7281     {
7282         $block_brace_vertical_tightness_pattern =
7283           make_block_pattern( '-bbvtl',
7284             $rOpts->{'block-brace-vertical-tightness-list'} );
7285     }
7286 }
7287
7288 sub make_block_pattern {
7289
7290     #  given a string of block-type keywords, return a regex to match them
7291     #  The only tricky part is that labels are indicated with a single ':'
7292     #  and the 'sub' token text may have additional text after it (name of
7293     #  sub).
7294     #
7295     #  Example:
7296     #
7297     #   input string: "if else elsif unless while for foreach do : sub";
7298     #   pattern:  '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
7299
7300     my ( $abbrev, $string ) = @_;
7301     $string =~ s/^\s+//;
7302     $string =~ s/\s+$//;
7303     my @list = split /\s+/, $string;
7304     my @words = ();
7305     my %seen;
7306     for my $i (@list) {
7307         next if $seen{$i};
7308         $seen{$i} = 1;
7309         if ( $i eq 'sub' ) {
7310         }
7311         elsif ( $i eq ':' ) {
7312             push @words, '\w+:';
7313         }
7314         elsif ( $i =~ /^\w/ ) {
7315             push @words, $i;
7316         }
7317         else {
7318             warn "unrecognized block type $i after $abbrev, ignoring\n";
7319         }
7320     }
7321     my $pattern = '(' . join( '|', @words ) . ')$';
7322     if ( $seen{'sub'} ) {
7323         $pattern = '(' . $pattern . '|sub)';
7324     }
7325     $pattern = '^' . $pattern;
7326     return $pattern;
7327 }
7328
7329 sub make_static_side_comment_pattern {
7330
7331     # create the pattern used to identify static side comments
7332     $static_side_comment_pattern = '^##';
7333
7334     # allow the user to change it
7335     if ( $rOpts->{'static-side-comment-prefix'} ) {
7336         my $prefix = $rOpts->{'static-side-comment-prefix'};
7337         $prefix =~ s/^\s*//;
7338         my $pattern = '^' . $prefix;
7339         eval "'##'=~/$pattern/";
7340         if ($@) {
7341             die
7342 "ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n";
7343         }
7344         $static_side_comment_pattern = $pattern;
7345     }
7346 }
7347
7348 sub make_closing_side_comment_prefix {
7349
7350     # Be sure we have a valid closing side comment prefix
7351     my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
7352     my $csc_prefix_pattern;
7353     if ( !defined($csc_prefix) ) {
7354         $csc_prefix         = '## end';
7355         $csc_prefix_pattern = '^##\s+end';
7356     }
7357     else {
7358         my $test_csc_prefix = $csc_prefix;
7359         if ( $test_csc_prefix !~ /^#/ ) {
7360             $test_csc_prefix = '#' . $test_csc_prefix;
7361         }
7362
7363         # make a regex to recognize the prefix
7364         my $test_csc_prefix_pattern = $test_csc_prefix;
7365
7366         # escape any special characters
7367         $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
7368
7369         $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
7370
7371         # allow exact number of intermediate spaces to vary
7372         $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
7373
7374         # make sure we have a good pattern
7375         # if we fail this we probably have an error in escaping
7376         # characters.
7377         eval "'##'=~/$test_csc_prefix_pattern/";
7378         if ($@) {
7379
7380             # shouldn't happen..must have screwed up escaping, above
7381             report_definite_bug();
7382             warn
7383 "Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n";
7384
7385             # just warn and keep going with defaults
7386             warn "Please consider using a simpler -cscp prefix\n";
7387             warn "Using default -cscp instead; please check output\n";
7388         }
7389         else {
7390             $csc_prefix         = $test_csc_prefix;
7391             $csc_prefix_pattern = $test_csc_prefix_pattern;
7392         }
7393     }
7394     $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
7395     $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
7396 }
7397
7398 sub dump_want_left_space {
7399     my $fh = shift;
7400     local $" = "\n";
7401     print $fh <<EOM;
7402 These values are the main control of whitespace to the left of a token type;
7403 They may be altered with the -wls parameter.
7404 For a list of token types, use perltidy --dump-token-types (-dtt)
7405  1 means the token wants a space to its left
7406 -1 means the token does not want a space to its left
7407 ------------------------------------------------------------------------
7408 EOM
7409     foreach ( sort keys %want_left_space ) {
7410         print $fh "$_\t$want_left_space{$_}\n";
7411     }
7412 }
7413
7414 sub dump_want_right_space {
7415     my $fh = shift;
7416     local $" = "\n";
7417     print $fh <<EOM;
7418 These values are the main control of whitespace to the right of a token type;
7419 They may be altered with the -wrs parameter.
7420 For a list of token types, use perltidy --dump-token-types (-dtt)
7421  1 means the token wants a space to its right
7422 -1 means the token does not want a space to its right
7423 ------------------------------------------------------------------------
7424 EOM
7425     foreach ( sort keys %want_right_space ) {
7426         print $fh "$_\t$want_right_space{$_}\n";
7427     }
7428 }
7429
7430 {    # begin is_essential_whitespace
7431
7432     my %is_sort_grep_map;
7433     my %is_for_foreach;
7434
7435     BEGIN {
7436
7437         @_ = qw(sort grep map);
7438         @is_sort_grep_map{@_} = (1) x scalar(@_);
7439
7440         @_ = qw(for foreach);
7441         @is_for_foreach{@_} = (1) x scalar(@_);
7442
7443     }
7444
7445     sub is_essential_whitespace {
7446
7447         # Essential whitespace means whitespace which cannot be safely deleted
7448         # without risking the introduction of a syntax error.
7449         # We are given three tokens and their types:
7450         # ($tokenl, $typel) is the token to the left of the space in question
7451         # ($tokenr, $typer) is the token to the right of the space in question
7452         # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
7453         #
7454         # This is a slow routine but is not needed too often except when -mangle
7455         # is used.
7456         #
7457         # Note: This routine should almost never need to be changed.  It is
7458         # for avoiding syntax problems rather than for formatting.
7459         my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
7460
7461         my $result =
7462
7463           # never combine two bare words or numbers
7464           # examples:  and ::ok(1)
7465           #            return ::spw(...)
7466           #            for bla::bla:: abc
7467           # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
7468           #            $input eq"quit" to make $inputeq"quit"
7469           #            my $size=-s::SINK if $file;  <==OK but we won't do it
7470           # don't join something like: for bla::bla:: abc
7471           # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
7472           ( ( $tokenl =~ /([\'\w]|\:\:)$/ ) && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
7473
7474           # do not combine a number with a concatination dot
7475           # example: pom.caputo:
7476           # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
7477           || ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) )
7478           || ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) )
7479
7480           # do not join a minus with a bare word, because you might form
7481           # a file test operator.  Example from Complex.pm:
7482           # if (CORE::abs($z - i) < $eps); "z-i" would be taken as a file test.
7483           || ( ( $tokenl eq '-' ) && ( $tokenr =~ /^[_A-Za-z]$/ ) )
7484
7485           # and something like this could become ambiguous without space
7486           # after the '-':
7487           #   use constant III=>1;
7488           #   $a = $b - III;
7489           # and even this:
7490           #   $a = - III;
7491           || ( ( $tokenl eq '-' )
7492             && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) )
7493
7494           # '= -' should not become =- or you will get a warning
7495           # about reversed -=
7496           # || ($tokenr eq '-')
7497
7498           # keep a space between a quote and a bareword to prevent the
7499           # bareword from becomming a quote modifier.
7500           || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
7501
7502           # keep a space between a token ending in '$' and any word;
7503           # this caused trouble:  "die @$ if $@"
7504           || ( ( $typel eq 'i' && $tokenl =~ /\$$/ )
7505             && ( $tokenr =~ /^[a-zA-Z_]/ ) )
7506
7507           # perl is very fussy about spaces before <<
7508           || ( $tokenr =~ /^\<\</ )
7509
7510           # avoid combining tokens to create new meanings. Example:
7511           #     $a+ +$b must not become $a++$b
7512           || ( $is_digraph{ $tokenl . $tokenr } )
7513           || ( $is_trigraph{ $tokenl . $tokenr } )
7514
7515           # another example: do not combine these two &'s:
7516           #     allow_options & &OPT_EXECCGI
7517           || ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } )
7518
7519           # don't combine $$ or $# with any alphanumeric
7520           # (testfile mangle.t with --mangle)
7521           || ( ( $tokenl =~ /^\$[\$\#]$/ ) && ( $tokenr =~ /^\w/ ) )
7522
7523           # retain any space after possible filehandle
7524           # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
7525           || ( $typel eq 'Z' )
7526
7527           # Perl is sensitive to whitespace after the + here:
7528           #  $b = xvals $a + 0.1 * yvals $a;
7529           || ( $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/ )
7530
7531           # keep paren separate in 'use Foo::Bar ()'
7532           || ( $tokenr eq '('
7533             && $typel   eq 'w'
7534             && $typell  eq 'k'
7535             && $tokenll eq 'use' )
7536
7537           # keep any space between filehandle and paren:
7538           # file mangle.t with --mangle:
7539           || ( $typel eq 'Y' && $tokenr eq '(' )
7540
7541           # retain any space after here doc operator ( hereerr.t)
7542           || ( $typel eq 'h' )
7543
7544           # FIXME: this needs some further work; extrude.t has test cases
7545           # it is safest to retain any space after start of ? : operator
7546           # because of perl's quirky parser.
7547           # ie, this line will fail if you remove the space after the '?':
7548           #    $b=join $comma ? ',' : ':', @_;   # ok
7549           #    $b=join $comma ?',' : ':', @_;   # error!
7550           # but this is ok :)
7551           #    $b=join $comma?',' : ':', @_;   # not a problem!
7552           ## || ($typel eq '?')
7553
7554           # be careful with a space around ++ and --, to avoid ambiguity as to
7555           # which token it applies
7556           || ( ( $typer =~ /^(pp|mm)$/ )     && ( $tokenl !~ /^[\;\{\(\[]/ ) )
7557           || ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) )
7558
7559           # need space after foreach my; for example, this will fail in
7560           # older versions of Perl:
7561           # foreach my$ft(@filetypes)...
7562           || (
7563             $tokenl eq 'my'
7564
7565             #  /^(for|foreach)$/
7566             && $is_for_foreach{$tokenll} && $tokenr =~ /^\$/
7567           )
7568
7569           # must have space between grep and left paren; "grep(" will fail
7570           || ( $tokenr eq '(' && $is_sort_grep_map{$tokenl} )
7571
7572           # don't stick numbers next to left parens, as in:
7573           #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
7574           || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )
7575
7576           ;    # the value of this long logic sequence is the result we want
7577         return $result;
7578     }
7579 }
7580
7581 sub set_white_space_flag {
7582
7583     #    This routine examines each pair of nonblank tokens and
7584     #    sets values for array @white_space_flag.
7585     #
7586     #    $white_space_flag[$j] is a flag indicating whether a white space
7587     #    BEFORE token $j is needed, with the following values:
7588     #
7589     #            -1 do not want a space before token $j
7590     #             0 optional space or $j is a whitespace
7591     #             1 want a space before token $j
7592     #
7593     #
7594     #   The values for the first token will be defined based
7595     #   upon the contents of the "to_go" output array.
7596     #
7597     #   Note: retain debug print statements because they are usually
7598     #   required after adding new token types.
7599
7600     BEGIN {
7601
7602         # initialize these global hashes, which control the use of
7603         # whitespace around tokens:
7604         #
7605         # %binary_ws_rules
7606         # %want_left_space
7607         # %want_right_space
7608         # %space_after_keyword
7609         #
7610         # Many token types are identical to the tokens themselves.
7611         # See the tokenizer for a complete list. Here are some special types:
7612         #   k = perl keyword
7613         #   f = semicolon in for statement
7614         #   m = unary minus
7615         #   p = unary plus
7616         # Note that :: is excluded since it should be contained in an identifier
7617         # Note that '->' is excluded because it never gets space
7618         # parentheses and brackets are excluded since they are handled specially
7619         # curly braces are included but may be overridden by logic, such as
7620         # newline logic.
7621
7622         # NEW_TOKENS: create a whitespace rule here.  This can be as
7623         # simple as adding your new letter to @spaces_both_sides, for
7624         # example.
7625
7626         @_ = qw" L { ( [ ";
7627         @is_opening_type{@_} = (1) x scalar(@_);
7628
7629         @_ = qw" R } ) ] ";
7630         @is_closing_type{@_} = (1) x scalar(@_);
7631
7632         my @spaces_both_sides = qw"
7633           + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
7634           .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~
7635           &&= ||= //= <=> A k f w F n C Y U G v
7636           ";
7637
7638         my @spaces_left_side = qw"
7639           t ! ~ m p { \ h pp mm Z j
7640           ";
7641         push( @spaces_left_side, '#' );    # avoids warning message
7642
7643         my @spaces_right_side = qw"
7644           ; } ) ] R J ++ -- **=
7645           ";
7646         push( @spaces_right_side, ',' );    # avoids warning message
7647         @want_left_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides);
7648         @want_right_space{@spaces_both_sides} =
7649           (1) x scalar(@spaces_both_sides);
7650         @want_left_space{@spaces_left_side}  = (1) x scalar(@spaces_left_side);
7651         @want_right_space{@spaces_left_side} = (-1) x scalar(@spaces_left_side);
7652         @want_left_space{@spaces_right_side} =
7653           (-1) x scalar(@spaces_right_side);
7654         @want_right_space{@spaces_right_side} =
7655           (1) x scalar(@spaces_right_side);
7656         $want_left_space{'L'}   = WS_NO;
7657         $want_left_space{'->'}  = WS_NO;
7658         $want_right_space{'->'} = WS_NO;
7659         $want_left_space{'**'}  = WS_NO;
7660         $want_right_space{'**'} = WS_NO;
7661
7662         # hash type information must stay tightly bound
7663         # as in :  ${xxxx}
7664         $binary_ws_rules{'i'}{'L'} = WS_NO;
7665         $binary_ws_rules{'i'}{'{'} = WS_YES;
7666         $binary_ws_rules{'k'}{'{'} = WS_YES;
7667         $binary_ws_rules{'U'}{'{'} = WS_YES;
7668         $binary_ws_rules{'i'}{'['} = WS_NO;
7669         $binary_ws_rules{'R'}{'L'} = WS_NO;
7670         $binary_ws_rules{'R'}{'{'} = WS_NO;
7671         $binary_ws_rules{'t'}{'L'} = WS_NO;
7672         $binary_ws_rules{'t'}{'{'} = WS_NO;
7673         $binary_ws_rules{'}'}{'L'} = WS_NO;
7674         $binary_ws_rules{'}'}{'{'} = WS_NO;
7675         $binary_ws_rules{'$'}{'L'} = WS_NO;
7676         $binary_ws_rules{'$'}{'{'} = WS_NO;
7677         $binary_ws_rules{'@'}{'L'} = WS_NO;
7678         $binary_ws_rules{'@'}{'{'} = WS_NO;
7679         $binary_ws_rules{'='}{'L'} = WS_YES;
7680
7681         # the following includes ') {'
7682         # as in :    if ( xxx ) { yyy }
7683         $binary_ws_rules{']'}{'L'} = WS_NO;
7684         $binary_ws_rules{']'}{'{'} = WS_NO;
7685         $binary_ws_rules{')'}{'{'} = WS_YES;
7686         $binary_ws_rules{')'}{'['} = WS_NO;
7687         $binary_ws_rules{']'}{'['} = WS_NO;
7688         $binary_ws_rules{']'}{'{'} = WS_NO;
7689         $binary_ws_rules{'}'}{'['} = WS_NO;
7690         $binary_ws_rules{'R'}{'['} = WS_NO;
7691
7692         $binary_ws_rules{']'}{'++'} = WS_NO;
7693         $binary_ws_rules{']'}{'--'} = WS_NO;
7694         $binary_ws_rules{')'}{'++'} = WS_NO;
7695         $binary_ws_rules{')'}{'--'} = WS_NO;
7696
7697         $binary_ws_rules{'R'}{'++'} = WS_NO;
7698         $binary_ws_rules{'R'}{'--'} = WS_NO;
7699
7700         $binary_ws_rules{'k'}{':'} = WS_NO;     # keep colon with label
7701         $binary_ws_rules{'w'}{':'} = WS_NO;
7702         $binary_ws_rules{'i'}{'Q'} = WS_YES;
7703         $binary_ws_rules{'n'}{'('} = WS_YES;    # occurs in 'use package n ()'
7704
7705         # FIXME: we need to split 'i' into variables and functions
7706         # and have no space for functions but space for variables.  For now,
7707         # I have a special patch in the special rules below
7708         $binary_ws_rules{'i'}{'('} = WS_NO;
7709
7710         $binary_ws_rules{'w'}{'('} = WS_NO;
7711         $binary_ws_rules{'w'}{'{'} = WS_YES;
7712     }
7713     my ( $jmax, $rtokens, $rtoken_type, $rblock_type ) = @_;
7714     my ( $last_token, $last_type, $last_block_type, $token, $type,
7715         $block_type );
7716     my (@white_space_flag);
7717     my $j_tight_closing_paren = -1;
7718
7719     if ( $max_index_to_go >= 0 ) {
7720         $token      = $tokens_to_go[$max_index_to_go];
7721         $type       = $types_to_go[$max_index_to_go];
7722         $block_type = $block_type_to_go[$max_index_to_go];
7723     }
7724     else {
7725         $token      = ' ';
7726         $type       = 'b';
7727         $block_type = '';
7728     }
7729
7730     # loop over all tokens
7731     my ( $j, $ws );
7732
7733     for ( $j = 0 ; $j <= $jmax ; $j++ ) {
7734
7735         if ( $$rtoken_type[$j] eq 'b' ) {
7736             $white_space_flag[$j] = WS_OPTIONAL;
7737             next;
7738         }
7739
7740         # set a default value, to be changed as needed
7741         $ws              = undef;
7742         $last_token      = $token;
7743         $last_type       = $type;
7744         $last_block_type = $block_type;
7745         $token           = $$rtokens[$j];
7746         $type            = $$rtoken_type[$j];
7747         $block_type      = $$rblock_type[$j];
7748
7749         #---------------------------------------------------------------
7750         # section 1:
7751         # handle space on the inside of opening braces
7752         #---------------------------------------------------------------
7753
7754         #    /^[L\{\(\[]$/
7755         if ( $is_opening_type{$last_type} ) {
7756
7757             $j_tight_closing_paren = -1;
7758
7759             # let's keep empty matched braces together: () {} []
7760             # except for BLOCKS
7761             if ( $token eq $matching_token{$last_token} ) {
7762                 if ($block_type) {
7763                     $ws = WS_YES;
7764                 }
7765                 else {
7766                     $ws = WS_NO;
7767                 }
7768             }
7769             else {
7770
7771                 # we're considering the right of an opening brace
7772                 # tightness = 0 means always pad inside with space
7773                 # tightness = 1 means pad inside if "complex"
7774                 # tightness = 2 means never pad inside with space
7775
7776                 my $tightness;
7777                 if (   $last_type eq '{'
7778                     && $last_token eq '{'
7779                     && $last_block_type )
7780                 {
7781                     $tightness = $rOpts_block_brace_tightness;
7782                 }
7783                 else { $tightness = $tightness{$last_token} }
7784
7785                 if ( $tightness <= 0 ) {
7786                     $ws = WS_YES;
7787                 }
7788                 elsif ( $tightness > 1 ) {
7789                     $ws = WS_NO;
7790                 }
7791                 else {
7792
7793                     # Patch to count '-foo' as single token so that
7794                     # each of  $a{-foo} and $a{foo} and $a{'foo'} do
7795                     # not get spaces with default formatting.
7796                     my $j_here = $j;
7797                     ++$j_here
7798                       if ( $token eq '-'
7799                         && $last_token             eq '{'
7800                         && $$rtoken_type[ $j + 1 ] eq 'w' );
7801
7802                     # $j_next is where a closing token should be if
7803                     # the container has a single token
7804                     my $j_next =
7805                       ( $$rtoken_type[ $j_here + 1 ] eq 'b' )
7806                       ? $j_here + 2
7807                       : $j_here + 1;
7808                     my $tok_next  = $$rtokens[$j_next];
7809                     my $type_next = $$rtoken_type[$j_next];
7810
7811                     # for tightness = 1, if there is just one token
7812                     # within the matching pair, we will keep it tight
7813                     if (
7814                         $tok_next eq $matching_token{$last_token}
7815
7816                         # but watch out for this: [ [ ]    (misc.t)
7817                         && $last_token ne $token
7818                       )
7819                     {
7820
7821                         # remember where to put the space for the closing paren
7822                         $j_tight_closing_paren = $j_next;
7823                         $ws                    = WS_NO;
7824                     }
7825                     else {
7826                         $ws = WS_YES;
7827                     }
7828                 }
7829             }
7830         }    # done with opening braces and brackets
7831         my $ws_1 = $ws
7832           if FORMATTER_DEBUG_FLAG_WHITE;
7833
7834         #---------------------------------------------------------------
7835         # section 2:
7836         # handle space on inside of closing brace pairs
7837         #---------------------------------------------------------------
7838
7839         #   /[\}\)\]R]/
7840         if ( $is_closing_type{$type} ) {
7841
7842             if ( $j == $j_tight_closing_paren ) {
7843
7844                 $j_tight_closing_paren = -1;
7845                 $ws                    = WS_NO;
7846             }
7847             else {
7848
7849                 if ( !defined($ws) ) {
7850
7851                     my $tightness;
7852                     if ( $type eq '}' && $token eq '}' && $block_type ) {
7853                         $tightness = $rOpts_block_brace_tightness;
7854                     }
7855                     else { $tightness = $tightness{$token} }
7856
7857                     $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
7858                 }
7859             }
7860         }
7861
7862         my $ws_2 = $ws
7863           if FORMATTER_DEBUG_FLAG_WHITE;
7864
7865         #---------------------------------------------------------------
7866         # section 3:
7867         # use the binary table
7868         #---------------------------------------------------------------
7869         if ( !defined($ws) ) {
7870             $ws = $binary_ws_rules{$last_type}{$type};
7871         }
7872         my $ws_3 = $ws
7873           if FORMATTER_DEBUG_FLAG_WHITE;
7874
7875         #---------------------------------------------------------------
7876         # section 4:
7877         # some special cases
7878         #---------------------------------------------------------------
7879         if ( $token eq '(' ) {
7880
7881             # This will have to be tweaked as tokenization changes.
7882             # We usually want a space at '} (', for example:
7883             #     map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
7884             #
7885             # But not others:
7886             #     &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
7887             # At present, the above & block is marked as type L/R so this case
7888             # won't go through here.
7889             if ( $last_type eq '}' ) { $ws = WS_YES }
7890
7891             # NOTE: some older versions of Perl had occasional problems if
7892             # spaces are introduced between keywords or functions and opening
7893             # parens.  So the default is not to do this except is certain
7894             # cases.  The current Perl seems to tolerate spaces.
7895
7896             # Space between keyword and '('
7897             elsif ( $last_type eq 'k' ) {
7898                 $ws = WS_NO
7899                   unless ( $rOpts_space_keyword_paren
7900                     || $space_after_keyword{$last_token} );
7901             }
7902
7903             # Space between function and '('
7904             # -----------------------------------------------------
7905             # 'w' and 'i' checks for something like:
7906             #   myfun(    &myfun(   ->myfun(
7907             # -----------------------------------------------------
7908             elsif (( $last_type =~ /^[wU]$/ )
7909                 || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )
7910             {
7911                 $ws = WS_NO unless ($rOpts_space_function_paren);
7912             }
7913
7914             # space between something like $i and ( in
7915             # for $i ( 0 .. 20 ) {
7916             # FIXME: eventually, type 'i' needs to be split into multiple
7917             # token types so this can be a hardwired rule.
7918             elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
7919                 $ws = WS_YES;
7920             }
7921
7922             # allow constant function followed by '()' to retain no space
7923             elsif ( $last_type eq 'C' && $$rtokens[ $j + 1 ] eq ')' ) {
7924                 $ws = WS_NO;
7925             }
7926         }
7927
7928         # patch for SWITCH/CASE: make space at ']{' optional
7929         # since the '{' might begin a case or when block
7930         elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
7931             $ws = WS_OPTIONAL;
7932         }
7933
7934         # keep space between 'sub' and '{' for anonymous sub definition
7935         if ( $type eq '{' ) {
7936             if ( $last_token eq 'sub' ) {
7937                 $ws = WS_YES;
7938             }
7939
7940             # this is needed to avoid no space in '){'
7941             if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
7942
7943             # avoid any space before the brace or bracket in something like
7944             #  @opts{'a','b',...}
7945             if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
7946                 $ws = WS_NO;
7947             }
7948         }
7949
7950         elsif ( $type eq 'i' ) {
7951
7952             # never a space before ->
7953             if ( $token =~ /^\-\>/ ) {
7954                 $ws = WS_NO;
7955             }
7956         }
7957
7958         # retain any space between '-' and bare word
7959         elsif ( $type eq 'w' || $type eq 'C' ) {
7960             $ws = WS_OPTIONAL if $last_type eq '-';
7961
7962             # never a space before ->
7963             if ( $token =~ /^\-\>/ ) {
7964                 $ws = WS_NO;
7965             }
7966         }
7967
7968         # retain any space between '-' and bare word
7969         # example: avoid space between 'USER' and '-' here:
7970         #   $myhash{USER-NAME}='steve';
7971         elsif ( $type eq 'm' || $type eq '-' ) {
7972             $ws = WS_OPTIONAL if ( $last_type eq 'w' );
7973         }
7974
7975         # always space before side comment
7976         elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
7977
7978         # always preserver whatever space was used after a possible
7979         # filehandle (except _) or here doc operator
7980         if (
7981             $type ne '#'
7982             && ( ( $last_type eq 'Z' && $last_token ne '_' )
7983                 || $last_type eq 'h' )
7984           )
7985         {
7986             $ws = WS_OPTIONAL;
7987         }
7988
7989         my $ws_4 = $ws
7990           if FORMATTER_DEBUG_FLAG_WHITE;
7991
7992         #---------------------------------------------------------------
7993         # section 5:
7994         # default rules not covered above
7995         #---------------------------------------------------------------
7996         # if we fall through to here,
7997         # look at the pre-defined hash tables for the two tokens, and
7998         # if (they are equal) use the common value
7999         # if (either is zero or undef) use the other
8000         # if (either is -1) use it
8001         # That is,
8002         # left  vs right
8003         #  1    vs    1     -->  1
8004         #  0    vs    0     -->  0
8005         # -1    vs   -1     --> -1
8006         #
8007         #  0    vs   -1     --> -1
8008         #  0    vs    1     -->  1
8009         #  1    vs    0     -->  1
8010         # -1    vs    0     --> -1
8011         #
8012         # -1    vs    1     --> -1
8013         #  1    vs   -1     --> -1
8014         if ( !defined($ws) ) {
8015             my $wl = $want_left_space{$type};
8016             my $wr = $want_right_space{$last_type};
8017             if ( !defined($wl) ) { $wl = 0 }
8018             if ( !defined($wr) ) { $wr = 0 }
8019             $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
8020         }
8021
8022         if ( !defined($ws) ) {
8023             $ws = 0;
8024             write_diagnostics(
8025                 "WS flag is undefined for tokens $last_token $token\n");
8026         }
8027
8028         # Treat newline as a whitespace. Otherwise, we might combine
8029         # 'Send' and '-recipients' here according to the above rules:
8030         #    my $msg = new Fax::Send
8031         #      -recipients => $to,
8032         #      -data => $data;
8033         if ( $ws == 0 && $j == 0 ) { $ws = 1 }
8034
8035         if (   ( $ws == 0 )
8036             && $j > 0
8037             && $j < $jmax
8038             && ( $last_type !~ /^[Zh]$/ ) )
8039         {
8040
8041             # If this happens, we have a non-fatal but undesirable
8042             # hole in the above rules which should be patched.
8043             write_diagnostics(
8044                 "WS flag is zero for tokens $last_token $token\n");
8045         }
8046         $white_space_flag[$j] = $ws;
8047
8048         FORMATTER_DEBUG_FLAG_WHITE && do {
8049             my $str = substr( $last_token, 0, 15 );
8050             $str .= ' ' x ( 16 - length($str) );
8051             if ( !defined($ws_1) ) { $ws_1 = "*" }
8052             if ( !defined($ws_2) ) { $ws_2 = "*" }
8053             if ( !defined($ws_3) ) { $ws_3 = "*" }
8054             if ( !defined($ws_4) ) { $ws_4 = "*" }
8055             print
8056 "WHITE:  i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
8057         };
8058     }
8059     return \@white_space_flag;
8060 }
8061
8062 {    # begin print_line_of_tokens
8063
8064     my $rtoken_type;
8065     my $rtokens;
8066     my $rlevels;
8067     my $rslevels;
8068     my $rblock_type;
8069     my $rcontainer_type;
8070     my $rcontainer_environment;
8071     my $rtype_sequence;
8072     my $input_line;
8073     my $rnesting_tokens;
8074     my $rci_levels;
8075     my $rnesting_blocks;
8076
8077     my $in_quote;
8078     my $python_indentation_level;
8079
8080     # These local token variables are stored by store_token_to_go:
8081     my $block_type;
8082     my $ci_level;
8083     my $container_environment;
8084     my $container_type;
8085     my $in_continued_quote;
8086     my $level;
8087     my $nesting_blocks;
8088     my $no_internal_newlines;
8089     my $slevel;
8090     my $token;
8091     my $type;
8092     my $type_sequence;
8093
8094     # routine to pull the jth token from the line of tokens
8095     sub extract_token {
8096         my $j = shift;
8097         $token                 = $$rtokens[$j];
8098         $type                  = $$rtoken_type[$j];
8099         $block_type            = $$rblock_type[$j];
8100         $container_type        = $$rcontainer_type[$j];
8101         $container_environment = $$rcontainer_environment[$j];
8102         $type_sequence         = $$rtype_sequence[$j];
8103         $level                 = $$rlevels[$j];
8104         $slevel                = $$rslevels[$j];
8105         $nesting_blocks        = $$rnesting_blocks[$j];
8106         $ci_level              = $$rci_levels[$j];
8107     }
8108
8109     {
8110         my @saved_token;
8111
8112         sub save_current_token {
8113
8114             @saved_token = (
8115                 $block_type,            $ci_level,
8116                 $container_environment, $container_type,
8117                 $in_continued_quote,    $level,
8118                 $nesting_blocks,        $no_internal_newlines,
8119                 $slevel,                $token,
8120                 $type,                  $type_sequence,
8121             );
8122         }
8123
8124         sub restore_current_token {
8125             (
8126                 $block_type,            $ci_level,
8127                 $container_environment, $container_type,
8128                 $in_continued_quote,    $level,
8129                 $nesting_blocks,        $no_internal_newlines,
8130                 $slevel,                $token,
8131                 $type,                  $type_sequence,
8132             ) = @saved_token;
8133         }
8134     }
8135
8136     # Routine to place the current token into the output stream.
8137     # Called once per output token.
8138     sub store_token_to_go {
8139
8140         my $flag = $no_internal_newlines;
8141         if ( $_[0] ) { $flag = 1 }
8142
8143         $tokens_to_go[ ++$max_index_to_go ]            = $token;
8144         $types_to_go[$max_index_to_go]                 = $type;
8145         $nobreak_to_go[$max_index_to_go]               = $flag;
8146         $old_breakpoint_to_go[$max_index_to_go]        = 0;
8147         $forced_breakpoint_to_go[$max_index_to_go]     = 0;
8148         $block_type_to_go[$max_index_to_go]            = $block_type;
8149         $type_sequence_to_go[$max_index_to_go]         = $type_sequence;
8150         $container_environment_to_go[$max_index_to_go] = $container_environment;
8151         $nesting_blocks_to_go[$max_index_to_go]        = $nesting_blocks;
8152         $ci_levels_to_go[$max_index_to_go]             = $ci_level;
8153         $mate_index_to_go[$max_index_to_go]            = -1;
8154         $matching_token_to_go[$max_index_to_go]        = '';
8155
8156         # Note: negative levels are currently retained as a diagnostic so that
8157         # the 'final indentation level' is correctly reported for bad scripts.
8158         # But this means that every use of $level as an index must be checked.
8159         # If this becomes too much of a problem, we might give up and just clip
8160         # them at zero.
8161         ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0;
8162         $levels_to_go[$max_index_to_go] = $level;
8163         $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0;
8164         $lengths_to_go[ $max_index_to_go + 1 ] =
8165           $lengths_to_go[$max_index_to_go] + length($token);
8166
8167         # Define the indentation that this token would have if it started
8168         # a new line.  We have to do this now because we need to know this
8169         # when considering one-line blocks.
8170         set_leading_whitespace( $level, $ci_level, $in_continued_quote );
8171
8172         if ( $type ne 'b' ) {
8173             $last_last_nonblank_index_to_go = $last_nonblank_index_to_go;
8174             $last_last_nonblank_type_to_go  = $last_nonblank_type_to_go;
8175             $last_last_nonblank_token_to_go = $last_nonblank_token_to_go;
8176             $last_nonblank_index_to_go      = $max_index_to_go;
8177             $last_nonblank_type_to_go       = $type;
8178             $last_nonblank_token_to_go      = $token;
8179             if ( $type eq ',' ) {
8180                 $comma_count_in_batch++;
8181             }
8182         }
8183
8184         FORMATTER_DEBUG_FLAG_STORE && do {
8185             my ( $a, $b, $c ) = caller();
8186             print
8187 "STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n";
8188         };
8189     }
8190
8191     sub insert_new_token_to_go {
8192
8193         # insert a new token into the output stream.  use same level as
8194         # previous token; assumes a character at max_index_to_go.
8195         save_current_token();
8196         ( $token, $type, $slevel, $no_internal_newlines ) = @_;
8197
8198         if ( $max_index_to_go == UNDEFINED_INDEX ) {
8199             warning("code bug: bad call to insert_new_token_to_go\n");
8200         }
8201         $level = $levels_to_go[$max_index_to_go];
8202
8203         # FIXME: it seems to be necessary to use the next, rather than
8204         # previous, value of this variable when creating a new blank (align.t)
8205         #my $slevel         = $nesting_depth_to_go[$max_index_to_go];
8206         $nesting_blocks        = $nesting_blocks_to_go[$max_index_to_go];
8207         $ci_level              = $ci_levels_to_go[$max_index_to_go];
8208         $container_environment = $container_environment_to_go[$max_index_to_go];
8209         $in_continued_quote    = 0;
8210         $block_type            = "";
8211         $type_sequence         = "";
8212         store_token_to_go();
8213         restore_current_token();
8214         return;
8215     }
8216
8217     sub print_line_of_tokens {
8218
8219         my $line_of_tokens = shift;
8220
8221         # This routine is called once per input line to process all of
8222         # the tokens on that line.  This is the first stage of
8223         # beautification.
8224         #
8225         # Full-line comments and blank lines may be processed immediately.
8226         #
8227         # For normal lines of code, the tokens are stored one-by-one,
8228         # via calls to 'sub store_token_to_go', until a known line break
8229         # point is reached.  Then, the batch of collected tokens is
8230         # passed along to 'sub output_line_to_go' for further
8231         # processing.  This routine decides if there should be
8232         # whitespace between each pair of non-white tokens, so later
8233         # routines only need to decide on any additional line breaks.
8234         # Any whitespace is initally a single space character.  Later,
8235         # the vertical aligner may expand that to be multiple space
8236         # characters if necessary for alignment.
8237
8238         # extract input line number for error messages
8239         $input_line_number = $line_of_tokens->{_line_number};
8240
8241         $rtoken_type            = $line_of_tokens->{_rtoken_type};
8242         $rtokens                = $line_of_tokens->{_rtokens};
8243         $rlevels                = $line_of_tokens->{_rlevels};
8244         $rslevels               = $line_of_tokens->{_rslevels};
8245         $rblock_type            = $line_of_tokens->{_rblock_type};
8246         $rcontainer_type        = $line_of_tokens->{_rcontainer_type};
8247         $rcontainer_environment = $line_of_tokens->{_rcontainer_environment};
8248         $rtype_sequence         = $line_of_tokens->{_rtype_sequence};
8249         $input_line             = $line_of_tokens->{_line_text};
8250         $rnesting_tokens        = $line_of_tokens->{_rnesting_tokens};
8251         $rci_levels             = $line_of_tokens->{_rci_levels};
8252         $rnesting_blocks        = $line_of_tokens->{_rnesting_blocks};
8253
8254         $in_continued_quote = $starting_in_quote =
8255           $line_of_tokens->{_starting_in_quote};
8256         $in_quote        = $line_of_tokens->{_ending_in_quote};
8257         $ending_in_quote = $in_quote;
8258         $python_indentation_level =
8259           $line_of_tokens->{_python_indentation_level};
8260
8261         my $j;
8262         my $j_next;
8263         my $jmax;
8264         my $next_nonblank_token;
8265         my $next_nonblank_token_type;
8266         my $rwhite_space_flag;
8267
8268         $jmax                    = @$rtokens - 1;
8269         $block_type              = "";
8270         $container_type          = "";
8271         $container_environment   = "";
8272         $type_sequence           = "";
8273         $no_internal_newlines    = 1 - $rOpts_add_newlines;
8274         $is_static_block_comment = 0;
8275
8276         # Handle a continued quote..
8277         if ($in_continued_quote) {
8278
8279             # A line which is entirely a quote or pattern must go out
8280             # verbatim.  Note: the \n is contained in $input_line.
8281             if ( $jmax <= 0 ) {
8282                 if ( ( $input_line =~ "\t" ) ) {
8283                     note_embedded_tab();
8284                 }
8285                 write_unindented_line("$input_line");
8286                 $last_line_had_side_comment = 0;
8287                 return;
8288             }
8289
8290             # prior to version 20010406, perltidy had a bug which placed
8291             # continuation indentation before the last line of some multiline
8292             # quotes and patterns -- exactly the lines passing this way.
8293             # To help find affected lines in scripts run with these
8294             # versions, run with '-chk', and it will warn of any quotes or
8295             # patterns which might have been modified by these early
8296             # versions.
8297             if ( $rOpts->{'check-multiline-quotes'} && $input_line =~ /^ / ) {
8298                 warning(
8299 "-chk: please check this line for extra leading whitespace\n"
8300                 );
8301             }
8302         }
8303
8304         # Write line verbatim if we are in a formatting skip section
8305         if ($in_format_skipping_section) {
8306             write_unindented_line("$input_line");
8307             $last_line_had_side_comment = 0;
8308
8309             # Note: extra space appended to comment simplifies pattern matching
8310             if (   $jmax == 0
8311                 && $$rtoken_type[0] eq '#'
8312                 && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_end/o )
8313             {
8314                 $in_format_skipping_section = 0;
8315                 write_logfile_entry("Exiting formatting skip section\n");
8316             }
8317             return;
8318         }
8319
8320         # See if we are entering a formatting skip section
8321         if (   $rOpts_format_skipping
8322             && $jmax == 0
8323             && $$rtoken_type[0] eq '#'
8324             && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_begin/o )
8325         {
8326             flush();
8327             $in_format_skipping_section = 1;
8328             write_logfile_entry("Entering formatting skip section\n");
8329             write_unindented_line("$input_line");
8330             $last_line_had_side_comment = 0;
8331             return;
8332         }
8333
8334         # delete trailing blank tokens
8335         if ( $jmax > 0 && $$rtoken_type[$jmax] eq 'b' ) { $jmax-- }
8336
8337         # Handle a blank line..
8338         if ( $jmax < 0 ) {
8339
8340             # For the 'swallow-optional-blank-lines' option, we delete all
8341             # old blank lines and let the blank line rules generate any
8342             # needed blanks.
8343             if ( !$rOpts_swallow_optional_blank_lines ) {
8344                 flush();
8345                 $file_writer_object->write_blank_code_line();
8346                 $last_line_leading_type = 'b';
8347             }
8348             $last_line_had_side_comment = 0;
8349             return;
8350         }
8351
8352         # see if this is a static block comment (starts with ## by default)
8353         my $is_static_block_comment_without_leading_space = 0;
8354         if (   $jmax == 0
8355             && $$rtoken_type[0] eq '#'
8356             && $rOpts->{'static-block-comments'}
8357             && $input_line =~ /$static_block_comment_pattern/o )
8358         {
8359             $is_static_block_comment = 1;
8360             $is_static_block_comment_without_leading_space =
8361               substr( $input_line, 0, 1 ) eq '#';
8362         }
8363
8364         # create a hanging side comment if appropriate
8365         if (
8366                $jmax == 0
8367             && $$rtoken_type[0] eq '#'    # only token is a comment
8368             && $last_line_had_side_comment    # last line had side comment
8369             && $input_line =~ /^\s/           # there is some leading space
8370             && !$is_static_block_comment    # do not make static comment hanging
8371             && $rOpts->{'hanging-side-comments'}    # user is allowing this
8372           )
8373         {
8374
8375             # We will insert an empty qw string at the start of the token list
8376             # to force this comment to be a side comment. The vertical aligner
8377             # should then line it up with the previous side comment.
8378             unshift @$rtoken_type,            'q';
8379             unshift @$rtokens,                '';
8380             unshift @$rlevels,                $$rlevels[0];
8381             unshift @$rslevels,               $$rslevels[0];
8382             unshift @$rblock_type,            '';
8383             unshift @$rcontainer_type,        '';
8384             unshift @$rcontainer_environment, '';
8385             unshift @$rtype_sequence,         '';
8386             unshift @$rnesting_tokens,        $$rnesting_tokens[0];
8387             unshift @$rci_levels,             $$rci_levels[0];
8388             unshift @$rnesting_blocks,        $$rnesting_blocks[0];
8389             $jmax = 1;
8390         }
8391
8392         # remember if this line has a side comment
8393         $last_line_had_side_comment =
8394           ( $jmax > 0 && $$rtoken_type[$jmax] eq '#' );
8395
8396         # Handle a block (full-line) comment..
8397         if ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq '#' ) ) {
8398
8399             if ( $rOpts->{'delete-block-comments'} ) { return }
8400
8401             if ( $rOpts->{'tee-block-comments'} ) {
8402                 $file_writer_object->tee_on();
8403             }
8404
8405             destroy_one_line_block();
8406             output_line_to_go();
8407
8408             # output a blank line before block comments
8409             if (
8410                    $last_line_leading_type !~ /^[#b]$/
8411                 && $rOpts->{'blanks-before-comments'}    # only if allowed
8412                 && !
8413                 $is_static_block_comment    # never before static block comments
8414               )
8415             {
8416                 flush();                    # switching to new output stream
8417                 $file_writer_object->write_blank_code_line();
8418                 $last_line_leading_type = 'b';
8419             }
8420
8421             # TRIM COMMENTS -- This could be turned off as a option
8422             $$rtokens[0] =~ s/\s*$//;       # trim right end
8423
8424             if (
8425                 $rOpts->{'indent-block-comments'}
8426                 && ( !$rOpts->{'indent-spaced-block-comments'}
8427                     || $input_line =~ /^\s+/ )
8428                 && !$is_static_block_comment_without_leading_space
8429               )
8430             {
8431                 extract_token(0);
8432                 store_token_to_go();
8433                 output_line_to_go();
8434             }
8435             else {
8436                 flush();    # switching to new output stream
8437                 $file_writer_object->write_code_line( $$rtokens[0] . "\n" );
8438                 $last_line_leading_type = '#';
8439             }
8440             if ( $rOpts->{'tee-block-comments'} ) {
8441                 $file_writer_object->tee_off();
8442             }
8443             return;
8444         }
8445
8446         # compare input/output indentation except for continuation lines
8447         # (because they have an unknown amount of initial blank space)
8448         # and lines which are quotes (because they may have been outdented)
8449         # Note: this test is placed here because we know the continuation flag
8450         # at this point, which allows us to avoid non-meaningful checks.
8451         my $structural_indentation_level = $$rlevels[0];
8452         compare_indentation_levels( $python_indentation_level,
8453             $structural_indentation_level )
8454           unless ( $python_indentation_level < 0
8455             || ( $$rci_levels[0] > 0 )
8456             || ( ( $python_indentation_level == 0 ) && $$rtoken_type[0] eq 'Q' )
8457           );
8458
8459         #   Patch needed for MakeMaker.  Do not break a statement
8460         #   in which $VERSION may be calculated.  See MakeMaker.pm;
8461         #   this is based on the coding in it.
8462         #   The first line of a file that matches this will be eval'd:
8463         #       /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
8464         #   Examples:
8465         #     *VERSION = \'1.01';
8466         #     ( $VERSION ) = '$Revision: 1.56 $ ' =~ /\$Revision:\s+([^\s]+)/;
8467         #   We will pass such a line straight through without breaking
8468         #   it unless -npvl is used
8469
8470         my $is_VERSION_statement = 0;
8471
8472         if (
8473             !$saw_VERSION_in_this_file
8474             && $input_line =~ /VERSION/    # quick check to reject most lines
8475             && $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
8476           )
8477         {
8478             $saw_VERSION_in_this_file = 1;
8479             $is_VERSION_statement     = 1;
8480             write_logfile_entry("passing VERSION line; -npvl deactivates\n");
8481             $no_internal_newlines = 1;
8482         }
8483
8484         # take care of indentation-only
8485         # NOTE: In previous versions we sent all qw lines out immediately here.
8486         # No longer doing this: also write a line which is entirely a 'qw' list
8487         # to allow stacking of opening and closing tokens.  Note that interior
8488         # qw lines will still go out at the end of this routine.
8489         if ( $rOpts->{'indent-only'} ) {
8490             flush();
8491             $input_line =~ s/^\s*//;    # trim left end
8492             $input_line =~ s/\s*$//;    # trim right end
8493
8494             extract_token(0);
8495             $token                 = $input_line;
8496             $type                  = 'q';
8497             $block_type            = "";
8498             $container_type        = "";
8499             $container_environment = "";
8500             $type_sequence         = "";
8501             store_token_to_go();
8502             output_line_to_go();
8503             return;
8504         }
8505
8506         push( @$rtokens,     ' ', ' ' );   # making $j+2 valid simplifies coding
8507         push( @$rtoken_type, 'b', 'b' );
8508         ($rwhite_space_flag) =
8509           set_white_space_flag( $jmax, $rtokens, $rtoken_type, $rblock_type );
8510
8511         # find input tabbing to allow checks for tabbing disagreement
8512         ## not used for now
8513         ##$input_line_tabbing = "";
8514         ##if ( $input_line =~ /^(\s*)/ ) { $input_line_tabbing = $1; }
8515
8516         # if the buffer hasn't been flushed, add a leading space if
8517         # necessary to keep essential whitespace. This is really only
8518         # necessary if we are squeezing out all ws.
8519         if ( $max_index_to_go >= 0 ) {
8520
8521             $old_line_count_in_batch++;
8522
8523             if (
8524                 is_essential_whitespace(
8525                     $last_last_nonblank_token,
8526                     $last_last_nonblank_type,
8527                     $tokens_to_go[$max_index_to_go],
8528                     $types_to_go[$max_index_to_go],
8529                     $$rtokens[0],
8530                     $$rtoken_type[0]
8531                 )
8532               )
8533             {
8534                 my $slevel = $$rslevels[0];
8535                 insert_new_token_to_go( ' ', 'b', $slevel,
8536                     $no_internal_newlines );
8537             }
8538         }
8539
8540         # If we just saw the end of an elsif block, write nag message
8541         # if we do not see another elseif or an else.
8542         if ($looking_for_else) {
8543
8544             unless ( $$rtokens[0] =~ /^(elsif|else)$/ ) {
8545                 write_logfile_entry("(No else block)\n");
8546             }
8547             $looking_for_else = 0;
8548         }
8549
8550         # This is a good place to kill incomplete one-line blocks
8551         if (   ( $semicolons_before_block_self_destruct == 0 )
8552             && ( $max_index_to_go >= 0 )
8553             && ( $types_to_go[$max_index_to_go] eq ';' )
8554             && ( $$rtokens[0] ne '}' ) )
8555         {
8556             destroy_one_line_block();
8557             output_line_to_go();
8558         }
8559
8560         # loop to process the tokens one-by-one
8561         $type  = 'b';
8562         $token = "";
8563
8564         foreach $j ( 0 .. $jmax ) {
8565
8566             # pull out the local values for this token
8567             extract_token($j);
8568
8569             if ( $type eq '#' ) {
8570
8571                 # trim trailing whitespace
8572                 # (there is no option at present to prevent this)
8573                 $token =~ s/\s*$//;
8574
8575                 if (
8576                     $rOpts->{'delete-side-comments'}
8577
8578                     # delete closing side comments if necessary
8579                     || (   $rOpts->{'delete-closing-side-comments'}
8580                         && $token =~ /$closing_side_comment_prefix_pattern/o
8581                         && $last_nonblank_block_type =~
8582                         /$closing_side_comment_list_pattern/o )
8583                   )
8584                 {
8585                     if ( $types_to_go[$max_index_to_go] eq 'b' ) {
8586                         unstore_token_to_go();
8587                     }
8588                     last;
8589                 }
8590             }
8591
8592             # If we are continuing after seeing a right curly brace, flush
8593             # buffer unless we see what we are looking for, as in
8594             #   } else ...
8595             if ( $rbrace_follower && $type ne 'b' ) {
8596
8597                 unless ( $rbrace_follower->{$token} ) {
8598                     output_line_to_go();
8599                 }
8600                 $rbrace_follower = undef;
8601             }
8602
8603             $j_next = ( $$rtoken_type[ $j + 1 ] eq 'b' ) ? $j + 2 : $j + 1;
8604             $next_nonblank_token      = $$rtokens[$j_next];
8605             $next_nonblank_token_type = $$rtoken_type[$j_next];
8606
8607             #--------------------------------------------------------
8608             # Start of section to patch token text
8609             #--------------------------------------------------------
8610
8611             # Modify certain tokens here for whitespace
8612             # The following is not yet done, but could be:
8613             #   sub (x x x)
8614             if ( $type =~ /^[wit]$/ ) {
8615
8616                 # Examples:
8617                 # change '$  var'  to '$var' etc
8618                 #        '-> new'  to '->new'
8619                 if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) {
8620                     $token =~ s/\s*//g;
8621                 }
8622
8623                 if ( $token =~ /^sub/ ) { $token =~ s/\s+/ /g }
8624             }
8625
8626             # change 'LABEL   :'   to 'LABEL:'
8627             elsif ( $type eq 'J' ) { $token =~ s/\s+//g }
8628
8629             # patch to add space to something like "x10"
8630             # This avoids having to split this token in the pre-tokenizer
8631             elsif ( $type eq 'n' ) {
8632                 if ( $token =~ /^x\d+/ ) { $token =~ s/x/x / }
8633             }
8634
8635             elsif ( $type eq 'Q' ) {
8636                 note_embedded_tab() if ( $token =~ "\t" );
8637
8638                 # make note of something like '$var = s/xxx/yyy/;'
8639                 # in case it should have been '$var =~ s/xxx/yyy/;'
8640                 if (
8641                        $token               =~ /^(s|tr|y|m|\/)/
8642                     && $last_nonblank_token =~ /^(=|==|!=)$/
8643
8644                     # precededed by simple scalar
8645                     && $last_last_nonblank_type eq 'i'
8646                     && $last_last_nonblank_token =~ /^\$/
8647
8648                     # followed by some kind of termination
8649                     # (but give complaint if we can's see far enough ahead)
8650                     && $next_nonblank_token =~ /^[; \)\}]$/
8651
8652                     # scalar is not decleared
8653                     && !(
8654                            $types_to_go[0] eq 'k'
8655                         && $tokens_to_go[0] =~ /^(my|our|local)$/
8656                     )
8657                   )
8658                 {
8659                     my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
8660                     complain(
8661 "Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n"
8662                     );
8663                 }
8664             }
8665
8666            # trim blanks from right of qw quotes
8667            # (To avoid trimming qw quotes use -ntqw; the tokenizer handles this)
8668             elsif ( $type eq 'q' ) {
8669                 $token =~ s/\s*$//;
8670                 note_embedded_tab() if ( $token =~ "\t" );
8671             }
8672
8673             #--------------------------------------------------------
8674             # End of section to patch token text
8675             #--------------------------------------------------------
8676
8677             # insert any needed whitespace
8678             if (   ( $type ne 'b' )
8679                 && ( $max_index_to_go >= 0 )
8680                 && ( $types_to_go[$max_index_to_go] ne 'b' )
8681                 && $rOpts_add_whitespace )
8682             {
8683                 my $ws = $$rwhite_space_flag[$j];
8684
8685                 if ( $ws == 1 ) {
8686                     insert_new_token_to_go( ' ', 'b', $slevel,
8687                         $no_internal_newlines );
8688                 }
8689             }
8690
8691             # Do not allow breaks which would promote a side comment to a
8692             # block comment.  In order to allow a break before an opening
8693             # or closing BLOCK, followed by a side comment, those sections
8694             # of code will handle this flag separately.
8695             my $side_comment_follows = ( $next_nonblank_token_type eq '#' );
8696             my $is_opening_BLOCK =
8697               (      $type eq '{'
8698                   && $token eq '{'
8699                   && $block_type
8700                   && $block_type ne 't' );
8701             my $is_closing_BLOCK =
8702               (      $type eq '}'
8703                   && $token eq '}'
8704                   && $block_type
8705                   && $block_type ne 't' );
8706
8707             if (   $side_comment_follows
8708                 && !$is_opening_BLOCK
8709                 && !$is_closing_BLOCK )
8710             {
8711                 $no_internal_newlines = 1;
8712             }
8713
8714             # We're only going to handle breaking for code BLOCKS at this
8715             # (top) level.  Other indentation breaks will be handled by
8716             # sub scan_list, which is better suited to dealing with them.
8717             if ($is_opening_BLOCK) {
8718
8719                 # Tentatively output this token.  This is required before
8720                 # calling starting_one_line_block.  We may have to unstore
8721                 # it, though, if we have to break before it.
8722                 store_token_to_go($side_comment_follows);
8723
8724                 # Look ahead to see if we might form a one-line block
8725                 my $too_long =
8726                   starting_one_line_block( $j, $jmax, $level, $slevel,
8727                     $ci_level, $rtokens, $rtoken_type, $rblock_type );
8728                 clear_breakpoint_undo_stack();
8729
8730                 # to simplify the logic below, set a flag to indicate if
8731                 # this opening brace is far from the keyword which introduces it
8732                 my $keyword_on_same_line = 1;
8733                 if (   ( $max_index_to_go >= 0 )
8734                     && ( $last_nonblank_type eq ')' ) )
8735                 {
8736                     if (   $block_type =~ /^(if|else|elsif)$/
8737                         && ( $tokens_to_go[0] eq '}' )
8738                         && $rOpts_cuddled_else )
8739                     {
8740                         $keyword_on_same_line = 1;
8741                     }
8742                     elsif ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long )
8743                     {
8744                         $keyword_on_same_line = 0;
8745                     }
8746                 }
8747
8748                 # decide if user requested break before '{'
8749                 my $want_break =
8750
8751                   # use -bl flag if not a sub block of any type
8752                   $block_type !~ /^sub/
8753                   ? $rOpts->{'opening-brace-on-new-line'}
8754
8755                   # use -sbl flag unless this is an anonymous sub block
8756                   : $block_type !~ /^sub\W*$/
8757                   ? $rOpts->{'opening-sub-brace-on-new-line'}
8758
8759                   # do not break for anonymous subs
8760                   : 0;
8761
8762                 # Break before an opening '{' ...
8763                 if (
8764
8765                     # if requested
8766                     $want_break
8767
8768                     # and we were unable to start looking for a block,
8769                     && $index_start_one_line_block == UNDEFINED_INDEX
8770
8771                     # or if it will not be on same line as its keyword, so that
8772                     # it will be outdented (eval.t, overload.t), and the user
8773                     # has not insisted on keeping it on the right
8774                     || (   !$keyword_on_same_line
8775                         && !$rOpts->{'opening-brace-always-on-right'} )
8776
8777                   )
8778                 {
8779
8780                     # but only if allowed
8781                     unless ($no_internal_newlines) {
8782
8783                         # since we already stored this token, we must unstore it
8784                         unstore_token_to_go();
8785
8786                         # then output the line
8787                         output_line_to_go();
8788
8789                         # and now store this token at the start of a new line
8790                         store_token_to_go($side_comment_follows);
8791                     }
8792                 }
8793
8794                 # Now update for side comment
8795                 if ($side_comment_follows) { $no_internal_newlines = 1 }
8796
8797                 # now output this line
8798                 unless ($no_internal_newlines) {
8799                     output_line_to_go();
8800                 }
8801             }
8802
8803             elsif ($is_closing_BLOCK) {
8804
8805                 # If there is a pending one-line block ..
8806                 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
8807
8808                     # we have to terminate it if..
8809                     if (
8810
8811                     # it is too long (final length may be different from
8812                     # initial estimate). note: must allow 1 space for this token
8813                         excess_line_length( $index_start_one_line_block,
8814                             $max_index_to_go ) >= 0
8815
8816                         # or if it has too many semicolons
8817                         || (   $semicolons_before_block_self_destruct == 0
8818                             && $last_nonblank_type ne ';' )
8819                       )
8820                     {
8821                         destroy_one_line_block();
8822                     }
8823                 }
8824
8825                 # put a break before this closing curly brace if appropriate
8826                 unless ( $no_internal_newlines
8827                     || $index_start_one_line_block != UNDEFINED_INDEX )
8828                 {
8829
8830                     # add missing semicolon if ...
8831                     # there are some tokens
8832                     if (
8833                         ( $max_index_to_go > 0 )
8834
8835                         # and we don't have one
8836                         && ( $last_nonblank_type ne ';' )
8837
8838                         # patch until some block type issues are fixed:
8839                         # Do not add semi-colon for block types '{',
8840                         # '}', and ';' because we cannot be sure yet
8841                         # that this is a block and not an anonomyous
8842                         # hash (blktype.t, blktype1.t)
8843                         && ( $block_type !~ /^[\{\};]$/ )
8844
8845                         # it seems best not to add semicolons in these
8846                         # special block types: sort|map|grep
8847                         && ( !$is_sort_map_grep{$block_type} )
8848
8849                         # and we are allowed to do so.
8850                         && $rOpts->{'add-semicolons'}
8851                       )
8852                     {
8853
8854                         save_current_token();
8855                         $token  = ';';
8856                         $type   = ';';
8857                         $level  = $levels_to_go[$max_index_to_go];
8858                         $slevel = $nesting_depth_to_go[$max_index_to_go];
8859                         $nesting_blocks =
8860                           $nesting_blocks_to_go[$max_index_to_go];
8861                         $ci_level       = $ci_levels_to_go[$max_index_to_go];
8862                         $block_type     = "";
8863                         $container_type = "";
8864                         $container_environment = "";
8865                         $type_sequence         = "";
8866
8867                         # Note - we remove any blank AFTER extracting its
8868                         # parameters such as level, etc, above
8869                         if ( $types_to_go[$max_index_to_go] eq 'b' ) {
8870                             unstore_token_to_go();
8871                         }
8872                         store_token_to_go();
8873
8874                         note_added_semicolon();
8875                         restore_current_token();
8876                     }
8877
8878                     # then write out everything before this closing curly brace
8879                     output_line_to_go();
8880
8881                 }
8882
8883                 # Now update for side comment
8884                 if ($side_comment_follows) { $no_internal_newlines = 1 }
8885
8886                 # store the closing curly brace
8887                 store_token_to_go();
8888
8889                 # ok, we just stored a closing curly brace.  Often, but
8890                 # not always, we want to end the line immediately.
8891                 # So now we have to check for special cases.
8892
8893                 # if this '}' successfully ends a one-line block..
8894                 my $is_one_line_block = 0;
8895                 my $keep_going        = 0;
8896                 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
8897
8898                     # Remember the type of token just before the
8899                     # opening brace.  It would be more general to use
8900                     # a stack, but this will work for one-line blocks.
8901                     $is_one_line_block =
8902                       $types_to_go[$index_start_one_line_block];
8903
8904                     # we have to actually make it by removing tentative
8905                     # breaks that were set within it
8906                     undo_forced_breakpoint_stack(0);
8907                     set_nobreaks( $index_start_one_line_block,
8908                         $max_index_to_go - 1 );
8909
8910                     # then re-initialize for the next one-line block
8911                     destroy_one_line_block();
8912
8913                     # then decide if we want to break after the '}' ..
8914                     # We will keep going to allow certain brace followers as in:
8915                     #   do { $ifclosed = 1; last } unless $losing;
8916                     #
8917                     # But make a line break if the curly ends a
8918                     # significant block:
8919                     ##if ( $is_until_while_for_if_elsif_else{$block_type} ) {
8920                     if (
8921                         $is_block_without_semicolon{$block_type}
8922
8923                         # if needless semicolon follows we handle it later
8924                         && $next_nonblank_token ne ';'
8925                       )
8926                     {
8927                         output_line_to_go() unless ($no_internal_newlines);
8928                     }
8929                 }
8930
8931                 # set string indicating what we need to look for brace follower
8932                 # tokens
8933                 if ( $block_type eq 'do' ) {
8934                     $rbrace_follower = \%is_do_follower;
8935                 }
8936                 elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
8937                     $rbrace_follower = \%is_if_brace_follower;
8938                 }
8939                 elsif ( $block_type eq 'else' ) {
8940                     $rbrace_follower = \%is_else_brace_follower;
8941                 }
8942
8943                 # added eval for borris.t
8944                 elsif ($is_sort_map_grep_eval{$block_type}
8945                     || $is_one_line_block eq 'G' )
8946                 {
8947                     $rbrace_follower = undef;
8948                     $keep_going      = 1;
8949                 }
8950
8951                 # anonymous sub
8952                 elsif ( $block_type =~ /^sub\W*$/ ) {
8953
8954                     if ($is_one_line_block) {
8955                         $rbrace_follower = \%is_anon_sub_1_brace_follower;
8956                     }
8957                     else {
8958                         $rbrace_follower = \%is_anon_sub_brace_follower;
8959                     }
8960                 }
8961
8962                 # None of the above: specify what can follow a closing
8963                 # brace of a block which is not an
8964                 # if/elsif/else/do/sort/map/grep/eval
8965                 # Testfiles:
8966                 # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
8967                 else {
8968                     $rbrace_follower = \%is_other_brace_follower;
8969                 }
8970
8971                 # See if an elsif block is followed by another elsif or else;
8972                 # complain if not.
8973                 if ( $block_type eq 'elsif' ) {
8974
8975                     if ( $next_nonblank_token_type eq 'b' ) {    # end of line?
8976                         $looking_for_else = 1;    # ok, check on next line
8977                     }
8978                     else {
8979
8980                         unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {
8981                             write_logfile_entry("No else block :(\n");
8982                         }
8983                     }
8984                 }
8985
8986                 # keep going after certain block types (map,sort,grep,eval)
8987                 # added eval for borris.t
8988                 if ($keep_going) {
8989
8990                     # keep going
8991                 }
8992
8993                 # if no more tokens, postpone decision until re-entring
8994                 elsif ( ( $next_nonblank_token_type eq 'b' )
8995                     && $rOpts_add_newlines )
8996                 {
8997                     unless ($rbrace_follower) {
8998                         output_line_to_go() unless ($no_internal_newlines);
8999                     }
9000                 }
9001
9002                 elsif ($rbrace_follower) {
9003
9004                     unless ( $rbrace_follower->{$next_nonblank_token} ) {
9005                         output_line_to_go() unless ($no_internal_newlines);
9006                     }
9007                     $rbrace_follower = undef;
9008                 }
9009
9010                 else {
9011                     output_line_to_go() unless ($no_internal_newlines);
9012                 }
9013
9014             }    # end treatment of closing block token
9015
9016             # handle semicolon
9017             elsif ( $type eq ';' ) {
9018
9019                 # kill one-line blocks with too many semicolons
9020                 $semicolons_before_block_self_destruct--;
9021                 if (
9022                     ( $semicolons_before_block_self_destruct < 0 )
9023                     || (   $semicolons_before_block_self_destruct == 0
9024                         && $next_nonblank_token_type !~ /^[b\}]$/ )
9025                   )
9026                 {
9027                     destroy_one_line_block();
9028                 }
9029
9030                 # Remove unnecessary semicolons, but not after bare
9031                 # blocks, where it could be unsafe if the brace is
9032                 # mistokenized.
9033                 if (
9034                     (
9035                         $last_nonblank_token eq '}'
9036                         && (
9037                             $is_block_without_semicolon{
9038                                 $last_nonblank_block_type}
9039                             || $last_nonblank_block_type =~ /^sub\s+\w/
9040                             || $last_nonblank_block_type =~ /^\w+:$/ )
9041                     )
9042                     || $last_nonblank_type eq ';'
9043                   )
9044                 {
9045
9046                     if (
9047                         $rOpts->{'delete-semicolons'}
9048
9049                         # don't delete ; before a # because it would promote it
9050                         # to a block comment
9051                         && ( $next_nonblank_token_type ne '#' )
9052                       )
9053                     {
9054                         note_deleted_semicolon();
9055                         output_line_to_go()
9056                           unless ( $no_internal_newlines
9057                             || $index_start_one_line_block != UNDEFINED_INDEX );
9058                         next;
9059                     }
9060                     else {
9061                         write_logfile_entry("Extra ';'\n");
9062                     }
9063                 }
9064                 store_token_to_go();
9065
9066                 output_line_to_go()
9067                   unless ( $no_internal_newlines
9068                     || ( $next_nonblank_token eq '}' ) );
9069
9070             }
9071
9072             # handle here_doc target string
9073             elsif ( $type eq 'h' ) {
9074                 $no_internal_newlines =
9075                   1;    # no newlines after seeing here-target
9076                 destroy_one_line_block();
9077                 store_token_to_go();
9078             }
9079
9080             # handle all other token types
9081             else {
9082
9083                 # if this is a blank...
9084                 if ( $type eq 'b' ) {
9085
9086                     # make it just one character
9087                     $token = ' ' if $rOpts_add_whitespace;
9088
9089                     # delete it if unwanted by whitespace rules
9090                     # or we are deleting all whitespace
9091                     my $ws = $$rwhite_space_flag[ $j + 1 ];
9092                     if ( ( defined($ws) && $ws == -1 )
9093                         || $rOpts_delete_old_whitespace )
9094                     {
9095
9096                         # unless it might make a syntax error
9097                         next
9098                           unless is_essential_whitespace(
9099                             $last_last_nonblank_token,
9100                             $last_last_nonblank_type,
9101                             $tokens_to_go[$max_index_to_go],
9102                             $types_to_go[$max_index_to_go],
9103                             $$rtokens[ $j + 1 ],
9104                             $$rtoken_type[ $j + 1 ]
9105                           );
9106                     }
9107                 }
9108                 store_token_to_go();
9109             }
9110
9111             # remember two previous nonblank OUTPUT tokens
9112             if ( $type ne '#' && $type ne 'b' ) {
9113                 $last_last_nonblank_token = $last_nonblank_token;
9114                 $last_last_nonblank_type  = $last_nonblank_type;
9115                 $last_nonblank_token      = $token;
9116                 $last_nonblank_type       = $type;
9117                 $last_nonblank_block_type = $block_type;
9118             }
9119
9120             # unset the continued-quote flag since it only applies to the
9121             # first token, and we want to resume normal formatting if
9122             # there are additional tokens on the line
9123             $in_continued_quote = 0;
9124
9125         }    # end of loop over all tokens in this 'line_of_tokens'
9126
9127         # we have to flush ..
9128         if (
9129
9130             # if there is a side comment
9131             ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} )
9132
9133             # if this line ends in a quote
9134             # NOTE: This is critically important for insuring that quoted lines
9135             # do not get processed by things like -sot and -sct
9136             || $in_quote
9137
9138             # if this is a VERSION statement
9139             || $is_VERSION_statement
9140
9141             # to keep a label on one line if that is how it is now
9142             || ( ( $type eq 'J' ) && ( $max_index_to_go == 0 ) )
9143
9144             # if we are instructed to keep all old line breaks
9145             || !$rOpts->{'delete-old-newlines'}
9146           )
9147         {
9148             destroy_one_line_block();
9149             output_line_to_go();
9150         }
9151
9152         # mark old line breakpoints in current output stream
9153         if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) {
9154             $old_breakpoint_to_go[$max_index_to_go] = 1;
9155         }
9156     }
9157 }    # end print_line_of_tokens
9158
9159 sub note_added_semicolon {
9160     $last_added_semicolon_at = $input_line_number;
9161     if ( $added_semicolon_count == 0 ) {
9162         $first_added_semicolon_at = $last_added_semicolon_at;
9163     }
9164     $added_semicolon_count++;
9165     write_logfile_entry("Added ';' here\n");
9166 }
9167
9168 sub note_deleted_semicolon {
9169     $last_deleted_semicolon_at = $input_line_number;
9170     if ( $deleted_semicolon_count == 0 ) {
9171         $first_deleted_semicolon_at = $last_deleted_semicolon_at;
9172     }
9173     $deleted_semicolon_count++;
9174     write_logfile_entry("Deleted unnecessary ';'\n");    # i hope ;)
9175 }
9176
9177 sub note_embedded_tab {
9178     $embedded_tab_count++;
9179     $last_embedded_tab_at = $input_line_number;
9180     if ( !$first_embedded_tab_at ) {
9181         $first_embedded_tab_at = $last_embedded_tab_at;
9182     }
9183
9184     if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) {
9185         write_logfile_entry("Embedded tabs in quote or pattern\n");
9186     }
9187 }
9188
9189 sub starting_one_line_block {
9190
9191     # after seeing an opening curly brace, look for the closing brace
9192     # and see if the entire block will fit on a line.  This routine is
9193     # not always right because it uses the old whitespace, so a check
9194     # is made later (at the closing brace) to make sure we really
9195     # have a one-line block.  We have to do this preliminary check,
9196     # though, because otherwise we would always break at a semicolon
9197     # within a one-line block if the block contains multiple statements.
9198
9199     my ( $j, $jmax, $level, $slevel, $ci_level, $rtokens, $rtoken_type,
9200         $rblock_type )
9201       = @_;
9202
9203     # kill any current block - we can only go 1 deep
9204     destroy_one_line_block();
9205
9206     # return value:
9207     #  1=distance from start of block to opening brace exceeds line length
9208     #  0=otherwise
9209
9210     my $i_start = 0;
9211
9212     # shouldn't happen: there must have been a prior call to
9213     # store_token_to_go to put the opening brace in the output stream
9214     if ( $max_index_to_go < 0 ) {
9215         warning("program bug: store_token_to_go called incorrectly\n");
9216         report_definite_bug();
9217     }
9218     else {
9219
9220         # cannot use one-line blocks with cuddled else else/elsif lines
9221         if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) {
9222             return 0;
9223         }
9224     }
9225
9226     my $block_type = $$rblock_type[$j];
9227
9228     # find the starting keyword for this block (such as 'if', 'else', ...)
9229
9230     if ( $block_type =~ /^[\{\}\;\:]$/ ) {
9231         $i_start = $max_index_to_go;
9232     }
9233
9234     elsif ( $last_last_nonblank_token_to_go eq ')' ) {
9235
9236         # For something like "if (xxx) {", the keyword "if" will be
9237         # just after the most recent break. This will be 0 unless
9238         # we have just killed a one-line block and are starting another.
9239         # (doif.t)
9240         $i_start = $index_max_forced_break + 1;
9241         if ( $types_to_go[$i_start] eq 'b' ) {
9242             $i_start++;
9243         }
9244
9245         unless ( $tokens_to_go[$i_start] eq $block_type ) {
9246             return 0;
9247         }
9248     }
9249
9250     # the previous nonblank token should start these block types
9251     elsif (
9252         ( $last_last_nonblank_token_to_go eq $block_type )
9253         || (   $block_type =~ /^sub/
9254             && $last_last_nonblank_token_to_go =~ /^sub/ )
9255       )
9256     {
9257         $i_start = $last_last_nonblank_index_to_go;
9258     }
9259
9260     # patch for SWITCH/CASE to retain one-line case/when blocks
9261     elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
9262         $i_start = $index_max_forced_break + 1;
9263         if ( $types_to_go[$i_start] eq 'b' ) {
9264             $i_start++;
9265         }
9266         unless ( $tokens_to_go[$i_start] eq $block_type ) {
9267             return 0;
9268         }
9269     }
9270
9271     else {
9272         return 1;
9273     }
9274
9275     my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
9276
9277     my $i;
9278
9279     # see if length is too long to even start
9280     if ( $pos > $rOpts_maximum_line_length ) {
9281         return 1;
9282     }
9283
9284     for ( $i = $j + 1 ; $i <= $jmax ; $i++ ) {
9285
9286         # old whitespace could be arbitrarily large, so don't use it
9287         if   ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 }
9288         else                              { $pos += length( $$rtokens[$i] ) }
9289
9290         # Return false result if we exceed the maximum line length,
9291         if ( $pos > $rOpts_maximum_line_length ) {
9292             return 0;
9293         }
9294
9295         # or encounter another opening brace before finding the closing brace.
9296         elsif ($$rtokens[$i] eq '{'
9297             && $$rtoken_type[$i] eq '{'
9298             && $$rblock_type[$i] )
9299         {
9300             return 0;
9301         }
9302
9303         # if we find our closing brace..
9304         elsif ($$rtokens[$i] eq '}'
9305             && $$rtoken_type[$i] eq '}'
9306             && $$rblock_type[$i] )
9307         {
9308
9309             # be sure any trailing comment also fits on the line
9310             my $i_nonblank =
9311               ( $$rtoken_type[ $i + 1 ] eq 'b' ) ? $i + 2 : $i + 1;
9312
9313             if ( $$rtoken_type[$i_nonblank] eq '#' ) {
9314                 $pos += length( $$rtokens[$i_nonblank] );
9315
9316                 if ( $i_nonblank > $i + 1 ) {
9317                     $pos += length( $$rtokens[ $i + 1 ] );
9318                 }
9319
9320                 if ( $pos > $rOpts_maximum_line_length ) {
9321                     return 0;
9322                 }
9323             }
9324
9325             # ok, it's a one-line block
9326             create_one_line_block( $i_start, 20 );
9327             return 0;
9328         }
9329
9330         # just keep going for other characters
9331         else {
9332         }
9333     }
9334
9335     # Allow certain types of new one-line blocks to form by joining
9336     # input lines.  These can be safely done, but for other block types,
9337     # we keep old one-line blocks but do not form new ones. It is not
9338     # always a good idea to make as many one-line blocks as possible,
9339     # so other types are not done.  The user can always use -mangle.
9340     if ( $is_sort_map_grep_eval{$block_type} ) {
9341         create_one_line_block( $i_start, 1 );
9342     }
9343
9344     return 0;
9345 }
9346
9347 sub unstore_token_to_go {
9348
9349     # remove most recent token from output stream
9350     if ( $max_index_to_go > 0 ) {
9351         $max_index_to_go--;
9352     }
9353     else {
9354         $max_index_to_go = UNDEFINED_INDEX;
9355     }
9356
9357 }
9358
9359 sub want_blank_line {
9360     flush();
9361     $file_writer_object->want_blank_line();
9362 }
9363
9364 sub write_unindented_line {
9365     flush();
9366     $file_writer_object->write_line( $_[0] );
9367 }
9368
9369 sub undo_lp_ci {
9370
9371     # If there is a single, long parameter within parens, like this:
9372     #
9373     #  $self->command( "/msg "
9374     #        . $infoline->chan
9375     #        . " You said $1, but did you know that it's square was "
9376     #        . $1 * $1 . " ?" );
9377     #
9378     # we can remove the continuation indentation of the 2nd and higher lines
9379     # to achieve this effect, which is more pleasing:
9380     #
9381     #  $self->command("/msg "
9382     #                 . $infoline->chan
9383     #                 . " You said $1, but did you know that it's square was "
9384     #                 . $1 * $1 . " ?");
9385
9386     my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_;
9387     my $max_line = @$ri_first - 1;
9388
9389     # must be multiple lines
9390     return unless $max_line > $line_open;
9391
9392     my $lev_start     = $levels_to_go[$i_start];
9393     my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
9394
9395     # see if all additional lines in this container have continuation
9396     # indentation
9397     my $n;
9398     my $line_1 = 1 + $line_open;
9399     for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
9400         my $ibeg = $$ri_first[$n];
9401         my $iend = $$ri_last[$n];
9402         if ( $ibeg eq $closing_index ) { $n--; last }
9403         return if ( $lev_start != $levels_to_go[$ibeg] );
9404         return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
9405         last   if ( $closing_index <= $iend );
9406     }
9407
9408     # we can reduce the indentation of all continuation lines
9409     my $continuation_line_count = $n - $line_open;
9410     @ci_levels_to_go[ @$ri_first[ $line_1 .. $n ] ] =
9411       (0) x ($continuation_line_count);
9412     @leading_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ] =
9413       @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ];
9414 }
9415
9416 sub set_logical_padding {
9417
9418     # Look at a batch of lines and see if extra padding can improve the
9419     # alignment when there are certain leading operators. Here is an
9420     # example, in which some extra space is introduced before
9421     # '( $year' to make it line up with the subsequent lines:
9422     #
9423     #       if (   ( $Year < 1601 )
9424     #           || ( $Year > 2899 )
9425     #           || ( $EndYear < 1601 )
9426     #           || ( $EndYear > 2899 ) )
9427     #       {
9428     #           &Error_OutOfRange;
9429     #       }
9430     #
9431     my ( $ri_first, $ri_last ) = @_;
9432     my $max_line = @$ri_first - 1;
9433
9434     my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $line, $pad_spaces,
9435         $tok_next, $has_leading_op_next, $has_leading_op );
9436
9437     # looking at each line of this batch..
9438     foreach $line ( 0 .. $max_line - 1 ) {
9439
9440         # see if the next line begins with a logical operator
9441         $ibeg                = $$ri_first[$line];
9442         $iend                = $$ri_last[$line];
9443         $ibeg_next           = $$ri_first[ $line + 1 ];
9444         $tok_next            = $tokens_to_go[$ibeg_next];
9445         $has_leading_op_next = $is_chain_operator{$tok_next};
9446         next unless ($has_leading_op_next);
9447
9448         # next line must not be at lesser depth
9449         next
9450           if ( $nesting_depth_to_go[$ibeg] > $nesting_depth_to_go[$ibeg_next] );
9451
9452         # identify the token in this line to be padded on the left
9453         $ipad = undef;
9454
9455         # handle lines at same depth...
9456         if ( $nesting_depth_to_go[$ibeg] == $nesting_depth_to_go[$ibeg_next] ) {
9457
9458             # if this is not first line of the batch ...
9459             if ( $line > 0 ) {
9460
9461                 # and we have leading operator
9462                 next if $has_leading_op;
9463
9464                 # and ..
9465                 # 1. the previous line is at lesser depth, or
9466                 # 2. the previous line ends in an assignment
9467                 #
9468                 # Example 1: previous line at lesser depth
9469                 #       if (   ( $Year < 1601 )      # <- we are here but
9470                 #           || ( $Year > 2899 )      #  list has not yet
9471                 #           || ( $EndYear < 1601 )   # collapsed vertically
9472                 #           || ( $EndYear > 2899 ) )
9473                 #       {
9474                 #
9475                 # Example 2: previous line ending in assignment:
9476                 #    $leapyear =
9477                 #        $year % 4   ? 0     # <- We are here
9478                 #      : $year % 100 ? 1
9479                 #      : $year % 400 ? 0
9480                 #      : 1;
9481                 next
9482                   unless (
9483                     $is_assignment{ $types_to_go[$iendm] }
9484                     || ( $nesting_depth_to_go[$ibegm] <
9485                         $nesting_depth_to_go[$ibeg] )
9486                   );
9487
9488                 # we will add padding before the first token
9489                 $ipad = $ibeg;
9490             }
9491
9492             # for first line of the batch..
9493             else {
9494
9495                 # WARNING: Never indent if first line is starting in a
9496                 # continued quote, which would change the quote.
9497                 next if $starting_in_quote;
9498
9499                 # if this is text after closing '}'
9500                 # then look for an interior token to pad
9501                 if ( $types_to_go[$ibeg] eq '}' ) {
9502
9503                 }
9504
9505                 # otherwise, we might pad if it looks really good
9506                 else {
9507
9508                     # we might pad token $ibeg, so be sure that it
9509                     # is at the same depth as the next line.
9510                     next
9511                       if ( $nesting_depth_to_go[$ibeg] !=
9512                         $nesting_depth_to_go[$ibeg_next] );
9513
9514                     # We can pad on line 1 of a statement if at least 3
9515                     # lines will be aligned. Otherwise, it
9516                     # can look very confusing.
9517                     if ( $max_line > 2 ) {
9518                         my $leading_token = $tokens_to_go[$ibeg_next];
9519
9520                         # never indent line 1 of a '.' series because
9521                         # previous line is most likely at same level.
9522                         # TODO: we should also look at the leasing_spaces
9523                         # of the last output line and skip if it is same
9524                         # as this line.
9525                         next if ( $leading_token eq '.' );
9526
9527                         my $count = 1;
9528                         foreach my $l ( 2 .. 3 ) {
9529                             my $ibeg_next_next = $$ri_first[ $line + $l ];
9530                             next
9531                               unless $tokens_to_go[$ibeg_next_next] eq
9532                               $leading_token;
9533                             $count++;
9534                         }
9535                         next unless $count == 3;
9536                         $ipad = $ibeg;
9537                     }
9538                     else {
9539                         next;
9540                     }
9541                 }
9542             }
9543         }
9544
9545         # find interior token to pad if necessary
9546         if ( !defined($ipad) ) {
9547
9548             for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
9549
9550                 # find any unclosed container
9551                 next
9552                   unless ( $type_sequence_to_go[$i]
9553                     && $mate_index_to_go[$i] > $iend );
9554
9555                 # find next nonblank token to pad
9556                 $ipad = $i + 1;
9557                 if ( $types_to_go[$ipad] eq 'b' ) {
9558                     $ipad++;
9559                     last if ( $ipad > $iend );
9560                 }
9561             }
9562             last unless $ipad;
9563         }
9564
9565         # next line must not be at greater depth
9566         my $iend_next = $$ri_last[ $line + 1 ];
9567         next
9568           if ( $nesting_depth_to_go[ $iend_next + 1 ] >
9569             $nesting_depth_to_go[$ipad] );
9570
9571         # lines must be somewhat similar to be padded..
9572         my $inext_next = $ibeg_next + 1;
9573         if ( $types_to_go[$inext_next] eq 'b' ) {
9574             $inext_next++;
9575         }
9576         my $type = $types_to_go[$ipad];
9577
9578         # see if there are multiple continuation lines
9579         my $logical_continuation_lines = 1;
9580         if ( $line + 2 <= $max_line ) {
9581             my $leading_token  = $tokens_to_go[$ibeg_next];
9582             my $ibeg_next_next = $$ri_first[ $line + 2 ];
9583             if (   $tokens_to_go[$ibeg_next_next] eq $leading_token
9584                 && $nesting_depth_to_go[$ibeg_next] eq
9585                 $nesting_depth_to_go[$ibeg_next_next] )
9586             {
9587                 $logical_continuation_lines++;
9588             }
9589         }
9590         if (
9591
9592             # either we have multiple continuation lines to follow
9593             # and we are not padding the first token
9594             ( $logical_continuation_lines > 1 && $ipad > 0 )
9595
9596             # or..
9597             || (
9598
9599                 # types must match
9600                 $types_to_go[$inext_next] eq $type
9601
9602                 # and keywords must match if keyword
9603                 && !(
9604                        $type eq 'k'
9605                     && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
9606                 )
9607             )
9608           )
9609         {
9610
9611             #----------------------begin special check---------------
9612             #
9613             # One more check is needed before we can make the pad.
9614             # If we are in a list with some long items, we want each
9615             # item to stand out.  So in the following example, the
9616             # first line begining with '$casefold->' would look good
9617             # padded to align with the next line, but then it
9618             # would be indented more than the last line, so we
9619             # won't do it.
9620             #
9621             #  ok(
9622             #      $casefold->{code}         eq '0041'
9623             #        && $casefold->{status}  eq 'C'
9624             #        && $casefold->{mapping} eq '0061',
9625             #      'casefold 0x41'
9626             #  );
9627             #
9628             # Note:
9629             # It would be faster, and almost as good, to use a comma
9630             # count, and not pad if comma_count > 1 and the previous
9631             # line did not end with a comma.
9632             #
9633             my $ok_to_pad = 1;
9634
9635             my $ibg   = $$ri_first[ $line + 1 ];
9636             my $depth = $nesting_depth_to_go[ $ibg + 1 ];
9637
9638             # just use simplified formula for leading spaces to avoid
9639             # needless sub calls
9640             my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
9641
9642             # look at each line beyond the next ..
9643             my $l = $line + 1;
9644             foreach $l ( $line + 2 .. $max_line ) {
9645                 my $ibg = $$ri_first[$l];
9646
9647                 # quit looking at the end of this container
9648                 last
9649                   if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
9650                   || ( $nesting_depth_to_go[$ibg] < $depth );
9651
9652                 # cannot do the pad if a later line would be
9653                 # outdented more
9654                 if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
9655                     $ok_to_pad = 0;
9656                     last;
9657                 }
9658             }
9659
9660             # don't pad if we end in a broken list
9661             if ( $l == $max_line ) {
9662                 my $i2 = $$ri_last[$l];
9663                 if ( $types_to_go[$i2] eq '#' ) {
9664                     my $i1 = $$ri_first[$l];
9665                     next
9666                       if (
9667                         terminal_type( \@types_to_go, \@block_type_to_go, $i1,
9668                             $i2 ) eq ','
9669                       );
9670                 }
9671             }
9672             next unless $ok_to_pad;
9673
9674             #----------------------end special check---------------
9675
9676             my $length_1 = total_line_length( $ibeg,      $ipad - 1 );
9677             my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
9678             $pad_spaces = $length_2 - $length_1;
9679
9680             # make sure this won't change if -lp is used
9681             my $indentation_1 = $leading_spaces_to_go[$ibeg];
9682             if ( ref($indentation_1) ) {
9683                 if ( $indentation_1->get_RECOVERABLE_SPACES() == 0 ) {
9684                     my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
9685                     unless ( $indentation_2->get_RECOVERABLE_SPACES() == 0 ) {
9686                         $pad_spaces = 0;
9687                     }
9688                 }
9689             }
9690
9691             # we might be able to handle a pad of -1 by removing a blank
9692             # token
9693             if ( $pad_spaces < 0 ) {
9694                 if ( $pad_spaces == -1 ) {
9695                     if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' ) {
9696                         $tokens_to_go[ $ipad - 1 ] = '';
9697                     }
9698                 }
9699                 $pad_spaces = 0;
9700             }
9701
9702             # now apply any padding for alignment
9703             if ( $ipad >= 0 && $pad_spaces ) {
9704                 my $length_t = total_line_length( $ibeg, $iend );
9705                 if ( $pad_spaces + $length_t <= $rOpts_maximum_line_length ) {
9706                     $tokens_to_go[$ipad] =
9707                       ' ' x $pad_spaces . $tokens_to_go[$ipad];
9708                 }
9709             }
9710         }
9711     }
9712     continue {
9713         $iendm          = $iend;
9714         $ibegm          = $ibeg;
9715         $has_leading_op = $has_leading_op_next;
9716     }    # end of loop over lines
9717     return;
9718 }
9719
9720 sub correct_lp_indentation {
9721
9722     # When the -lp option is used, we need to make a last pass through
9723     # each line to correct the indentation positions in case they differ
9724     # from the predictions.  This is necessary because perltidy uses a
9725     # predictor/corrector method for aligning with opening parens.  The
9726     # predictor is usually good, but sometimes stumbles.  The corrector
9727     # tries to patch things up once the actual opening paren locations
9728     # are known.
9729     my ( $ri_first, $ri_last ) = @_;
9730     my $do_not_pad = 0;
9731
9732     #  Note on flag '$do_not_pad':
9733     #  We want to avoid a situation like this, where the aligner inserts
9734     #  whitespace before the '=' to align it with a previous '=', because
9735     #  otherwise the parens might become mis-aligned in a situation like
9736     #  this, where the '=' has become aligned with the previous line,
9737     #  pushing the opening '(' forward beyond where we want it.
9738     #
9739     #  $mkFloor::currentRoom = '';
9740     #  $mkFloor::c_entry     = $c->Entry(
9741     #                                 -width        => '10',
9742     #                                 -relief       => 'sunken',
9743     #                                 ...
9744     #                                 );
9745     #
9746     #  We leave it to the aligner to decide how to do this.
9747
9748     # first remove continuation indentation if appropriate
9749     my $max_line = @$ri_first - 1;
9750
9751     # looking at each line of this batch..
9752     my ( $ibeg, $iend );
9753     my $line;
9754     foreach $line ( 0 .. $max_line ) {
9755         $ibeg = $$ri_first[$line];
9756         $iend = $$ri_last[$line];
9757
9758         # looking at each token in this output line..
9759         my $i;
9760         foreach $i ( $ibeg .. $iend ) {
9761
9762             # How many space characters to place before this token
9763             # for special alignment.  Actual padding is done in the
9764             # continue block.
9765
9766             # looking for next unvisited indentation item
9767             my $indentation = $leading_spaces_to_go[$i];
9768             if ( !$indentation->get_MARKED() ) {
9769                 $indentation->set_MARKED(1);
9770
9771                 # looking for indentation item for which we are aligning
9772                 # with parens, braces, and brackets
9773                 next unless ( $indentation->get_ALIGN_PAREN() );
9774
9775                 # skip closed container on this line
9776                 if ( $i > $ibeg ) {
9777                     my $im = $i - 1;
9778                     if ( $types_to_go[$im] eq 'b' && $im > $ibeg ) { $im-- }
9779                     if (   $type_sequence_to_go[$im]
9780                         && $mate_index_to_go[$im] <= $iend )
9781                     {
9782                         next;
9783                     }
9784                 }
9785
9786                 if ( $line == 1 && $i == $ibeg ) {
9787                     $do_not_pad = 1;
9788                 }
9789
9790                 # Ok, let's see what the error is and try to fix it
9791                 my $actual_pos;
9792                 my $predicted_pos = $indentation->get_SPACES();
9793                 if ( $i > $ibeg ) {
9794
9795                     # token is mid-line - use length to previous token
9796                     $actual_pos = total_line_length( $ibeg, $i - 1 );
9797
9798                     # for mid-line token, we must check to see if all
9799                     # additional lines have continuation indentation,
9800                     # and remove it if so.  Otherwise, we do not get
9801                     # good alignment.
9802                     my $closing_index = $indentation->get_CLOSED();
9803                     if ( $closing_index > $iend ) {
9804                         my $ibeg_next = $$ri_first[ $line + 1 ];
9805                         if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
9806                             undo_lp_ci( $line, $i, $closing_index, $ri_first,
9807                                 $ri_last );
9808                         }
9809                     }
9810                 }
9811                 elsif ( $line > 0 ) {
9812
9813                     # handle case where token starts a new line;
9814                     # use length of previous line
9815                     my $ibegm = $$ri_first[ $line - 1 ];
9816                     my $iendm = $$ri_last[ $line - 1 ];
9817                     $actual_pos = total_line_length( $ibegm, $iendm );
9818
9819                     # follow -pt style
9820                     ++$actual_pos
9821                       if ( $types_to_go[ $iendm + 1 ] eq 'b' );
9822                 }
9823                 else {
9824
9825                     # token is first character of first line of batch
9826                     $actual_pos = $predicted_pos;
9827                 }
9828
9829                 my $move_right = $actual_pos - $predicted_pos;
9830
9831                 # done if no error to correct (gnu2.t)
9832                 if ( $move_right == 0 ) {
9833                     $indentation->set_RECOVERABLE_SPACES($move_right);
9834                     next;
9835                 }
9836
9837                 # if we have not seen closure for this indentation in
9838                 # this batch, we can only pass on a request to the
9839                 # vertical aligner
9840                 my $closing_index = $indentation->get_CLOSED();
9841
9842                 if ( $closing_index < 0 ) {
9843                     $indentation->set_RECOVERABLE_SPACES($move_right);
9844                     next;
9845                 }
9846
9847                 # If necessary, look ahead to see if there is really any
9848                 # leading whitespace dependent on this whitespace, and
9849                 # also find the longest line using this whitespace.
9850                 # Since it is always safe to move left if there are no
9851                 # dependents, we only need to do this if we may have
9852                 # dependent nodes or need to move right.
9853
9854                 my $right_margin = 0;
9855                 my $have_child   = $indentation->get_HAVE_CHILD();
9856
9857                 my %saw_indentation;
9858                 my $line_count = 1;
9859                 $saw_indentation{$indentation} = $indentation;
9860
9861                 if ( $have_child || $move_right > 0 ) {
9862                     $have_child = 0;
9863                     my $max_length = 0;
9864                     if ( $i == $ibeg ) {
9865                         $max_length = total_line_length( $ibeg, $iend );
9866                     }
9867
9868                     # look ahead at the rest of the lines of this batch..
9869                     my $line_t;
9870                     foreach $line_t ( $line + 1 .. $max_line ) {
9871                         my $ibeg_t = $$ri_first[$line_t];
9872                         my $iend_t = $$ri_last[$line_t];
9873                         last if ( $closing_index <= $ibeg_t );
9874
9875                         # remember all different indentation objects
9876                         my $indentation_t = $leading_spaces_to_go[$ibeg_t];
9877                         $saw_indentation{$indentation_t} = $indentation_t;
9878                         $line_count++;
9879
9880                         # remember longest line in the group
9881                         my $length_t = total_line_length( $ibeg_t, $iend_t );
9882                         if ( $length_t > $max_length ) {
9883                             $max_length = $length_t;
9884                         }
9885                     }
9886                     $right_margin = $rOpts_maximum_line_length - $max_length;
9887                     if ( $right_margin < 0 ) { $right_margin = 0 }
9888                 }
9889
9890                 my $first_line_comma_count =
9891                   grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
9892                 my $comma_count = $indentation->get_COMMA_COUNT();
9893                 my $arrow_count = $indentation->get_ARROW_COUNT();
9894
9895                 # This is a simple approximate test for vertical alignment:
9896                 # if we broke just after an opening paren, brace, bracket,
9897                 # and there are 2 or more commas in the first line,
9898                 # and there are no '=>'s,
9899                 # then we are probably vertically aligned.  We could set
9900                 # an exact flag in sub scan_list, but this is good
9901                 # enough.
9902                 my $indentation_count = keys %saw_indentation;
9903                 my $is_vertically_aligned =
9904                   (      $i == $ibeg
9905                       && $first_line_comma_count > 1
9906                       && $indentation_count == 1
9907                       && ( $arrow_count == 0 || $arrow_count == $line_count ) );
9908
9909                 # Make the move if possible ..
9910                 if (
9911
9912                     # we can always move left
9913                     $move_right < 0
9914
9915                     # but we should only move right if we are sure it will
9916                     # not spoil vertical alignment
9917                     || ( $comma_count == 0 )
9918                     || ( $comma_count > 0 && !$is_vertically_aligned )
9919                   )
9920                 {
9921                     my $move =
9922                       ( $move_right <= $right_margin )
9923                       ? $move_right
9924                       : $right_margin;
9925
9926                     foreach ( keys %saw_indentation ) {
9927                         $saw_indentation{$_}
9928                           ->permanently_decrease_AVAILABLE_SPACES( -$move );
9929                     }
9930                 }
9931
9932                 # Otherwise, record what we want and the vertical aligner
9933                 # will try to recover it.
9934                 else {
9935                     $indentation->set_RECOVERABLE_SPACES($move_right);
9936                 }
9937             }
9938         }
9939     }
9940     return $do_not_pad;
9941 }
9942
9943 # flush is called to output any tokens in the pipeline, so that
9944 # an alternate source of lines can be written in the correct order
9945
9946 sub flush {
9947     destroy_one_line_block();
9948     output_line_to_go();
9949     Perl::Tidy::VerticalAligner::flush();
9950 }
9951
9952 # sub output_line_to_go sends one logical line of tokens on down the
9953 # pipeline to the VerticalAligner package, breaking the line into continuation
9954 # lines as necessary.  The line of tokens is ready to go in the "to_go"
9955 # arrays.
9956 sub output_line_to_go {
9957
9958     # debug stuff; this routine can be called from many points
9959     FORMATTER_DEBUG_FLAG_OUTPUT && do {
9960         my ( $a, $b, $c ) = caller;
9961         write_diagnostics(
9962 "OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n"
9963         );
9964         my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
9965         write_diagnostics("$output_str\n");
9966     };
9967
9968     # just set a tentative breakpoint if we might be in a one-line block
9969     if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
9970         set_forced_breakpoint($max_index_to_go);
9971         return;
9972     }
9973
9974     my $cscw_block_comment;
9975     $cscw_block_comment = add_closing_side_comment()
9976       if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 );
9977
9978     match_opening_and_closing_tokens();
9979
9980     # tell the -lp option we are outputting a batch so it can close
9981     # any unfinished items in its stack
9982     finish_lp_batch();
9983
9984     # If this line ends in a code block brace, set breaks at any
9985     # previous closing code block braces to breakup a chain of code
9986     # blocks on one line.  This is very rare but can happen for
9987     # user-defined subs.  For example we might be looking at this:
9988     #  BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
9989     my $saw_good_break = 0;    # flag to force breaks even if short line
9990     if (
9991
9992         # looking for opening or closing block brace
9993         $block_type_to_go[$max_index_to_go]
9994
9995         # but not one of these which are never duplicated on a line:
9996         ##&& !$is_until_while_for_if_elsif_else{ $block_type_to_go
9997         ##      [$max_index_to_go] }
9998         && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] }
9999       )
10000     {
10001         my $lev = $nesting_depth_to_go[$max_index_to_go];
10002
10003         # Walk backwards from the end and
10004         # set break at any closing block braces at the same level.
10005         # But quit if we are not in a chain of blocks.
10006         for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
10007             last if ( $levels_to_go[$i] < $lev );    # stop at a lower level
10008             next if ( $levels_to_go[$i] > $lev );    # skip past higher level
10009
10010             if ( $block_type_to_go[$i] ) {
10011                 if ( $tokens_to_go[$i] eq '}' ) {
10012                     set_forced_breakpoint($i);
10013                     $saw_good_break = 1;
10014                 }
10015             }
10016
10017             # quit if we see anything besides words, function, blanks
10018             # at this level
10019             elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
10020         }
10021     }
10022
10023     my $imin = 0;
10024     my $imax = $max_index_to_go;
10025
10026     # trim any blank tokens
10027     if ( $max_index_to_go >= 0 ) {
10028         if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
10029         if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
10030     }
10031
10032     # anything left to write?
10033     if ( $imin <= $imax ) {
10034
10035         # add a blank line before certain key types
10036         if ( $last_line_leading_type !~ /^[#b]/ ) {
10037             my $want_blank    = 0;
10038             my $leading_token = $tokens_to_go[$imin];
10039             my $leading_type  = $types_to_go[$imin];
10040
10041             # blank lines before subs except declarations and one-liners
10042             # MCONVERSION LOCATION - for sub tokenization change
10043             if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) {
10044                 $want_blank = ( $rOpts->{'blanks-before-subs'} )
10045                   && (
10046                     terminal_type( \@types_to_go, \@block_type_to_go, $imin,
10047                         $imax ) !~ /^[\;\}]$/
10048                   );
10049             }
10050
10051             # break before all package declarations
10052             # MCONVERSION LOCATION - for tokenizaton change
10053             elsif ($leading_token =~ /^(package\s)/
10054                 && $leading_type eq 'i' )
10055             {
10056                 $want_blank = ( $rOpts->{'blanks-before-subs'} );
10057             }
10058
10059             # break before certain key blocks except one-liners
10060             if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) {
10061                 $want_blank = ( $rOpts->{'blanks-before-subs'} )
10062                   && (
10063                     terminal_type( \@types_to_go, \@block_type_to_go, $imin,
10064                         $imax ) ne '}'
10065                   );
10066             }
10067
10068             # Break before certain block types if we haven't had a
10069             # break at this level for a while.  This is the
10070             # difficult decision..
10071             elsif ($leading_token =~ /^(unless|if|while|until|for|foreach)$/
10072                 && $leading_type eq 'k' )
10073             {
10074                 my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
10075                 if ( !defined($lc) ) { $lc = 0 }
10076
10077                 $want_blank = $rOpts->{'blanks-before-blocks'}
10078                   && $lc >= $rOpts->{'long-block-line-count'}
10079                   && $file_writer_object->get_consecutive_nonblank_lines() >=
10080                   $rOpts->{'long-block-line-count'}
10081                   && (
10082                     terminal_type( \@types_to_go, \@block_type_to_go, $imin,
10083                         $imax ) ne '}'
10084                   );
10085             }
10086
10087             if ($want_blank) {
10088
10089                 # future: send blank line down normal path to VerticalAligner
10090                 Perl::Tidy::VerticalAligner::flush();
10091                 $file_writer_object->write_blank_code_line();
10092             }
10093         }
10094
10095         # update blank line variables and count number of consecutive
10096         # non-blank, non-comment lines at this level
10097         $last_last_line_leading_level = $last_line_leading_level;
10098         $last_line_leading_level      = $levels_to_go[$imin];
10099         if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
10100         $last_line_leading_type = $types_to_go[$imin];
10101         if (   $last_line_leading_level == $last_last_line_leading_level
10102             && $last_line_leading_type ne 'b'
10103             && $last_line_leading_type ne '#'
10104             && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
10105         {
10106             $nonblank_lines_at_depth[$last_line_leading_level]++;
10107         }
10108         else {
10109             $nonblank_lines_at_depth[$last_line_leading_level] = 1;
10110         }
10111
10112         FORMATTER_DEBUG_FLAG_FLUSH && do {
10113             my ( $package, $file, $line ) = caller;
10114             print
10115 "FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n";
10116         };
10117
10118         # add a couple of extra terminal blank tokens
10119         pad_array_to_go();
10120
10121         # set all forced breakpoints for good list formatting
10122         my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0;
10123
10124         if (
10125             $max_index_to_go > 0
10126             && (
10127                    $is_long_line
10128                 || $old_line_count_in_batch > 1
10129                 || is_unbalanced_batch()
10130                 || (
10131                     $comma_count_in_batch
10132                     && (   $rOpts_maximum_fields_per_table > 0
10133                         || $rOpts_comma_arrow_breakpoints == 0 )
10134                 )
10135             )
10136           )
10137         {
10138             $saw_good_break ||= scan_list();
10139         }
10140
10141         # let $ri_first and $ri_last be references to lists of
10142         # first and last tokens of line fragments to output..
10143         my ( $ri_first, $ri_last );
10144
10145         # write a single line if..
10146         if (
10147
10148             # we aren't allowed to add any newlines
10149             !$rOpts_add_newlines
10150
10151             # or, we don't already have an interior breakpoint
10152             # and we didn't see a good breakpoint
10153             || (
10154                    !$forced_breakpoint_count
10155                 && !$saw_good_break
10156
10157                 # and this line is 'short'
10158                 && !$is_long_line
10159             )
10160           )
10161         {
10162             @$ri_first = ($imin);
10163             @$ri_last  = ($imax);
10164         }
10165
10166         # otherwise use multiple lines
10167         else {
10168
10169             ( $ri_first, $ri_last ) = set_continuation_breaks($saw_good_break);
10170
10171             # now we do a correction step to clean this up a bit
10172             # (The only time we would not do this is for debugging)
10173             if ( $rOpts->{'recombine'} ) {
10174                 ( $ri_first, $ri_last ) =
10175                   recombine_breakpoints( $ri_first, $ri_last );
10176             }
10177         }
10178
10179         # do corrector step if -lp option is used
10180         my $do_not_pad = 0;
10181         if ($rOpts_line_up_parentheses) {
10182             $do_not_pad = correct_lp_indentation( $ri_first, $ri_last );
10183         }
10184         send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad );
10185     }
10186     prepare_for_new_input_lines();
10187
10188     # output any new -cscw block comment
10189     if ($cscw_block_comment) {
10190         flush();
10191         $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
10192     }
10193 }
10194
10195 sub reset_block_text_accumulator {
10196
10197     # save text after 'if' and 'elsif' to append after 'else'
10198     if ($accumulating_text_for_block) {
10199
10200         if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
10201             push @{$rleading_block_if_elsif_text}, $leading_block_text;
10202         }
10203     }
10204     $accumulating_text_for_block        = "";
10205     $leading_block_text                 = "";
10206     $leading_block_text_level           = 0;
10207     $leading_block_text_length_exceeded = 0;
10208     $leading_block_text_line_number     = 0;
10209     $leading_block_text_line_length     = 0;
10210 }
10211
10212 sub set_block_text_accumulator {
10213     my $i = shift;
10214     $accumulating_text_for_block = $tokens_to_go[$i];
10215     if ( $accumulating_text_for_block !~ /^els/ ) {
10216         $rleading_block_if_elsif_text = [];
10217     }
10218     $leading_block_text       = "";
10219     $leading_block_text_level = $levels_to_go[$i];
10220     $leading_block_text_line_number =
10221       $vertical_aligner_object->get_output_line_number();
10222     $leading_block_text_length_exceeded = 0;
10223
10224     # this will contain the column number of the last character
10225     # of the closing side comment
10226     $leading_block_text_line_length =
10227       length($accumulating_text_for_block) +
10228       length( $rOpts->{'closing-side-comment-prefix'} ) +
10229       $leading_block_text_level * $rOpts_indent_columns + 3;
10230 }
10231
10232 sub accumulate_block_text {
10233     my $i = shift;
10234
10235     # accumulate leading text for -csc, ignoring any side comments
10236     if (   $accumulating_text_for_block
10237         && !$leading_block_text_length_exceeded
10238         && $types_to_go[$i] ne '#' )
10239     {
10240
10241         my $added_length = length( $tokens_to_go[$i] );
10242         $added_length += 1 if $i == 0;
10243         my $new_line_length = $leading_block_text_line_length + $added_length;
10244
10245         # we can add this text if we don't exceed some limits..
10246         if (
10247
10248             # we must not have already exceeded the text length limit
10249             length($leading_block_text) <
10250             $rOpts_closing_side_comment_maximum_text
10251
10252             # and either:
10253             # the new total line length must be below the line length limit
10254             # or the new length must be below the text length limit
10255             # (ie, we may allow one token to exceed the text length limit)
10256             && ( $new_line_length < $rOpts_maximum_line_length
10257                 || length($leading_block_text) + $added_length <
10258                 $rOpts_closing_side_comment_maximum_text )
10259
10260             # UNLESS: we are adding a closing paren before the brace we seek.
10261             # This is an attempt to avoid situations where the ... to be
10262             # added are longer than the omitted right paren, as in:
10263
10264             #   foreach my $item (@a_rather_long_variable_name_here) {
10265             #      &whatever;
10266             #   } ## end foreach my $item (@a_rather_long_variable_name_here...
10267
10268             || (
10269                 $tokens_to_go[$i] eq ')'
10270                 && (
10271                     (
10272                            $i + 1 <= $max_index_to_go
10273                         && $block_type_to_go[ $i + 1 ] eq
10274                         $accumulating_text_for_block
10275                     )
10276                     || (   $i + 2 <= $max_index_to_go
10277                         && $block_type_to_go[ $i + 2 ] eq
10278                         $accumulating_text_for_block )
10279                 )
10280             )
10281           )
10282         {
10283
10284             # add an extra space at each newline
10285             if ( $i == 0 ) { $leading_block_text .= ' ' }
10286
10287             # add the token text
10288             $leading_block_text .= $tokens_to_go[$i];
10289             $leading_block_text_line_length = $new_line_length;
10290         }
10291
10292         # show that text was truncated if necessary
10293         elsif ( $types_to_go[$i] ne 'b' ) {
10294             $leading_block_text_length_exceeded = 1;
10295             $leading_block_text .= '...';
10296         }
10297     }
10298 }
10299
10300 {
10301     my %is_if_elsif_else_unless_while_until_for_foreach;
10302
10303     BEGIN {
10304
10305         # These block types may have text between the keyword and opening
10306         # curly.  Note: 'else' does not, but must be included to allow trailing
10307         # if/elsif text to be appended.
10308         # patch for SWITCH/CASE: added 'case' and 'when'
10309         @_ = qw(if elsif else unless while until for foreach case when);
10310         @is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_);
10311     }
10312
10313     sub accumulate_csc_text {
10314
10315         # called once per output buffer when -csc is used. Accumulates
10316         # the text placed after certain closing block braces.
10317         # Defines and returns the following for this buffer:
10318
10319         my $block_leading_text = "";    # the leading text of the last '}'
10320         my $rblock_leading_if_elsif_text;
10321         my $i_block_leading_text =
10322           -1;    # index of token owning block_leading_text
10323         my $block_line_count    = 100;    # how many lines the block spans
10324         my $terminal_type       = 'b';    # type of last nonblank token
10325         my $i_terminal          = 0;      # index of last nonblank token
10326         my $terminal_block_type = "";
10327
10328         for my $i ( 0 .. $max_index_to_go ) {
10329             my $type       = $types_to_go[$i];
10330             my $block_type = $block_type_to_go[$i];
10331             my $token      = $tokens_to_go[$i];
10332
10333             # remember last nonblank token type
10334             if ( $type ne '#' && $type ne 'b' ) {
10335                 $terminal_type       = $type;
10336                 $terminal_block_type = $block_type;
10337                 $i_terminal          = $i;
10338             }
10339
10340             my $type_sequence = $type_sequence_to_go[$i];
10341             if ( $block_type && $type_sequence ) {
10342
10343                 if ( $token eq '}' ) {
10344
10345                     # restore any leading text saved when we entered this block
10346                     if ( defined( $block_leading_text{$type_sequence} ) ) {
10347                         ( $block_leading_text, $rblock_leading_if_elsif_text ) =
10348                           @{ $block_leading_text{$type_sequence} };
10349                         $i_block_leading_text = $i;
10350                         delete $block_leading_text{$type_sequence};
10351                         $rleading_block_if_elsif_text =
10352                           $rblock_leading_if_elsif_text;
10353                     }
10354
10355                     # if we run into a '}' then we probably started accumulating
10356                     # at something like a trailing 'if' clause..no harm done.
10357                     if (   $accumulating_text_for_block
10358                         && $levels_to_go[$i] <= $leading_block_text_level )
10359                     {
10360                         my $lev = $levels_to_go[$i];
10361                         reset_block_text_accumulator();
10362                     }
10363
10364                     if ( defined( $block_opening_line_number{$type_sequence} ) )
10365                     {
10366                         my $output_line_number =
10367                           $vertical_aligner_object->get_output_line_number();
10368                         $block_line_count = $output_line_number -
10369                           $block_opening_line_number{$type_sequence} + 1;
10370                         delete $block_opening_line_number{$type_sequence};
10371                     }
10372                     else {
10373
10374                         # Error: block opening line undefined for this line..
10375                         # This shouldn't be possible, but it is not a
10376                         # significant problem.
10377                     }
10378                 }
10379
10380                 elsif ( $token eq '{' ) {
10381
10382                     my $line_number =
10383                       $vertical_aligner_object->get_output_line_number();
10384                     $block_opening_line_number{$type_sequence} = $line_number;
10385
10386                     if (   $accumulating_text_for_block
10387                         && $levels_to_go[$i] == $leading_block_text_level )
10388                     {
10389
10390                         if ( $accumulating_text_for_block eq $block_type ) {
10391
10392                             # save any leading text before we enter this block
10393                             $block_leading_text{$type_sequence} = [
10394                                 $leading_block_text,
10395                                 $rleading_block_if_elsif_text
10396                             ];
10397                             $block_opening_line_number{$type_sequence} =
10398                               $leading_block_text_line_number;
10399                             reset_block_text_accumulator();
10400                         }
10401                         else {
10402
10403                             # shouldn't happen, but not a serious error.
10404                             # We were accumulating -csc text for block type
10405                             # $accumulating_text_for_block and unexpectedly
10406                             # encountered a '{' for block type $block_type.
10407                         }
10408                     }
10409                 }
10410             }
10411
10412             if (   $type eq 'k'
10413                 && $csc_new_statement_ok
10414                 && $is_if_elsif_else_unless_while_until_for_foreach{$token}
10415                 && $token =~ /$closing_side_comment_list_pattern/o )
10416             {
10417                 set_block_text_accumulator($i);
10418             }
10419             else {
10420
10421                 # note: ignoring type 'q' because of tricks being played
10422                 # with 'q' for hanging side comments
10423                 if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
10424                     $csc_new_statement_ok =
10425                       ( $block_type || $type eq 'J' || $type eq ';' );
10426                 }
10427                 if (   $type eq ';'
10428                     && $accumulating_text_for_block
10429                     && $levels_to_go[$i] == $leading_block_text_level )
10430                 {
10431                     reset_block_text_accumulator();
10432                 }
10433                 else {
10434                     accumulate_block_text($i);
10435                 }
10436             }
10437         }
10438
10439         # Treat an 'else' block specially by adding preceding 'if' and
10440         # 'elsif' text.  Otherwise, the 'end else' is not helpful,
10441         # especially for cuddled-else formatting.
10442         if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
10443             $block_leading_text =
10444               make_else_csc_text( $i_terminal, $terminal_block_type,
10445                 $block_leading_text, $rblock_leading_if_elsif_text );
10446         }
10447
10448         return ( $terminal_type, $i_terminal, $i_block_leading_text,
10449             $block_leading_text, $block_line_count );
10450     }
10451 }
10452
10453 sub make_else_csc_text {
10454
10455     # create additional -csc text for an 'else' and optionally 'elsif',
10456     # depending on the value of switch
10457     # $rOpts_closing_side_comment_else_flag:
10458     #
10459     #  = 0 add 'if' text to trailing else
10460     #  = 1 same as 0 plus:
10461     #      add 'if' to 'elsif's if can fit in line length
10462     #      add last 'elsif' to trailing else if can fit in one line
10463     #  = 2 same as 1 but do not check if exceed line length
10464     #
10465     # $rif_elsif_text = a reference to a list of all previous closing
10466     # side comments created for this if block
10467     #
10468     my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_;
10469     my $csc_text = $block_leading_text;
10470
10471     if ( $block_type eq 'elsif' && $rOpts_closing_side_comment_else_flag == 0 )
10472     {
10473         return $csc_text;
10474     }
10475
10476     my $count = @{$rif_elsif_text};
10477     return $csc_text unless ($count);
10478
10479     my $if_text = '[ if' . $rif_elsif_text->[0];
10480
10481     # always show the leading 'if' text on 'else'
10482     if ( $block_type eq 'else' ) {
10483         $csc_text .= $if_text;
10484     }
10485
10486     # see if that's all
10487     if ( $rOpts_closing_side_comment_else_flag == 0 ) {
10488         return $csc_text;
10489     }
10490
10491     my $last_elsif_text = "";
10492     if ( $count > 1 ) {
10493         $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
10494         if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
10495     }
10496
10497     # tentatively append one more item
10498     my $saved_text = $csc_text;
10499     if ( $block_type eq 'else' ) {
10500         $csc_text .= $last_elsif_text;
10501     }
10502     else {
10503         $csc_text .= ' ' . $if_text;
10504     }
10505
10506     # all done if no length checks requested
10507     if ( $rOpts_closing_side_comment_else_flag == 2 ) {
10508         return $csc_text;
10509     }
10510
10511     # undo it if line length exceeded
10512     my $length =
10513       length($csc_text) + length($block_type) +
10514       length( $rOpts->{'closing-side-comment-prefix'} ) +
10515       $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
10516     if ( $length > $rOpts_maximum_line_length ) {
10517         $csc_text = $saved_text;
10518     }
10519     return $csc_text;
10520 }
10521
10522 sub add_closing_side_comment {
10523
10524     # add closing side comments after closing block braces if -csc used
10525     my $cscw_block_comment;
10526
10527     #---------------------------------------------------------------
10528     # Step 1: loop through all tokens of this line to accumulate
10529     # the text needed to create the closing side comments. Also see
10530     # how the line ends.
10531     #---------------------------------------------------------------
10532
10533     my ( $terminal_type, $i_terminal, $i_block_leading_text,
10534         $block_leading_text, $block_line_count )
10535       = accumulate_csc_text();
10536
10537     #---------------------------------------------------------------
10538     # Step 2: make the closing side comment if this ends a block
10539     #---------------------------------------------------------------
10540     my $have_side_comment = $i_terminal != $max_index_to_go;
10541
10542     # if this line might end in a block closure..
10543     if (
10544         $terminal_type eq '}'
10545
10546         # ..and either
10547         && (
10548
10549             # the block is long enough
10550             ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
10551
10552             # or there is an existing comment to check
10553             || (   $have_side_comment
10554                 && $rOpts->{'closing-side-comment-warnings'} )
10555         )
10556
10557         # .. and if this is one of the types of interest
10558         && $block_type_to_go[$i_terminal] =~
10559         /$closing_side_comment_list_pattern/o
10560
10561         # .. but not an anonymous sub
10562         # These are not normally of interest, and their closing braces are
10563         # often followed by commas or semicolons anyway.  This also avoids
10564         # possible erratic output due to line numbering inconsistencies
10565         # in the cases where their closing braces terminate a line.
10566         && $block_type_to_go[$i_terminal] ne 'sub'
10567
10568         # ..and the corresponding opening brace must is not in this batch
10569         # (because we do not need to tag one-line blocks, although this
10570         # should also be caught with a positive -csci value)
10571         && $mate_index_to_go[$i_terminal] < 0
10572
10573         # ..and either
10574         && (
10575
10576             # this is the last token (line doesnt have a side comment)
10577             !$have_side_comment
10578
10579             # or the old side comment is a closing side comment
10580             || $tokens_to_go[$max_index_to_go] =~
10581             /$closing_side_comment_prefix_pattern/o
10582         )
10583       )
10584     {
10585
10586         # then make the closing side comment text
10587         my $token =
10588 "$rOpts->{'closing-side-comment-prefix'} $block_type_to_go[$i_terminal]";
10589
10590         # append any extra descriptive text collected above
10591         if ( $i_block_leading_text == $i_terminal ) {
10592             $token .= $block_leading_text;
10593         }
10594         $token =~ s/\s*$//;    # trim any trailing whitespace
10595
10596         # handle case of existing closing side comment
10597         if ($have_side_comment) {
10598
10599             # warn if requested and tokens differ significantly
10600             if ( $rOpts->{'closing-side-comment-warnings'} ) {
10601                 my $old_csc = $tokens_to_go[$max_index_to_go];
10602                 my $new_csc = $token;
10603                 $new_csc =~ s/(\.\.\.)\s*$//;    # trim trailing '...'
10604                 my $new_trailing_dots = $1;
10605                 $old_csc =~ s/\.\.\.\s*$//;
10606                 $new_csc =~ s/\s+//g;            # trim all whitespace
10607                 $old_csc =~ s/\s+//g;
10608
10609                 # Patch to handle multiple closing side comments at
10610                 # else and elsif's.  These have become too complicated
10611                 # to check, so if we see an indication of
10612                 # '[ if' or '[ # elsif', then assume they were made
10613                 # by perltidy.
10614                 if ( $block_type_to_go[$i_terminal] eq 'else' ) {
10615                     if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
10616                 }
10617                 elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
10618                     if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
10619                 }
10620
10621                 # if old comment is contained in new comment,
10622                 # only compare the common part.
10623                 if ( length($new_csc) > length($old_csc) ) {
10624                     $new_csc = substr( $new_csc, 0, length($old_csc) );
10625                 }
10626
10627                 # if the new comment is shorter and has been limited,
10628                 # only compare the common part.
10629                 if ( length($new_csc) < length($old_csc) && $new_trailing_dots )
10630                 {
10631                     $old_csc = substr( $old_csc, 0, length($new_csc) );
10632                 }
10633
10634                 # any remaining difference?
10635                 if ( $new_csc ne $old_csc ) {
10636
10637                     # just leave the old comment if we are below the threshold
10638                     # for creating side comments
10639                     if ( $block_line_count <
10640                         $rOpts->{'closing-side-comment-interval'} )
10641                     {
10642                         $token = undef;
10643                     }
10644
10645                     # otherwise we'll make a note of it
10646                     else {
10647
10648                         warning(
10649 "perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
10650                         );
10651
10652                      # save the old side comment in a new trailing block comment
10653                         my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
10654                         $year  += 1900;
10655                         $month += 1;
10656                         $cscw_block_comment =
10657 "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
10658                     }
10659                 }
10660                 else {
10661
10662                     # No differences.. we can safely delete old comment if we
10663                     # are below the threshold
10664                     if ( $block_line_count <
10665                         $rOpts->{'closing-side-comment-interval'} )
10666                     {
10667                         $token = undef;
10668                         unstore_token_to_go()
10669                           if ( $types_to_go[$max_index_to_go] eq '#' );
10670                         unstore_token_to_go()
10671                           if ( $types_to_go[$max_index_to_go] eq 'b' );
10672                     }
10673                 }
10674             }
10675
10676             # switch to the new csc (unless we deleted it!)
10677             $tokens_to_go[$max_index_to_go] = $token if $token;
10678         }
10679
10680         # handle case of NO existing closing side comment
10681         else {
10682
10683             # insert the new side comment into the output token stream
10684             my $type          = '#';
10685             my $block_type    = '';
10686             my $type_sequence = '';
10687             my $container_environment =
10688               $container_environment_to_go[$max_index_to_go];
10689             my $level                = $levels_to_go[$max_index_to_go];
10690             my $slevel               = $nesting_depth_to_go[$max_index_to_go];
10691             my $no_internal_newlines = 0;
10692
10693             my $nesting_blocks     = $nesting_blocks_to_go[$max_index_to_go];
10694             my $ci_level           = $ci_levels_to_go[$max_index_to_go];
10695             my $in_continued_quote = 0;
10696
10697             # first insert a blank token
10698             insert_new_token_to_go( ' ', 'b', $slevel, $no_internal_newlines );
10699
10700             # then the side comment
10701             insert_new_token_to_go( $token, $type, $slevel,
10702                 $no_internal_newlines );
10703         }
10704     }
10705     return $cscw_block_comment;
10706 }
10707
10708 sub previous_nonblank_token {
10709     my ($i) = @_;
10710     if ( $i <= 0 ) {
10711         return "";
10712     }
10713     elsif ( $types_to_go[ $i - 1 ] ne 'b' ) {
10714         return $tokens_to_go[ $i - 1 ];
10715     }
10716     elsif ( $i > 1 ) {
10717         return $tokens_to_go[ $i - 2 ];
10718     }
10719     else {
10720         return "";
10721     }
10722 }
10723
10724 sub send_lines_to_vertical_aligner {
10725
10726     my ( $ri_first, $ri_last, $do_not_pad ) = @_;
10727
10728     my $rindentation_list = [0];    # ref to indentations for each line
10729
10730     # define the array @matching_token_to_go for the output tokens
10731     # which will be non-blank for each special token (such as =>)
10732     # for which alignment is required.
10733     set_vertical_alignment_markers( $ri_first, $ri_last );
10734
10735     # flush if necessary to avoid unwanted alignment
10736     my $must_flush = 0;
10737     if ( @$ri_first > 1 ) {
10738
10739         # flush before a long if statement
10740         if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) {
10741             $must_flush = 1;
10742         }
10743     }
10744     if ($must_flush) {
10745         Perl::Tidy::VerticalAligner::flush();
10746     }
10747
10748     set_logical_padding( $ri_first, $ri_last );
10749
10750     # loop to prepare each line for shipment
10751     my $n_last_line = @$ri_first - 1;
10752     my $in_comma_list;
10753     for my $n ( 0 .. $n_last_line ) {
10754         my $ibeg = $$ri_first[$n];
10755         my $iend = $$ri_last[$n];
10756
10757         my @patterns = ();
10758         my @tokens   = ();
10759         my @fields   = ();
10760         my $i_start  = $ibeg;
10761         my $i;
10762
10763         my $depth                 = 0;
10764         my @container_name        = ("");
10765         my @multiple_comma_arrows = (undef);
10766
10767         my $j = 0;    # field index
10768
10769         $patterns[0] = "";
10770         for $i ( $ibeg .. $iend ) {
10771
10772             # Keep track of containers balanced on this line only.
10773             # These are used below to prevent unwanted cross-line alignments.
10774             # Unbalanced containers already avoid aligning across
10775             # container boundaries.
10776             if ( $tokens_to_go[$i] eq '(' ) {
10777                 my $i_mate = $mate_index_to_go[$i];
10778                 if ( $i_mate > $i && $i_mate <= $iend ) {
10779                     $depth++;
10780                     my $seqno = $type_sequence_to_go[$i];
10781                     my $count = comma_arrow_count($seqno);
10782                     $multiple_comma_arrows[$depth] = $count && $count > 1;
10783                     my $name = previous_nonblank_token($i);
10784                     $name =~ s/^->//;
10785                     $container_name[$depth] = "+" . $name;
10786                 }
10787             }
10788             elsif ( $tokens_to_go[$i] eq ')' ) {
10789                 $depth-- if $depth > 0;
10790             }
10791
10792             # if we find a new synchronization token, we are done with
10793             # a field
10794             if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) {
10795
10796                 my $tok = my $raw_tok = $matching_token_to_go[$i];
10797
10798                 # make separators in different nesting depths unique
10799                 # by appending the nesting depth digit.
10800                 if ( $raw_tok ne '#' ) {
10801                     $tok .= "$nesting_depth_to_go[$i]";
10802                 }
10803
10804                 # do any special decorations for commas to avoid unwanted
10805                 # cross-line alignments.
10806                 if ( $raw_tok eq ',' ) {
10807                     if ( $container_name[$depth] ) {
10808                         $tok .= $container_name[$depth];
10809                     }
10810                 }
10811
10812                 # decorate '=>' with:
10813                 # - Nothing if this container is unbalanced on this line.
10814                 # - The previous token if it is balanced and multiple '=>'s
10815                 # - The container name if it is bananced and no other '=>'s
10816                 elsif ( $raw_tok eq '=>' ) {
10817                     if ( $container_name[$depth] ) {
10818                         if ( $multiple_comma_arrows[$depth] ) {
10819                             $tok .= "+" . previous_nonblank_token($i);
10820                         }
10821                         else {
10822                             $tok .= $container_name[$depth];
10823                         }
10824                     }
10825                 }
10826
10827                 # concatenate the text of the consecutive tokens to form
10828                 # the field
10829                 push( @fields,
10830                     join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
10831
10832                 # store the alignment token for this field
10833                 push( @tokens, $tok );
10834
10835                 # get ready for the next batch
10836                 $i_start = $i;
10837                 $j++;
10838                 $patterns[$j] = "";
10839             }
10840
10841             # continue accumulating tokens
10842             # handle non-keywords..
10843             if ( $types_to_go[$i] ne 'k' ) {
10844                 my $type = $types_to_go[$i];
10845
10846                 # Mark most things before arrows as a quote to
10847                 # get them to line up. Testfile: mixed.pl.
10848                 if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) {
10849                     my $next_type = $types_to_go[ $i + 1 ];
10850                     my $i_next_nonblank =
10851                       ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
10852
10853                     if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
10854                         $type = 'Q';
10855                     }
10856                 }
10857
10858                 # minor patch to make numbers and quotes align
10859                 if ( $type eq 'n' ) { $type = 'Q' }
10860
10861                 $patterns[$j] .= $type;
10862             }
10863
10864             # for keywords we have to use the actual text
10865             else {
10866
10867                 # map certain keywords to the same 'if' class to align
10868                 # long if/elsif sequences. my testfile: elsif.pl
10869                 my $tok = $tokens_to_go[$i];
10870                 if ( $n == 0 && $tok =~ /^(elsif|else|unless)$/ ) {
10871                     $tok = 'if';
10872                 }
10873                 $patterns[$j] .= $tok;
10874             }
10875         }
10876
10877         # done with this line .. join text of tokens to make the last field
10878         push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
10879
10880         my ( $indentation, $lev, $level_end, $terminal_type,
10881             $is_semicolon_terminated, $is_outdented_line )
10882           = set_adjusted_indentation( $ibeg, $iend, \@fields, \@patterns,
10883             $ri_first, $ri_last, $rindentation_list );
10884
10885         # we will allow outdenting of long lines..
10886         my $outdent_long_lines = (
10887
10888             # which are long quotes, if allowed
10889             ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} )
10890
10891             # which are long block comments, if allowed
10892               || (
10893                    $types_to_go[$ibeg] eq '#'
10894                 && $rOpts->{'outdent-long-comments'}
10895
10896                 # but not if this is a static block comment
10897                 && !$is_static_block_comment
10898               )
10899         );
10900
10901         my $level_jump =
10902           $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg];
10903
10904         my $rvertical_tightness_flags =
10905           set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
10906             $ri_first, $ri_last );
10907
10908         # flush an outdented line to avoid any unwanted vertical alignment
10909         Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
10910
10911         my $is_terminal_ternary = 0;
10912         if (   $tokens_to_go[$ibeg] eq ':'
10913             || $n > 0 && $tokens_to_go[ $$ri_last[ $n - 1 ] ] eq ':' )
10914         {
10915             if (   ( $terminal_type eq ';' && $level_end <= $lev )
10916                 || ( $level_end < $lev ) )
10917             {
10918                 $is_terminal_ternary = 1;
10919             }
10920         }
10921
10922         # send this new line down the pipe
10923         my $forced_breakpoint = $forced_breakpoint_to_go[$iend];
10924         Perl::Tidy::VerticalAligner::append_line(
10925             $lev,
10926             $level_end,
10927             $indentation,
10928             \@fields,
10929             \@tokens,
10930             \@patterns,
10931             $forced_breakpoint_to_go[$iend] || $in_comma_list,
10932             $outdent_long_lines,
10933             $is_terminal_ternary,
10934             $is_semicolon_terminated,
10935             $do_not_pad,
10936             $rvertical_tightness_flags,
10937             $level_jump,
10938         );
10939         $in_comma_list =
10940           $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend];
10941
10942         # flush an outdented line to avoid any unwanted vertical alignment
10943         Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
10944
10945         $do_not_pad = 0;
10946
10947     }    # end of loop to output each line
10948
10949     # remember indentation of lines containing opening containers for
10950     # later use by sub set_adjusted_indentation
10951     save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
10952 }
10953
10954 {        # begin unmatched_indexes
10955
10956     # closure to keep track of unbalanced containers.
10957     # arrays shared by the routines in this block:
10958     my @unmatched_opening_indexes_in_this_batch;
10959     my @unmatched_closing_indexes_in_this_batch;
10960     my %comma_arrow_count;
10961
10962     sub is_unbalanced_batch {
10963         @unmatched_opening_indexes_in_this_batch +
10964           @unmatched_closing_indexes_in_this_batch;
10965     }
10966
10967     sub comma_arrow_count {
10968         my $seqno = $_[0];
10969         return $comma_arrow_count{$seqno};
10970     }
10971
10972     sub match_opening_and_closing_tokens {
10973
10974         # Match up indexes of opening and closing braces, etc, in this batch.
10975         # This has to be done after all tokens are stored because unstoring
10976         # of tokens would otherwise cause trouble.
10977
10978         @unmatched_opening_indexes_in_this_batch = ();
10979         @unmatched_closing_indexes_in_this_batch = ();
10980         %comma_arrow_count                       = ();
10981
10982         my ( $i, $i_mate, $token );
10983         foreach $i ( 0 .. $max_index_to_go ) {
10984             if ( $type_sequence_to_go[$i] ) {
10985                 $token = $tokens_to_go[$i];
10986                 if ( $token =~ /^[\(\[\{\?]$/ ) {
10987                     push @unmatched_opening_indexes_in_this_batch, $i;
10988                 }
10989                 elsif ( $token =~ /^[\)\]\}\:]$/ ) {
10990
10991                     $i_mate = pop @unmatched_opening_indexes_in_this_batch;
10992                     if ( defined($i_mate) && $i_mate >= 0 ) {
10993                         if ( $type_sequence_to_go[$i_mate] ==
10994                             $type_sequence_to_go[$i] )
10995                         {
10996                             $mate_index_to_go[$i]      = $i_mate;
10997                             $mate_index_to_go[$i_mate] = $i;
10998                         }
10999                         else {
11000                             push @unmatched_opening_indexes_in_this_batch,
11001                               $i_mate;
11002                             push @unmatched_closing_indexes_in_this_batch, $i;
11003                         }
11004                     }
11005                     else {
11006                         push @unmatched_closing_indexes_in_this_batch, $i;
11007                     }
11008                 }
11009             }
11010             elsif ( $tokens_to_go[$i] eq '=>' ) {
11011                 if (@unmatched_opening_indexes_in_this_batch) {
11012                     my $j     = $unmatched_opening_indexes_in_this_batch[-1];
11013                     my $seqno = $type_sequence_to_go[$j];
11014                     $comma_arrow_count{$seqno}++;
11015                 }
11016             }
11017         }
11018     }
11019
11020     sub save_opening_indentation {
11021
11022         # This should be called after each batch of tokens is output. It
11023         # saves indentations of lines of all unmatched opening tokens.
11024         # These will be used by sub get_opening_indentation.
11025
11026         my ( $ri_first, $ri_last, $rindentation_list ) = @_;
11027
11028         # we no longer need indentations of any saved indentations which
11029         # are unmatched closing tokens in this batch, because we will
11030         # never encounter them again.  So we can delete them to keep
11031         # the hash size down.
11032         foreach (@unmatched_closing_indexes_in_this_batch) {
11033             my $seqno = $type_sequence_to_go[$_];
11034             delete $saved_opening_indentation{$seqno};
11035         }
11036
11037         # we need to save indentations of any unmatched opening tokens
11038         # in this batch because we may need them in a subsequent batch.
11039         foreach (@unmatched_opening_indexes_in_this_batch) {
11040             my $seqno = $type_sequence_to_go[$_];
11041             $saved_opening_indentation{$seqno} = [
11042                 lookup_opening_indentation(
11043                     $_, $ri_first, $ri_last, $rindentation_list
11044                 )
11045             ];
11046         }
11047     }
11048 }    # end unmatched_indexes
11049
11050 sub get_opening_indentation {
11051
11052     # get the indentation of the line which output the opening token
11053     # corresponding to a given closing token in the current output batch.
11054     #
11055     # given:
11056     # $i_closing - index in this line of a closing token ')' '}' or ']'
11057     #
11058     # $ri_first - reference to list of the first index $i for each output
11059     #               line in this batch
11060     # $ri_last - reference to list of the last index $i for each output line
11061     #              in this batch
11062     # $rindentation_list - reference to a list containing the indentation
11063     #            used for each line.
11064     #
11065     # return:
11066     #   -the indentation of the line which contained the opening token
11067     #    which matches the token at index $i_opening
11068     #   -and its offset (number of columns) from the start of the line
11069     #
11070     my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_;
11071
11072     # first, see if the opening token is in the current batch
11073     my $i_opening = $mate_index_to_go[$i_closing];
11074     my ( $indent, $offset );
11075     if ( $i_opening >= 0 ) {
11076
11077         # it is..look up the indentation
11078         ( $indent, $offset ) =
11079           lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
11080             $rindentation_list );
11081     }
11082
11083     # if not, it should have been stored in the hash by a previous batch
11084     else {
11085         my $seqno = $type_sequence_to_go[$i_closing];
11086         if ($seqno) {
11087             if ( $saved_opening_indentation{$seqno} ) {
11088                 ( $indent, $offset ) = @{ $saved_opening_indentation{$seqno} };
11089             }
11090
11091             # some kind of serious error
11092             # (example is badfile.t)
11093             else {
11094                 $indent = 0;
11095                 $offset = 0;
11096             }
11097         }
11098
11099         # if no sequence number it must be an unbalanced container
11100         else {
11101             $indent = 0;
11102             $offset = 0;
11103         }
11104     }
11105     return ( $indent, $offset );
11106 }
11107
11108 sub lookup_opening_indentation {
11109
11110     # get the indentation of the line in the current output batch
11111     # which output a selected opening token
11112     #
11113     # given:
11114     #   $i_opening - index of an opening token in the current output batch
11115     #                whose line indentation we need
11116     #   $ri_first - reference to list of the first index $i for each output
11117     #               line in this batch
11118     #   $ri_last - reference to list of the last index $i for each output line
11119     #              in this batch
11120     #   $rindentation_list - reference to a list containing the indentation
11121     #            used for each line.  (NOTE: the first slot in
11122     #            this list is the last returned line number, and this is
11123     #            followed by the list of indentations).
11124     #
11125     # return
11126     #   -the indentation of the line which contained token $i_opening
11127     #   -and its offset (number of columns) from the start of the line
11128
11129     my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
11130
11131     my $nline = $rindentation_list->[0];    # line number of previous lookup
11132
11133     # reset line location if necessary
11134     $nline = 0 if ( $i_opening < $ri_start->[$nline] );
11135
11136     # find the correct line
11137     unless ( $i_opening > $ri_last->[-1] ) {
11138         while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
11139     }
11140
11141     # error - token index is out of bounds - shouldn't happen
11142     else {
11143         warning(
11144 "non-fatal program bug in lookup_opening_indentation - index out of range\n"
11145         );
11146         report_definite_bug();
11147         $nline = $#{$ri_last};
11148     }
11149
11150     $rindentation_list->[0] =
11151       $nline;    # save line number to start looking next call
11152     my $ibeg = $ri_start->[$nline];
11153     my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
11154     return ( $rindentation_list->[ $nline + 1 ], $offset );
11155 }
11156
11157 {
11158     my %is_if_elsif_else_unless_while_until_for_foreach;
11159
11160     BEGIN {
11161
11162         # These block types may have text between the keyword and opening
11163         # curly.  Note: 'else' does not, but must be included to allow trailing
11164         # if/elsif text to be appended.
11165         # patch for SWITCH/CASE: added 'case' and 'when'
11166         @_ = qw(if elsif else unless while until for foreach case when);
11167         @is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_);
11168     }
11169
11170     sub set_adjusted_indentation {
11171
11172         # This routine has the final say regarding the actual indentation of
11173         # a line.  It starts with the basic indentation which has been
11174         # defined for the leading token, and then takes into account any
11175         # options that the user has set regarding special indenting and
11176         # outdenting.
11177
11178         my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last,
11179             $rindentation_list )
11180           = @_;
11181
11182         # we need to know the last token of this line
11183         my ( $terminal_type, $i_terminal ) =
11184           terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend );
11185
11186         my $is_outdented_line = 0;
11187
11188         my $is_semicolon_terminated = $terminal_type eq ';'
11189           && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg];
11190
11191         ##########################################################
11192         # Section 1: set a flag and a default indentation
11193         #
11194         # Most lines are indented according to the initial token.
11195         # But it is common to outdent to the level just after the
11196         # terminal token in certain cases...
11197         # adjust_indentation flag:
11198         #       0 - do not adjust
11199         #       1 - outdent
11200         #       2 - vertically align with opening token
11201         #       3 - indent
11202         ##########################################################
11203         my $adjust_indentation         = 0;
11204         my $default_adjust_indentation = $adjust_indentation;
11205
11206         my ( $opening_indentation, $opening_offset );
11207
11208         # if we are at a closing token of some type..
11209         if ( $types_to_go[$ibeg] =~ /^[\)\}\]]$/ ) {
11210
11211             # get the indentation of the line containing the corresponding
11212             # opening token
11213             ( $opening_indentation, $opening_offset ) =
11214               get_opening_indentation( $ibeg, $ri_first, $ri_last,
11215                 $rindentation_list );
11216
11217             # First set the default behavior:
11218             # default behavior is to outdent closing lines
11219             # of the form:   ");  };  ];  )->xxx;"
11220             if (
11221                 $is_semicolon_terminated
11222
11223                 # and 'cuddled parens' of the form:   ")->pack("
11224                 || (
11225                        $terminal_type      eq '('
11226                     && $types_to_go[$ibeg] eq ')'
11227                     && ( $nesting_depth_to_go[$iend] + 1 ==
11228                         $nesting_depth_to_go[$ibeg] )
11229                 )
11230               )
11231             {
11232                 $adjust_indentation = 1;
11233             }
11234
11235             # TESTING: outdent something like '),'
11236             if (
11237                 $terminal_type eq ','
11238
11239                 # allow just one character before the comma
11240                 && $i_terminal == $ibeg + 1
11241
11242                 # requre LIST environment; otherwise, we may outdent too much --
11243                 # this can happen in calls without parentheses (overload.t);
11244                 && $container_environment_to_go[$i_terminal] eq 'LIST'
11245               )
11246             {
11247                 $adjust_indentation = 1;
11248             }
11249
11250             # undo continuation indentation of a terminal closing token if
11251             # it is the last token before a level decrease.  This will allow
11252             # a closing token to line up with its opening counterpart, and
11253             # avoids a indentation jump larger than 1 level.
11254             if (   $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
11255                 && $i_terminal == $ibeg )
11256             {
11257                 my $ci        = $ci_levels_to_go[$ibeg];
11258                 my $lev       = $levels_to_go[$ibeg];
11259                 my $next_type = $types_to_go[ $ibeg + 1 ];
11260                 my $i_next_nonblank =
11261                   ( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 );
11262                 if (   $i_next_nonblank <= $max_index_to_go
11263                     && $levels_to_go[$i_next_nonblank] < $lev )
11264                 {
11265                     $adjust_indentation = 1;
11266                 }
11267             }
11268
11269             $default_adjust_indentation = $adjust_indentation;
11270
11271             # Now modify default behavior according to user request:
11272             # handle option to indent non-blocks of the form );  };  ];
11273             # But don't do special indentation to something like ')->pack('
11274             if ( !$block_type_to_go[$ibeg] ) {
11275                 my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] };
11276                 if ( $cti == 1 ) {
11277                     if (   $i_terminal <= $ibeg + 1
11278                         || $is_semicolon_terminated )
11279                     {
11280                         $adjust_indentation = 2;
11281                     }
11282                     else {
11283                         $adjust_indentation = 0;
11284                     }
11285                 }
11286                 elsif ( $cti == 2 ) {
11287                     if ($is_semicolon_terminated) {
11288                         $adjust_indentation = 3;
11289                     }
11290                     else {
11291                         $adjust_indentation = 0;
11292                     }
11293                 }
11294                 elsif ( $cti == 3 ) {
11295                     $adjust_indentation = 3;
11296                 }
11297             }
11298
11299             # handle option to indent blocks
11300             else {
11301                 if (
11302                     $rOpts->{'indent-closing-brace'}
11303                     && (
11304                         $i_terminal == $ibeg    #  isolated terminal '}'
11305                         || $is_semicolon_terminated
11306                     )
11307                   )                             #  } xxxx ;
11308                 {
11309                     $adjust_indentation = 3;
11310                 }
11311             }
11312         }
11313
11314         # if at ');', '};', '>;', and '];' of a terminal qw quote
11315         elsif ($$rpatterns[0] =~ /^qb*;$/
11316             && $$rfields[0] =~ /^([\)\}\]\>]);$/ )
11317         {
11318             if ( $closing_token_indentation{$1} == 0 ) {
11319                 $adjust_indentation = 1;
11320             }
11321             else {
11322                 $adjust_indentation = 3;
11323             }
11324         }
11325
11326         ##########################################################
11327         # Section 2: set indentation according to flag set above
11328         #
11329         # Select the indentation object to define leading
11330         # whitespace.  If we are outdenting something like '} } );'
11331         # then we want to use one level below the last token
11332         # ($i_terminal) in order to get it to fully outdent through
11333         # all levels.
11334         ##########################################################
11335         my $indentation;
11336         my $lev;
11337         my $level_end = $levels_to_go[$iend];
11338
11339         if ( $adjust_indentation == 0 ) {
11340             $indentation = $leading_spaces_to_go[$ibeg];
11341             $lev         = $levels_to_go[$ibeg];
11342         }
11343         elsif ( $adjust_indentation == 1 ) {
11344             $indentation = $reduced_spaces_to_go[$i_terminal];
11345             $lev         = $levels_to_go[$i_terminal];
11346         }
11347
11348         # handle indented closing token which aligns with opening token
11349         elsif ( $adjust_indentation == 2 ) {
11350
11351             # handle option to align closing token with opening token
11352             $lev = $levels_to_go[$ibeg];
11353
11354             # calculate spaces needed to align with opening token
11355             my $space_count =
11356               get_SPACES($opening_indentation) + $opening_offset;
11357
11358             # Indent less than the previous line.
11359             #
11360             # Problem: For -lp we don't exactly know what it was if there
11361             # were recoverable spaces sent to the aligner.  A good solution
11362             # would be to force a flush of the vertical alignment buffer, so
11363             # that we would know.  For now, this rule is used for -lp:
11364             #
11365             # When the last line did not start with a closing token we will
11366             # be optimistic that the aligner will recover everything wanted.
11367             #
11368             # This rule will prevent us from breaking a hierarchy of closing
11369             # tokens, and in a worst case will leave a closing paren too far
11370             # indented, but this is better than frequently leaving it not
11371             # indented enough.
11372             my $last_spaces = get_SPACES($last_indentation_written);
11373             if ( $last_leading_token !~ /^[\}\]\)]$/ ) {
11374                 $last_spaces +=
11375                   get_RECOVERABLE_SPACES($last_indentation_written);
11376             }
11377
11378             # reset the indentation to the new space count if it works
11379             # only options are all or none: nothing in-between looks good
11380             $lev = $levels_to_go[$ibeg];
11381             if ( $space_count < $last_spaces ) {
11382                 if ($rOpts_line_up_parentheses) {
11383                     my $lev = $levels_to_go[$ibeg];
11384                     $indentation =
11385                       new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
11386                 }
11387                 else {
11388                     $indentation = $space_count;
11389                 }
11390             }
11391
11392             # revert to default if it doesnt work
11393             else {
11394                 $space_count = leading_spaces_to_go($ibeg);
11395                 if ( $default_adjust_indentation == 0 ) {
11396                     $indentation = $leading_spaces_to_go[$ibeg];
11397                 }
11398                 elsif ( $default_adjust_indentation == 1 ) {
11399                     $indentation = $reduced_spaces_to_go[$i_terminal];
11400                     $lev         = $levels_to_go[$i_terminal];
11401                 }
11402             }
11403         }
11404
11405         # Full indentaion of closing tokens (-icb and -icp or -cti=2)
11406         else {
11407
11408             # handle -icb (indented closing code block braces)
11409             # Updated method for indented block braces: indent one full level if
11410             # there is no continuation indentation.  This will occur for major
11411             # structures such as sub, if, else, but not for things like map
11412             # blocks.
11413             #
11414             # Note: only code blocks without continuation indentation are
11415             # handled here (if, else, unless, ..). In the following snippet,
11416             # the terminal brace of the sort block will have continuation
11417             # indentation as shown so it will not be handled by the coding
11418             # here.  We would have to undo the continuation indentation to do
11419             # this, but it probably looks ok as is.  This is a possible future
11420             # update for semicolon terminated lines.
11421             #
11422             #     if ($sortby eq 'date' or $sortby eq 'size') {
11423             #         @files = sort {
11424             #             $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
11425             #                 or $a cmp $b
11426             #                 } @files;
11427             #         }
11428             #
11429             if (   $block_type_to_go[$ibeg]
11430                 && $ci_levels_to_go[$i_terminal] == 0 )
11431             {
11432                 my $spaces = get_SPACES( $leading_spaces_to_go[$i_terminal] );
11433                 $indentation = $spaces + $rOpts_indent_columns;
11434
11435                 # NOTE: for -lp we could create a new indentation object, but
11436                 # there is probably no need to do it
11437             }
11438
11439             # handle -icp and any -icb block braces which fall through above
11440             # test such as the 'sort' block mentioned above.
11441             else {
11442
11443                 # There are currently two ways to handle -icp...
11444                 # One way is to use the indentation of the previous line:
11445                 # $indentation = $last_indentation_written;
11446
11447                 # The other way is to use the indentation that the previous line
11448                 # would have had if it hadn't been adjusted:
11449                 $indentation = $last_unadjusted_indentation;
11450
11451                 # Current method: use the minimum of the two. This avoids
11452                 # inconsistent indentation.
11453                 if ( get_SPACES($last_indentation_written) <
11454                     get_SPACES($indentation) )
11455                 {
11456                     $indentation = $last_indentation_written;
11457                 }
11458             }
11459
11460             # use previous indentation but use own level
11461             # to cause list to be flushed properly
11462             $lev = $levels_to_go[$ibeg];
11463         }
11464
11465         # remember indentation except for multi-line quotes, which get
11466         # no indentation
11467         unless ( $ibeg == 0 && $starting_in_quote ) {
11468             $last_indentation_written    = $indentation;
11469             $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
11470             $last_leading_token          = $tokens_to_go[$ibeg];
11471         }
11472
11473         # be sure lines with leading closing tokens are not outdented more
11474         # than the line which contained the corresponding opening token.
11475
11476         #############################################################
11477         # updated per bug report in alex_bug.pl: we must not
11478         # mess with the indentation of closing logical braces so
11479         # we must treat something like '} else {' as if it were
11480         # an isolated brace my $is_isolated_block_brace = (
11481         # $iend == $ibeg ) && $block_type_to_go[$ibeg];
11482         my $is_isolated_block_brace = $block_type_to_go[$ibeg]
11483           && ( $iend == $ibeg
11484             || $is_if_elsif_else_unless_while_until_for_foreach{
11485                 $block_type_to_go[$ibeg] } );
11486         #############################################################
11487         if ( !$is_isolated_block_brace && defined($opening_indentation) ) {
11488             if ( get_SPACES($opening_indentation) > get_SPACES($indentation) ) {
11489                 $indentation = $opening_indentation;
11490             }
11491         }
11492
11493         # remember the indentation of each line of this batch
11494         push @{$rindentation_list}, $indentation;
11495
11496         # outdent lines with certain leading tokens...
11497         if (
11498
11499             # must be first word of this batch
11500             $ibeg == 0
11501
11502             # and ...
11503             && (
11504
11505                 # certain leading keywords if requested
11506                 (
11507                        $rOpts->{'outdent-keywords'}
11508                     && $types_to_go[$ibeg] eq 'k'
11509                     && $outdent_keyword{ $tokens_to_go[$ibeg] }
11510                 )
11511
11512                 # or labels if requested
11513                 || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
11514
11515                 # or static block comments if requested
11516                 || (   $types_to_go[$ibeg] eq '#'
11517                     && $rOpts->{'outdent-static-block-comments'}
11518                     && $is_static_block_comment )
11519             )
11520           )
11521
11522         {
11523             my $space_count = leading_spaces_to_go($ibeg);
11524             if ( $space_count > 0 ) {
11525                 $space_count -= $rOpts_continuation_indentation;
11526                 $is_outdented_line = 1;
11527                 if ( $space_count < 0 ) { $space_count = 0 }
11528
11529                 # do not promote a spaced static block comment to non-spaced;
11530                 # this is not normally necessary but could be for some
11531                 # unusual user inputs (such as -ci = -i)
11532                 if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {
11533                     $space_count = 1;
11534                 }
11535
11536                 if ($rOpts_line_up_parentheses) {
11537                     $indentation =
11538                       new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
11539                 }
11540                 else {
11541                     $indentation = $space_count;
11542                 }
11543             }
11544         }
11545
11546         return ( $indentation, $lev, $level_end, $terminal_type,
11547             $is_semicolon_terminated, $is_outdented_line );
11548     }
11549 }
11550
11551 sub set_vertical_tightness_flags {
11552
11553     my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_;
11554
11555     # Define vertical tightness controls for the nth line of a batch.
11556     # We create an array of parameters which tell the vertical aligner
11557     # if we should combine this line with the next line to achieve the
11558     # desired vertical tightness.  The array of parameters contains:
11559     #
11560     #   [0] type: 1=is opening tok 2=is closing tok  3=is opening block brace
11561     #   [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
11562     #             if closing: spaces of padding to use
11563     #   [2] sequence number of container
11564     #   [3] valid flag: do not append if this flag is false. Will be
11565     #       true if appropriate -vt flag is set.  Otherwise, Will be
11566     #       made true only for 2 line container in parens with -lp
11567     #
11568     # These flags are used by sub set_leading_whitespace in
11569     # the vertical aligner
11570
11571     my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ];
11572
11573     # For non-BLOCK tokens, we will need to examine the next line
11574     # too, so we won't consider the last line.
11575     if ( $n < $n_last_line ) {
11576
11577         # see if last token is an opening token...not a BLOCK...
11578         my $ibeg_next = $$ri_first[ $n + 1 ];
11579         my $token_end = $tokens_to_go[$iend];
11580         my $iend_next = $$ri_last[ $n + 1 ];
11581         if (
11582                $type_sequence_to_go[$iend]
11583             && !$block_type_to_go[$iend]
11584             && $is_opening_token{$token_end}
11585             && (
11586                 $opening_vertical_tightness{$token_end} > 0
11587
11588                 # allow 2-line method call to be closed up
11589                 || (   $rOpts_line_up_parentheses
11590                     && $token_end eq '('
11591                     && $iend > $ibeg
11592                     && $types_to_go[ $iend - 1 ] ne 'b' )
11593             )
11594           )
11595         {
11596
11597             # avoid multiple jumps in nesting depth in one line if
11598             # requested
11599             my $ovt       = $opening_vertical_tightness{$token_end};
11600             my $iend_next = $$ri_last[ $n + 1 ];
11601             unless (
11602                 $ovt < 2
11603                 && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
11604                     $nesting_depth_to_go[$ibeg_next] )
11605               )
11606             {
11607
11608                 # If -vt flag has not been set, mark this as invalid
11609                 # and aligner will validate it if it sees the closing paren
11610                 # within 2 lines.
11611                 my $valid_flag = $ovt;
11612                 @{$rvertical_tightness_flags} =
11613                   ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag );
11614             }
11615         }
11616
11617         # see if first token of next line is a closing token...
11618         # ..and be sure this line does not have a side comment
11619         my $token_next = $tokens_to_go[$ibeg_next];
11620         if (   $type_sequence_to_go[$ibeg_next]
11621             && !$block_type_to_go[$ibeg_next]
11622             && $is_closing_token{$token_next}
11623             && $types_to_go[$iend] !~ '#' )    # for safety, shouldn't happen!
11624         {
11625             my $ovt = $opening_vertical_tightness{$token_next};
11626             my $cvt = $closing_vertical_tightness{$token_next};
11627             if (
11628
11629                 # never append a trailing line like   )->pack(
11630                 # because it will throw off later alignment
11631                 (
11632                     $nesting_depth_to_go[$ibeg_next] ==
11633                     $nesting_depth_to_go[ $iend_next + 1 ] + 1
11634                 )
11635                 && (
11636                     $cvt == 2
11637                     || (
11638                         $container_environment_to_go[$ibeg_next] ne 'LIST'
11639                         && (
11640                             $cvt == 1
11641
11642                             # allow closing up 2-line method calls
11643                             || (   $rOpts_line_up_parentheses
11644                                 && $token_next eq ')' )
11645                         )
11646                     )
11647                 )
11648               )
11649             {
11650
11651                 # decide which trailing closing tokens to append..
11652                 my $ok = 0;
11653                 if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
11654                 else {
11655                     my $str = join( '',
11656                         @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
11657
11658                     # append closing token if followed by comment or ';'
11659                     if ( $str =~ /^b?[#;]/ ) { $ok = 1 }
11660                 }
11661
11662                 if ($ok) {
11663                     my $valid_flag = $cvt;
11664                     @{$rvertical_tightness_flags} = (
11665                         2,
11666                         $tightness{$token_next} == 2 ? 0 : 1,
11667                         $type_sequence_to_go[$ibeg_next], $valid_flag,
11668                     );
11669                 }
11670             }
11671         }
11672
11673         # Opening Token Right
11674         # If requested, move an isolated trailing opening token to the end of
11675         # the previous line which ended in a comma.  We could do this
11676         # in sub recombine_breakpoints but that would cause problems
11677         # with -lp formatting.  The problem is that indentation will
11678         # quickly move far to the right in nested expressions.  By
11679         # doing it after indentation has been set, we avoid changes
11680         # to the indentation.  Actual movement of the token takes place
11681         # in sub write_leader_and_string.
11682         if (
11683             $opening_token_right{ $tokens_to_go[$ibeg_next] }
11684
11685             # previous line is not opening
11686             # (use -sot to combine with it)
11687             && !$is_opening_token{$token_end}
11688
11689             # previous line ended in one of these
11690             # (add other cases if necessary; '=>' and '.' are not necessary
11691             ##&& ($is_opening_token{$token_end} || $token_end eq ',')
11692             && !$block_type_to_go[$ibeg_next]
11693
11694             # this is a line with just an opening token
11695             && (   $iend_next == $ibeg_next
11696                 || $iend_next == $ibeg_next + 1
11697                 && $types_to_go[$iend_next] eq '#' )
11698
11699             # looks bad if we align vertically with the wrong container
11700             && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
11701           )
11702         {
11703             my $valid_flag = 1;
11704             my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
11705             @{$rvertical_tightness_flags} =
11706               ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, );
11707         }
11708
11709         # Stacking of opening and closing tokens
11710         my $stackable;
11711         my $token_beg_next = $tokens_to_go[$ibeg_next];
11712
11713         # patch to make something like 'qw(' behave like an opening paren
11714         # (aran.t)
11715         if ( $types_to_go[$ibeg_next] eq 'q' ) {
11716             if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
11717                 $token_beg_next = $1;
11718             }
11719         }
11720
11721         if (   $is_closing_token{$token_end}
11722             && $is_closing_token{$token_beg_next} )
11723         {
11724             $stackable = $stack_closing_token{$token_beg_next}
11725               unless ( $block_type_to_go[$ibeg_next] )
11726               ;    # shouldn't happen; just checking
11727         }
11728         elsif ($is_opening_token{$token_end}
11729             && $is_opening_token{$token_beg_next} )
11730         {
11731             $stackable = $stack_opening_token{$token_beg_next}
11732               unless ( $block_type_to_go[$ibeg_next] )
11733               ;    # shouldn't happen; just checking
11734         }
11735
11736         if ($stackable) {
11737
11738             my $is_semicolon_terminated;
11739             if ( $n + 1 == $n_last_line ) {
11740                 my ( $terminal_type, $i_terminal ) = terminal_type(
11741                     \@types_to_go, \@block_type_to_go,
11742                     $ibeg_next,    $iend_next
11743                 );
11744                 $is_semicolon_terminated = $terminal_type eq ';'
11745                   && $nesting_depth_to_go[$iend_next] <
11746                   $nesting_depth_to_go[$ibeg_next];
11747             }
11748
11749             # this must be a line with just an opening token
11750             # or end in a semicolon
11751             if (
11752                 $is_semicolon_terminated
11753                 || (   $iend_next == $ibeg_next
11754                     || $iend_next == $ibeg_next + 1
11755                     && $types_to_go[$iend_next] eq '#' )
11756               )
11757             {
11758                 my $valid_flag = 1;
11759                 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
11760                 @{$rvertical_tightness_flags} =
11761                   ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag,
11762                   );
11763             }
11764         }
11765     }
11766
11767     # Check for a last line with isolated opening BLOCK curly
11768     elsif ($rOpts_block_brace_vertical_tightness
11769         && $ibeg               eq $iend
11770         && $types_to_go[$iend] eq '{'
11771         && $block_type_to_go[$iend] =~
11772         /$block_brace_vertical_tightness_pattern/o )
11773     {
11774         @{$rvertical_tightness_flags} =
11775           ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
11776     }
11777
11778     # pack in the sequence numbers of the ends of this line
11779     $rvertical_tightness_flags->[4] = get_seqno($ibeg);
11780     $rvertical_tightness_flags->[5] = get_seqno($iend);
11781     return $rvertical_tightness_flags;
11782 }
11783
11784 sub get_seqno {
11785
11786     # get opening and closing sequence numbers of a token for the vertical
11787     # aligner.  Assign qw quotes a value to allow qw opening and closing tokens
11788     # to be treated somewhat like opening and closing tokens for stacking
11789     # tokens by the vertical aligner.
11790     my ($ii) = @_;
11791     my $seqno = $type_sequence_to_go[$ii];
11792     if ( $types_to_go[$ii] eq 'q' ) {
11793         my $SEQ_QW = -1;
11794         if ( $ii > 0 ) {
11795             $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ );
11796         }
11797         else {
11798             if ( !$ending_in_quote ) {
11799                 $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ );
11800             }
11801         }
11802     }
11803     return ($seqno);
11804 }
11805
11806 {
11807     my %is_vertical_alignment_type;
11808     my %is_vertical_alignment_keyword;
11809
11810     BEGIN {
11811
11812         @_ = qw#
11813           = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
11814           { ? : => =~ && || // ~~
11815           #;
11816         @is_vertical_alignment_type{@_} = (1) x scalar(@_);
11817
11818         @_ = qw(if unless and or err eq ne for foreach while until);
11819         @is_vertical_alignment_keyword{@_} = (1) x scalar(@_);
11820     }
11821
11822     sub set_vertical_alignment_markers {
11823
11824         # This routine takes the first step toward vertical alignment of the
11825         # lines of output text.  It looks for certain tokens which can serve as
11826         # vertical alignment markers (such as an '=').
11827         #
11828         # Method: We look at each token $i in this output batch and set
11829         # $matching_token_to_go[$i] equal to those tokens at which we would
11830         # accept vertical alignment.
11831
11832         # nothing to do if we aren't allowed to change whitespace
11833         if ( !$rOpts_add_whitespace ) {
11834             for my $i ( 0 .. $max_index_to_go ) {
11835                 $matching_token_to_go[$i] = '';
11836             }
11837             return;
11838         }
11839
11840         my ( $ri_first, $ri_last ) = @_;
11841
11842         # remember the index of last nonblank token before any sidecomment
11843         my $i_terminal = $max_index_to_go;
11844         if ( $types_to_go[$i_terminal] eq '#' ) {
11845             if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) {
11846                 if ( $i_terminal > 0 ) { --$i_terminal }
11847             }
11848         }
11849
11850         # look at each line of this batch..
11851         my $last_vertical_alignment_before_index;
11852         my $vert_last_nonblank_type;
11853         my $vert_last_nonblank_token;
11854         my $vert_last_nonblank_block_type;
11855         my $max_line = @$ri_first - 1;
11856         my ( $i, $type, $token, $block_type, $alignment_type );
11857         my ( $ibeg, $iend, $line );
11858
11859         foreach $line ( 0 .. $max_line ) {
11860             $ibeg                                 = $$ri_first[$line];
11861             $iend                                 = $$ri_last[$line];
11862             $last_vertical_alignment_before_index = -1;
11863             $vert_last_nonblank_type              = '';
11864             $vert_last_nonblank_token             = '';
11865             $vert_last_nonblank_block_type        = '';
11866
11867             # look at each token in this output line..
11868             foreach $i ( $ibeg .. $iend ) {
11869                 $alignment_type = '';
11870                 $type           = $types_to_go[$i];
11871                 $block_type     = $block_type_to_go[$i];
11872                 $token          = $tokens_to_go[$i];
11873
11874                 # check for flag indicating that we should not align
11875                 # this token
11876                 if ( $matching_token_to_go[$i] ) {
11877                     $matching_token_to_go[$i] = '';
11878                     next;
11879                 }
11880
11881                 #--------------------------------------------------------
11882                 # First see if we want to align BEFORE this token
11883                 #--------------------------------------------------------
11884
11885                 # The first possible token that we can align before
11886                 # is index 2 because: 1) it doesn't normally make sense to
11887                 # align before the first token and 2) the second
11888                 # token must be a blank if we are to align before
11889                 # the third
11890                 if ( $i < $ibeg + 2 ) { }
11891
11892                 # must follow a blank token
11893                 elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
11894
11895                 # align a side comment --
11896                 elsif ( $type eq '#' ) {
11897
11898                     unless (
11899
11900                         # it is a static side comment
11901                         (
11902                                $rOpts->{'static-side-comments'}
11903                             && $token =~ /$static_side_comment_pattern/o
11904                         )
11905
11906                         # or a closing side comment
11907                         || (   $vert_last_nonblank_block_type
11908                             && $token =~
11909                             /$closing_side_comment_prefix_pattern/o )
11910                       )
11911                     {
11912                         $alignment_type = $type;
11913                     }    ## Example of a static side comment
11914                 }
11915
11916                 # otherwise, do not align two in a row to create a
11917                 # blank field
11918                 elsif ( $last_vertical_alignment_before_index == $i - 2 ) { }
11919
11920                 # align before one of these keywords
11921                 # (within a line, since $i>1)
11922                 elsif ( $type eq 'k' ) {
11923
11924                     #  /^(if|unless|and|or|eq|ne)$/
11925                     if ( $is_vertical_alignment_keyword{$token} ) {
11926                         $alignment_type = $token;
11927                     }
11928                 }
11929
11930                 # align before one of these types..
11931                 # Note: add '.' after new vertical aligner is operational
11932                 elsif ( $is_vertical_alignment_type{$type} ) {
11933                     $alignment_type = $token;
11934
11935                     # Do not align a terminal token.  Although it might
11936                     # occasionally look ok to do this, it has been found to be
11937                     # a good general rule.  The main problems are:
11938                     # (1) that the terminal token (such as an = or :) might get
11939                     # moved far to the right where it is hard to see because
11940                     # nothing follows it, and
11941                     # (2) doing so may prevent other good alignments.
11942                     if ( $i == $iend || $i >= $i_terminal ) {
11943                         $alignment_type = "";
11944                     }
11945
11946                     # Do not align leading ': ('.  This would prevent
11947                     # alignment in something like the following:
11948                     #   $extra_space .=
11949                     #       ( $input_line_number < 10 )  ? "  "
11950                     #     : ( $input_line_number < 100 ) ? " "
11951                     #     :                                "";
11952                     if (   $i == $ibeg + 2
11953                         && $types_to_go[$ibeg]    eq ':'
11954                         && $types_to_go[ $i - 1 ] eq 'b' )
11955                     {
11956                         $alignment_type = "";
11957                     }
11958
11959                     # For a paren after keyword, only align something like this:
11960                     #    if    ( $a ) { &a }
11961                     #    elsif ( $b ) { &b }
11962                     if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) {
11963                         $alignment_type = ""
11964                           unless $vert_last_nonblank_token =~
11965                           /^(if|unless|elsif)$/;
11966                     }
11967
11968                     # be sure the alignment tokens are unique
11969                     # This didn't work well: reason not determined
11970                     # if ($token ne $type) {$alignment_type .= $type}
11971                 }
11972
11973                 # NOTE: This is deactivated because it causes the previous
11974                 # if/elsif alignment to fail
11975                 #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
11976                 #{ $alignment_type = $type; }
11977
11978                 if ($alignment_type) {
11979                     $last_vertical_alignment_before_index = $i;
11980                 }
11981
11982                 #--------------------------------------------------------
11983                 # Next see if we want to align AFTER the previous nonblank
11984                 #--------------------------------------------------------
11985
11986                 # We want to line up ',' and interior ';' tokens, with the added
11987                 # space AFTER these tokens.  (Note: interior ';' is included
11988                 # because it may occur in short blocks).
11989                 if (
11990
11991                     # we haven't already set it
11992                     !$alignment_type
11993
11994                     # and its not the first token of the line
11995                     && ( $i > $ibeg )
11996
11997                     # and it follows a blank
11998                     && $types_to_go[ $i - 1 ] eq 'b'
11999
12000                     # and previous token IS one of these:
12001                     && ( $vert_last_nonblank_type =~ /^[\,\;]$/ )
12002
12003                     # and it's NOT one of these
12004                     && ( $type !~ /^[b\#\)\]\}]$/ )
12005
12006                     # then go ahead and align
12007                   )
12008
12009                 {
12010                     $alignment_type = $vert_last_nonblank_type;
12011                 }
12012
12013                 #--------------------------------------------------------
12014                 # then store the value
12015                 #--------------------------------------------------------
12016                 $matching_token_to_go[$i] = $alignment_type;
12017                 if ( $type ne 'b' ) {
12018                     $vert_last_nonblank_type       = $type;
12019                     $vert_last_nonblank_token      = $token;
12020                     $vert_last_nonblank_block_type = $block_type;
12021                 }
12022             }
12023         }
12024     }
12025 }
12026
12027 sub terminal_type {
12028
12029     #    returns type of last token on this line (terminal token), as follows:
12030     #    returns # for a full-line comment
12031     #    returns ' ' for a blank line
12032     #    otherwise returns final token type
12033
12034     my ( $rtype, $rblock_type, $ibeg, $iend ) = @_;
12035
12036     # check for full-line comment..
12037     if ( $$rtype[$ibeg] eq '#' ) {
12038         return wantarray ? ( $$rtype[$ibeg], $ibeg ) : $$rtype[$ibeg];
12039     }
12040     else {
12041
12042         # start at end and walk bakwards..
12043         for ( my $i = $iend ; $i >= $ibeg ; $i-- ) {
12044
12045             # skip past any side comment and blanks
12046             next if ( $$rtype[$i] eq 'b' );
12047             next if ( $$rtype[$i] eq '#' );
12048
12049             # found it..make sure it is a BLOCK termination,
12050             # but hide a terminal } after sort/grep/map because it is not
12051             # necessarily the end of the line.  (terminal.t)
12052             my $terminal_type = $$rtype[$i];
12053             if (
12054                 $terminal_type eq '}'
12055                 && ( !$$rblock_type[$i]
12056                     || ( $is_sort_map_grep_eval_do{ $$rblock_type[$i] } ) )
12057               )
12058             {
12059                 $terminal_type = 'b';
12060             }
12061             return wantarray ? ( $terminal_type, $i ) : $terminal_type;
12062         }
12063
12064         # empty line
12065         return wantarray ? ( ' ', $ibeg ) : ' ';
12066     }
12067 }
12068
12069 {
12070     my %is_good_keyword_breakpoint;
12071     my %is_lt_gt_le_ge;
12072
12073     sub set_bond_strengths {
12074
12075         BEGIN {
12076
12077             @_ = qw(if unless while until for foreach);
12078             @is_good_keyword_breakpoint{@_} = (1) x scalar(@_);
12079
12080             @_ = qw(lt gt le ge);
12081             @is_lt_gt_le_ge{@_} = (1) x scalar(@_);
12082
12083             ###############################################################
12084             # NOTE: NO_BREAK's set here are HINTS which may not be honored;
12085             # essential NO_BREAKS's must be enforced in section 2, below.
12086             ###############################################################
12087
12088             # adding NEW_TOKENS: add a left and right bond strength by
12089             # mimmicking what is done for an existing token type.  You
12090             # can skip this step at first and take the default, then
12091             # tweak later to get desired results.
12092
12093             # The bond strengths should roughly follow precenence order where
12094             # possible.  If you make changes, please check the results very
12095             # carefully on a variety of scripts.
12096
12097             # no break around possible filehandle
12098             $left_bond_strength{'Z'}  = NO_BREAK;
12099             $right_bond_strength{'Z'} = NO_BREAK;
12100
12101             # never put a bare word on a new line:
12102             # example print (STDERR, "bla"); will fail with break after (
12103             $left_bond_strength{'w'} = NO_BREAK;
12104
12105         # blanks always have infinite strength to force breaks after real tokens
12106             $right_bond_strength{'b'} = NO_BREAK;
12107
12108             # try not to break on exponentation
12109             @_                       = qw" ** .. ... <=> ";
12110             @left_bond_strength{@_}  = (STRONG) x scalar(@_);
12111             @right_bond_strength{@_} = (STRONG) x scalar(@_);
12112
12113             # The comma-arrow has very low precedence but not a good break point
12114             $left_bond_strength{'=>'}  = NO_BREAK;
12115             $right_bond_strength{'=>'} = NOMINAL;
12116
12117             # ok to break after label
12118             $left_bond_strength{'J'}  = NO_BREAK;
12119             $right_bond_strength{'J'} = NOMINAL;
12120             $left_bond_strength{'j'}  = STRONG;
12121             $right_bond_strength{'j'} = STRONG;
12122             $left_bond_strength{'A'}  = STRONG;
12123             $right_bond_strength{'A'} = STRONG;
12124
12125             $left_bond_strength{'->'}  = STRONG;
12126             $right_bond_strength{'->'} = VERY_STRONG;
12127
12128             # breaking AFTER these is just ok:
12129             @_                       = qw" % + - * / x  ";
12130             @left_bond_strength{@_}  = (STRONG) x scalar(@_);
12131             @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
12132
12133             # breaking BEFORE these is just ok:
12134             @_                       = qw" >> << ";
12135             @right_bond_strength{@_} = (STRONG) x scalar(@_);
12136             @left_bond_strength{@_}  = (NOMINAL) x scalar(@_);
12137
12138             # I prefer breaking before the string concatenation operator
12139             # because it can be hard to see at the end of a line
12140             # swap these to break after a '.'
12141             # this could be a future option
12142             $right_bond_strength{'.'} = STRONG;
12143             $left_bond_strength{'.'}  = 0.9 * NOMINAL + 0.1 * WEAK;
12144
12145             @_                       = qw"} ] ) ";
12146             @left_bond_strength{@_}  = (STRONG) x scalar(@_);
12147             @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
12148
12149             # make these a little weaker than nominal so that they get
12150             # favored for end-of-line characters
12151             @_ = qw"!= == =~ !~ ~~";
12152             @left_bond_strength{@_} = (STRONG) x scalar(@_);
12153             @right_bond_strength{@_} =
12154               ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@_);
12155
12156             # break AFTER these
12157             @_ = qw" < >  | & >= <=";
12158             @left_bond_strength{@_} = (VERY_STRONG) x scalar(@_);
12159             @right_bond_strength{@_} =
12160               ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@_);
12161
12162             # breaking either before or after a quote is ok
12163             # but bias for breaking before a quote
12164             $left_bond_strength{'Q'}  = NOMINAL;
12165             $right_bond_strength{'Q'} = NOMINAL + 0.02;
12166             $left_bond_strength{'q'}  = NOMINAL;
12167             $right_bond_strength{'q'} = NOMINAL;
12168
12169             # starting a line with a keyword is usually ok
12170             $left_bond_strength{'k'} = NOMINAL;
12171
12172             # we usually want to bond a keyword strongly to what immediately
12173             # follows, rather than leaving it stranded at the end of a line
12174             $right_bond_strength{'k'} = STRONG;
12175
12176             $left_bond_strength{'G'}  = NOMINAL;
12177             $right_bond_strength{'G'} = STRONG;
12178
12179             # it is good to break AFTER various assignment operators
12180             @_ = qw(
12181               = **= += *= &= <<= &&=
12182               -= /= |= >>= ||= //=
12183               .= %= ^=
12184               x=
12185             );
12186             @left_bond_strength{@_} = (STRONG) x scalar(@_);
12187             @right_bond_strength{@_} =
12188               ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@_);
12189
12190             # break BEFORE '&&' and '||' and '//'
12191             # set strength of '||' to same as '=' so that chains like
12192             # $a = $b || $c || $d   will break before the first '||'
12193             $right_bond_strength{'||'} = NOMINAL;
12194             $left_bond_strength{'||'}  = $right_bond_strength{'='};
12195
12196             # same thing for '//'
12197             $right_bond_strength{'//'} = NOMINAL;
12198             $left_bond_strength{'//'}  = $right_bond_strength{'='};
12199
12200             # set strength of && a little higher than ||
12201             $right_bond_strength{'&&'} = NOMINAL;
12202             $left_bond_strength{'&&'}  = $left_bond_strength{'||'} + 0.1;
12203
12204             $left_bond_strength{';'}  = VERY_STRONG;
12205             $right_bond_strength{';'} = VERY_WEAK;
12206             $left_bond_strength{'f'}  = VERY_STRONG;
12207
12208             # make right strength of for ';' a little less than '='
12209             # to make for contents break after the ';' to avoid this:
12210             #   for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
12211             #     $number_of_fields )
12212             # and make it weaker than ',' and 'and' too
12213             $right_bond_strength{'f'} = VERY_WEAK - 0.03;
12214
12215             # The strengths of ?/: should be somewhere between
12216             # an '=' and a quote (NOMINAL),
12217             # make strength of ':' slightly less than '?' to help
12218             # break long chains of ? : after the colons
12219             $left_bond_strength{':'}  = 0.4 * WEAK + 0.6 * NOMINAL;
12220             $right_bond_strength{':'} = NO_BREAK;
12221             $left_bond_strength{'?'}  = $left_bond_strength{':'} + 0.01;
12222             $right_bond_strength{'?'} = NO_BREAK;
12223
12224             $left_bond_strength{','}  = VERY_STRONG;
12225             $right_bond_strength{','} = VERY_WEAK;
12226
12227             # Set bond strengths of certain keywords
12228             # make 'or', 'err', 'and' slightly weaker than a ','
12229             $left_bond_strength{'and'}  = VERY_WEAK - 0.01;
12230             $left_bond_strength{'or'}   = VERY_WEAK - 0.02;
12231             $left_bond_strength{'err'}  = VERY_WEAK - 0.02;
12232             $left_bond_strength{'xor'}  = NOMINAL;
12233             $right_bond_strength{'and'} = NOMINAL;
12234             $right_bond_strength{'or'}  = NOMINAL;
12235             $right_bond_strength{'err'} = NOMINAL;
12236             $right_bond_strength{'xor'} = STRONG;
12237         }
12238
12239         # patch-its always ok to break at end of line
12240         $nobreak_to_go[$max_index_to_go] = 0;
12241
12242         # adding a small 'bias' to strengths is a simple way to make a line
12243         # break at the first of a sequence of identical terms.  For example,
12244         # to force long string of conditional operators to break with
12245         # each line ending in a ':', we can add a small number to the bond
12246         # strength of each ':'
12247         my $colon_bias = 0;
12248         my $amp_bias   = 0;
12249         my $bar_bias   = 0;
12250         my $and_bias   = 0;
12251         my $or_bias    = 0;
12252         my $dot_bias   = 0;
12253         my $f_bias     = 0;
12254         my $code_bias  = -.01;
12255         my $type       = 'b';
12256         my $token      = ' ';
12257         my $last_type;
12258         my $last_nonblank_type  = $type;
12259         my $last_nonblank_token = $token;
12260         my $delta_bias          = 0.0001;
12261         my $list_str            = $left_bond_strength{'?'};
12262
12263         my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
12264             $next_nonblank_type, $next_token, $next_type, $total_nesting_depth,
12265         );
12266
12267         # preliminary loop to compute bond strengths
12268         for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) {
12269             $last_type = $type;
12270             if ( $type ne 'b' ) {
12271                 $last_nonblank_type  = $type;
12272                 $last_nonblank_token = $token;
12273             }
12274             $type = $types_to_go[$i];
12275
12276             # strength on both sides of a blank is the same
12277             if ( $type eq 'b' && $last_type ne 'b' ) {
12278                 $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
12279                 next;
12280             }
12281
12282             $token               = $tokens_to_go[$i];
12283             $block_type          = $block_type_to_go[$i];
12284             $i_next              = $i + 1;
12285             $next_type           = $types_to_go[$i_next];
12286             $next_token          = $tokens_to_go[$i_next];
12287             $total_nesting_depth = $nesting_depth_to_go[$i_next];
12288             $i_next_nonblank     = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
12289             $next_nonblank_type  = $types_to_go[$i_next_nonblank];
12290             $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
12291
12292             # Some token chemistry...  The decision about where to break a
12293             # line depends upon a "bond strength" between tokens.  The LOWER
12294             # the bond strength, the MORE likely a break.  The strength
12295             # values are based on trial-and-error, and need to be tweaked
12296             # occasionally to get desired results.  Things to keep in mind
12297             # are:
12298             #   1. relative strengths are important.  small differences
12299             #      in strengths can make big formatting differences.
12300             #   2. each indentation level adds one unit of bond strength
12301             #   3. a value of NO_BREAK makes an unbreakable bond
12302             #   4. a value of VERY_WEAK is the strength of a ','
12303             #   5. values below NOMINAL are considered ok break points
12304             #   6. values above NOMINAL are considered poor break points
12305             # We are computing the strength of the bond between the current
12306             # token and the NEXT token.
12307             my $bond_str = VERY_STRONG;    # a default, high strength
12308
12309             #---------------------------------------------------------------
12310             # section 1:
12311             # use minimum of left and right bond strengths if defined;
12312             # digraphs and trigraphs like to break on their left
12313             #---------------------------------------------------------------
12314             my $bsr = $right_bond_strength{$type};
12315
12316             if ( !defined($bsr) ) {
12317
12318                 if ( $is_digraph{$type} || $is_trigraph{$type} ) {
12319                     $bsr = STRONG;
12320                 }
12321                 else {
12322                     $bsr = VERY_STRONG;
12323                 }
12324             }
12325
12326             # define right bond strengths of certain keywords
12327             if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
12328                 $bsr = $right_bond_strength{$token};
12329             }
12330             elsif ( $token eq 'ne' or $token eq 'eq' ) {
12331                 $bsr = NOMINAL;
12332             }
12333             my $bsl = $left_bond_strength{$next_nonblank_type};
12334
12335             # set terminal bond strength to the nominal value
12336             # this will cause good preceding breaks to be retained
12337             if ( $i_next_nonblank > $max_index_to_go ) {
12338                 $bsl = NOMINAL;
12339             }
12340
12341             if ( !defined($bsl) ) {
12342
12343                 if (   $is_digraph{$next_nonblank_type}
12344                     || $is_trigraph{$next_nonblank_type} )
12345                 {
12346                     $bsl = WEAK;
12347                 }
12348                 else {
12349                     $bsl = VERY_STRONG;
12350                 }
12351             }
12352
12353             # define right bond strengths of certain keywords
12354             if ( $next_nonblank_type eq 'k'
12355                 && defined( $left_bond_strength{$next_nonblank_token} ) )
12356             {
12357                 $bsl = $left_bond_strength{$next_nonblank_token};
12358             }
12359             elsif ($next_nonblank_token eq 'ne'
12360                 or $next_nonblank_token eq 'eq' )
12361             {
12362                 $bsl = NOMINAL;
12363             }
12364             elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
12365                 $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
12366             }
12367
12368             # Note: it might seem that we would want to keep a NO_BREAK if
12369             # either token has this value.  This didn't work, because in an
12370             # arrow list, it prevents the comma from separating from the
12371             # following bare word (which is probably quoted by its arrow).
12372             # So necessary NO_BREAK's have to be handled as special cases
12373             # in the final section.
12374             $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
12375             my $bond_str_1 = $bond_str;
12376
12377             #---------------------------------------------------------------
12378             # section 2:
12379             # special cases
12380             #---------------------------------------------------------------
12381
12382             # allow long lines before final { in an if statement, as in:
12383             #    if (..........
12384             #      ..........)
12385             #    {
12386             #
12387             # Otherwise, the line before the { tends to be too short.
12388             if ( $type eq ')' ) {
12389                 if ( $next_nonblank_type eq '{' ) {
12390                     $bond_str = VERY_WEAK + 0.03;
12391                 }
12392             }
12393
12394             elsif ( $type eq '(' ) {
12395                 if ( $next_nonblank_type eq '{' ) {
12396                     $bond_str = NOMINAL;
12397                 }
12398             }
12399
12400             # break on something like '} (', but keep this stronger than a ','
12401             # example is in 'howe.pl'
12402             elsif ( $type eq 'R' or $type eq '}' ) {
12403                 if ( $next_nonblank_type eq '(' ) {
12404                     $bond_str = 0.8 * VERY_WEAK + 0.2 * WEAK;
12405                 }
12406             }
12407
12408             #-----------------------------------------------------------------
12409             # adjust bond strength bias
12410             #-----------------------------------------------------------------
12411
12412             elsif ( $type eq 'f' ) {
12413                 $bond_str += $f_bias;
12414                 $f_bias   += $delta_bias;
12415             }
12416
12417           # in long ?: conditionals, bias toward just one set per line (colon.t)
12418             elsif ( $type eq ':' ) {
12419                 if ( !$want_break_before{$type} ) {
12420                     $bond_str   += $colon_bias;
12421                     $colon_bias += $delta_bias;
12422                 }
12423             }
12424
12425             if (   $next_nonblank_type eq ':'
12426                 && $want_break_before{$next_nonblank_type} )
12427             {
12428                 $bond_str   += $colon_bias;
12429                 $colon_bias += $delta_bias;
12430             }
12431
12432             # if leading '.' is used, align all but 'short' quotes;
12433             # the idea is to not place something like "\n" on a single line.
12434             elsif ( $next_nonblank_type eq '.' ) {
12435                 if ( $want_break_before{'.'} ) {
12436                     unless (
12437                         $last_nonblank_type eq '.'
12438                         && (
12439                             length($token) <=
12440                             $rOpts_short_concatenation_item_length )
12441                         && ( $token !~ /^[\)\]\}]$/ )
12442                       )
12443                     {
12444                         $dot_bias += $delta_bias;
12445                     }
12446                     $bond_str += $dot_bias;
12447                 }
12448             }
12449             elsif ($next_nonblank_type eq '&&'
12450                 && $want_break_before{$next_nonblank_type} )
12451             {
12452                 $bond_str += $amp_bias;
12453                 $amp_bias += $delta_bias;
12454             }
12455             elsif ($next_nonblank_type eq '||'
12456                 && $want_break_before{$next_nonblank_type} )
12457             {
12458                 $bond_str += $bar_bias;
12459                 $bar_bias += $delta_bias;
12460             }
12461             elsif ( $next_nonblank_type eq 'k' ) {
12462
12463                 if (   $next_nonblank_token eq 'and'
12464                     && $want_break_before{$next_nonblank_token} )
12465                 {
12466                     $bond_str += $and_bias;
12467                     $and_bias += $delta_bias;
12468                 }
12469                 elsif ($next_nonblank_token =~ /^(or|err)$/
12470                     && $want_break_before{$next_nonblank_token} )
12471                 {
12472                     $bond_str += $or_bias;
12473                     $or_bias  += $delta_bias;
12474                 }
12475
12476                 # FIXME: needs more testing
12477                 elsif ( $is_keyword_returning_list{$next_nonblank_token} ) {
12478                     $bond_str = $list_str if ( $bond_str > $list_str );
12479                 }
12480                 elsif ( $token eq 'err'
12481                     && !$want_break_before{$token} )
12482                 {
12483                     $bond_str += $or_bias;
12484                     $or_bias  += $delta_bias;
12485                 }
12486             }
12487
12488             if ( $type eq ':'
12489                 && !$want_break_before{$type} )
12490             {
12491                 $bond_str   += $colon_bias;
12492                 $colon_bias += $delta_bias;
12493             }
12494             elsif ( $type eq '&&'
12495                 && !$want_break_before{$type} )
12496             {
12497                 $bond_str += $amp_bias;
12498                 $amp_bias += $delta_bias;
12499             }
12500             elsif ( $type eq '||'
12501                 && !$want_break_before{$type} )
12502             {
12503                 $bond_str += $bar_bias;
12504                 $bar_bias += $delta_bias;
12505             }
12506             elsif ( $type eq 'k' ) {
12507
12508                 if ( $token eq 'and'
12509                     && !$want_break_before{$token} )
12510                 {
12511                     $bond_str += $and_bias;
12512                     $and_bias += $delta_bias;
12513                 }
12514                 elsif ( $token eq 'or'
12515                     && !$want_break_before{$token} )
12516                 {
12517                     $bond_str += $or_bias;
12518                     $or_bias  += $delta_bias;
12519                 }
12520             }
12521
12522             # keep matrix and hash indices together
12523             # but make them a little below STRONG to allow breaking open
12524             # something like {'some-word'}{'some-very-long-word'} at the }{
12525             # (bracebrk.t)
12526             if (   ( $type eq ']' or $type eq 'R' )
12527                 && ( $next_nonblank_type eq '[' or $next_nonblank_type eq 'L' )
12528               )
12529             {
12530                 $bond_str = 0.9 * STRONG + 0.1 * NOMINAL;
12531             }
12532
12533             if ( $next_nonblank_token =~ /^->/ ) {
12534
12535                 # increase strength to the point where a break in the following
12536                 # will be after the opening paren rather than at the arrow:
12537                 #    $a->$b($c);
12538                 if ( $type eq 'i' ) {
12539                     $bond_str = 1.45 * STRONG;
12540                 }
12541
12542                 elsif ( $type =~ /^[\)\]\}R]$/ ) {
12543                     $bond_str = 0.1 * STRONG + 0.9 * NOMINAL;
12544                 }
12545
12546                 # otherwise make strength before an '->' a little over a '+'
12547                 else {
12548                     if ( $bond_str <= NOMINAL ) {
12549                         $bond_str = NOMINAL + 0.01;
12550                     }
12551                 }
12552             }
12553
12554             if ( $token eq ')' && $next_nonblank_token eq '[' ) {
12555                 $bond_str = 0.2 * STRONG + 0.8 * NOMINAL;
12556             }
12557
12558             # map1.t -- correct for a quirk in perl
12559             if (   $token eq '('
12560                 && $next_nonblank_type eq 'i'
12561                 && $last_nonblank_type eq 'k'
12562                 && $is_sort_map_grep{$last_nonblank_token} )
12563
12564               #     /^(sort|map|grep)$/ )
12565             {
12566                 $bond_str = NO_BREAK;
12567             }
12568
12569             # extrude.t: do not break before paren at:
12570             #    -l pid_filename(
12571             if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
12572                 $bond_str = NO_BREAK;
12573             }
12574
12575             # good to break after end of code blocks
12576             if ( $type eq '}' && $block_type ) {
12577
12578                 $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
12579                 $code_bias += $delta_bias;
12580             }
12581
12582             if ( $type eq 'k' ) {
12583
12584                 # allow certain control keywords to stand out
12585                 if (   $next_nonblank_type eq 'k'
12586                     && $is_last_next_redo_return{$token} )
12587                 {
12588                     $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
12589                 }
12590
12591 # Don't break after keyword my.  This is a quick fix for a
12592 # rare problem with perl. An example is this line from file
12593 # Container.pm:
12594 # foreach my $question( Debian::DebConf::ConfigDb::gettree( $this->{'question'} ) )
12595
12596                 if ( $token eq 'my' ) {
12597                     $bond_str = NO_BREAK;
12598                 }
12599
12600             }
12601
12602             # good to break before 'if', 'unless', etc
12603             if ( $is_if_brace_follower{$next_nonblank_token} ) {
12604                 $bond_str = VERY_WEAK;
12605             }
12606
12607             if ( $next_nonblank_type eq 'k' ) {
12608
12609                 # keywords like 'unless', 'if', etc, within statements
12610                 # make good breaks
12611                 if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
12612                     $bond_str = VERY_WEAK / 1.05;
12613                 }
12614             }
12615
12616             # try not to break before a comma-arrow
12617             elsif ( $next_nonblank_type eq '=>' ) {
12618                 if ( $bond_str < STRONG ) { $bond_str = STRONG }
12619             }
12620
12621          #----------------------------------------------------------------------
12622          # only set NO_BREAK's from here on
12623          #----------------------------------------------------------------------
12624             if ( $type eq 'C' or $type eq 'U' ) {
12625
12626                 # use strict requires that bare word and => not be separated
12627                 if ( $next_nonblank_type eq '=>' ) {
12628                     $bond_str = NO_BREAK;
12629                 }
12630
12631             }
12632
12633            # use strict requires that bare word within braces not start new line
12634             elsif ( $type eq 'L' ) {
12635
12636                 if ( $next_nonblank_type eq 'w' ) {
12637                     $bond_str = NO_BREAK;
12638                 }
12639             }
12640
12641             # in older version of perl, use strict can cause problems with
12642             # breaks before bare words following opening parens.  For example,
12643             # this will fail under older versions if a break is made between
12644             # '(' and 'MAIL':
12645             #  use strict;
12646             #  open( MAIL, "a long filename or command");
12647             #  close MAIL;
12648             elsif ( $type eq '{' ) {
12649
12650                 if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
12651
12652                     # but it's fine to break if the word is followed by a '=>'
12653                     # or if it is obviously a sub call
12654                     my $i_next_next_nonblank = $i_next_nonblank + 1;
12655                     my $next_next_type = $types_to_go[$i_next_next_nonblank];
12656                     if (   $next_next_type eq 'b'
12657                         && $i_next_nonblank < $max_index_to_go )
12658                     {
12659                         $i_next_next_nonblank++;
12660                         $next_next_type = $types_to_go[$i_next_next_nonblank];
12661                     }
12662
12663                     ##if ( $next_next_type ne '=>' ) {
12664                     # these are ok: '->xxx', '=>', '('
12665
12666                     # We'll check for an old breakpoint and keep a leading
12667                     # bareword if it was that way in the input file.
12668                     # Presumably it was ok that way.  For example, the
12669                     # following would remain unchanged:
12670                     #
12671                     # @months = (
12672                     #   January,   February, March,    April,
12673                     #   May,       June,     July,     August,
12674                     #   September, October,  November, December,
12675                     # );
12676                     #
12677                     # This should be sufficient:
12678                     if ( !$old_breakpoint_to_go[$i]
12679                         && ( $next_next_type eq ',' || $next_next_type eq '}' )
12680                       )
12681                     {
12682                         $bond_str = NO_BREAK;
12683                     }
12684                 }
12685             }
12686
12687             elsif ( $type eq 'w' ) {
12688
12689                 if ( $next_nonblank_type eq 'R' ) {
12690                     $bond_str = NO_BREAK;
12691                 }
12692
12693                 # use strict requires that bare word and => not be separated
12694                 if ( $next_nonblank_type eq '=>' ) {
12695                     $bond_str = NO_BREAK;
12696                 }
12697             }
12698
12699             # in fact, use strict hates bare words on any new line.  For
12700             # example, a break before the underscore here provokes the
12701             # wrath of use strict:
12702             # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
12703             elsif ( $type eq 'F' ) {
12704                 $bond_str = NO_BREAK;
12705             }
12706
12707             # use strict does not allow separating type info from trailing { }
12708             # testfile is readmail.pl
12709             elsif ( $type eq 't' or $type eq 'i' ) {
12710
12711                 if ( $next_nonblank_type eq 'L' ) {
12712                     $bond_str = NO_BREAK;
12713                 }
12714             }
12715
12716             # Do not break between a possible filehandle and a ? or / and do
12717             # not introduce a break after it if there is no blank
12718             # (extrude.t)
12719             elsif ( $type eq 'Z' ) {
12720
12721                 # dont break..
12722                 if (
12723
12724                     # if there is no blank and we do not want one. Examples:
12725                     #    print $x++    # do not break after $x
12726                     #    print HTML"HELLO"   # break ok after HTML
12727                     (
12728                            $next_type ne 'b'
12729                         && defined( $want_left_space{$next_type} )
12730                         && $want_left_space{$next_type} == WS_NO
12731                     )
12732
12733                     # or we might be followed by the start of a quote
12734                     || $next_nonblank_type =~ /^[\/\?]$/
12735                   )
12736                 {
12737                     $bond_str = NO_BREAK;
12738                 }
12739             }
12740
12741             # Do not break before a possible file handle
12742             if ( $next_nonblank_type eq 'Z' ) {
12743                 $bond_str = NO_BREAK;
12744             }
12745
12746             # As a defensive measure, do not break between a '(' and a
12747             # filehandle.  In some cases, this can cause an error.  For
12748             # example, the following program works:
12749             #    my $msg="hi!\n";
12750             #    print
12751             #    ( STDOUT
12752             #    $msg
12753             #    );
12754             #
12755             # But this program fails:
12756             #    my $msg="hi!\n";
12757             #    print
12758             #    (
12759             #    STDOUT
12760             #    $msg
12761             #    );
12762             #
12763             # This is normally only a problem with the 'extrude' option
12764             if ( $next_nonblank_type eq 'Y' && $token eq '(' ) {
12765                 $bond_str = NO_BREAK;
12766             }
12767
12768             # patch to put cuddled elses back together when on multiple
12769             # lines, as in: } \n else \n { \n
12770             if ($rOpts_cuddled_else) {
12771
12772                 if (   ( $token eq 'else' ) && ( $next_nonblank_type eq '{' )
12773                     || ( $type eq '}' ) && ( $next_nonblank_token eq 'else' ) )
12774                 {
12775                     $bond_str = NO_BREAK;
12776                 }
12777             }
12778
12779             # keep '}' together with ';'
12780             if ( ( $token eq '}' ) && ( $next_nonblank_type eq ';' ) ) {
12781                 $bond_str = NO_BREAK;
12782             }
12783
12784             # never break between sub name and opening paren
12785             if ( ( $type eq 'w' ) && ( $next_nonblank_token eq '(' ) ) {
12786                 $bond_str = NO_BREAK;
12787             }
12788
12789             #---------------------------------------------------------------
12790             # section 3:
12791             # now take nesting depth into account
12792             #---------------------------------------------------------------
12793             # final strength incorporates the bond strength and nesting depth
12794             my $strength;
12795
12796             if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
12797                 if ( $total_nesting_depth > 0 ) {
12798                     $strength = $bond_str + $total_nesting_depth;
12799                 }
12800                 else {
12801                     $strength = $bond_str;
12802                 }
12803             }
12804             else {
12805                 $strength = NO_BREAK;
12806             }
12807
12808             # always break after side comment
12809             if ( $type eq '#' ) { $strength = 0 }
12810
12811             $bond_strength_to_go[$i] = $strength;
12812
12813             FORMATTER_DEBUG_FLAG_BOND && do {
12814                 my $str = substr( $token, 0, 15 );
12815                 $str .= ' ' x ( 16 - length($str) );
12816                 print
12817 "BOND:  i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str -> $strength \n";
12818             };
12819         }
12820     }
12821
12822 }
12823
12824 sub pad_array_to_go {
12825
12826     # to simplify coding in scan_list and set_bond_strengths, it helps
12827     # to create some extra blank tokens at the end of the arrays
12828     $tokens_to_go[ $max_index_to_go + 1 ] = '';
12829     $tokens_to_go[ $max_index_to_go + 2 ] = '';
12830     $types_to_go[ $max_index_to_go + 1 ]  = 'b';
12831     $types_to_go[ $max_index_to_go + 2 ]  = 'b';
12832     $nesting_depth_to_go[ $max_index_to_go + 1 ] =
12833       $nesting_depth_to_go[$max_index_to_go];
12834
12835     #    /^[R\}\)\]]$/
12836     if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
12837         if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
12838
12839             # shouldn't happen:
12840             unless ( get_saw_brace_error() ) {
12841                 warning(
12842 "Program bug in scan_list: hit nesting error which should have been caught\n"
12843                 );
12844                 report_definite_bug();
12845             }
12846         }
12847         else {
12848             $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
12849         }
12850     }
12851
12852     #       /^[L\{\(\[]$/
12853     elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
12854         $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
12855     }
12856 }
12857
12858 {    # begin scan_list
12859
12860     my (
12861         $block_type,                $current_depth,
12862         $depth,                     $i,
12863         $i_last_nonblank_token,     $last_colon_sequence_number,
12864         $last_nonblank_token,       $last_nonblank_type,
12865         $last_old_breakpoint_count, $minimum_depth,
12866         $next_nonblank_block_type,  $next_nonblank_token,
12867         $next_nonblank_type,        $old_breakpoint_count,
12868         $starting_breakpoint_count, $starting_depth,
12869         $token,                     $type,
12870         $type_sequence,
12871     );
12872
12873     my (
12874         @breakpoint_stack,              @breakpoint_undo_stack,
12875         @comma_index,                   @container_type,
12876         @identifier_count_stack,        @index_before_arrow,
12877         @interrupted_list,              @item_count_stack,
12878         @last_comma_index,              @last_dot_index,
12879         @last_nonblank_type,            @old_breakpoint_count_stack,
12880         @opening_structure_index_stack, @rfor_semicolon_list,
12881         @has_old_logical_breakpoints,   @rand_or_list,
12882         @i_equals,
12883     );
12884
12885     # routine to define essential variables when we go 'up' to
12886     # a new depth
12887     sub check_for_new_minimum_depth {
12888         my $depth = shift;
12889         if ( $depth < $minimum_depth ) {
12890
12891             $minimum_depth = $depth;
12892
12893             # these arrays need not retain values between calls
12894             $breakpoint_stack[$depth]              = $starting_breakpoint_count;
12895             $container_type[$depth]                = "";
12896             $identifier_count_stack[$depth]        = 0;
12897             $index_before_arrow[$depth]            = -1;
12898             $interrupted_list[$depth]              = 1;
12899             $item_count_stack[$depth]              = 0;
12900             $last_nonblank_type[$depth]            = "";
12901             $opening_structure_index_stack[$depth] = -1;
12902
12903             $breakpoint_undo_stack[$depth]       = undef;
12904             $comma_index[$depth]                 = undef;
12905             $last_comma_index[$depth]            = undef;
12906             $last_dot_index[$depth]              = undef;
12907             $old_breakpoint_count_stack[$depth]  = undef;
12908             $has_old_logical_breakpoints[$depth] = 0;
12909             $rand_or_list[$depth]                = [];
12910             $rfor_semicolon_list[$depth]         = [];
12911             $i_equals[$depth]                    = -1;
12912
12913             # these arrays must retain values between calls
12914             if ( !defined( $has_broken_sublist[$depth] ) ) {
12915                 $dont_align[$depth]         = 0;
12916                 $has_broken_sublist[$depth] = 0;
12917                 $want_comma_break[$depth]   = 0;
12918             }
12919         }
12920     }
12921
12922     # routine to decide which commas to break at within a container;
12923     # returns:
12924     #   $bp_count = number of comma breakpoints set
12925     #   $do_not_break_apart = a flag indicating if container need not
12926     #     be broken open
12927     sub set_comma_breakpoints {
12928
12929         my $dd                 = shift;
12930         my $bp_count           = 0;
12931         my $do_not_break_apart = 0;
12932         if ( $item_count_stack[$dd] && !$dont_align[$dd] ) {
12933
12934             my $fbc = $forced_breakpoint_count;
12935
12936             # always open comma lists not preceded by keywords,
12937             # barewords, identifiers (that is, anything that doesn't
12938             # look like a function call)
12939             my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
12940
12941             set_comma_breakpoints_do(
12942                 $dd,
12943                 $opening_structure_index_stack[$dd],
12944                 $i,
12945                 $item_count_stack[$dd],
12946                 $identifier_count_stack[$dd],
12947                 $comma_index[$dd],
12948                 $next_nonblank_type,
12949                 $container_type[$dd],
12950                 $interrupted_list[$dd],
12951                 \$do_not_break_apart,
12952                 $must_break_open,
12953             );
12954             $bp_count = $forced_breakpoint_count - $fbc;
12955             $do_not_break_apart = 0 if $must_break_open;
12956         }
12957         return ( $bp_count, $do_not_break_apart );
12958     }
12959
12960     my %is_logical_container;
12961
12962     BEGIN {
12963         @_ = qw# if elsif unless while and or err not && | || ? : ! #;
12964         @is_logical_container{@_} = (1) x scalar(@_);
12965     }
12966
12967     sub set_for_semicolon_breakpoints {
12968         my $dd = shift;
12969         foreach ( @{ $rfor_semicolon_list[$dd] } ) {
12970             set_forced_breakpoint($_);
12971         }
12972     }
12973
12974     sub set_logical_breakpoints {
12975         my $dd = shift;
12976         if (
12977                $item_count_stack[$dd] == 0
12978             && $is_logical_container{ $container_type[$dd] }
12979
12980             # TESTING:
12981             || $has_old_logical_breakpoints[$dd]
12982           )
12983         {
12984
12985             # Look for breaks in this order:
12986             # 0   1    2   3
12987             # or  and  ||  &&
12988             foreach my $i ( 0 .. 3 ) {
12989                 if ( $rand_or_list[$dd][$i] ) {
12990                     foreach ( @{ $rand_or_list[$dd][$i] } ) {
12991                         set_forced_breakpoint($_);
12992                     }
12993
12994                     # break at any 'if' and 'unless' too
12995                     foreach ( @{ $rand_or_list[$dd][4] } ) {
12996                         set_forced_breakpoint($_);
12997                     }
12998                     $rand_or_list[$dd] = [];
12999                     last;
13000                 }
13001             }
13002         }
13003     }
13004
13005     sub is_unbreakable_container {
13006
13007         # never break a container of one of these types
13008         # because bad things can happen (map1.t)
13009         my $dd = shift;
13010         $is_sort_map_grep{ $container_type[$dd] };
13011     }
13012
13013     sub scan_list {
13014
13015         # This routine is responsible for setting line breaks for all lists,
13016         # so that hierarchical structure can be displayed and so that list
13017         # items can be vertically aligned.  The output of this routine is
13018         # stored in the array @forced_breakpoint_to_go, which is used to set
13019         # final breakpoints.
13020
13021         $starting_depth = $nesting_depth_to_go[0];
13022
13023         $block_type                 = ' ';
13024         $current_depth              = $starting_depth;
13025         $i                          = -1;
13026         $last_colon_sequence_number = -1;
13027         $last_nonblank_token        = ';';
13028         $last_nonblank_type         = ';';
13029         $last_old_breakpoint_count  = 0;
13030         $minimum_depth = $current_depth + 1;    # forces update in check below
13031         $old_breakpoint_count      = 0;
13032         $starting_breakpoint_count = $forced_breakpoint_count;
13033         $token                     = ';';
13034         $type                      = ';';
13035         $type_sequence             = '';
13036
13037         check_for_new_minimum_depth($current_depth);
13038
13039         my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0;
13040         my $want_previous_breakpoint = -1;
13041
13042         my $saw_good_breakpoint;
13043         my $i_line_end   = -1;
13044         my $i_line_start = -1;
13045
13046         # loop over all tokens in this batch
13047         while ( ++$i <= $max_index_to_go ) {
13048             if ( $type ne 'b' ) {
13049                 $i_last_nonblank_token = $i - 1;
13050                 $last_nonblank_type    = $type;
13051                 $last_nonblank_token   = $token;
13052             }
13053             $type          = $types_to_go[$i];
13054             $block_type    = $block_type_to_go[$i];
13055             $token         = $tokens_to_go[$i];
13056             $type_sequence = $type_sequence_to_go[$i];
13057             my $next_type       = $types_to_go[ $i + 1 ];
13058             my $next_token      = $tokens_to_go[ $i + 1 ];
13059             my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
13060             $next_nonblank_type       = $types_to_go[$i_next_nonblank];
13061             $next_nonblank_token      = $tokens_to_go[$i_next_nonblank];
13062             $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
13063
13064             # set break if flag was set
13065             if ( $want_previous_breakpoint >= 0 ) {
13066                 set_forced_breakpoint($want_previous_breakpoint);
13067                 $want_previous_breakpoint = -1;
13068             }
13069
13070             $last_old_breakpoint_count = $old_breakpoint_count;
13071             if ( $old_breakpoint_to_go[$i] ) {
13072                 $i_line_end   = $i;
13073                 $i_line_start = $i_next_nonblank;
13074
13075                 $old_breakpoint_count++;
13076
13077                 # Break before certain keywords if user broke there and
13078                 # this is a 'safe' break point. The idea is to retain
13079                 # any preferred breaks for sequential list operations,
13080                 # like a schwartzian transform.
13081                 if ($rOpts_break_at_old_keyword_breakpoints) {
13082                     if (
13083                            $next_nonblank_type eq 'k'
13084                         && $is_keyword_returning_list{$next_nonblank_token}
13085                         && (   $type =~ /^[=\)\]\}Riw]$/
13086                             || $type eq 'k'
13087                             && $is_keyword_returning_list{$token} )
13088                       )
13089                     {
13090
13091                         # we actually have to set this break next time through
13092                         # the loop because if we are at a closing token (such
13093                         # as '}') which forms a one-line block, this break might
13094                         # get undone.
13095                         $want_previous_breakpoint = $i;
13096                     }
13097                 }
13098             }
13099             next if ( $type eq 'b' );
13100             $depth = $nesting_depth_to_go[ $i + 1 ];
13101
13102             # safety check - be sure we always break after a comment
13103             # Shouldn't happen .. an error here probably means that the
13104             # nobreak flag did not get turned off correctly during
13105             # formatting.
13106             if ( $type eq '#' ) {
13107                 if ( $i != $max_index_to_go ) {
13108                     warning(
13109 "Non-fatal program bug: backup logic needed to break after a comment\n"
13110                     );
13111                     report_definite_bug();
13112                     $nobreak_to_go[$i] = 0;
13113                     set_forced_breakpoint($i);
13114                 }
13115             }
13116
13117             # Force breakpoints at certain tokens in long lines.
13118             # Note that such breakpoints will be undone later if these tokens
13119             # are fully contained within parens on a line.
13120             if (
13121                    $type eq 'k'
13122                 && $i > 0
13123                 && $token =~ /^(if|unless)$/
13124                 && (
13125                     $is_long_line
13126
13127                     # or container is broken (by side-comment, etc)
13128                     || (   $next_nonblank_token eq '('
13129                         && $mate_index_to_go[$i_next_nonblank] < $i )
13130                 )
13131               )
13132             {
13133                 set_forced_breakpoint( $i - 1 );
13134             }
13135
13136             # remember locations of '||'  and '&&' for possible breaks if we
13137             # decide this is a long logical expression.
13138             if ( $type eq '||' ) {
13139                 push @{ $rand_or_list[$depth][2] }, $i;
13140                 ++$has_old_logical_breakpoints[$depth]
13141                   if ( ( $i == $i_line_start || $i == $i_line_end )
13142                     && $rOpts_break_at_old_logical_breakpoints );
13143             }
13144             elsif ( $type eq '&&' ) {
13145                 push @{ $rand_or_list[$depth][3] }, $i;
13146                 ++$has_old_logical_breakpoints[$depth]
13147                   if ( ( $i == $i_line_start || $i == $i_line_end )
13148                     && $rOpts_break_at_old_logical_breakpoints );
13149             }
13150             elsif ( $type eq 'f' ) {
13151                 push @{ $rfor_semicolon_list[$depth] }, $i;
13152             }
13153             elsif ( $type eq 'k' ) {
13154                 if ( $token eq 'and' ) {
13155                     push @{ $rand_or_list[$depth][1] }, $i;
13156                     ++$has_old_logical_breakpoints[$depth]
13157                       if ( ( $i == $i_line_start || $i == $i_line_end )
13158                         && $rOpts_break_at_old_logical_breakpoints );
13159                 }
13160
13161                 # break immediately at 'or's which are probably not in a logical
13162                 # block -- but we will break in logical breaks below so that
13163                 # they do not add to the forced_breakpoint_count
13164                 elsif ( $token eq 'or' ) {
13165                     push @{ $rand_or_list[$depth][0] }, $i;
13166                     ++$has_old_logical_breakpoints[$depth]
13167                       if ( ( $i == $i_line_start || $i == $i_line_end )
13168                         && $rOpts_break_at_old_logical_breakpoints );
13169                     if ( $is_logical_container{ $container_type[$depth] } ) {
13170                     }
13171                     else {
13172                         if ($is_long_line) { set_forced_breakpoint($i) }
13173                         elsif ( ( $i == $i_line_start || $i == $i_line_end )
13174                             && $rOpts_break_at_old_logical_breakpoints )
13175                         {
13176                             $saw_good_breakpoint = 1;
13177                         }
13178                     }
13179                 }
13180                 elsif ( $token eq 'if' || $token eq 'unless' ) {
13181                     push @{ $rand_or_list[$depth][4] }, $i;
13182                     if ( ( $i == $i_line_start || $i == $i_line_end )
13183                         && $rOpts_break_at_old_logical_breakpoints )
13184                     {
13185                         set_forced_breakpoint($i);
13186                     }
13187                 }
13188             }
13189             elsif ( $is_assignment{$type} ) {
13190                 $i_equals[$depth] = $i;
13191             }
13192
13193             if ($type_sequence) {
13194
13195                 # handle any postponed closing breakpoints
13196                 if ( $token =~ /^[\)\]\}\:]$/ ) {
13197                     if ( $type eq ':' ) {
13198                         $last_colon_sequence_number = $type_sequence;
13199
13200                         # TESTING: retain break at a ':' line break
13201                         if ( ( $i == $i_line_start || $i == $i_line_end )
13202                             && $rOpts_break_at_old_ternary_breakpoints )
13203                         {
13204
13205                             # TESTING:
13206                             set_forced_breakpoint($i);
13207
13208                             # break at previous '='
13209                             if ( $i_equals[$depth] > 0 ) {
13210                                 set_forced_breakpoint( $i_equals[$depth] );
13211                                 $i_equals[$depth] = -1;
13212                             }
13213                         }
13214                     }
13215                     if ( defined( $postponed_breakpoint{$type_sequence} ) ) {
13216                         my $inc = ( $type eq ':' ) ? 0 : 1;
13217                         set_forced_breakpoint( $i - $inc );
13218                         delete $postponed_breakpoint{$type_sequence};
13219                     }
13220                 }
13221
13222                 # set breaks at ?/: if they will get separated (and are
13223                 # not a ?/: chain), or if the '?' is at the end of the
13224                 # line
13225                 elsif ( $token eq '?' ) {
13226                     my $i_colon = $mate_index_to_go[$i];
13227                     if (
13228                         $i_colon <= 0  # the ':' is not in this batch
13229                         || $i == 0     # this '?' is the first token of the line
13230                         || $i ==
13231                         $max_index_to_go    # or this '?' is the last token
13232                       )
13233                     {
13234
13235                         # don't break at a '?' if preceded by ':' on
13236                         # this line of previous ?/: pair on this line.
13237                         # This is an attempt to preserve a chain of ?/:
13238                         # expressions (elsif2.t).  And don't break if
13239                         # this has a side comment.
13240                         set_forced_breakpoint($i)
13241                           unless (
13242                             $type_sequence == (
13243                                 $last_colon_sequence_number +
13244                                   TYPE_SEQUENCE_INCREMENT
13245                             )
13246                             || $tokens_to_go[$max_index_to_go] eq '#'
13247                           );
13248                         set_closing_breakpoint($i);
13249                     }
13250                 }
13251             }
13252
13253 #print "LISTX sees: i=$i type=$type  tok=$token  block=$block_type depth=$depth\n";
13254
13255             #------------------------------------------------------------
13256             # Handle Increasing Depth..
13257             #
13258             # prepare for a new list when depth increases
13259             # token $i is a '(','{', or '['
13260             #------------------------------------------------------------
13261             if ( $depth > $current_depth ) {
13262
13263                 $breakpoint_stack[$depth]       = $forced_breakpoint_count;
13264                 $breakpoint_undo_stack[$depth]  = $forced_breakpoint_undo_count;
13265                 $has_broken_sublist[$depth]     = 0;
13266                 $identifier_count_stack[$depth] = 0;
13267                 $index_before_arrow[$depth]     = -1;
13268                 $interrupted_list[$depth]       = 0;
13269                 $item_count_stack[$depth]       = 0;
13270                 $last_comma_index[$depth]       = undef;
13271                 $last_dot_index[$depth]         = undef;
13272                 $last_nonblank_type[$depth]     = $last_nonblank_type;
13273                 $old_breakpoint_count_stack[$depth]    = $old_breakpoint_count;
13274                 $opening_structure_index_stack[$depth] = $i;
13275                 $rand_or_list[$depth]                  = [];
13276                 $rfor_semicolon_list[$depth]           = [];
13277                 $i_equals[$depth]                      = -1;
13278                 $want_comma_break[$depth]              = 0;
13279                 $container_type[$depth] =
13280                   ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ )
13281                   ? $last_nonblank_token
13282                   : "";
13283                 $has_old_logical_breakpoints[$depth] = 0;
13284
13285                 # if line ends here then signal closing token to break
13286                 if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
13287                 {
13288                     set_closing_breakpoint($i);
13289                 }
13290
13291                 # Not all lists of values should be vertically aligned..
13292                 $dont_align[$depth] =
13293
13294                   # code BLOCKS are handled at a higher level
13295                   ( $block_type ne "" )
13296
13297                   # certain paren lists
13298                   || ( $type eq '(' ) && (
13299
13300                     # it does not usually look good to align a list of
13301                     # identifiers in a parameter list, as in:
13302                     #    my($var1, $var2, ...)
13303                     # (This test should probably be refined, for now I'm just
13304                     # testing for any keyword)
13305                     ( $last_nonblank_type eq 'k' )
13306
13307                     # a trailing '(' usually indicates a non-list
13308                     || ( $next_nonblank_type eq '(' )
13309                   );
13310
13311                 # patch to outdent opening brace of long if/for/..
13312                 # statements (like this one).  See similar coding in
13313                 # set_continuation breaks.  We have also catch it here for
13314                 # short line fragments which otherwise will not go through
13315                 # set_continuation_breaks.
13316                 if (
13317                     $block_type
13318
13319                     # if we have the ')' but not its '(' in this batch..
13320                     && ( $last_nonblank_token eq ')' )
13321                     && $mate_index_to_go[$i_last_nonblank_token] < 0
13322
13323                     # and user wants brace to left
13324                     && !$rOpts->{'opening-brace-always-on-right'}
13325
13326                     && ( $type  eq '{' )    # should be true
13327                     && ( $token eq '{' )    # should be true
13328                   )
13329                 {
13330                     set_forced_breakpoint( $i - 1 );
13331                 }
13332             }
13333
13334             #------------------------------------------------------------
13335             # Handle Decreasing Depth..
13336             #
13337             # finish off any old list when depth decreases
13338             # token $i is a ')','}', or ']'
13339             #------------------------------------------------------------
13340             elsif ( $depth < $current_depth ) {
13341
13342                 check_for_new_minimum_depth($depth);
13343
13344                 # force all outer logical containers to break after we see on
13345                 # old breakpoint
13346                 $has_old_logical_breakpoints[$depth] ||=
13347                   $has_old_logical_breakpoints[$current_depth];
13348
13349                 # Patch to break between ') {' if the paren list is broken.
13350                 # There is similar logic in set_continuation_breaks for
13351                 # non-broken lists.
13352                 if (   $token eq ')'
13353                     && $next_nonblank_block_type
13354                     && $interrupted_list[$current_depth]
13355                     && $next_nonblank_type eq '{'
13356                     && !$rOpts->{'opening-brace-always-on-right'} )
13357                 {
13358                     set_forced_breakpoint($i);
13359                 }
13360
13361 #print "LISTY sees: i=$i type=$type  tok=$token  block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
13362
13363                 # set breaks at commas if necessary
13364                 my ( $bp_count, $do_not_break_apart ) =
13365                   set_comma_breakpoints($current_depth);
13366
13367                 my $i_opening = $opening_structure_index_stack[$current_depth];
13368                 my $saw_opening_structure = ( $i_opening >= 0 );
13369
13370                 # this term is long if we had to break at interior commas..
13371                 my $is_long_term = $bp_count > 0;
13372
13373                 # ..or if the length between opening and closing parens exceeds
13374                 # allowed line length
13375                 if ( !$is_long_term && $saw_opening_structure ) {
13376                     my $i_opening_minus = find_token_starting_list($i_opening);
13377
13378                     # Note: we have to allow for one extra space after a
13379                     # closing token so that we do not strand a comma or
13380                     # semicolon, hence the '>=' here (oneline.t)
13381                     $is_long_term =
13382                       excess_line_length( $i_opening_minus, $i ) >= 0;
13383                 }
13384
13385                 # We've set breaks after all comma-arrows.  Now we have to
13386                 # undo them if this can be a one-line block
13387                 # (the only breakpoints set will be due to comma-arrows)
13388                 if (
13389
13390                     # user doesn't require breaking after all comma-arrows
13391                     ( $rOpts_comma_arrow_breakpoints != 0 )
13392
13393                     # and if the opening structure is in this batch
13394                     && $saw_opening_structure
13395
13396                     # and either on the same old line
13397                     && (
13398                         $old_breakpoint_count_stack[$current_depth] ==
13399                         $last_old_breakpoint_count
13400
13401                         # or user wants to form long blocks with arrows
13402                         || $rOpts_comma_arrow_breakpoints == 2
13403                     )
13404
13405                   # and we made some breakpoints between the opening and closing
13406                     && ( $breakpoint_undo_stack[$current_depth] <
13407                         $forced_breakpoint_undo_count )
13408
13409                     # and this block is short enough to fit on one line
13410                     # Note: use < because need 1 more space for possible comma
13411                     && !$is_long_term
13412
13413                   )
13414                 {
13415                     undo_forced_breakpoint_stack(
13416                         $breakpoint_undo_stack[$current_depth] );
13417                 }
13418
13419                 # now see if we have any comma breakpoints left
13420                 my $has_comma_breakpoints =
13421                   ( $breakpoint_stack[$current_depth] !=
13422                       $forced_breakpoint_count );
13423
13424                 # update broken-sublist flag of the outer container
13425                      $has_broken_sublist[$depth] = $has_broken_sublist[$depth]
13426                   || $has_broken_sublist[$current_depth]
13427                   || $is_long_term
13428                   || $has_comma_breakpoints;
13429
13430 # Having come to the closing ')', '}', or ']', now we have to decide if we
13431 # should 'open up' the structure by placing breaks at the opening and
13432 # closing containers.  This is a tricky decision.  Here are some of the
13433 # basic considerations:
13434 #
13435 # -If this is a BLOCK container, then any breakpoints will have already
13436 # been set (and according to user preferences), so we need do nothing here.
13437 #
13438 # -If we have a comma-separated list for which we can align the list items,
13439 # then we need to do so because otherwise the vertical aligner cannot
13440 # currently do the alignment.
13441 #
13442 # -If this container does itself contain a container which has been broken
13443 # open, then it should be broken open to properly show the structure.
13444 #
13445 # -If there is nothing to align, and no other reason to break apart,
13446 # then do not do it.
13447 #
13448 # We will not break open the parens of a long but 'simple' logical expression.
13449 # For example:
13450 #
13451 # This is an example of a simple logical expression and its formatting:
13452 #
13453 #     if ( $bigwasteofspace1 && $bigwasteofspace2
13454 #         || $bigwasteofspace3 && $bigwasteofspace4 )
13455 #
13456 # Most people would prefer this than the 'spacey' version:
13457 #
13458 #     if (
13459 #         $bigwasteofspace1 && $bigwasteofspace2
13460 #         || $bigwasteofspace3 && $bigwasteofspace4
13461 #     )
13462 #
13463 # To illustrate the rules for breaking logical expressions, consider:
13464 #
13465 #             FULLY DENSE:
13466 #             if ( $opt_excl
13467 #                 and ( exists $ids_excl_uc{$id_uc}
13468 #                     or grep $id_uc =~ /$_/, @ids_excl_uc ))
13469 #
13470 # This is on the verge of being difficult to read.  The current default is to
13471 # open it up like this:
13472 #
13473 #             DEFAULT:
13474 #             if (
13475 #                 $opt_excl
13476 #                 and ( exists $ids_excl_uc{$id_uc}
13477 #                     or grep $id_uc =~ /$_/, @ids_excl_uc )
13478 #               )
13479 #
13480 # This is a compromise which tries to avoid being too dense and to spacey.
13481 # A more spaced version would be:
13482 #
13483 #             SPACEY:
13484 #             if (
13485 #                 $opt_excl
13486 #                 and (
13487 #                     exists $ids_excl_uc{$id_uc}
13488 #                     or grep $id_uc =~ /$_/, @ids_excl_uc
13489 #                 )
13490 #               )
13491 #
13492 # Some people might prefer the spacey version -- an option could be added.  The
13493 # innermost expression contains a long block '( exists $ids_...  ')'.
13494 #
13495 # Here is how the logic goes: We will force a break at the 'or' that the
13496 # innermost expression contains, but we will not break apart its opening and
13497 # closing containers because (1) it contains no multi-line sub-containers itself,
13498 # and (2) there is no alignment to be gained by breaking it open like this
13499 #
13500 #             and (
13501 #                 exists $ids_excl_uc{$id_uc}
13502 #                 or grep $id_uc =~ /$_/, @ids_excl_uc
13503 #             )
13504 #
13505 # (although this looks perfectly ok and might be good for long expressions).  The
13506 # outer 'if' container, though, contains a broken sub-container, so it will be
13507 # broken open to avoid too much density.  Also, since it contains no 'or's, there
13508 # will be a forced break at its 'and'.
13509
13510                 # set some flags telling something about this container..
13511                 my $is_simple_logical_expression = 0;
13512                 if (   $item_count_stack[$current_depth] == 0
13513                     && $saw_opening_structure
13514                     && $tokens_to_go[$i_opening] eq '('
13515                     && $is_logical_container{ $container_type[$current_depth] }
13516                   )
13517                 {
13518
13519                     # This seems to be a simple logical expression with
13520                     # no existing breakpoints.  Set a flag to prevent
13521                     # opening it up.
13522                     if ( !$has_comma_breakpoints ) {
13523                         $is_simple_logical_expression = 1;
13524                     }
13525
13526                     # This seems to be a simple logical expression with
13527                     # breakpoints (broken sublists, for example).  Break
13528                     # at all 'or's and '||'s.
13529                     else {
13530                         set_logical_breakpoints($current_depth);
13531                     }
13532                 }
13533
13534                 if ( $is_long_term
13535                     && @{ $rfor_semicolon_list[$current_depth] } )
13536                 {
13537                     set_for_semicolon_breakpoints($current_depth);
13538
13539                     # open up a long 'for' or 'foreach' container to allow
13540                     # leading term alignment unless -lp is used.
13541                     $has_comma_breakpoints = 1
13542                       unless $rOpts_line_up_parentheses;
13543                 }
13544
13545                 if (
13546
13547                     # breaks for code BLOCKS are handled at a higher level
13548                     !$block_type
13549
13550                     # we do not need to break at the top level of an 'if'
13551                     # type expression
13552                     && !$is_simple_logical_expression
13553
13554                     ## modification to keep ': (' containers vertically tight;
13555                     ## but probably better to let user set -vt=1 to avoid
13556                     ## inconsistency with other paren types
13557                     ## && ($container_type[$current_depth] ne ':')
13558
13559                     # otherwise, we require one of these reasons for breaking:
13560                     && (
13561
13562                         # - this term has forced line breaks
13563                         $has_comma_breakpoints
13564
13565                        # - the opening container is separated from this batch
13566                        #   for some reason (comment, blank line, code block)
13567                        # - this is a non-paren container spanning multiple lines
13568                         || !$saw_opening_structure
13569
13570                         # - this is a long block contained in another breakable
13571                         #   container
13572                         || (   $is_long_term
13573                             && $container_environment_to_go[$i_opening] ne
13574                             'BLOCK' )
13575                     )
13576                   )
13577                 {
13578
13579                     # For -lp option, we must put a breakpoint before
13580                     # the token which has been identified as starting
13581                     # this indentation level.  This is necessary for
13582                     # proper alignment.
13583                     if ( $rOpts_line_up_parentheses && $saw_opening_structure )
13584                     {
13585                         my $item = $leading_spaces_to_go[ $i_opening + 1 ];
13586                         if (   $i_opening + 1 < $max_index_to_go
13587                             && $types_to_go[ $i_opening + 1 ] eq 'b' )
13588                         {
13589                             $item = $leading_spaces_to_go[ $i_opening + 2 ];
13590                         }
13591                         if ( defined($item) ) {
13592                             my $i_start_2 = $item->get_STARTING_INDEX();
13593                             if (
13594                                 defined($i_start_2)
13595
13596                                 # we are breaking after an opening brace, paren,
13597                                 # so don't break before it too
13598                                 && $i_start_2 ne $i_opening
13599                               )
13600                             {
13601
13602                                 # Only break for breakpoints at the same
13603                                 # indentation level as the opening paren
13604                                 my $test1 = $nesting_depth_to_go[$i_opening];
13605                                 my $test2 = $nesting_depth_to_go[$i_start_2];
13606                                 if ( $test2 == $test1 ) {
13607                                     set_forced_breakpoint( $i_start_2 - 1 );
13608                                 }
13609                             }
13610                         }
13611                     }
13612
13613                     # break after opening structure.
13614                     # note: break before closing structure will be automatic
13615                     if ( $minimum_depth <= $current_depth ) {
13616
13617                         set_forced_breakpoint($i_opening)
13618                           unless ( $do_not_break_apart
13619                             || is_unbreakable_container($current_depth) );
13620
13621                         # break at '.' of lower depth level before opening token
13622                         if ( $last_dot_index[$depth] ) {
13623                             set_forced_breakpoint( $last_dot_index[$depth] );
13624                         }
13625
13626                         # break before opening structure if preeced by another
13627                         # closing structure and a comma.  This is normally
13628                         # done by the previous closing brace, but not
13629                         # if it was a one-line block.
13630                         if ( $i_opening > 2 ) {
13631                             my $i_prev =
13632                               ( $types_to_go[ $i_opening - 1 ] eq 'b' )
13633                               ? $i_opening - 2
13634                               : $i_opening - 1;
13635
13636                             if (   $types_to_go[$i_prev] eq ','
13637                                 && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ )
13638                             {
13639                                 set_forced_breakpoint($i_prev);
13640                             }
13641
13642                             # also break before something like ':('  or '?('
13643                             # if appropriate.
13644                             elsif (
13645                                 $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
13646                             {
13647                                 my $token_prev = $tokens_to_go[$i_prev];
13648                                 if ( $want_break_before{$token_prev} ) {
13649                                     set_forced_breakpoint($i_prev);
13650                                 }
13651                             }
13652                         }
13653                     }
13654
13655                     # break after comma following closing structure
13656                     if ( $next_type eq ',' ) {
13657                         set_forced_breakpoint( $i + 1 );
13658                     }
13659
13660                     # break before an '=' following closing structure
13661                     if (
13662                         $is_assignment{$next_nonblank_type}
13663                         && ( $breakpoint_stack[$current_depth] !=
13664                             $forced_breakpoint_count )
13665                       )
13666                     {
13667                         set_forced_breakpoint($i);
13668                     }
13669
13670                     # break at any comma before the opening structure Added
13671                     # for -lp, but seems to be good in general.  It isn't
13672                     # obvious how far back to look; the '5' below seems to
13673                     # work well and will catch the comma in something like
13674                     #  push @list, myfunc( $param, $param, ..
13675
13676                     my $icomma = $last_comma_index[$depth];
13677                     if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
13678                         unless ( $forced_breakpoint_to_go[$icomma] ) {
13679                             set_forced_breakpoint($icomma);
13680                         }
13681                     }
13682                 }    # end logic to open up a container
13683
13684                 # Break open a logical container open if it was already open
13685                 elsif ($is_simple_logical_expression
13686                     && $has_old_logical_breakpoints[$current_depth] )
13687                 {
13688                     set_logical_breakpoints($current_depth);
13689                 }
13690
13691                 # Handle long container which does not get opened up
13692                 elsif ($is_long_term) {
13693
13694                     # must set fake breakpoint to alert outer containers that
13695                     # they are complex
13696                     set_fake_breakpoint();
13697                 }
13698             }
13699
13700             #------------------------------------------------------------
13701             # Handle this token
13702             #------------------------------------------------------------
13703
13704             $current_depth = $depth;
13705
13706             # handle comma-arrow
13707             if ( $type eq '=>' ) {
13708                 next if ( $last_nonblank_type eq '=>' );
13709                 next if $rOpts_break_at_old_comma_breakpoints;
13710                 next if $rOpts_comma_arrow_breakpoints == 3;
13711                 $want_comma_break[$depth]   = 1;
13712                 $index_before_arrow[$depth] = $i_last_nonblank_token;
13713                 next;
13714             }
13715
13716             elsif ( $type eq '.' ) {
13717                 $last_dot_index[$depth] = $i;
13718             }
13719
13720             # Turn off alignment if we are sure that this is not a list
13721             # environment.  To be safe, we will do this if we see certain
13722             # non-list tokens, such as ';', and also the environment is
13723             # not a list.  Note that '=' could be in any of the = operators
13724             # (lextest.t). We can't just use the reported environment
13725             # because it can be incorrect in some cases.
13726             elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
13727                 && $container_environment_to_go[$i] ne 'LIST' )
13728             {
13729                 $dont_align[$depth]         = 1;
13730                 $want_comma_break[$depth]   = 0;
13731                 $index_before_arrow[$depth] = -1;
13732             }
13733
13734             # now just handle any commas
13735             next unless ( $type eq ',' );
13736
13737             $last_dot_index[$depth]   = undef;
13738             $last_comma_index[$depth] = $i;
13739
13740             # break here if this comma follows a '=>'
13741             # but not if there is a side comment after the comma
13742             if ( $want_comma_break[$depth] ) {
13743
13744                 if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
13745                     $want_comma_break[$depth]   = 0;
13746                     $index_before_arrow[$depth] = -1;
13747                     next;
13748                 }
13749
13750                 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
13751
13752                 # break before the previous token if it looks safe
13753                 # Example of something that we will not try to break before:
13754                 #   DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
13755                 my $ibreak = $index_before_arrow[$depth] - 1;
13756                 if (   $ibreak > 0
13757                     && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
13758                 {
13759                     if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
13760                     if ( $types_to_go[$ibreak] =~ /^[,b\(\{\[]$/ ) {
13761                         set_forced_breakpoint($ibreak);
13762                     }
13763                 }
13764
13765                 $want_comma_break[$depth]   = 0;
13766                 $index_before_arrow[$depth] = -1;
13767
13768                 # handle list which mixes '=>'s and ','s:
13769                 # treat any list items so far as an interrupted list
13770                 $interrupted_list[$depth] = 1;
13771                 next;
13772             }
13773
13774             # skip past these commas if we are not supposed to format them
13775             next if ( $dont_align[$depth] );
13776
13777             # break after all commas above starting depth
13778             if ( $depth < $starting_depth ) {
13779                 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
13780                 next;
13781             }
13782
13783             # add this comma to the list..
13784             my $item_count = $item_count_stack[$depth];
13785             if ( $item_count == 0 ) {
13786
13787                 # but do not form a list with no opening structure
13788                 # for example:
13789
13790                 #            open INFILE_COPY, ">$input_file_copy"
13791                 #              or die ("very long message");
13792
13793                 if ( ( $opening_structure_index_stack[$depth] < 0 )
13794                     && $container_environment_to_go[$i] eq 'BLOCK' )
13795                 {
13796                     $dont_align[$depth] = 1;
13797                     next;
13798                 }
13799             }
13800
13801             $comma_index[$depth][$item_count] = $i;
13802             ++$item_count_stack[$depth];
13803             if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
13804                 $identifier_count_stack[$depth]++;
13805             }
13806         }
13807
13808         #-------------------------------------------
13809         # end of loop over all tokens in this batch
13810         #-------------------------------------------
13811
13812         # set breaks for any unfinished lists ..
13813         for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
13814
13815             $interrupted_list[$dd] = 1;
13816             $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
13817             set_comma_breakpoints($dd);
13818             set_logical_breakpoints($dd)
13819               if ( $has_old_logical_breakpoints[$dd] );
13820             set_for_semicolon_breakpoints($dd);
13821
13822             # break open container...
13823             my $i_opening = $opening_structure_index_stack[$dd];
13824             set_forced_breakpoint($i_opening)
13825               unless (
13826                 is_unbreakable_container($dd)
13827
13828                 # Avoid a break which would place an isolated ' or "
13829                 # on a line
13830                 || (   $type eq 'Q'
13831                     && $i_opening >= $max_index_to_go - 2
13832                     && $token =~ /^['"]$/ )
13833               );
13834         }
13835
13836         # Return a flag indicating if the input file had some good breakpoints.
13837         # This flag will be used to force a break in a line shorter than the
13838         # allowed line length.
13839         if ( $has_old_logical_breakpoints[$current_depth] ) {
13840             $saw_good_breakpoint = 1;
13841         }
13842         return $saw_good_breakpoint;
13843     }
13844 }    # end scan_list
13845
13846 sub find_token_starting_list {
13847
13848     # When testing to see if a block will fit on one line, some
13849     # previous token(s) may also need to be on the line; particularly
13850     # if this is a sub call.  So we will look back at least one
13851     # token. NOTE: This isn't perfect, but not critical, because
13852     # if we mis-identify a block, it will be wrapped and therefore
13853     # fixed the next time it is formatted.
13854     my $i_opening_paren = shift;
13855     my $i_opening_minus = $i_opening_paren;
13856     my $im1             = $i_opening_paren - 1;
13857     my $im2             = $i_opening_paren - 2;
13858     my $im3             = $i_opening_paren - 3;
13859     my $typem1          = $types_to_go[$im1];
13860     my $typem2          = $im2 >= 0 ? $types_to_go[$im2] : 'b';
13861     if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) {
13862         $i_opening_minus = $i_opening_paren;
13863     }
13864     elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) {
13865         $i_opening_minus = $im1 if $im1 >= 0;
13866
13867         # walk back to improve length estimate
13868         for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
13869             last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
13870             $i_opening_minus = $j;
13871         }
13872         if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
13873     }
13874     elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 }
13875     elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) {
13876         $i_opening_minus = $im2;
13877     }
13878     return $i_opening_minus;
13879 }
13880
13881 {    # begin set_comma_breakpoints_do
13882
13883     my %is_keyword_with_special_leading_term;
13884
13885     BEGIN {
13886
13887         # These keywords have prototypes which allow a special leading item
13888         # followed by a list
13889         @_ =
13890           qw(formline grep kill map printf sprintf push chmod join pack unshift);
13891         @is_keyword_with_special_leading_term{@_} = (1) x scalar(@_);
13892     }
13893
13894     sub set_comma_breakpoints_do {
13895
13896         # Given a list with some commas, set breakpoints at some of the
13897         # commas, if necessary, to make it easy to read.  This list is
13898         # an example:
13899         my (
13900             $depth,               $i_opening_paren,  $i_closing_paren,
13901             $item_count,          $identifier_count, $rcomma_index,
13902             $next_nonblank_type,  $list_type,        $interrupted,
13903             $rdo_not_break_apart, $must_break_open,
13904         ) = @_;
13905
13906         # nothing to do if no commas seen
13907         return if ( $item_count < 1 );
13908         my $i_first_comma     = $$rcomma_index[0];
13909         my $i_true_last_comma = $$rcomma_index[ $item_count - 1 ];
13910         my $i_last_comma      = $i_true_last_comma;
13911         if ( $i_last_comma >= $max_index_to_go ) {
13912             $i_last_comma = $$rcomma_index[ --$item_count - 1 ];
13913             return if ( $item_count < 1 );
13914         }
13915
13916         #---------------------------------------------------------------
13917         # find lengths of all items in the list to calculate page layout
13918         #---------------------------------------------------------------
13919         my $comma_count = $item_count;
13920         my @item_lengths;
13921         my @i_term_begin;
13922         my @i_term_end;
13923         my @i_term_comma;
13924         my $i_prev_plus;
13925         my @max_length = ( 0, 0 );
13926         my $first_term_length;
13927         my $i      = $i_opening_paren;
13928         my $is_odd = 1;
13929
13930         for ( my $j = 0 ; $j < $comma_count ; $j++ ) {
13931             $is_odd      = 1 - $is_odd;
13932             $i_prev_plus = $i + 1;
13933             $i           = $$rcomma_index[$j];
13934
13935             my $i_term_end =
13936               ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
13937             my $i_term_begin =
13938               ( $types_to_go[$i_prev_plus] eq 'b' )
13939               ? $i_prev_plus + 1
13940               : $i_prev_plus;
13941             push @i_term_begin, $i_term_begin;
13942             push @i_term_end,   $i_term_end;
13943             push @i_term_comma, $i;
13944
13945             # note: currently adding 2 to all lengths (for comma and space)
13946             my $length =
13947               2 + token_sequence_length( $i_term_begin, $i_term_end );
13948             push @item_lengths, $length;
13949
13950             if ( $j == 0 ) {
13951                 $first_term_length = $length;
13952             }
13953             else {
13954
13955                 if ( $length > $max_length[$is_odd] ) {
13956                     $max_length[$is_odd] = $length;
13957                 }
13958             }
13959         }
13960
13961         # now we have to make a distinction between the comma count and item
13962         # count, because the item count will be one greater than the comma
13963         # count if the last item is not terminated with a comma
13964         my $i_b =
13965           ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
13966           ? $i_last_comma + 1
13967           : $i_last_comma;
13968         my $i_e =
13969           ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
13970           ? $i_closing_paren - 2
13971           : $i_closing_paren - 1;
13972         my $i_effective_last_comma = $i_last_comma;
13973
13974         my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
13975
13976         if ( $last_item_length > 0 ) {
13977
13978             # add 2 to length because other lengths include a comma and a blank
13979             $last_item_length += 2;
13980             push @item_lengths, $last_item_length;
13981             push @i_term_begin, $i_b + 1;
13982             push @i_term_end,   $i_e;
13983             push @i_term_comma, undef;
13984
13985             my $i_odd = $item_count % 2;
13986
13987             if ( $last_item_length > $max_length[$i_odd] ) {
13988                 $max_length[$i_odd] = $last_item_length;
13989             }
13990
13991             $item_count++;
13992             $i_effective_last_comma = $i_e + 1;
13993
13994             if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
13995                 $identifier_count++;
13996             }
13997         }
13998
13999         #---------------------------------------------------------------
14000         # End of length calculations
14001         #---------------------------------------------------------------
14002
14003         #---------------------------------------------------------------
14004         # Compound List Rule 1:
14005         # Break at (almost) every comma for a list containing a broken
14006         # sublist.  This has higher priority than the Interrupted List
14007         # Rule.
14008         #---------------------------------------------------------------
14009         if ( $has_broken_sublist[$depth] ) {
14010
14011             # Break at every comma except for a comma between two
14012             # simple, small terms.  This prevents long vertical
14013             # columns of, say, just 0's.
14014             my $small_length = 10;    # 2 + actual maximum length wanted
14015
14016             # We'll insert a break in long runs of small terms to
14017             # allow alignment in uniform tables.
14018             my $skipped_count = 0;
14019             my $columns       = table_columns_available($i_first_comma);
14020             my $fields        = int( $columns / $small_length );
14021             if (   $rOpts_maximum_fields_per_table
14022                 && $fields > $rOpts_maximum_fields_per_table )
14023             {
14024                 $fields = $rOpts_maximum_fields_per_table;
14025             }
14026             my $max_skipped_count = $fields - 1;
14027
14028             my $is_simple_last_term = 0;
14029             my $is_simple_next_term = 0;
14030             foreach my $j ( 0 .. $item_count ) {
14031                 $is_simple_last_term = $is_simple_next_term;
14032                 $is_simple_next_term = 0;
14033                 if (   $j < $item_count
14034                     && $i_term_end[$j] == $i_term_begin[$j]
14035                     && $item_lengths[$j] <= $small_length )
14036                 {
14037                     $is_simple_next_term = 1;
14038                 }
14039                 next if $j == 0;
14040                 if (   $is_simple_last_term
14041                     && $is_simple_next_term
14042                     && $skipped_count < $max_skipped_count )
14043                 {
14044                     $skipped_count++;
14045                 }
14046                 else {
14047                     $skipped_count = 0;
14048                     my $i = $i_term_comma[ $j - 1 ];
14049                     last unless defined $i;
14050                     set_forced_breakpoint($i);
14051                 }
14052             }
14053
14054             # always break at the last comma if this list is
14055             # interrupted; we wouldn't want to leave a terminal '{', for
14056             # example.
14057             if ($interrupted) { set_forced_breakpoint($i_true_last_comma) }
14058             return;
14059         }
14060
14061 #my ( $a, $b, $c ) = caller();
14062 #print "LISTX: in set_list $a $c interupt=$interrupted count=$item_count
14063 #i_first = $i_first_comma  i_last=$i_last_comma max=$max_index_to_go\n";
14064 #print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
14065
14066         #---------------------------------------------------------------
14067         # Interrupted List Rule:
14068         # A list is is forced to use old breakpoints if it was interrupted
14069         # by side comments or blank lines, or requested by user.
14070         #---------------------------------------------------------------
14071         if (   $rOpts_break_at_old_comma_breakpoints
14072             || $interrupted
14073             || $i_opening_paren < 0 )
14074         {
14075             copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
14076             return;
14077         }
14078
14079         #---------------------------------------------------------------
14080         # Looks like a list of items.  We have to look at it and size it up.
14081         #---------------------------------------------------------------
14082
14083         my $opening_token = $tokens_to_go[$i_opening_paren];
14084         my $opening_environment =
14085           $container_environment_to_go[$i_opening_paren];
14086
14087         #-------------------------------------------------------------------
14088         # Return if this will fit on one line
14089         #-------------------------------------------------------------------
14090
14091         my $i_opening_minus = find_token_starting_list($i_opening_paren);
14092         return
14093           unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0;
14094
14095         #-------------------------------------------------------------------
14096         # Now we know that this block spans multiple lines; we have to set
14097         # at least one breakpoint -- real or fake -- as a signal to break
14098         # open any outer containers.
14099         #-------------------------------------------------------------------
14100         set_fake_breakpoint();
14101
14102         # be sure we do not extend beyond the current list length
14103         if ( $i_effective_last_comma >= $max_index_to_go ) {
14104             $i_effective_last_comma = $max_index_to_go - 1;
14105         }
14106
14107         # Set a flag indicating if we need to break open to keep -lp
14108         # items aligned.  This is necessary if any of the list terms
14109         # exceeds the available space after the '('.
14110         my $need_lp_break_open = $must_break_open;
14111         if ( $rOpts_line_up_parentheses && !$must_break_open ) {
14112             my $columns_if_unbroken = $rOpts_maximum_line_length -
14113               total_line_length( $i_opening_minus, $i_opening_paren );
14114             $need_lp_break_open = ( $max_length[0] > $columns_if_unbroken )
14115               || ( $max_length[1] > $columns_if_unbroken )
14116               || ( $first_term_length > $columns_if_unbroken );
14117         }
14118
14119         # Specify if the list must have an even number of fields or not.
14120         # It is generally safest to assume an even number, because the
14121         # list items might be a hash list.  But if we can be sure that
14122         # it is not a hash, then we can allow an odd number for more
14123         # flexibility.
14124         my $odd_or_even = 2;    # 1 = odd field count ok, 2 = want even count
14125
14126         if (   $identifier_count >= $item_count - 1
14127             || $is_assignment{$next_nonblank_type}
14128             || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
14129           )
14130         {
14131             $odd_or_even = 1;
14132         }
14133
14134         # do we have a long first term which should be
14135         # left on a line by itself?
14136         my $use_separate_first_term = (
14137             $odd_or_even == 1       # only if we can use 1 field/line
14138               && $item_count > 3    # need several items
14139               && $first_term_length >
14140               2 * $max_length[0] - 2    # need long first term
14141               && $first_term_length >
14142               2 * $max_length[1] - 2    # need long first term
14143         );
14144
14145         # or do we know from the type of list that the first term should
14146         # be placed alone?
14147         if ( !$use_separate_first_term ) {
14148             if ( $is_keyword_with_special_leading_term{$list_type} ) {
14149                 $use_separate_first_term = 1;
14150
14151                 # should the container be broken open?
14152                 if ( $item_count < 3 ) {
14153                     if ( $i_first_comma - $i_opening_paren < 4 ) {
14154                         $$rdo_not_break_apart = 1;
14155                     }
14156                 }
14157                 elsif ($first_term_length < 20
14158                     && $i_first_comma - $i_opening_paren < 4 )
14159                 {
14160                     my $columns = table_columns_available($i_first_comma);
14161                     if ( $first_term_length < $columns ) {
14162                         $$rdo_not_break_apart = 1;
14163                     }
14164                 }
14165             }
14166         }
14167
14168         # if so,
14169         if ($use_separate_first_term) {
14170
14171             # ..set a break and update starting values
14172             $use_separate_first_term = 1;
14173             set_forced_breakpoint($i_first_comma);
14174             $i_opening_paren = $i_first_comma;
14175             $i_first_comma   = $$rcomma_index[1];
14176             $item_count--;
14177             return if $comma_count == 1;
14178             shift @item_lengths;
14179             shift @i_term_begin;
14180             shift @i_term_end;
14181             shift @i_term_comma;
14182         }
14183
14184         # if not, update the metrics to include the first term
14185         else {
14186             if ( $first_term_length > $max_length[0] ) {
14187                 $max_length[0] = $first_term_length;
14188             }
14189         }
14190
14191         # Field width parameters
14192         my $pair_width = ( $max_length[0] + $max_length[1] );
14193         my $max_width =
14194           ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
14195
14196         # Number of free columns across the page width for laying out tables
14197         my $columns = table_columns_available($i_first_comma);
14198
14199         # Estimated maximum number of fields which fit this space
14200         # This will be our first guess
14201         my $number_of_fields_max =
14202           maximum_number_of_fields( $columns, $odd_or_even, $max_width,
14203             $pair_width );
14204         my $number_of_fields = $number_of_fields_max;
14205
14206         # Find the best-looking number of fields
14207         # and make this our second guess if possible
14208         my ( $number_of_fields_best, $ri_ragged_break_list,
14209             $new_identifier_count )
14210           = study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths,
14211             $max_width );
14212
14213         if (   $number_of_fields_best != 0
14214             && $number_of_fields_best < $number_of_fields_max )
14215         {
14216             $number_of_fields = $number_of_fields_best;
14217         }
14218
14219         # ----------------------------------------------------------------------
14220         # If we are crowded and the -lp option is being used, try to
14221         # undo some indentation
14222         # ----------------------------------------------------------------------
14223         if (
14224             $rOpts_line_up_parentheses
14225             && (
14226                 $number_of_fields == 0
14227                 || (   $number_of_fields == 1
14228                     && $number_of_fields != $number_of_fields_best )
14229             )
14230           )
14231         {
14232             my $available_spaces = get_AVAILABLE_SPACES_to_go($i_first_comma);
14233             if ( $available_spaces > 0 ) {
14234
14235                 my $spaces_wanted = $max_width - $columns;    # for 1 field
14236
14237                 if ( $number_of_fields_best == 0 ) {
14238                     $number_of_fields_best =
14239                       get_maximum_fields_wanted( \@item_lengths );
14240                 }
14241
14242                 if ( $number_of_fields_best != 1 ) {
14243                     my $spaces_wanted_2 =
14244                       1 + $pair_width - $columns;             # for 2 fields
14245                     if ( $available_spaces > $spaces_wanted_2 ) {
14246                         $spaces_wanted = $spaces_wanted_2;
14247                     }
14248                 }
14249
14250                 if ( $spaces_wanted > 0 ) {
14251                     my $deleted_spaces =
14252                       reduce_lp_indentation( $i_first_comma, $spaces_wanted );
14253
14254                     # redo the math
14255                     if ( $deleted_spaces > 0 ) {
14256                         $columns = table_columns_available($i_first_comma);
14257                         $number_of_fields_max =
14258                           maximum_number_of_fields( $columns, $odd_or_even,
14259                             $max_width, $pair_width );
14260                         $number_of_fields = $number_of_fields_max;
14261
14262                         if (   $number_of_fields_best == 1
14263                             && $number_of_fields >= 1 )
14264                         {
14265                             $number_of_fields = $number_of_fields_best;
14266                         }
14267                     }
14268                 }
14269             }
14270         }
14271
14272         # try for one column if two won't work
14273         if ( $number_of_fields <= 0 ) {
14274             $number_of_fields = int( $columns / $max_width );
14275         }
14276
14277         # The user can place an upper bound on the number of fields,
14278         # which can be useful for doing maintenance on tables
14279         if (   $rOpts_maximum_fields_per_table
14280             && $number_of_fields > $rOpts_maximum_fields_per_table )
14281         {
14282             $number_of_fields = $rOpts_maximum_fields_per_table;
14283         }
14284
14285         # How many columns (characters) and lines would this container take
14286         # if no additional whitespace were added?
14287         my $packed_columns = token_sequence_length( $i_opening_paren + 1,
14288             $i_effective_last_comma + 1 );
14289         if ( $columns <= 0 ) { $columns = 1 }    # avoid divide by zero
14290         my $packed_lines = 1 + int( $packed_columns / $columns );
14291
14292         # are we an item contained in an outer list?
14293         my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
14294
14295         if ( $number_of_fields <= 0 ) {
14296
14297 #         #---------------------------------------------------------------
14298 #         # We're in trouble.  We can't find a single field width that works.
14299 #         # There is no simple answer here; we may have a single long list
14300 #         # item, or many.
14301 #         #---------------------------------------------------------------
14302 #
14303 #         In many cases, it may be best to not force a break if there is just one
14304 #         comma, because the standard continuation break logic will do a better
14305 #         job without it.
14306 #
14307 #         In the common case that all but one of the terms can fit
14308 #         on a single line, it may look better not to break open the
14309 #         containing parens.  Consider, for example
14310 #
14311 #             $color =
14312 #               join ( '/',
14313 #                 sort { $color_value{$::a} <=> $color_value{$::b}; }
14314 #                 keys %colors );
14315 #
14316 #         which will look like this with the container broken:
14317 #
14318 #             $color = join (
14319 #                 '/',
14320 #                 sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
14321 #             );
14322 #
14323 #         Here is an example of this rule for a long last term:
14324 #
14325 #             log_message( 0, 256, 128,
14326 #                 "Number of routes in adj-RIB-in to be considered: $peercount" );
14327 #
14328 #         And here is an example with a long first term:
14329 #
14330 #         $s = sprintf(
14331 # "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
14332 #             $r, $pu, $ps, $cu, $cs, $tt
14333 #           )
14334 #           if $style eq 'all';
14335
14336             my $i_last_comma = $$rcomma_index[ $comma_count - 1 ];
14337             my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0;
14338             my $long_first_term =
14339               excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0;
14340
14341             # break at every comma ...
14342             if (
14343
14344                 # if requested by user or is best looking
14345                 $number_of_fields_best == 1
14346
14347                 # or if this is a sublist of a larger list
14348                 || $in_hierarchical_list
14349
14350                 # or if multiple commas and we dont have a long first or last
14351                 # term
14352                 || ( $comma_count > 1
14353                     && !( $long_last_term || $long_first_term ) )
14354               )
14355             {
14356                 foreach ( 0 .. $comma_count - 1 ) {
14357                     set_forced_breakpoint( $$rcomma_index[$_] );
14358                 }
14359             }
14360             elsif ($long_last_term) {
14361
14362                 set_forced_breakpoint($i_last_comma);
14363                 $$rdo_not_break_apart = 1 unless $must_break_open;
14364             }
14365             elsif ($long_first_term) {
14366
14367                 set_forced_breakpoint($i_first_comma);
14368             }
14369             else {
14370
14371                 # let breaks be defined by default bond strength logic
14372             }
14373             return;
14374         }
14375
14376         # --------------------------------------------------------
14377         # We have a tentative field count that seems to work.
14378         # How many lines will this require?
14379         # --------------------------------------------------------
14380         my $formatted_lines = $item_count / ($number_of_fields);
14381         if ( $formatted_lines != int $formatted_lines ) {
14382             $formatted_lines = 1 + int $formatted_lines;
14383         }
14384
14385         # So far we've been trying to fill out to the right margin.  But
14386         # compact tables are easier to read, so let's see if we can use fewer
14387         # fields without increasing the number of lines.
14388         $number_of_fields =
14389           compactify_table( $item_count, $number_of_fields, $formatted_lines,
14390             $odd_or_even );
14391
14392         # How many spaces across the page will we fill?
14393         my $columns_per_line =
14394           ( int $number_of_fields / 2 ) * $pair_width +
14395           ( $number_of_fields % 2 ) * $max_width;
14396
14397         my $formatted_columns;
14398
14399         if ( $number_of_fields > 1 ) {
14400             $formatted_columns =
14401               ( $pair_width * ( int( $item_count / 2 ) ) + ( $item_count % 2 ) *
14402                   $max_width );
14403         }
14404         else {
14405             $formatted_columns = $max_width * $item_count;
14406         }
14407         if ( $formatted_columns < $packed_columns ) {
14408             $formatted_columns = $packed_columns;
14409         }
14410
14411         my $unused_columns = $formatted_columns - $packed_columns;
14412
14413         # set some empirical parameters to help decide if we should try to
14414         # align; high sparsity does not look good, especially with few lines
14415         my $sparsity = ($unused_columns) / ($formatted_columns);
14416         my $max_allowed_sparsity =
14417             ( $item_count < 3 )    ? 0.1
14418           : ( $packed_lines == 1 ) ? 0.15
14419           : ( $packed_lines == 2 ) ? 0.4
14420           :                          0.7;
14421
14422         # Begin check for shortcut methods, which avoid treating a list
14423         # as a table for relatively small parenthesized lists.  These
14424         # are usually easier to read if not formatted as tables.
14425         if (
14426             $packed_lines <= 2    # probably can fit in 2 lines
14427             && $item_count < 9    # doesn't have too many items
14428             && $opening_environment eq 'BLOCK'    # not a sub-container
14429             && $opening_token       eq '('        # is paren list
14430           )
14431         {
14432
14433             # Shortcut method 1: for -lp and just one comma:
14434             # This is a no-brainer, just break at the comma.
14435             if (
14436                 $rOpts_line_up_parentheses        # -lp
14437                 && $item_count == 2               # two items, one comma
14438                 && !$must_break_open
14439               )
14440             {
14441                 my $i_break = $$rcomma_index[0];
14442                 set_forced_breakpoint($i_break);
14443                 $$rdo_not_break_apart = 1;
14444                 set_non_alignment_flags( $comma_count, $rcomma_index );
14445                 return;
14446
14447             }
14448
14449             # method 2 is for most small ragged lists which might look
14450             # best if not displayed as a table.
14451             if (
14452                 ( $number_of_fields == 2 && $item_count == 3 )
14453                 || (
14454                     $new_identifier_count > 0    # isn't all quotes
14455                     && $sparsity > 0.15
14456                 )    # would be fairly spaced gaps if aligned
14457               )
14458             {
14459
14460                 my $break_count =
14461                   set_ragged_breakpoints( \@i_term_comma,
14462                     $ri_ragged_break_list );
14463                 ++$break_count if ($use_separate_first_term);
14464
14465                 # NOTE: we should really use the true break count here,
14466                 # which can be greater if there are large terms and
14467                 # little space, but usually this will work well enough.
14468                 unless ($must_break_open) {
14469
14470                     if ( $break_count <= 1 ) {
14471                         $$rdo_not_break_apart = 1;
14472                     }
14473                     elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
14474                     {
14475                         $$rdo_not_break_apart = 1;
14476                     }
14477                 }
14478                 set_non_alignment_flags( $comma_count, $rcomma_index );
14479                 return;
14480             }
14481
14482         }    # end shortcut methods
14483
14484         # debug stuff
14485
14486         FORMATTER_DEBUG_FLAG_SPARSE && do {
14487             print
14488 "SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line  unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
14489
14490         };
14491
14492         #---------------------------------------------------------------
14493         # Compound List Rule 2:
14494         # If this list is too long for one line, and it is an item of a
14495         # larger list, then we must format it, regardless of sparsity
14496         # (ian.t).  One reason that we have to do this is to trigger
14497         # Compound List Rule 1, above, which causes breaks at all commas of
14498         # all outer lists.  In this way, the structure will be properly
14499         # displayed.
14500         #---------------------------------------------------------------
14501
14502         # Decide if this list is too long for one line unless broken
14503         my $total_columns = table_columns_available($i_opening_paren);
14504         my $too_long      = $packed_columns > $total_columns;
14505
14506         # For a paren list, include the length of the token just before the
14507         # '(' because this is likely a sub call, and we would have to
14508         # include the sub name on the same line as the list.  This is still
14509         # imprecise, but not too bad.  (steve.t)
14510         if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
14511
14512             $too_long =
14513               excess_line_length( $i_opening_minus,
14514                 $i_effective_last_comma + 1 ) > 0;
14515         }
14516
14517         # FIXME: For an item after a '=>', try to include the length of the
14518         # thing before the '=>'.  This is crude and should be improved by
14519         # actually looking back token by token.
14520         if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
14521             my $i_opening_minus = $i_opening_paren - 4;
14522             if ( $i_opening_minus >= 0 ) {
14523                 $too_long =
14524                   excess_line_length( $i_opening_minus,
14525                     $i_effective_last_comma + 1 ) > 0;
14526             }
14527         }
14528
14529         # Always break lists contained in '[' and '{' if too long for 1 line,
14530         # and always break lists which are too long and part of a more complex
14531         # structure.
14532         my $must_break_open_container = $must_break_open
14533           || ( $too_long
14534             && ( $in_hierarchical_list || $opening_token ne '(' ) );
14535
14536 #print "LISTX: next=$next_nonblank_type  avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long  opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines  packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
14537
14538         #---------------------------------------------------------------
14539         # The main decision:
14540         # Now decide if we will align the data into aligned columns.  Do not
14541         # attempt to align columns if this is a tiny table or it would be
14542         # too spaced.  It seems that the more packed lines we have, the
14543         # sparser the list that can be allowed and still look ok.
14544         #---------------------------------------------------------------
14545
14546         if (   ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
14547             || ( $formatted_lines < 2 )
14548             || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
14549           )
14550         {
14551
14552             #---------------------------------------------------------------
14553             # too sparse: would look ugly if aligned in a table;
14554             #---------------------------------------------------------------
14555
14556             # use old breakpoints if this is a 'big' list
14557             # FIXME: goal is to improve set_ragged_breakpoints so that
14558             # this is not necessary.
14559             if ( $packed_lines > 2 && $item_count > 10 ) {
14560                 write_logfile_entry("List sparse: using old breakpoints\n");
14561                 copy_old_breakpoints( $i_first_comma, $i_last_comma );
14562             }
14563
14564             # let the continuation logic handle it if 2 lines
14565             else {
14566
14567                 my $break_count =
14568                   set_ragged_breakpoints( \@i_term_comma,
14569                     $ri_ragged_break_list );
14570                 ++$break_count if ($use_separate_first_term);
14571
14572                 unless ($must_break_open_container) {
14573                     if ( $break_count <= 1 ) {
14574                         $$rdo_not_break_apart = 1;
14575                     }
14576                     elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
14577                     {
14578                         $$rdo_not_break_apart = 1;
14579                     }
14580                 }
14581                 set_non_alignment_flags( $comma_count, $rcomma_index );
14582             }
14583             return;
14584         }
14585
14586         #---------------------------------------------------------------
14587         # go ahead and format as a table
14588         #---------------------------------------------------------------
14589         write_logfile_entry(
14590             "List: auto formatting with $number_of_fields fields/row\n");
14591
14592         my $j_first_break =
14593           $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
14594
14595         for (
14596             my $j = $j_first_break ;
14597             $j < $comma_count ;
14598             $j += $number_of_fields
14599           )
14600         {
14601             my $i = $$rcomma_index[$j];
14602             set_forced_breakpoint($i);
14603         }
14604         return;
14605     }
14606 }
14607
14608 sub set_non_alignment_flags {
14609
14610     # set flag which indicates that these commas should not be
14611     # aligned
14612     my ( $comma_count, $rcomma_index ) = @_;
14613     foreach ( 0 .. $comma_count - 1 ) {
14614         $matching_token_to_go[ $$rcomma_index[$_] ] = 1;
14615     }
14616 }
14617
14618 sub study_list_complexity {
14619
14620     # Look for complex tables which should be formatted with one term per line.
14621     # Returns the following:
14622     #
14623     #  \@i_ragged_break_list = list of good breakpoints to avoid lines
14624     #    which are hard to read
14625     #  $number_of_fields_best = suggested number of fields based on
14626     #    complexity; = 0 if any number may be used.
14627     #
14628     my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
14629     my $item_count            = @{$ri_term_begin};
14630     my $complex_item_count    = 0;
14631     my $number_of_fields_best = $rOpts_maximum_fields_per_table;
14632     my $i_max                 = @{$ritem_lengths} - 1;
14633     ##my @item_complexity;
14634
14635     my $i_last_last_break = -3;
14636     my $i_last_break      = -2;
14637     my @i_ragged_break_list;
14638
14639     my $definitely_complex = 30;
14640     my $definitely_simple  = 12;
14641     my $quote_count        = 0;
14642
14643     for my $i ( 0 .. $i_max ) {
14644         my $ib = $ri_term_begin->[$i];
14645         my $ie = $ri_term_end->[$i];
14646
14647         # define complexity: start with the actual term length
14648         my $weighted_length = ( $ritem_lengths->[$i] - 2 );
14649
14650         ##TBD: join types here and check for variations
14651         ##my $str=join "", @tokens_to_go[$ib..$ie];
14652
14653         my $is_quote = 0;
14654         if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
14655             $is_quote = 1;
14656             $quote_count++;
14657         }
14658         elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
14659             $quote_count++;
14660         }
14661
14662         if ( $ib eq $ie ) {
14663             if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
14664                 $complex_item_count++;
14665                 $weighted_length *= 2;
14666             }
14667             else {
14668             }
14669         }
14670         else {
14671             if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
14672                 $complex_item_count++;
14673                 $weighted_length *= 2;
14674             }
14675             if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
14676                 $weighted_length += 4;
14677             }
14678         }
14679
14680         # add weight for extra tokens.
14681         $weighted_length += 2 * ( $ie - $ib );
14682
14683 ##        my $BUB = join '', @tokens_to_go[$ib..$ie];
14684 ##        print "# COMPLEXITY:$weighted_length   $BUB\n";
14685
14686 ##push @item_complexity, $weighted_length;
14687
14688         # now mark a ragged break after this item it if it is 'long and
14689         # complex':
14690         if ( $weighted_length >= $definitely_complex ) {
14691
14692             # if we broke after the previous term
14693             # then break before it too
14694             if (   $i_last_break == $i - 1
14695                 && $i > 1
14696                 && $i_last_last_break != $i - 2 )
14697             {
14698
14699                 ## FIXME: don't strand a small term
14700                 pop @i_ragged_break_list;
14701                 push @i_ragged_break_list, $i - 2;
14702                 push @i_ragged_break_list, $i - 1;
14703             }
14704
14705             push @i_ragged_break_list, $i;
14706             $i_last_last_break = $i_last_break;
14707             $i_last_break      = $i;
14708         }
14709
14710         # don't break before a small last term -- it will
14711         # not look good on a line by itself.
14712         elsif ($i == $i_max
14713             && $i_last_break == $i - 1
14714             && $weighted_length <= $definitely_simple )
14715         {
14716             pop @i_ragged_break_list;
14717         }
14718     }
14719
14720     my $identifier_count = $i_max + 1 - $quote_count;
14721
14722     # Need more tuning here..
14723     if (   $max_width > 12
14724         && $complex_item_count > $item_count / 2
14725         && $number_of_fields_best != 2 )
14726     {
14727         $number_of_fields_best = 1;
14728     }
14729
14730     return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
14731 }
14732
14733 sub get_maximum_fields_wanted {
14734
14735     # Not all tables look good with more than one field of items.
14736     # This routine looks at a table and decides if it should be
14737     # formatted with just one field or not.
14738     # This coding is still under development.
14739     my ($ritem_lengths) = @_;
14740
14741     my $number_of_fields_best = 0;
14742
14743     # For just a few items, we tentatively assume just 1 field.
14744     my $item_count = @{$ritem_lengths};
14745     if ( $item_count <= 5 ) {
14746         $number_of_fields_best = 1;
14747     }
14748
14749     # For larger tables, look at it both ways and see what looks best
14750     else {
14751
14752         my $is_odd            = 1;
14753         my @max_length        = ( 0, 0 );
14754         my @last_length_2     = ( undef, undef );
14755         my @first_length_2    = ( undef, undef );
14756         my $last_length       = undef;
14757         my $total_variation_1 = 0;
14758         my $total_variation_2 = 0;
14759         my @total_variation_2 = ( 0, 0 );
14760         for ( my $j = 0 ; $j < $item_count ; $j++ ) {
14761
14762             $is_odd = 1 - $is_odd;
14763             my $length = $ritem_lengths->[$j];
14764             if ( $length > $max_length[$is_odd] ) {
14765                 $max_length[$is_odd] = $length;
14766             }
14767
14768             if ( defined($last_length) ) {
14769                 my $dl = abs( $length - $last_length );
14770                 $total_variation_1 += $dl;
14771             }
14772             $last_length = $length;
14773
14774             my $ll = $last_length_2[$is_odd];
14775             if ( defined($ll) ) {
14776                 my $dl = abs( $length - $ll );
14777                 $total_variation_2[$is_odd] += $dl;
14778             }
14779             else {
14780                 $first_length_2[$is_odd] = $length;
14781             }
14782             $last_length_2[$is_odd] = $length;
14783         }
14784         $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
14785
14786         my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
14787         unless ( $total_variation_2 < $factor * $total_variation_1 ) {
14788             $number_of_fields_best = 1;
14789         }
14790     }
14791     return ($number_of_fields_best);
14792 }
14793
14794 sub table_columns_available {
14795     my $i_first_comma = shift;
14796     my $columns =
14797       $rOpts_maximum_line_length - leading_spaces_to_go($i_first_comma);
14798
14799     # Patch: the vertical formatter does not line up lines whose lengths
14800     # exactly equal the available line length because of allowances
14801     # that must be made for side comments.  Therefore, the number of
14802     # available columns is reduced by 1 character.
14803     $columns -= 1;
14804     return $columns;
14805 }
14806
14807 sub maximum_number_of_fields {
14808
14809     # how many fields will fit in the available space?
14810     my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
14811     my $max_pairs        = int( $columns / $pair_width );
14812     my $number_of_fields = $max_pairs * 2;
14813     if (   $odd_or_even == 1
14814         && $max_pairs * $pair_width + $max_width <= $columns )
14815     {
14816         $number_of_fields++;
14817     }
14818     return $number_of_fields;
14819 }
14820
14821 sub compactify_table {
14822
14823     # given a table with a certain number of fields and a certain number
14824     # of lines, see if reducing the number of fields will make it look
14825     # better.
14826     my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
14827     if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
14828         my $min_fields;
14829
14830         for (
14831             $min_fields = $number_of_fields ;
14832             $min_fields >= $odd_or_even
14833             && $min_fields * $formatted_lines >= $item_count ;
14834             $min_fields -= $odd_or_even
14835           )
14836         {
14837             $number_of_fields = $min_fields;
14838         }
14839     }
14840     return $number_of_fields;
14841 }
14842
14843 sub set_ragged_breakpoints {
14844
14845     # Set breakpoints in a list that cannot be formatted nicely as a
14846     # table.
14847     my ( $ri_term_comma, $ri_ragged_break_list ) = @_;
14848
14849     my $break_count = 0;
14850     foreach (@$ri_ragged_break_list) {
14851         my $j = $ri_term_comma->[$_];
14852         if ($j) {
14853             set_forced_breakpoint($j);
14854             $break_count++;
14855         }
14856     }
14857     return $break_count;
14858 }
14859
14860 sub copy_old_breakpoints {
14861     my ( $i_first_comma, $i_last_comma ) = @_;
14862     for my $i ( $i_first_comma .. $i_last_comma ) {
14863         if ( $old_breakpoint_to_go[$i] ) {
14864             set_forced_breakpoint($i);
14865         }
14866     }
14867 }
14868
14869 sub set_nobreaks {
14870     my ( $i, $j ) = @_;
14871     if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
14872
14873         FORMATTER_DEBUG_FLAG_NOBREAK && do {
14874             my ( $a, $b, $c ) = caller();
14875             print(
14876 "NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n"
14877             );
14878         };
14879
14880         @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
14881     }
14882
14883     # shouldn't happen; non-critical error
14884     else {
14885         FORMATTER_DEBUG_FLAG_NOBREAK && do {
14886             my ( $a, $b, $c ) = caller();
14887             print(
14888 "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n"
14889             );
14890         };
14891     }
14892 }
14893
14894 sub set_fake_breakpoint {
14895
14896     # Just bump up the breakpoint count as a signal that there are breaks.
14897     # This is useful if we have breaks but may want to postpone deciding where
14898     # to make them.
14899     $forced_breakpoint_count++;
14900 }
14901
14902 sub set_forced_breakpoint {
14903     my $i = shift;
14904
14905     return unless defined $i && $i >= 0;
14906
14907     # when called with certain tokens, use bond strengths to decide
14908     # if we break before or after it
14909     my $token = $tokens_to_go[$i];
14910
14911     if ( $token =~ /^([\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) {
14912         if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
14913     }
14914
14915     # breaks are forced before 'if' and 'unless'
14916     elsif ( $is_if_unless{$token} ) { $i-- }
14917
14918     if ( $i >= 0 && $i <= $max_index_to_go ) {
14919         my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
14920
14921         FORMATTER_DEBUG_FLAG_FORCE && do {
14922             my ( $a, $b, $c ) = caller();
14923             print
14924 "FORCE forced_breakpoint $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
14925         };
14926
14927         if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) {
14928             $forced_breakpoint_to_go[$i_nonblank] = 1;
14929
14930             if ( $i_nonblank > $index_max_forced_break ) {
14931                 $index_max_forced_break = $i_nonblank;
14932             }
14933             $forced_breakpoint_count++;
14934             $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] =
14935               $i_nonblank;
14936
14937             # if we break at an opening container..break at the closing
14938             if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) {
14939                 set_closing_breakpoint($i_nonblank);
14940             }
14941         }
14942     }
14943 }
14944
14945 sub clear_breakpoint_undo_stack {
14946     $forced_breakpoint_undo_count = 0;
14947 }
14948
14949 sub undo_forced_breakpoint_stack {
14950
14951     my $i_start = shift;
14952     if ( $i_start < 0 ) {
14953         $i_start = 0;
14954         my ( $a, $b, $c ) = caller();
14955         warning(
14956 "Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start "
14957         );
14958     }
14959
14960     while ( $forced_breakpoint_undo_count > $i_start ) {
14961         my $i =
14962           $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
14963         if ( $i >= 0 && $i <= $max_index_to_go ) {
14964             $forced_breakpoint_to_go[$i] = 0;
14965             $forced_breakpoint_count--;
14966
14967             FORMATTER_DEBUG_FLAG_UNDOBP && do {
14968                 my ( $a, $b, $c ) = caller();
14969                 print(
14970 "UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n"
14971                 );
14972             };
14973         }
14974
14975         # shouldn't happen, but not a critical error
14976         else {
14977             FORMATTER_DEBUG_FLAG_UNDOBP && do {
14978                 my ( $a, $b, $c ) = caller();
14979                 print(
14980 "Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go"
14981                 );
14982             };
14983         }
14984     }
14985 }
14986
14987 sub recombine_breakpoints {
14988
14989     # sub set_continuation_breaks is very liberal in setting line breaks
14990     # for long lines, always setting breaks at good breakpoints, even
14991     # when that creates small lines.  Occasionally small line fragments
14992     # are produced which would look better if they were combined.
14993     # That's the task of this routine, recombine_breakpoints.
14994     my ( $ri_first, $ri_last ) = @_;
14995     my $more_to_do = 1;
14996
14997     # We keep looping over all of the lines of this batch
14998     # until there are no more possible recombinations
14999     my $nmax_last = @$ri_last;
15000     while ($more_to_do) {
15001         my $n_best = 0;
15002         my $bs_best;
15003         my $n;
15004         my $nmax = @$ri_last - 1;
15005
15006         # safety check for infinite loop
15007         unless ( $nmax < $nmax_last ) {
15008
15009             # shouldn't happen because splice below decreases nmax on each pass:
15010             # but i get paranoid sometimes
15011             die "Program bug-infinite loop in recombine breakpoints\n";
15012         }
15013         $nmax_last  = $nmax;
15014         $more_to_do = 0;
15015         my $previous_outdentable_closing_paren;
15016         my $leading_amp_count = 0;
15017         my $this_line_is_semicolon_terminated;
15018
15019         # loop over all remaining lines in this batch
15020         for $n ( 1 .. $nmax ) {
15021
15022             #----------------------------------------------------------
15023             # If we join the current pair of lines,
15024             # line $n-1 will become the left part of the joined line
15025             # line $n will become the right part of the joined line
15026             #
15027             # Here are Indexes of the endpoint tokens of the two lines:
15028             #
15029             #  ---left---- | ---right---
15030             #  $if   $imid | $imidr   $il
15031             #
15032             # We want to decide if we should join tokens $imid to $imidr
15033             #
15034             # We will apply a number of ad-hoc tests to see if joining
15035             # here will look ok.  The code will just issue a 'next'
15036             # command if the join doesn't look good.  If we get through
15037             # the gauntlet of tests, the lines will be recombined.
15038             #----------------------------------------------------------
15039             my $if    = $$ri_first[ $n - 1 ];
15040             my $il    = $$ri_last[$n];
15041             my $imid  = $$ri_last[ $n - 1 ];
15042             my $imidr = $$ri_first[$n];
15043
15044             #my $depth_increase=( $nesting_depth_to_go[$imidr] -
15045             #        $nesting_depth_to_go[$if] );
15046
15047 ##print "RECOMBINE: n=$n imid=$imid if=$if type=$types_to_go[$if] =$tokens_to_go[$if] next_type=$types_to_go[$imidr] next_tok=$tokens_to_go[$imidr]\n";
15048
15049             # If line $n is the last line, we set some flags and
15050             # do any special checks for it
15051             if ( $n == $nmax ) {
15052
15053                 # a terminal '{' should stay where it is
15054                 next if $types_to_go[$imidr] eq '{';
15055
15056                 # set flag if statement $n ends in ';'
15057                 $this_line_is_semicolon_terminated = $types_to_go[$il] eq ';'
15058
15059                   # with possible side comment
15060                   || ( $types_to_go[$il] eq '#'
15061                     && $il - $imidr >= 2
15062                     && $types_to_go[ $il - 2 ] eq ';'
15063                     && $types_to_go[ $il - 1 ] eq 'b' );
15064             }
15065
15066             #----------------------------------------------------------
15067             # Section 1: examine token at $imid (right end of first line
15068             # of pair)
15069             #----------------------------------------------------------
15070
15071             # an isolated '}' may join with a ';' terminated segment
15072             if ( $types_to_go[$imid] eq '}' ) {
15073
15074                 # Check for cases where combining a semicolon terminated
15075                 # statement with a previous isolated closing paren will
15076                 # allow the combined line to be outdented.  This is
15077                 # generally a good move.  For example, we can join up
15078                 # the last two lines here:
15079                 #  (
15080                 #      $dev,  $ino,   $mode,  $nlink, $uid,     $gid, $rdev,
15081                 #      $size, $atime, $mtime, $ctime, $blksize, $blocks
15082                 #    )
15083                 #    = stat($file);
15084                 #
15085                 # to get:
15086                 #  (
15087                 #      $dev,  $ino,   $mode,  $nlink, $uid,     $gid, $rdev,
15088                 #      $size, $atime, $mtime, $ctime, $blksize, $blocks
15089                 #  ) = stat($file);
15090                 #
15091                 # which makes the parens line up.
15092                 #
15093                 # Another example, from Joe Matarazzo, probably looks best
15094                 # with the 'or' clause appended to the trailing paren:
15095                 #  $self->some_method(
15096                 #      PARAM1 => 'foo',
15097                 #      PARAM2 => 'bar'
15098                 #  ) or die "Some_method didn't work";
15099                 #
15100                 $previous_outdentable_closing_paren =
15101                   $this_line_is_semicolon_terminated    # ends in ';'
15102                   && $if == $imid    # only one token on last line
15103                   && $tokens_to_go[$imid] eq ')'    # must be structural paren
15104
15105                   # only &&, ||, and : if no others seen
15106                   # (but note: our count made below could be wrong
15107                   # due to intervening comments)
15108                   && ( $leading_amp_count == 0
15109                     || $types_to_go[$imidr] !~ /^(:|\&\&|\|\|)$/ )
15110
15111                   # but leading colons probably line up with with a
15112                   # previous colon or question (count could be wrong).
15113                   && $types_to_go[$imidr] ne ':'
15114
15115                   # only one step in depth allowed.  this line must not
15116                   # begin with a ')' itself.
15117                   && ( $nesting_depth_to_go[$imid] ==
15118                     $nesting_depth_to_go[$il] + 1 );
15119
15120                 next
15121                   unless (
15122                     $previous_outdentable_closing_paren
15123
15124                     # handle '.' and '?' specially below
15125                     || ( $types_to_go[$imidr] =~ /^[\.\?]$/ )
15126                   );
15127             }
15128
15129             # do not recombine lines with ending &&, ||, or :
15130             elsif ( $types_to_go[$imid] =~ /^(|:|\&\&|\|\|)$/ ) {
15131                 next unless $want_break_before{ $types_to_go[$imid] };
15132             }
15133
15134             # for lines ending in a comma...
15135             elsif ( $types_to_go[$imid] eq ',' ) {
15136
15137                 # an isolated '},' may join with an identifier + ';'
15138                 # this is useful for the class of a 'bless' statement (bless.t)
15139                 if (   $types_to_go[$if] eq '}'
15140                     && $types_to_go[$imidr] eq 'i' )
15141                 {
15142                     next
15143                       unless ( ( $if == ( $imid - 1 ) )
15144                         && ( $il == ( $imidr + 1 ) )
15145                         && $this_line_is_semicolon_terminated );
15146
15147                     # override breakpoint
15148                     $forced_breakpoint_to_go[$imid] = 0;
15149                 }
15150
15151                 # but otherwise, do not recombine unless this will leave
15152                 # just 1 more line
15153                 else {
15154                     next unless ( $n + 1 >= $nmax );
15155                 }
15156             }
15157
15158             # opening paren..
15159             elsif ( $types_to_go[$imid] eq '(' ) {
15160
15161                 # No longer doing this
15162             }
15163
15164             elsif ( $types_to_go[$imid] eq ')' ) {
15165
15166                 # No longer doing this
15167             }
15168
15169             # keep a terminal colon
15170             elsif ( $types_to_go[$imid] eq ':' ) {
15171                 next;
15172             }
15173
15174             # keep a terminal for-semicolon
15175             elsif ( $types_to_go[$imid] eq 'f' ) {
15176                 next;
15177             }
15178
15179             # if '=' at end of line ...
15180             elsif ( $is_assignment{ $types_to_go[$imid] } ) {
15181
15182                 # otherwise always ok to join isolated '='
15183                 unless ( $if == $imid ) {
15184
15185                     my $is_math = (
15186                         ( $types_to_go[$il] =~ /^[+-\/\*\)]$/ )
15187
15188                         # note no '$' in pattern because -> can
15189                         # start long identifier
15190                           && !grep { $_ =~ /^(->|=>|[\,])/ }
15191                           @types_to_go[ $imidr .. $il ]
15192                     );
15193
15194                     # retain the break after the '=' unless ...
15195                     next
15196                       unless (
15197
15198                         # '=' is followed by a number and looks like math
15199                         ( $types_to_go[$imidr] eq 'n' && $is_math )
15200
15201                         # or followed by a scalar and looks like math
15202                         || (   ( $types_to_go[$imidr] eq 'i' )
15203                             && ( $tokens_to_go[$imidr] =~ /^\$/ )
15204                             && $is_math )
15205
15206                         # or followed by a single "short" token
15207                         # ('12' is arbitrary)
15208                         || ( $il == $imidr
15209                             && token_sequence_length( $imidr, $imidr ) < 12 )
15210
15211                       );
15212                 }
15213                 unless ( $tokens_to_go[$imidr] =~ /^[\{\(\[]$/ ) {
15214                     $forced_breakpoint_to_go[$imid] = 0;
15215                 }
15216             }
15217
15218             # for keywords..
15219             elsif ( $types_to_go[$imid] eq 'k' ) {
15220
15221                 # make major control keywords stand out
15222                 # (recombine.t)
15223                 next
15224                   if (
15225
15226                     #/^(last|next|redo|return)$/
15227                     $is_last_next_redo_return{ $tokens_to_go[$imid] }
15228                   );
15229
15230                 if ( $is_and_or{ $tokens_to_go[$imid] } ) {
15231                     next unless $want_break_before{ $tokens_to_go[$imid] };
15232                 }
15233             }
15234
15235             #----------------------------------------------------------
15236             # Section 2: Now examine token at $imidr (left end of second
15237             # line of pair)
15238             #----------------------------------------------------------
15239
15240             # join lines identified above as capable of
15241             # causing an outdented line with leading closing paren
15242             if ($previous_outdentable_closing_paren) {
15243                 $forced_breakpoint_to_go[$imid] = 0;
15244             }
15245
15246             # do not recombine lines with leading &&, ||, or :
15247             elsif ( $types_to_go[$imidr] =~ /^(:|\&\&|\|\|)$/ ) {
15248                 $leading_amp_count++;
15249                 next if $want_break_before{ $types_to_go[$imidr] };
15250             }
15251
15252             # Identify and recombine a broken ?/: chain
15253             elsif ( $types_to_go[$imidr] eq '?' ) {
15254
15255                 # indexes of line first tokens --
15256                 #  mm  - line before previous line
15257                 #  f   - previous line
15258                 #     <-- this line
15259                 #  ff  - next line
15260                 #  fff - line after next
15261                 my $iff  = $n < $nmax      ? $$ri_first[ $n + 1 ] : -1;
15262                 my $ifff = $n + 2 <= $nmax ? $$ri_first[ $n + 2 ] : -1;
15263                 my $imm  = $n > 1          ? $$ri_first[ $n - 2 ] : -1;
15264                 my $seqno = $type_sequence_to_go[$imidr];
15265                 my $f_ok =
15266                   (      $types_to_go[$if] eq ':'
15267                       && $type_sequence_to_go[$if] ==
15268                       $seqno - TYPE_SEQUENCE_INCREMENT );
15269                 my $mm_ok =
15270                   (      $imm >= 0
15271                       && $types_to_go[$imm] eq ':'
15272                       && $type_sequence_to_go[$imm] ==
15273                       $seqno - 2 * TYPE_SEQUENCE_INCREMENT );
15274
15275                 my $ff_ok =
15276                   (      $iff > 0
15277                       && $types_to_go[$iff] eq ':'
15278                       && $type_sequence_to_go[$iff] == $seqno );
15279                 my $fff_ok =
15280                   (      $ifff > 0
15281                       && $types_to_go[$ifff] eq ':'
15282                       && $type_sequence_to_go[$ifff] ==
15283                       $seqno + TYPE_SEQUENCE_INCREMENT );
15284
15285                 # we require that this '?' be part of a correct sequence
15286                 # of 3 in a row or else no recombination is done.
15287                 next
15288                   unless ( ( $ff_ok || $mm_ok ) && ( $f_ok || $fff_ok ) );
15289                 $forced_breakpoint_to_go[$imid] = 0;
15290             }
15291
15292             # do not recombine lines with leading '.'
15293             elsif ( $types_to_go[$imidr] =~ /^(\.)$/ ) {
15294                 my $i_next_nonblank = $imidr + 1;
15295                 if ( $types_to_go[$i_next_nonblank] eq 'b' ) {
15296                     $i_next_nonblank++;
15297                 }
15298
15299                 next
15300                   unless (
15301
15302                    # ... unless there is just one and we can reduce
15303                    # this to two lines if we do.  For example, this
15304                    #
15305                    #
15306                    #  $bodyA .=
15307                    #    '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
15308                    #
15309                    #  looks better than this:
15310                    #  $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
15311                    #    . '$args .= $pat;'
15312
15313                     (
15314                            $n == 2
15315                         && $n == $nmax
15316                         && $types_to_go[$if] ne $types_to_go[$imidr]
15317                     )
15318
15319                     #      ... or this would strand a short quote , like this
15320                     #                . "some long qoute"
15321                     #                . "\n";
15322
15323                     || (   $types_to_go[$i_next_nonblank] eq 'Q'
15324                         && $i_next_nonblank >= $il - 1
15325                         && length( $tokens_to_go[$i_next_nonblank] ) <
15326                         $rOpts_short_concatenation_item_length )
15327                   );
15328             }
15329
15330             # handle leading keyword..
15331             elsif ( $types_to_go[$imidr] eq 'k' ) {
15332
15333                 # handle leading "and" and "or"
15334                 if ( $is_and_or{ $tokens_to_go[$imidr] } ) {
15335
15336                     # Decide if we will combine a single terminal 'and' and
15337                     # 'or' after an 'if' or 'unless'.  We should consider the
15338                     # possible vertical alignment, and visual clutter.
15339
15340                     #     This looks best with the 'and' on the same
15341                     #     line as the 'if':
15342                     #
15343                     #         $a = 1
15344                     #           if $seconds and $nu < 2;
15345                     #
15346                     #     But this looks better as shown:
15347                     #
15348                     #         $a = 1
15349                     #           if !$this->{Parents}{$_}
15350                     #           or $this->{Parents}{$_} eq $_;
15351                     #
15352                     #     Eventually, it would be nice to look for
15353                     #     similarities (such as 'this' or 'Parents'), but
15354                     #     for now I'm using a simple rule that says that
15355                     #     the resulting line length must not be more than
15356                     #     half the maximum line length (making it 80/2 =
15357                     #     40 characters by default).
15358                     next
15359                       unless (
15360                         $this_line_is_semicolon_terminated
15361                         && (
15362
15363                             # following 'if' or 'unless'
15364                             $types_to_go[$if] eq 'k'
15365                             && $is_if_unless{ $tokens_to_go[$if] }
15366
15367                         )
15368                       );
15369                 }
15370
15371                 # handle leading "if" and "unless"
15372                 elsif ( $is_if_unless{ $tokens_to_go[$imidr] } ) {
15373
15374                     # FIXME: This is still experimental..may not be too useful
15375                     next
15376                       unless (
15377                         $this_line_is_semicolon_terminated
15378
15379                         #  previous line begins with 'and' or 'or'
15380                         && $types_to_go[$if] eq 'k'
15381                         && $is_and_or{ $tokens_to_go[$if] }
15382
15383                       );
15384                 }
15385
15386                 # handle all other leading keywords
15387                 else {
15388
15389                     # keywords look best at start of lines,
15390                     # but combine things like "1 while"
15391                     unless ( $is_assignment{ $types_to_go[$imid] } ) {
15392                         next
15393                           if ( ( $types_to_go[$imid] ne 'k' )
15394                             && ( $tokens_to_go[$imidr] ne 'while' ) );
15395                     }
15396                 }
15397             }
15398
15399             # similar treatment of && and || as above for 'and' and 'or':
15400             # NOTE: This block of code is currently bypassed because
15401             # of a previous block but is retained for possible future use.
15402             elsif ( $types_to_go[$imidr] =~ /^(&&|\|\|)$/ ) {
15403
15404                 # maybe looking at something like:
15405                 # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
15406
15407                 next
15408                   unless (
15409                     $this_line_is_semicolon_terminated
15410
15411                     # previous line begins with an 'if' or 'unless' keyword
15412                     && $types_to_go[$if] eq 'k'
15413                     && $is_if_unless{ $tokens_to_go[$if] }
15414
15415                   );
15416             }
15417
15418             #----------------------------------------------------------
15419             # Section 3:
15420             # Combine the lines if we arrive here and it is possible
15421             #----------------------------------------------------------
15422
15423             # honor hard breakpoints
15424             next if ( $forced_breakpoint_to_go[$imid] > 0 );
15425
15426             my $bs = $bond_strength_to_go[$imid];
15427
15428             # combined line cannot be too long
15429             next
15430               if excess_line_length( $if, $il ) > 0;
15431
15432             # do not recombine if we would skip in indentation levels
15433             if ( $n < $nmax ) {
15434                 my $if_next = $$ri_first[ $n + 1 ];
15435                 next
15436                   if (
15437                        $levels_to_go[$if] < $levels_to_go[$imidr]
15438                     && $levels_to_go[$imidr] < $levels_to_go[$if_next]
15439
15440                     # but an isolated 'if (' is undesirable
15441                     && !(
15442                            $n == 1
15443                         && $imid - $if <= 2
15444                         && $types_to_go[$if]  eq 'k'
15445                         && $tokens_to_go[$if] eq 'if'
15446                         && $tokens_to_go[$imid] ne '('
15447                     )
15448                   );
15449             }
15450
15451             # honor no-break's
15452             next if ( $bs == NO_BREAK );
15453
15454             # remember the pair with the greatest bond strength
15455             if ( !$n_best ) {
15456                 $n_best  = $n;
15457                 $bs_best = $bs;
15458             }
15459             else {
15460
15461                 if ( $bs > $bs_best ) {
15462                     $n_best  = $n;
15463                     $bs_best = $bs;
15464                 }
15465
15466                 # we have 2 or more candidates, so need another pass
15467                 $more_to_do++;
15468             }
15469         }
15470
15471         # recombine the pair with the greatest bond strength
15472         if ($n_best) {
15473             splice @$ri_first, $n_best, 1;
15474             splice @$ri_last, $n_best - 1, 1;
15475         }
15476     }
15477     return ( $ri_first, $ri_last );
15478 }
15479
15480 sub set_continuation_breaks {
15481
15482     # Define an array of indexes for inserting newline characters to
15483     # keep the line lengths below the maximum desired length.  There is
15484     # an implied break after the last token, so it need not be included.
15485     # We'll break at points where the bond strength is lowest.
15486
15487     my $saw_good_break = shift;
15488     my @i_first        = ();      # the first index to output
15489     my @i_last         = ();      # the last index to output
15490     my @i_colon_breaks = ();      # needed to decide if we have to break at ?'s
15491     if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
15492
15493     set_bond_strengths();
15494
15495     my $imin = 0;
15496     my $imax = $max_index_to_go;
15497     if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
15498     if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
15499     my $i_begin = $imin;
15500
15501     my $leading_spaces          = leading_spaces_to_go($imin);
15502     my $line_count              = 0;
15503     my $last_break_strength     = NO_BREAK;
15504     my $i_last_break            = -1;
15505     my $max_bias                = 0.001;
15506     my $tiny_bias               = 0.0001;
15507     my $leading_alignment_token = "";
15508     my $leading_alignment_type  = "";
15509
15510     # see if any ?/:'s are in order
15511     my $colons_in_order = 1;
15512     my $last_tok        = "";
15513     my @colon_list = grep /^[\?\:]$/, @tokens_to_go[ 0 .. $max_index_to_go ];
15514     foreach (@colon_list) {
15515         if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
15516         $last_tok = $_;
15517     }
15518
15519     # This is a sufficient but not necessary condition for colon chain
15520     my $is_colon_chain = ( $colons_in_order && @colon_list > 2 );
15521
15522     while ( $i_begin <= $imax ) {
15523         my $lowest_strength        = NO_BREAK;
15524         my $starting_sum           = $lengths_to_go[$i_begin];
15525         my $i_lowest               = -1;
15526         my $i_test                 = -1;
15527         my $lowest_next_token      = '';
15528         my $lowest_next_type       = 'b';
15529         my $i_lowest_next_nonblank = -1;
15530
15531         # loop to find next break point
15532         for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
15533             my $type       = $types_to_go[$i_test];
15534             my $token      = $tokens_to_go[$i_test];
15535             my $next_type  = $types_to_go[ $i_test + 1 ];
15536             my $next_token = $tokens_to_go[ $i_test + 1 ];
15537             my $i_next_nonblank =
15538               ( ( $next_type eq 'b' ) ? $i_test + 2 : $i_test + 1 );
15539             my $next_nonblank_type       = $types_to_go[$i_next_nonblank];
15540             my $next_nonblank_token      = $tokens_to_go[$i_next_nonblank];
15541             my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
15542             my $strength                 = $bond_strength_to_go[$i_test];
15543             my $must_break               = 0;
15544
15545             # FIXME: TESTING: Might want to be able to break after these
15546             # force an immediate break at certain operators
15547             # with lower level than the start of the line
15548             if (
15549                 (
15550                     $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
15551                     || (   $next_nonblank_type eq 'k'
15552                         && $next_nonblank_token =~ /^(and|or)$/ )
15553                 )
15554                 && ( $nesting_depth_to_go[$i_begin] >
15555                     $nesting_depth_to_go[$i_next_nonblank] )
15556               )
15557             {
15558                 set_forced_breakpoint($i_next_nonblank);
15559             }
15560
15561             if (
15562
15563                 # Try to put a break where requested by scan_list
15564                 $forced_breakpoint_to_go[$i_test]
15565
15566                 # break between ) { in a continued line so that the '{' can
15567                 # be outdented
15568                 # See similar logic in scan_list which catches instances
15569                 # where a line is just something like ') {'
15570                 || (   $line_count
15571                     && ( $token eq ')' )
15572                     && ( $next_nonblank_type eq '{' )
15573                     && ($next_nonblank_block_type)
15574                     && !$rOpts->{'opening-brace-always-on-right'} )
15575
15576                 # There is an implied forced break at a terminal opening brace
15577                 || ( ( $type eq '{' ) && ( $i_test == $imax ) )
15578               )
15579             {
15580
15581                 # Forced breakpoints must sometimes be overridden, for example
15582                 # because of a side comment causing a NO_BREAK.  It is easier
15583                 # to catch this here than when they are set.
15584                 if ( $strength < NO_BREAK ) {
15585                     $strength   = $lowest_strength - $tiny_bias;
15586                     $must_break = 1;
15587                 }
15588             }
15589
15590             # quit if a break here would put a good terminal token on
15591             # the next line and we already have a possible break
15592             if (
15593                    !$must_break
15594                 && ( $next_nonblank_type =~ /^[\;\,]$/ )
15595                 && (
15596                     (
15597                         $leading_spaces + $lengths_to_go[ $i_next_nonblank + 1 ]
15598                         - $starting_sum
15599                     ) > $rOpts_maximum_line_length
15600                 )
15601               )
15602             {
15603                 last if ( $i_lowest >= 0 );
15604             }
15605
15606             # Avoid a break which would strand a single punctuation
15607             # token.  For example, we do not want to strand a leading
15608             # '.' which is followed by a long quoted string.
15609             if (
15610                    !$must_break
15611                 && ( $i_test == $i_begin )
15612                 && ( $i_test < $imax )
15613                 && ( $token eq $type )
15614                 && (
15615                     (
15616                         $leading_spaces + $lengths_to_go[ $i_test + 1 ] -
15617                         $starting_sum
15618                     ) <= $rOpts_maximum_line_length
15619                 )
15620               )
15621             {
15622                 $i_test++;
15623
15624                 if ( ( $i_test < $imax ) && ( $next_type eq 'b' ) ) {
15625                     $i_test++;
15626                 }
15627                 redo;
15628             }
15629
15630             if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
15631             {
15632
15633                 # break at previous best break if it would have produced
15634                 # a leading alignment of certain common tokens, and it
15635                 # is different from the latest candidate break
15636                 last
15637                   if ($leading_alignment_type);
15638
15639                 # Force at least one breakpoint if old code had good
15640                 # break It is only called if a breakpoint is required or
15641                 # desired.  This will probably need some adjustments
15642                 # over time.  A goal is to try to be sure that, if a new
15643                 # side comment is introduced into formated text, then
15644                 # the same breakpoints will occur.  scbreak.t
15645                 last
15646                   if (
15647                     $i_test == $imax                # we are at the end
15648                     && !$forced_breakpoint_count    #
15649                     && $saw_good_break              # old line had good break
15650                     && $type =~ /^[#;\{]$/          # and this line ends in
15651                                                     # ';' or side comment
15652                     && $i_last_break < 0        # and we haven't made a break
15653                     && $i_lowest > 0            # and we saw a possible break
15654                     && $i_lowest < $imax - 1    # (but not just before this ;)
15655                     && $strength - $lowest_strength < 0.5 * WEAK # and it's good
15656                   );
15657
15658                 $lowest_strength        = $strength;
15659                 $i_lowest               = $i_test;
15660                 $lowest_next_token      = $next_nonblank_token;
15661                 $lowest_next_type       = $next_nonblank_type;
15662                 $i_lowest_next_nonblank = $i_next_nonblank;
15663                 last if $must_break;
15664
15665                 # set flags to remember if a break here will produce a
15666                 # leading alignment of certain common tokens
15667                 if (   $line_count > 0
15668                     && $i_test < $imax
15669                     && ( $lowest_strength - $last_break_strength <= $max_bias )
15670                   )
15671                 {
15672                     my $i_last_end = $i_begin - 1;
15673                     if ( $types_to_go[$i_last_end] eq 'b' ) { $i_last_end -= 1 }
15674                     my $tok_beg  = $tokens_to_go[$i_begin];
15675                     my $type_beg = $types_to_go[$i_begin];
15676                     if (
15677
15678                         # check for leading alignment of certain tokens
15679                         (
15680                                $tok_beg eq $next_nonblank_token
15681                             && $is_chain_operator{$tok_beg}
15682                             && (   $type_beg eq 'k'
15683                                 || $type_beg eq $tok_beg )
15684                             && $nesting_depth_to_go[$i_begin] >=
15685                             $nesting_depth_to_go[$i_next_nonblank]
15686                         )
15687
15688                         || (   $tokens_to_go[$i_last_end] eq $token
15689                             && $is_chain_operator{$token}
15690                             && ( $type eq 'k' || $type eq $token )
15691                             && $nesting_depth_to_go[$i_last_end] >=
15692                             $nesting_depth_to_go[$i_test] )
15693                       )
15694                     {
15695                         $leading_alignment_token = $next_nonblank_token;
15696                         $leading_alignment_type  = $next_nonblank_type;
15697                     }
15698                 }
15699             }
15700
15701             my $too_long =
15702               ( $i_test >= $imax )
15703               ? 1
15704               : (
15705                 (
15706                     $leading_spaces + $lengths_to_go[ $i_test + 2 ] -
15707                       $starting_sum
15708                 ) > $rOpts_maximum_line_length
15709               );
15710
15711             FORMATTER_DEBUG_FLAG_BREAK
15712               && print
15713 "BREAK: testing i = $i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type leading sp=($leading_spaces) next length = $lengths_to_go[$i_test+2] too_long=$too_long str=$strength\n";
15714
15715             # allow one extra terminal token after exceeding line length
15716             # if it would strand this token.
15717             if (   $rOpts_fuzzy_line_length
15718                 && $too_long
15719                 && ( $i_lowest == $i_test )
15720                 && ( length($token) > 1 )
15721                 && ( $next_nonblank_type =~ /^[\;\,]$/ ) )
15722             {
15723                 $too_long = 0;
15724             }
15725
15726             last
15727               if (
15728                 ( $i_test == $imax )    # we're done if no more tokens,
15729                 || (
15730                     ( $i_lowest >= 0 )    # or no more space and we have a break
15731                     && $too_long
15732                 )
15733               );
15734         }
15735
15736         # it's always ok to break at imax if no other break was found
15737         if ( $i_lowest < 0 ) { $i_lowest = $imax }
15738
15739         # semi-final index calculation
15740         my $i_next_nonblank = (
15741             ( $types_to_go[ $i_lowest + 1 ] eq 'b' )
15742             ? $i_lowest + 2
15743             : $i_lowest + 1
15744         );
15745         my $next_nonblank_type  = $types_to_go[$i_next_nonblank];
15746         my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
15747
15748         #-------------------------------------------------------
15749         # ?/: rule 1 : if a break here will separate a '?' on this
15750         # line from its closing ':', then break at the '?' instead.
15751         #-------------------------------------------------------
15752         my $i;
15753         foreach $i ( $i_begin + 1 .. $i_lowest - 1 ) {
15754             next unless ( $tokens_to_go[$i] eq '?' );
15755
15756             # do not break if probable sequence of ?/: statements
15757             next if ($is_colon_chain);
15758
15759             # do not break if statement is broken by side comment
15760             next
15761               if (
15762                 $tokens_to_go[$max_index_to_go] eq '#'
15763                 && terminal_type( \@types_to_go, \@block_type_to_go, 0,
15764                     $max_index_to_go ) !~ /^[\;\}]$/
15765               );
15766
15767             # no break needed if matching : is also on the line
15768             next
15769               if ( $mate_index_to_go[$i] >= 0
15770                 && $mate_index_to_go[$i] <= $i_next_nonblank );
15771
15772             $i_lowest = $i;
15773             if ( $want_break_before{'?'} ) { $i_lowest-- }
15774             last;
15775         }
15776
15777         # final index calculation
15778         $i_next_nonblank = (
15779             ( $types_to_go[ $i_lowest + 1 ] eq 'b' )
15780             ? $i_lowest + 2
15781             : $i_lowest + 1
15782         );
15783         $next_nonblank_type  = $types_to_go[$i_next_nonblank];
15784         $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
15785
15786         FORMATTER_DEBUG_FLAG_BREAK
15787           && print "BREAK: best is i = $i_lowest strength = $lowest_strength\n";
15788
15789         #-------------------------------------------------------
15790         # ?/: rule 2 : if we break at a '?', then break at its ':'
15791         #
15792         # Note: this rule is also in sub scan_list to handle a break
15793         # at the start and end of a line (in case breaks are dictated
15794         # by side comments).
15795         #-------------------------------------------------------
15796         if ( $next_nonblank_type eq '?' ) {
15797             set_closing_breakpoint($i_next_nonblank);
15798         }
15799         elsif ( $types_to_go[$i_lowest] eq '?' ) {
15800             set_closing_breakpoint($i_lowest);
15801         }
15802
15803         #-------------------------------------------------------
15804         # ?/: rule 3 : if we break at a ':' then we save
15805         # its location for further work below.  We may need to go
15806         # back and break at its '?'.
15807         #-------------------------------------------------------
15808         if ( $next_nonblank_type eq ':' ) {
15809             push @i_colon_breaks, $i_next_nonblank;
15810         }
15811         elsif ( $types_to_go[$i_lowest] eq ':' ) {
15812             push @i_colon_breaks, $i_lowest;
15813         }
15814
15815         # here we should set breaks for all '?'/':' pairs which are
15816         # separated by this line
15817
15818         $line_count++;
15819
15820         # save this line segment, after trimming blanks at the ends
15821         push( @i_first,
15822             ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
15823         push( @i_last,
15824             ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
15825
15826         # set a forced breakpoint at a container opening, if necessary, to
15827         # signal a break at a closing container.  Excepting '(' for now.
15828         if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/
15829             && !$forced_breakpoint_to_go[$i_lowest] )
15830         {
15831             set_closing_breakpoint($i_lowest);
15832         }
15833
15834         # get ready to go again
15835         $i_begin                 = $i_lowest + 1;
15836         $last_break_strength     = $lowest_strength;
15837         $i_last_break            = $i_lowest;
15838         $leading_alignment_token = "";
15839         $leading_alignment_type  = "";
15840         $lowest_next_token       = '';
15841         $lowest_next_type        = 'b';
15842
15843         if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
15844             $i_begin++;
15845         }
15846
15847         # update indentation size
15848         if ( $i_begin <= $imax ) {
15849             $leading_spaces = leading_spaces_to_go($i_begin);
15850         }
15851     }
15852
15853     #-------------------------------------------------------
15854     # ?/: rule 4 -- if we broke at a ':', then break at
15855     # corresponding '?' unless this is a chain of ?: expressions
15856     #-------------------------------------------------------
15857     if (@i_colon_breaks) {
15858
15859         # using a simple method for deciding if we are in a ?/: chain --
15860         # this is a chain if it has multiple ?/: pairs all in order;
15861         # otherwise not.
15862         # Note that if line starts in a ':' we count that above as a break
15863         my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
15864
15865         unless ($is_chain) {
15866             my @insert_list = ();
15867             foreach (@i_colon_breaks) {
15868                 my $i_question = $mate_index_to_go[$_];
15869                 if ( $i_question >= 0 ) {
15870                     if ( $want_break_before{'?'} ) {
15871                         $i_question--;
15872                         if (   $i_question > 0
15873                             && $types_to_go[$i_question] eq 'b' )
15874                         {
15875                             $i_question--;
15876                         }
15877                     }
15878
15879                     if ( $i_question >= 0 ) {
15880                         push @insert_list, $i_question;
15881                     }
15882                 }
15883                 insert_additional_breaks( \@insert_list, \@i_first, \@i_last );
15884             }
15885         }
15886     }
15887     return \@i_first, \@i_last;
15888 }
15889
15890 sub insert_additional_breaks {
15891
15892     # this routine will add line breaks at requested locations after
15893     # sub set_continuation_breaks has made preliminary breaks.
15894
15895     my ( $ri_break_list, $ri_first, $ri_last ) = @_;
15896     my $i_f;
15897     my $i_l;
15898     my $line_number = 0;
15899     my $i_break_left;
15900     foreach $i_break_left ( sort @$ri_break_list ) {
15901
15902         $i_f = $$ri_first[$line_number];
15903         $i_l = $$ri_last[$line_number];
15904         while ( $i_break_left >= $i_l ) {
15905             $line_number++;
15906
15907             # shouldn't happen unless caller passes bad indexes
15908             if ( $line_number >= @$ri_last ) {
15909                 warning(
15910 "Non-fatal program bug: couldn't set break at $i_break_left\n"
15911                 );
15912                 report_definite_bug();
15913                 return;
15914             }
15915             $i_f = $$ri_first[$line_number];
15916             $i_l = $$ri_last[$line_number];
15917         }
15918
15919         my $i_break_right = $i_break_left + 1;
15920         if ( $types_to_go[$i_break_right] eq 'b' ) { $i_break_right++ }
15921
15922         if (   $i_break_left >= $i_f
15923             && $i_break_left < $i_l
15924             && $i_break_right > $i_f
15925             && $i_break_right <= $i_l )
15926         {
15927             splice( @$ri_first, $line_number, 1, ( $i_f, $i_break_right ) );
15928             splice( @$ri_last, $line_number, 1, ( $i_break_left, $i_l ) );
15929         }
15930     }
15931 }
15932
15933 sub set_closing_breakpoint {
15934
15935     # set a breakpoint at a matching closing token
15936     # at present, this is only used to break at a ':' which matches a '?'
15937     my $i_break = shift;
15938
15939     if ( $mate_index_to_go[$i_break] >= 0 ) {
15940
15941         # CAUTION: infinite recursion possible here:
15942         #   set_closing_breakpoint calls set_forced_breakpoint, and
15943         #   set_forced_breakpoint call set_closing_breakpoint
15944         #   ( test files attrib.t, BasicLyx.pm.html).
15945         # Don't reduce the '2' in the statement below
15946         if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
15947
15948             # break before } ] and ), but sub set_forced_breakpoint will decide
15949             # to break before or after a ? and :
15950             my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
15951             set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc );
15952         }
15953     }
15954     else {
15955         my $type_sequence = $type_sequence_to_go[$i_break];
15956         if ($type_sequence) {
15957             my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
15958             $postponed_breakpoint{$type_sequence} = 1;
15959         }
15960     }
15961 }
15962
15963 # check to see if output line tabbing agrees with input line
15964 # this can be very useful for debugging a script which has an extra
15965 # or missing brace
15966 sub compare_indentation_levels {
15967
15968     my ( $python_indentation_level, $structural_indentation_level ) = @_;
15969     if ( ( $python_indentation_level ne $structural_indentation_level ) ) {
15970         $last_tabbing_disagreement = $input_line_number;
15971
15972         if ($in_tabbing_disagreement) {
15973         }
15974         else {
15975             $tabbing_disagreement_count++;
15976
15977             if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
15978                 write_logfile_entry(
15979 "Start indentation disagreement: input=$python_indentation_level; output=$structural_indentation_level\n"
15980                 );
15981             }
15982             $in_tabbing_disagreement    = $input_line_number;
15983             $first_tabbing_disagreement = $in_tabbing_disagreement
15984               unless ($first_tabbing_disagreement);
15985         }
15986     }
15987     else {
15988
15989         if ($in_tabbing_disagreement) {
15990
15991             if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
15992                 write_logfile_entry(
15993 "End indentation disagreement from input line $in_tabbing_disagreement\n"
15994                 );
15995
15996                 if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) {
15997                     write_logfile_entry(
15998                         "No further tabbing disagreements will be noted\n");
15999                 }
16000             }
16001             $in_tabbing_disagreement = 0;
16002         }
16003     }
16004 }
16005
16006 #####################################################################
16007 #
16008 # the Perl::Tidy::IndentationItem class supplies items which contain
16009 # how much whitespace should be used at the start of a line
16010 #
16011 #####################################################################
16012
16013 package Perl::Tidy::IndentationItem;
16014
16015 # Indexes for indentation items
16016 use constant SPACES             => 0;     # total leading white spaces
16017 use constant LEVEL              => 1;     # the indentation 'level'
16018 use constant CI_LEVEL           => 2;     # the 'continuation level'
16019 use constant AVAILABLE_SPACES   => 3;     # how many left spaces available
16020                                           # for this level
16021 use constant CLOSED             => 4;     # index where we saw closing '}'
16022 use constant COMMA_COUNT        => 5;     # how many commas at this level?
16023 use constant SEQUENCE_NUMBER    => 6;     # output batch number
16024 use constant INDEX              => 7;     # index in output batch list
16025 use constant HAVE_CHILD         => 8;     # any dependents?
16026 use constant RECOVERABLE_SPACES => 9;     # how many spaces to the right
16027                                           # we would like to move to get
16028                                           # alignment (negative if left)
16029 use constant ALIGN_PAREN        => 10;    # do we want to try to align
16030                                           # with an opening structure?
16031 use constant MARKED             => 11;    # if visited by corrector logic
16032 use constant STACK_DEPTH        => 12;    # indentation nesting depth
16033 use constant STARTING_INDEX     => 13;    # first token index of this level
16034 use constant ARROW_COUNT        => 14;    # how many =>'s
16035
16036 sub new {
16037
16038     # Create an 'indentation_item' which describes one level of leading
16039     # whitespace when the '-lp' indentation is used.  We return
16040     # a reference to an anonymous array of associated variables.
16041     # See above constants for storage scheme.
16042     my (
16043         $class,               $spaces,           $level,
16044         $ci_level,            $available_spaces, $index,
16045         $gnu_sequence_number, $align_paren,      $stack_depth,
16046         $starting_index,
16047     ) = @_;
16048     my $closed            = -1;
16049     my $arrow_count       = 0;
16050     my $comma_count       = 0;
16051     my $have_child        = 0;
16052     my $want_right_spaces = 0;
16053     my $marked            = 0;
16054     bless [
16055         $spaces,              $level,          $ci_level,
16056         $available_spaces,    $closed,         $comma_count,
16057         $gnu_sequence_number, $index,          $have_child,
16058         $want_right_spaces,   $align_paren,    $marked,
16059         $stack_depth,         $starting_index, $arrow_count,
16060     ], $class;
16061 }
16062
16063 sub permanently_decrease_AVAILABLE_SPACES {
16064
16065     # make a permanent reduction in the available indentation spaces
16066     # at one indentation item.  NOTE: if there are child nodes, their
16067     # total SPACES must be reduced by the caller.
16068
16069     my ( $item, $spaces_needed ) = @_;
16070     my $available_spaces = $item->get_AVAILABLE_SPACES();
16071     my $deleted_spaces =
16072       ( $available_spaces > $spaces_needed )
16073       ? $spaces_needed
16074       : $available_spaces;
16075     $item->decrease_AVAILABLE_SPACES($deleted_spaces);
16076     $item->decrease_SPACES($deleted_spaces);
16077     $item->set_RECOVERABLE_SPACES(0);
16078
16079     return $deleted_spaces;
16080 }
16081
16082 sub tentatively_decrease_AVAILABLE_SPACES {
16083
16084     # We are asked to tentatively delete $spaces_needed of indentation
16085     # for a indentation item.  We may want to undo this later.  NOTE: if
16086     # there are child nodes, their total SPACES must be reduced by the
16087     # caller.
16088     my ( $item, $spaces_needed ) = @_;
16089     my $available_spaces = $item->get_AVAILABLE_SPACES();
16090     my $deleted_spaces =
16091       ( $available_spaces > $spaces_needed )
16092       ? $spaces_needed
16093       : $available_spaces;
16094     $item->decrease_AVAILABLE_SPACES($deleted_spaces);
16095     $item->decrease_SPACES($deleted_spaces);
16096     $item->increase_RECOVERABLE_SPACES($deleted_spaces);
16097     return $deleted_spaces;
16098 }
16099
16100 sub get_STACK_DEPTH {
16101     my $self = shift;
16102     return $self->[STACK_DEPTH];
16103 }
16104
16105 sub get_SPACES {
16106     my $self = shift;
16107     return $self->[SPACES];
16108 }
16109
16110 sub get_MARKED {
16111     my $self = shift;
16112     return $self->[MARKED];
16113 }
16114
16115 sub set_MARKED {
16116     my ( $self, $value ) = @_;
16117     if ( defined($value) ) {
16118         $self->[MARKED] = $value;
16119     }
16120     return $self->[MARKED];
16121 }
16122
16123 sub get_AVAILABLE_SPACES {
16124     my $self = shift;
16125     return $self->[AVAILABLE_SPACES];
16126 }
16127
16128 sub decrease_SPACES {
16129     my ( $self, $value ) = @_;
16130     if ( defined($value) ) {
16131         $self->[SPACES] -= $value;
16132     }
16133     return $self->[SPACES];
16134 }
16135
16136 sub decrease_AVAILABLE_SPACES {
16137     my ( $self, $value ) = @_;
16138     if ( defined($value) ) {
16139         $self->[AVAILABLE_SPACES] -= $value;
16140     }
16141     return $self->[AVAILABLE_SPACES];
16142 }
16143
16144 sub get_ALIGN_PAREN {
16145     my $self = shift;
16146     return $self->[ALIGN_PAREN];
16147 }
16148
16149 sub get_RECOVERABLE_SPACES {
16150     my $self = shift;
16151     return $self->[RECOVERABLE_SPACES];
16152 }
16153
16154 sub set_RECOVERABLE_SPACES {
16155     my ( $self, $value ) = @_;
16156     if ( defined($value) ) {
16157         $self->[RECOVERABLE_SPACES] = $value;
16158     }
16159     return $self->[RECOVERABLE_SPACES];
16160 }
16161
16162 sub increase_RECOVERABLE_SPACES {
16163     my ( $self, $value ) = @_;
16164     if ( defined($value) ) {
16165         $self->[RECOVERABLE_SPACES] += $value;
16166     }
16167     return $self->[RECOVERABLE_SPACES];
16168 }
16169
16170 sub get_CI_LEVEL {
16171     my $self = shift;
16172     return $self->[CI_LEVEL];
16173 }
16174
16175 sub get_LEVEL {
16176     my $self = shift;
16177     return $self->[LEVEL];
16178 }
16179
16180 sub get_SEQUENCE_NUMBER {
16181     my $self = shift;
16182     return $self->[SEQUENCE_NUMBER];
16183 }
16184
16185 sub get_INDEX {
16186     my $self = shift;
16187     return $self->[INDEX];
16188 }
16189
16190 sub get_STARTING_INDEX {
16191     my $self = shift;
16192     return $self->[STARTING_INDEX];
16193 }
16194
16195 sub set_HAVE_CHILD {
16196     my ( $self, $value ) = @_;
16197     if ( defined($value) ) {
16198         $self->[HAVE_CHILD] = $value;
16199     }
16200     return $self->[HAVE_CHILD];
16201 }
16202
16203 sub get_HAVE_CHILD {
16204     my $self = shift;
16205     return $self->[HAVE_CHILD];
16206 }
16207
16208 sub set_ARROW_COUNT {
16209     my ( $self, $value ) = @_;
16210     if ( defined($value) ) {
16211         $self->[ARROW_COUNT] = $value;
16212     }
16213     return $self->[ARROW_COUNT];
16214 }
16215
16216 sub get_ARROW_COUNT {
16217     my $self = shift;
16218     return $self->[ARROW_COUNT];
16219 }
16220
16221 sub set_COMMA_COUNT {
16222     my ( $self, $value ) = @_;
16223     if ( defined($value) ) {
16224         $self->[COMMA_COUNT] = $value;
16225     }
16226     return $self->[COMMA_COUNT];
16227 }
16228
16229 sub get_COMMA_COUNT {
16230     my $self = shift;
16231     return $self->[COMMA_COUNT];
16232 }
16233
16234 sub set_CLOSED {
16235     my ( $self, $value ) = @_;
16236     if ( defined($value) ) {
16237         $self->[CLOSED] = $value;
16238     }
16239     return $self->[CLOSED];
16240 }
16241
16242 sub get_CLOSED {
16243     my $self = shift;
16244     return $self->[CLOSED];
16245 }
16246
16247 #####################################################################
16248 #
16249 # the Perl::Tidy::VerticalAligner::Line class supplies an object to
16250 # contain a single output line
16251 #
16252 #####################################################################
16253
16254 package Perl::Tidy::VerticalAligner::Line;
16255
16256 {
16257
16258     use strict;
16259     use Carp;
16260
16261     use constant JMAX                      => 0;
16262     use constant JMAX_ORIGINAL_LINE        => 1;
16263     use constant RTOKENS                   => 2;
16264     use constant RFIELDS                   => 3;
16265     use constant RPATTERNS                 => 4;
16266     use constant INDENTATION               => 5;
16267     use constant LEADING_SPACE_COUNT       => 6;
16268     use constant OUTDENT_LONG_LINES        => 7;
16269     use constant LIST_TYPE                 => 8;
16270     use constant IS_HANGING_SIDE_COMMENT   => 9;
16271     use constant RALIGNMENTS               => 10;
16272     use constant MAXIMUM_LINE_LENGTH       => 11;
16273     use constant RVERTICAL_TIGHTNESS_FLAGS => 12;
16274
16275     my %_index_map;
16276     $_index_map{jmax}                      = JMAX;
16277     $_index_map{jmax_original_line}        = JMAX_ORIGINAL_LINE;
16278     $_index_map{rtokens}                   = RTOKENS;
16279     $_index_map{rfields}                   = RFIELDS;
16280     $_index_map{rpatterns}                 = RPATTERNS;
16281     $_index_map{indentation}               = INDENTATION;
16282     $_index_map{leading_space_count}       = LEADING_SPACE_COUNT;
16283     $_index_map{outdent_long_lines}        = OUTDENT_LONG_LINES;
16284     $_index_map{list_type}                 = LIST_TYPE;
16285     $_index_map{is_hanging_side_comment}   = IS_HANGING_SIDE_COMMENT;
16286     $_index_map{ralignments}               = RALIGNMENTS;
16287     $_index_map{maximum_line_length}       = MAXIMUM_LINE_LENGTH;
16288     $_index_map{rvertical_tightness_flags} = RVERTICAL_TIGHTNESS_FLAGS;
16289
16290     my @_default_data = ();
16291     $_default_data[JMAX]                      = undef;
16292     $_default_data[JMAX_ORIGINAL_LINE]        = undef;
16293     $_default_data[RTOKENS]                   = undef;
16294     $_default_data[RFIELDS]                   = undef;
16295     $_default_data[RPATTERNS]                 = undef;
16296     $_default_data[INDENTATION]               = undef;
16297     $_default_data[LEADING_SPACE_COUNT]       = undef;
16298     $_default_data[OUTDENT_LONG_LINES]        = undef;
16299     $_default_data[LIST_TYPE]                 = undef;
16300     $_default_data[IS_HANGING_SIDE_COMMENT]   = undef;
16301     $_default_data[RALIGNMENTS]               = [];
16302     $_default_data[MAXIMUM_LINE_LENGTH]       = undef;
16303     $_default_data[RVERTICAL_TIGHTNESS_FLAGS] = undef;
16304
16305     {
16306
16307         # methods to count object population
16308         my $_count = 0;
16309         sub get_count        { $_count; }
16310         sub _increment_count { ++$_count }
16311         sub _decrement_count { --$_count }
16312     }
16313
16314     # Constructor may be called as a class method
16315     sub new {
16316         my ( $caller, %arg ) = @_;
16317         my $caller_is_obj = ref($caller);
16318         my $class = $caller_is_obj || $caller;
16319         no strict "refs";
16320         my $self = bless [], $class;
16321
16322         $self->[RALIGNMENTS] = [];
16323
16324         my $index;
16325         foreach ( keys %_index_map ) {
16326             $index = $_index_map{$_};
16327             if    ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
16328             elsif ($caller_is_obj)    { $self->[$index] = $caller->[$index] }
16329             else { $self->[$index] = $_default_data[$index] }
16330         }
16331
16332         $self->_increment_count();
16333         return $self;
16334     }
16335
16336     sub DESTROY {
16337         $_[0]->_decrement_count();
16338     }
16339
16340     sub get_jmax                      { $_[0]->[JMAX] }
16341     sub get_jmax_original_line        { $_[0]->[JMAX_ORIGINAL_LINE] }
16342     sub get_rtokens                   { $_[0]->[RTOKENS] }
16343     sub get_rfields                   { $_[0]->[RFIELDS] }
16344     sub get_rpatterns                 { $_[0]->[RPATTERNS] }
16345     sub get_indentation               { $_[0]->[INDENTATION] }
16346     sub get_leading_space_count       { $_[0]->[LEADING_SPACE_COUNT] }
16347     sub get_outdent_long_lines        { $_[0]->[OUTDENT_LONG_LINES] }
16348     sub get_list_type                 { $_[0]->[LIST_TYPE] }
16349     sub get_is_hanging_side_comment   { $_[0]->[IS_HANGING_SIDE_COMMENT] }
16350     sub get_rvertical_tightness_flags { $_[0]->[RVERTICAL_TIGHTNESS_FLAGS] }
16351
16352     sub set_column     { $_[0]->[RALIGNMENTS]->[ $_[1] ]->set_column( $_[2] ) }
16353     sub get_alignment  { $_[0]->[RALIGNMENTS]->[ $_[1] ] }
16354     sub get_alignments { @{ $_[0]->[RALIGNMENTS] } }
16355     sub get_column     { $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_column() }
16356
16357     sub get_starting_column {
16358         $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_starting_column();
16359     }
16360
16361     sub increment_column {
16362         $_[0]->[RALIGNMENTS]->[ $_[1] ]->increment_column( $_[2] );
16363     }
16364     sub set_alignments { my $self = shift; @{ $self->[RALIGNMENTS] } = @_; }
16365
16366     sub current_field_width {
16367         my $self = shift;
16368         my ($j) = @_;
16369         if ( $j == 0 ) {
16370             return $self->get_column($j);
16371         }
16372         else {
16373             return $self->get_column($j) - $self->get_column( $j - 1 );
16374         }
16375     }
16376
16377     sub field_width_growth {
16378         my $self = shift;
16379         my $j    = shift;
16380         return $self->get_column($j) - $self->get_starting_column($j);
16381     }
16382
16383     sub starting_field_width {
16384         my $self = shift;
16385         my $j    = shift;
16386         if ( $j == 0 ) {
16387             return $self->get_starting_column($j);
16388         }
16389         else {
16390             return $self->get_starting_column($j) -
16391               $self->get_starting_column( $j - 1 );
16392         }
16393     }
16394
16395     sub increase_field_width {
16396
16397         my $self = shift;
16398         my ( $j, $pad ) = @_;
16399         my $jmax = $self->get_jmax();
16400         for my $k ( $j .. $jmax ) {
16401             $self->increment_column( $k, $pad );
16402         }
16403     }
16404
16405     sub get_available_space_on_right {
16406         my $self = shift;
16407         my $jmax = $self->get_jmax();
16408         return $self->[MAXIMUM_LINE_LENGTH] - $self->get_column($jmax);
16409     }
16410
16411     sub set_jmax                    { $_[0]->[JMAX]                    = $_[1] }
16412     sub set_jmax_original_line      { $_[0]->[JMAX_ORIGINAL_LINE]      = $_[1] }
16413     sub set_rtokens                 { $_[0]->[RTOKENS]                 = $_[1] }
16414     sub set_rfields                 { $_[0]->[RFIELDS]                 = $_[1] }
16415     sub set_rpatterns               { $_[0]->[RPATTERNS]               = $_[1] }
16416     sub set_indentation             { $_[0]->[INDENTATION]             = $_[1] }
16417     sub set_leading_space_count     { $_[0]->[LEADING_SPACE_COUNT]     = $_[1] }
16418     sub set_outdent_long_lines      { $_[0]->[OUTDENT_LONG_LINES]      = $_[1] }
16419     sub set_list_type               { $_[0]->[LIST_TYPE]               = $_[1] }
16420     sub set_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] = $_[1] }
16421     sub set_alignment               { $_[0]->[RALIGNMENTS]->[ $_[1] ]  = $_[2] }
16422
16423 }
16424
16425 #####################################################################
16426 #
16427 # the Perl::Tidy::VerticalAligner::Alignment class holds information
16428 # on a single column being aligned
16429 #
16430 #####################################################################
16431 package Perl::Tidy::VerticalAligner::Alignment;
16432
16433 {
16434
16435     use strict;
16436
16437     #use Carp;
16438
16439     # Symbolic array indexes
16440     use constant COLUMN          => 0;    # the current column number
16441     use constant STARTING_COLUMN => 1;    # column number when created
16442     use constant MATCHING_TOKEN  => 2;    # what token we are matching
16443     use constant STARTING_LINE   => 3;    # the line index of creation
16444     use constant ENDING_LINE     => 4;    # the most recent line to use it
16445     use constant SAVED_COLUMN    => 5;    # the most recent line to use it
16446     use constant SERIAL_NUMBER   => 6;    # unique number for this alignment
16447                                           # (just its index in an array)
16448
16449     # Correspondence between variables and array indexes
16450     my %_index_map;
16451     $_index_map{column}          = COLUMN;
16452     $_index_map{starting_column} = STARTING_COLUMN;
16453     $_index_map{matching_token}  = MATCHING_TOKEN;
16454     $_index_map{starting_line}   = STARTING_LINE;
16455     $_index_map{ending_line}     = ENDING_LINE;
16456     $_index_map{saved_column}    = SAVED_COLUMN;
16457     $_index_map{serial_number}   = SERIAL_NUMBER;
16458
16459     my @_default_data = ();
16460     $_default_data[COLUMN]          = undef;
16461     $_default_data[STARTING_COLUMN] = undef;
16462     $_default_data[MATCHING_TOKEN]  = undef;
16463     $_default_data[STARTING_LINE]   = undef;
16464     $_default_data[ENDING_LINE]     = undef;
16465     $_default_data[SAVED_COLUMN]    = undef;
16466     $_default_data[SERIAL_NUMBER]   = undef;
16467
16468     # class population count
16469     {
16470         my $_count = 0;
16471         sub get_count        { $_count; }
16472         sub _increment_count { ++$_count }
16473         sub _decrement_count { --$_count }
16474     }
16475
16476     # constructor
16477     sub new {
16478         my ( $caller, %arg ) = @_;
16479         my $caller_is_obj = ref($caller);
16480         my $class = $caller_is_obj || $caller;
16481         no strict "refs";
16482         my $self = bless [], $class;
16483
16484         foreach ( keys %_index_map ) {
16485             my $index = $_index_map{$_};
16486             if    ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
16487             elsif ($caller_is_obj)    { $self->[$index] = $caller->[$index] }
16488             else { $self->[$index] = $_default_data[$index] }
16489         }
16490         $self->_increment_count();
16491         return $self;
16492     }
16493
16494     sub DESTROY {
16495         $_[0]->_decrement_count();
16496     }
16497
16498     sub get_column          { return $_[0]->[COLUMN] }
16499     sub get_starting_column { return $_[0]->[STARTING_COLUMN] }
16500     sub get_matching_token  { return $_[0]->[MATCHING_TOKEN] }
16501     sub get_starting_line   { return $_[0]->[STARTING_LINE] }
16502     sub get_ending_line     { return $_[0]->[ENDING_LINE] }
16503     sub get_serial_number   { return $_[0]->[SERIAL_NUMBER] }
16504
16505     sub set_column          { $_[0]->[COLUMN]          = $_[1] }
16506     sub set_starting_column { $_[0]->[STARTING_COLUMN] = $_[1] }
16507     sub set_matching_token  { $_[0]->[MATCHING_TOKEN]  = $_[1] }
16508     sub set_starting_line   { $_[0]->[STARTING_LINE]   = $_[1] }
16509     sub set_ending_line     { $_[0]->[ENDING_LINE]     = $_[1] }
16510     sub increment_column { $_[0]->[COLUMN] += $_[1] }
16511
16512     sub save_column    { $_[0]->[SAVED_COLUMN] = $_[0]->[COLUMN] }
16513     sub restore_column { $_[0]->[COLUMN]       = $_[0]->[SAVED_COLUMN] }
16514
16515 }
16516
16517 package Perl::Tidy::VerticalAligner;
16518
16519 # The Perl::Tidy::VerticalAligner package collects output lines and
16520 # attempts to line up certain common tokens, such as => and #, which are
16521 # identified by the calling routine.
16522 #
16523 # There are two main routines: append_line and flush.  Append acts as a
16524 # storage buffer, collecting lines into a group which can be vertically
16525 # aligned.  When alignment is no longer possible or desirable, it dumps
16526 # the group to flush.
16527 #
16528 #     append_line -----> flush
16529 #
16530 #     collects          writes
16531 #     vertical          one
16532 #     groups            group
16533
16534 BEGIN {
16535
16536     # Caution: these debug flags produce a lot of output
16537     # They should all be 0 except when debugging small scripts
16538
16539     use constant VALIGN_DEBUG_FLAG_APPEND  => 0;
16540     use constant VALIGN_DEBUG_FLAG_APPEND0 => 0;
16541     use constant VALIGN_DEBUG_FLAG_TERNARY => 0;
16542
16543     my $debug_warning = sub {
16544         print "VALIGN_DEBUGGING with key $_[0]\n";
16545     };
16546
16547     VALIGN_DEBUG_FLAG_APPEND  && $debug_warning->('APPEND');
16548     VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0');
16549
16550 }
16551
16552 use vars qw(
16553   $vertical_aligner_self
16554   $current_line
16555   $maximum_alignment_index
16556   $ralignment_list
16557   $maximum_jmax_seen
16558   $minimum_jmax_seen
16559   $previous_minimum_jmax_seen
16560   $previous_maximum_jmax_seen
16561   $maximum_line_index
16562   $group_level
16563   $group_type
16564   $group_maximum_gap
16565   $marginal_match
16566   $last_group_level_written
16567   $last_leading_space_count
16568   $extra_indent_ok
16569   $zero_count
16570   @group_lines
16571   $last_comment_column
16572   $last_side_comment_line_number
16573   $last_side_comment_length
16574   $last_side_comment_level
16575   $outdented_line_count
16576   $first_outdented_line_at
16577   $last_outdented_line_at
16578   $diagnostics_object
16579   $logger_object
16580   $file_writer_object
16581   @side_comment_history
16582   $comment_leading_space_count
16583
16584   $cached_line_text
16585   $cached_line_type
16586   $cached_line_flag
16587   $cached_seqno
16588   $cached_line_valid
16589   $cached_line_leading_space_count
16590   $cached_seqno_string
16591
16592   $seqno_string
16593   $last_nonblank_seqno_string
16594
16595   $rOpts
16596
16597   $rOpts_maximum_line_length
16598   $rOpts_continuation_indentation
16599   $rOpts_indent_columns
16600   $rOpts_tabs
16601   $rOpts_entab_leading_whitespace
16602   $rOpts_valign
16603
16604   $rOpts_minimum_space_to_comment
16605
16606 );
16607
16608 sub initialize {
16609
16610     my $class;
16611
16612     ( $class, $rOpts, $file_writer_object, $logger_object, $diagnostics_object )
16613       = @_;
16614
16615     # variables describing the entire space group:
16616
16617     $ralignment_list            = [];
16618     $group_level                = 0;
16619     $last_group_level_written   = -1;
16620     $extra_indent_ok            = 0;    # can we move all lines to the right?
16621     $last_side_comment_length   = 0;
16622     $maximum_jmax_seen          = 0;
16623     $minimum_jmax_seen          = 0;
16624     $previous_minimum_jmax_seen = 0;
16625     $previous_maximum_jmax_seen = 0;
16626
16627     # variables describing each line of the group
16628     @group_lines = ();                  # list of all lines in group
16629
16630     $outdented_line_count          = 0;
16631     $first_outdented_line_at       = 0;
16632     $last_outdented_line_at        = 0;
16633     $last_side_comment_line_number = 0;
16634     $last_side_comment_level       = -1;
16635
16636     # most recent 3 side comments; [ line number, column ]
16637     $side_comment_history[0] = [ -300, 0 ];
16638     $side_comment_history[1] = [ -200, 0 ];
16639     $side_comment_history[2] = [ -100, 0 ];
16640
16641     # write_leader_and_string cache:
16642     $cached_line_text                = "";
16643     $cached_line_type                = 0;
16644     $cached_line_flag                = 0;
16645     $cached_seqno                    = 0;
16646     $cached_line_valid               = 0;
16647     $cached_line_leading_space_count = 0;
16648     $cached_seqno_string             = "";
16649
16650     # string of sequence numbers joined together
16651     $seqno_string               = "";
16652     $last_nonblank_seqno_string = "";
16653
16654     # frequently used parameters
16655     $rOpts_indent_columns           = $rOpts->{'indent-columns'};
16656     $rOpts_tabs                     = $rOpts->{'tabs'};
16657     $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'};
16658     $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'};
16659     $rOpts_maximum_line_length      = $rOpts->{'maximum-line-length'};
16660     $rOpts_valign                   = $rOpts->{'valign'};
16661
16662     forget_side_comment();
16663
16664     initialize_for_new_group();
16665
16666     $vertical_aligner_self = {};
16667     bless $vertical_aligner_self, $class;
16668     return $vertical_aligner_self;
16669 }
16670
16671 sub initialize_for_new_group {
16672     $maximum_line_index      = -1;      # lines in the current group
16673     $maximum_alignment_index = -1;      # alignments in current group
16674     $zero_count              = 0;       # count consecutive lines without tokens
16675     $current_line            = undef;   # line being matched for alignment
16676     $group_maximum_gap       = 0;       # largest gap introduced
16677     $group_type              = "";
16678     $marginal_match          = 0;
16679     $comment_leading_space_count = 0;
16680     $last_leading_space_count    = 0;
16681 }
16682
16683 # interface to Perl::Tidy::Diagnostics routines
16684 sub write_diagnostics {
16685     if ($diagnostics_object) {
16686         $diagnostics_object->write_diagnostics(@_);
16687     }
16688 }
16689
16690 # interface to Perl::Tidy::Logger routines
16691 sub warning {
16692     if ($logger_object) {
16693         $logger_object->warning(@_);
16694     }
16695 }
16696
16697 sub write_logfile_entry {
16698     if ($logger_object) {
16699         $logger_object->write_logfile_entry(@_);
16700     }
16701 }
16702
16703 sub report_definite_bug {
16704     if ($logger_object) {
16705         $logger_object->report_definite_bug();
16706     }
16707 }
16708
16709 sub get_SPACES {
16710
16711     # return the number of leading spaces associated with an indentation
16712     # variable $indentation is either a constant number of spaces or an
16713     # object with a get_SPACES method.
16714     my $indentation = shift;
16715     return ref($indentation) ? $indentation->get_SPACES() : $indentation;
16716 }
16717
16718 sub get_RECOVERABLE_SPACES {
16719
16720     # return the number of spaces (+ means shift right, - means shift left)
16721     # that we would like to shift a group of lines with the same indentation
16722     # to get them to line up with their opening parens
16723     my $indentation = shift;
16724     return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
16725 }
16726
16727 sub get_STACK_DEPTH {
16728
16729     my $indentation = shift;
16730     return ref($indentation) ? $indentation->get_STACK_DEPTH() : 0;
16731 }
16732
16733 sub make_alignment {
16734     my ( $col, $token ) = @_;
16735
16736     # make one new alignment at column $col which aligns token $token
16737     ++$maximum_alignment_index;
16738     my $alignment = new Perl::Tidy::VerticalAligner::Alignment(
16739         column          => $col,
16740         starting_column => $col,
16741         matching_token  => $token,
16742         starting_line   => $maximum_line_index,
16743         ending_line     => $maximum_line_index,
16744         serial_number   => $maximum_alignment_index,
16745     );
16746     $ralignment_list->[$maximum_alignment_index] = $alignment;
16747     return $alignment;
16748 }
16749
16750 sub dump_alignments {
16751     print
16752 "Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n";
16753     for my $i ( 0 .. $maximum_alignment_index ) {
16754         my $column          = $ralignment_list->[$i]->get_column();
16755         my $starting_column = $ralignment_list->[$i]->get_starting_column();
16756         my $matching_token  = $ralignment_list->[$i]->get_matching_token();
16757         my $starting_line   = $ralignment_list->[$i]->get_starting_line();
16758         my $ending_line     = $ralignment_list->[$i]->get_ending_line();
16759         print
16760 "$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n";
16761     }
16762 }
16763
16764 sub save_alignment_columns {
16765     for my $i ( 0 .. $maximum_alignment_index ) {
16766         $ralignment_list->[$i]->save_column();
16767     }
16768 }
16769
16770 sub restore_alignment_columns {
16771     for my $i ( 0 .. $maximum_alignment_index ) {
16772         $ralignment_list->[$i]->restore_column();
16773     }
16774 }
16775
16776 sub forget_side_comment {
16777     $last_comment_column = 0;
16778 }
16779
16780 sub append_line {
16781
16782     # sub append is called to place one line in the current vertical group.
16783     #
16784     # The input parameters are:
16785     #     $level = indentation level of this line
16786     #     $rfields = reference to array of fields
16787     #     $rpatterns = reference to array of patterns, one per field
16788     #     $rtokens   = reference to array of tokens starting fields 1,2,..
16789     #
16790     # Here is an example of what this package does.  In this example,
16791     # we are trying to line up both the '=>' and the '#'.
16792     #
16793     #         '18' => 'grave',    #   \`
16794     #         '19' => 'acute',    #   `'
16795     #         '20' => 'caron',    #   \v
16796     # <-tabs-><f1-><--field 2 ---><-f3->
16797     # |            |              |    |
16798     # |            |              |    |
16799     # col1        col2         col3 col4
16800     #
16801     # The calling routine has already broken the entire line into 3 fields as
16802     # indicated.  (So the work of identifying promising common tokens has
16803     # already been done).
16804     #
16805     # In this example, there will be 2 tokens being matched: '=>' and '#'.
16806     # They are the leading parts of fields 2 and 3, but we do need to know
16807     # what they are so that we can dump a group of lines when these tokens
16808     # change.
16809     #
16810     # The fields contain the actual characters of each field.  The patterns
16811     # are like the fields, but they contain mainly token types instead
16812     # of tokens, so they have fewer characters.  They are used to be
16813     # sure we are matching fields of similar type.
16814     #
16815     # In this example, there will be 4 column indexes being adjusted.  The
16816     # first one is always at zero.  The interior columns are at the start of
16817     # the matching tokens, and the last one tracks the maximum line length.
16818     #
16819     # Basically, each time a new line comes in, it joins the current vertical
16820     # group if possible.  Otherwise it causes the current group to be dumped
16821     # and a new group is started.
16822     #
16823     # For each new group member, the column locations are increased, as
16824     # necessary, to make room for the new fields.  When the group is finally
16825     # output, these column numbers are used to compute the amount of spaces of
16826     # padding needed for each field.
16827     #
16828     # Programming note: the fields are assumed not to have any tab characters.
16829     # Tabs have been previously removed except for tabs in quoted strings and
16830     # side comments.  Tabs in these fields can mess up the column counting.
16831     # The log file warns the user if there are any such tabs.
16832
16833     my (
16834         $level,               $level_end,
16835         $indentation,         $rfields,
16836         $rtokens,             $rpatterns,
16837         $is_forced_break,     $outdent_long_lines,
16838         $is_terminal_ternary, $is_terminal_statement,
16839         $do_not_pad,          $rvertical_tightness_flags,
16840         $level_jump,
16841     ) = @_;
16842
16843     # number of fields is $jmax
16844     # number of tokens between fields is $jmax-1
16845     my $jmax = $#{$rfields};
16846
16847     my $leading_space_count = get_SPACES($indentation);
16848
16849     # set outdented flag to be sure we either align within statements or
16850     # across statement boundaries, but not both.
16851     my $is_outdented = $last_leading_space_count > $leading_space_count;
16852     $last_leading_space_count = $leading_space_count;
16853
16854     # Patch: undo for hanging side comment
16855     my $is_hanging_side_comment =
16856       ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
16857     $is_outdented = 0 if $is_hanging_side_comment;
16858
16859     VALIGN_DEBUG_FLAG_APPEND0 && do {
16860         print
16861 "APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n";
16862     };
16863
16864     # Validate cached line if necessary: If we can produce a container
16865     # with just 2 lines total by combining an existing cached opening
16866     # token with the closing token to follow, then we will mark both
16867     # cached flags as valid.
16868     if ($rvertical_tightness_flags) {
16869         if (   $maximum_line_index <= 0
16870             && $cached_line_type
16871             && $cached_seqno
16872             && $rvertical_tightness_flags->[2]
16873             && $rvertical_tightness_flags->[2] == $cached_seqno )
16874         {
16875             $rvertical_tightness_flags->[3] ||= 1;
16876             $cached_line_valid              ||= 1;
16877         }
16878     }
16879
16880     # do not join an opening block brace with an unbalanced line
16881     # unless requested with a flag value of 2
16882     if (   $cached_line_type == 3
16883         && $maximum_line_index < 0
16884         && $cached_line_flag < 2
16885         && $level_jump != 0 )
16886     {
16887         $cached_line_valid = 0;
16888     }
16889
16890     # patch until new aligner is finished
16891     if ($do_not_pad) { my_flush() }
16892
16893     # shouldn't happen:
16894     if ( $level < 0 ) { $level = 0 }
16895
16896     # do not align code across indentation level changes
16897     # or if vertical alignment is turned off for debugging
16898     if ( $level != $group_level || $is_outdented || !$rOpts_valign ) {
16899
16900         # we are allowed to shift a group of lines to the right if its
16901         # level is greater than the previous and next group
16902         $extra_indent_ok =
16903           ( $level < $group_level && $last_group_level_written < $group_level );
16904
16905         my_flush();
16906
16907         # If we know that this line will get flushed out by itself because
16908         # of level changes, we can leave the extra_indent_ok flag set.
16909         # That way, if we get an external flush call, we will still be
16910         # able to do some -lp alignment if necessary.
16911         $extra_indent_ok = ( $is_terminal_statement && $level > $group_level );
16912
16913         $group_level = $level;
16914
16915         # wait until after the above flush to get the leading space
16916         # count because it may have been changed if the -icp flag is in
16917         # effect
16918         $leading_space_count = get_SPACES($indentation);
16919
16920     }
16921
16922     # --------------------------------------------------------------------
16923     # Patch to collect outdentable block COMMENTS
16924     # --------------------------------------------------------------------
16925     my $is_blank_line = "";
16926     my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ );
16927     if ( $group_type eq 'COMMENT' ) {
16928         if (
16929             (
16930                    $is_block_comment
16931                 && $outdent_long_lines
16932                 && $leading_space_count == $comment_leading_space_count
16933             )
16934             || $is_blank_line
16935           )
16936         {
16937             $group_lines[ ++$maximum_line_index ] = $rfields->[0];
16938             return;
16939         }
16940         else {
16941             my_flush();
16942         }
16943     }
16944
16945     # --------------------------------------------------------------------
16946     # add dummy fields for terminal ternary
16947     # --------------------------------------------------------------------
16948     if ( $is_terminal_ternary && $current_line ) {
16949         fix_terminal_ternary( $rfields, $rtokens, $rpatterns );
16950         $jmax = @{$rfields} - 1;
16951     }
16952
16953     # --------------------------------------------------------------------
16954     # add dummy fields for else statement
16955     # --------------------------------------------------------------------
16956     if (   $rfields->[0] =~ /^else\s*$/
16957         && $current_line
16958         && $level_jump == 0 )
16959     {
16960         fix_terminal_else( $rfields, $rtokens, $rpatterns );
16961         $jmax = @{$rfields} - 1;
16962     }
16963
16964     # --------------------------------------------------------------------
16965     # Step 1. Handle simple line of code with no fields to match.
16966     # --------------------------------------------------------------------
16967     if ( $jmax <= 0 ) {
16968         $zero_count++;
16969
16970         if ( $maximum_line_index >= 0
16971             && !get_RECOVERABLE_SPACES( $group_lines[0]->get_indentation() ) )
16972         {
16973
16974             # flush the current group if it has some aligned columns..
16975             if ( $group_lines[0]->get_jmax() > 1 ) { my_flush() }
16976
16977             # flush current group if we are just collecting side comments..
16978             elsif (
16979
16980                 # ...and we haven't seen a comment lately
16981                 ( $zero_count > 3 )
16982
16983                 # ..or if this new line doesn't fit to the left of the comments
16984                 || ( ( $leading_space_count + length( $$rfields[0] ) ) >
16985                     $group_lines[0]->get_column(0) )
16986               )
16987             {
16988                 my_flush();
16989             }
16990         }
16991
16992         # patch to start new COMMENT group if this comment may be outdented
16993         if (   $is_block_comment
16994             && $outdent_long_lines
16995             && $maximum_line_index < 0 )
16996         {
16997             $group_type                           = 'COMMENT';
16998             $comment_leading_space_count          = $leading_space_count;
16999             $group_lines[ ++$maximum_line_index ] = $rfields->[0];
17000             return;
17001         }
17002
17003         # just write this line directly if no current group, no side comment,
17004         # and no space recovery is needed.
17005         if ( $maximum_line_index < 0 && !get_RECOVERABLE_SPACES($indentation) )
17006         {
17007             write_leader_and_string( $leading_space_count, $$rfields[0], 0,
17008                 $outdent_long_lines, $rvertical_tightness_flags );
17009             return;
17010         }
17011     }
17012     else {
17013         $zero_count = 0;
17014     }
17015
17016     # programming check: (shouldn't happen)
17017     # an error here implies an incorrect call was made
17018     if ( $jmax > 0 && ( $#{$rtokens} != ( $jmax - 1 ) ) ) {
17019         warning(
17020 "Program bug in Perl::Tidy::VerticalAligner - number of tokens = $#{$rtokens} should be one less than number of fields: $#{$rfields})\n"
17021         );
17022         report_definite_bug();
17023     }
17024
17025     # --------------------------------------------------------------------
17026     # create an object to hold this line
17027     # --------------------------------------------------------------------
17028     my $new_line = new Perl::Tidy::VerticalAligner::Line(
17029         jmax                      => $jmax,
17030         jmax_original_line        => $jmax,
17031         rtokens                   => $rtokens,
17032         rfields                   => $rfields,
17033         rpatterns                 => $rpatterns,
17034         indentation               => $indentation,
17035         leading_space_count       => $leading_space_count,
17036         outdent_long_lines        => $outdent_long_lines,
17037         list_type                 => "",
17038         is_hanging_side_comment   => $is_hanging_side_comment,
17039         maximum_line_length       => $rOpts->{'maximum-line-length'},
17040         rvertical_tightness_flags => $rvertical_tightness_flags,
17041     );
17042
17043     # --------------------------------------------------------------------
17044     # It simplifies things to create a zero length side comment
17045     # if none exists.
17046     # --------------------------------------------------------------------
17047     make_side_comment( $new_line, $level_end );
17048
17049     # --------------------------------------------------------------------
17050     # Decide if this is a simple list of items.
17051     # There are 3 list types: none, comma, comma-arrow.
17052     # We use this below to be less restrictive in deciding what to align.
17053     # --------------------------------------------------------------------
17054     if ($is_forced_break) {
17055         decide_if_list($new_line);
17056     }
17057
17058     if ($current_line) {
17059
17060         # --------------------------------------------------------------------
17061         # Allow hanging side comment to join current group, if any
17062         # This will help keep side comments aligned, because otherwise we
17063         # will have to start a new group, making alignment less likely.
17064         # --------------------------------------------------------------------
17065         join_hanging_comment( $new_line, $current_line )
17066           if $is_hanging_side_comment;
17067
17068         # --------------------------------------------------------------------
17069         # If there is just one previous line, and it has more fields
17070         # than the new line, try to join fields together to get a match with
17071         # the new line.  At the present time, only a single leading '=' is
17072         # allowed to be compressed out.  This is useful in rare cases where
17073         # a table is forced to use old breakpoints because of side comments,
17074         # and the table starts out something like this:
17075         #   my %MonthChars = ('0', 'Jan',   # side comment
17076         #                     '1', 'Feb',
17077         #                     '2', 'Mar',
17078         # Eliminating the '=' field will allow the remaining fields to line up.
17079         # This situation does not occur if there are no side comments
17080         # because scan_list would put a break after the opening '('.
17081         # --------------------------------------------------------------------
17082         eliminate_old_fields( $new_line, $current_line );
17083
17084         # --------------------------------------------------------------------
17085         # If the new line has more fields than the current group,
17086         # see if we can match the first fields and combine the remaining
17087         # fields of the new line.
17088         # --------------------------------------------------------------------
17089         eliminate_new_fields( $new_line, $current_line );
17090
17091         # --------------------------------------------------------------------
17092         # Flush previous group unless all common tokens and patterns match..
17093         # --------------------------------------------------------------------
17094         check_match( $new_line, $current_line );
17095
17096         # --------------------------------------------------------------------
17097         # See if there is space for this line in the current group (if any)
17098         # --------------------------------------------------------------------
17099         if ($current_line) {
17100             check_fit( $new_line, $current_line );
17101         }
17102     }
17103
17104     # --------------------------------------------------------------------
17105     # Append this line to the current group (or start new group)
17106     # --------------------------------------------------------------------
17107     accept_line($new_line);
17108
17109     # Future update to allow this to vary:
17110     $current_line = $new_line if ( $maximum_line_index == 0 );
17111
17112     my_flush() if ( $group_type eq "TERMINAL" );
17113
17114     # --------------------------------------------------------------------
17115     # Step 8. Some old debugging stuff
17116     # --------------------------------------------------------------------
17117     VALIGN_DEBUG_FLAG_APPEND && do {
17118         print "APPEND fields:";
17119         dump_array(@$rfields);
17120         print "APPEND tokens:";
17121         dump_array(@$rtokens);
17122         print "APPEND patterns:";
17123         dump_array(@$rpatterns);
17124         dump_alignments();
17125     };
17126 }
17127
17128 sub join_hanging_comment {
17129
17130     my $line = shift;
17131     my $jmax = $line->get_jmax();
17132     return 0 unless $jmax == 1;    # must be 2 fields
17133     my $rtokens = $line->get_rtokens();
17134     return 0 unless $$rtokens[0] eq '#';    # the second field is a comment..
17135     my $rfields = $line->get_rfields();
17136     return 0 unless $$rfields[0] =~ /^\s*$/;    # the first field is empty...
17137     my $old_line            = shift;
17138     my $maximum_field_index = $old_line->get_jmax();
17139     return 0
17140       unless $maximum_field_index > $jmax;    # the current line has more fields
17141     my $rpatterns = $line->get_rpatterns();
17142
17143     $line->set_is_hanging_side_comment(1);
17144     $jmax = $maximum_field_index;
17145     $line->set_jmax($jmax);
17146     $$rfields[$jmax]         = $$rfields[1];
17147     $$rtokens[ $jmax - 1 ]   = $$rtokens[0];
17148     $$rpatterns[ $jmax - 1 ] = $$rpatterns[0];
17149     for ( my $j = 1 ; $j < $jmax ; $j++ ) {
17150         $$rfields[$j]         = " ";  # NOTE: caused glitch unless 1 blank, why?
17151         $$rtokens[ $j - 1 ]   = "";
17152         $$rpatterns[ $j - 1 ] = "";
17153     }
17154     return 1;
17155 }
17156
17157 sub eliminate_old_fields {
17158
17159     my $new_line = shift;
17160     my $jmax     = $new_line->get_jmax();
17161     if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax }
17162     if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax }
17163
17164     # there must be one previous line
17165     return unless ( $maximum_line_index == 0 );
17166
17167     my $old_line            = shift;
17168     my $maximum_field_index = $old_line->get_jmax();
17169
17170     # this line must have fewer fields
17171     return unless $maximum_field_index > $jmax;
17172
17173     # Identify specific cases where field elimination is allowed:
17174     # case=1: both lines have comma-separated lists, and the first
17175     #         line has an equals
17176     # case=2: both lines have leading equals
17177
17178     # case 1 is the default
17179     my $case = 1;
17180
17181     # See if case 2: both lines have leading '='
17182     # We'll require smiliar leading patterns in this case
17183     my $old_rtokens   = $old_line->get_rtokens();
17184     my $rtokens       = $new_line->get_rtokens();
17185     my $rpatterns     = $new_line->get_rpatterns();
17186     my $old_rpatterns = $old_line->get_rpatterns();
17187     if (   $rtokens->[0] =~ /^=\d*$/
17188         && $old_rtokens->[0]   eq $rtokens->[0]
17189         && $old_rpatterns->[0] eq $rpatterns->[0] )
17190     {
17191         $case = 2;
17192     }
17193
17194     # not too many fewer fields in new line for case 1
17195     return unless ( $case != 1 || $maximum_field_index - 2 <= $jmax );
17196
17197     # case 1 must have side comment
17198     my $old_rfields = $old_line->get_rfields();
17199     return
17200       if ( $case == 1
17201         && length( $$old_rfields[$maximum_field_index] ) == 0 );
17202
17203     my $rfields = $new_line->get_rfields();
17204
17205     my $hid_equals = 0;
17206
17207     my @new_alignments        = ();
17208     my @new_fields            = ();
17209     my @new_matching_patterns = ();
17210     my @new_matching_tokens   = ();
17211
17212     my $j = 0;
17213     my $k;
17214     my $current_field   = '';
17215     my $current_pattern = '';
17216
17217     # loop over all old tokens
17218     my $in_match = 0;
17219     for ( $k = 0 ; $k < $maximum_field_index ; $k++ ) {
17220         $current_field   .= $$old_rfields[$k];
17221         $current_pattern .= $$old_rpatterns[$k];
17222         last if ( $j > $jmax - 1 );
17223
17224         if ( $$old_rtokens[$k] eq $$rtokens[$j] ) {
17225             $in_match                  = 1;
17226             $new_fields[$j]            = $current_field;
17227             $new_matching_patterns[$j] = $current_pattern;
17228             $current_field             = '';
17229             $current_pattern           = '';
17230             $new_matching_tokens[$j]   = $$old_rtokens[$k];
17231             $new_alignments[$j]        = $old_line->get_alignment($k);
17232             $j++;
17233         }
17234         else {
17235
17236             if ( $$old_rtokens[$k] =~ /^\=\d*$/ ) {
17237                 last if ( $case == 2 );    # avoid problems with stuff
17238                                            # like:   $a=$b=$c=$d;
17239                 $hid_equals = 1;
17240             }
17241             last
17242               if ( $in_match && $case == 1 )
17243               ;    # disallow gaps in matching field types in case 1
17244         }
17245     }
17246
17247     # Modify the current state if we are successful.
17248     # We must exactly reach the ends of both lists for success.
17249     if (   ( $j == $jmax )
17250         && ( $current_field eq '' )
17251         && ( $case != 1 || $hid_equals ) )
17252     {
17253         $k = $maximum_field_index;
17254         $current_field   .= $$old_rfields[$k];
17255         $current_pattern .= $$old_rpatterns[$k];
17256         $new_fields[$j]            = $current_field;
17257         $new_matching_patterns[$j] = $current_pattern;
17258
17259         $new_alignments[$j] = $old_line->get_alignment($k);
17260         $maximum_field_index = $j;
17261
17262         $old_line->set_alignments(@new_alignments);
17263         $old_line->set_jmax($jmax);
17264         $old_line->set_rtokens( \@new_matching_tokens );
17265         $old_line->set_rfields( \@new_fields );
17266         $old_line->set_rpatterns( \@$rpatterns );
17267     }
17268 }
17269
17270 # create an empty side comment if none exists
17271 sub make_side_comment {
17272     my $new_line  = shift;
17273     my $level_end = shift;
17274     my $jmax      = $new_line->get_jmax();
17275     my $rtokens   = $new_line->get_rtokens();
17276
17277     # if line does not have a side comment...
17278     if ( ( $jmax == 0 ) || ( $$rtokens[ $jmax - 1 ] ne '#' ) ) {
17279         my $rfields   = $new_line->get_rfields();
17280         my $rpatterns = $new_line->get_rpatterns();
17281         $$rtokens[$jmax]     = '#';
17282         $$rfields[ ++$jmax ] = '';
17283         $$rpatterns[$jmax]   = '#';
17284         $new_line->set_jmax($jmax);
17285         $new_line->set_jmax_original_line($jmax);
17286     }
17287
17288     # line has a side comment..
17289     else {
17290
17291         # don't remember old side comment location for very long
17292         my $line_number = $vertical_aligner_self->get_output_line_number();
17293         my $rfields     = $new_line->get_rfields();
17294         if (
17295             $line_number - $last_side_comment_line_number > 12
17296
17297             # and don't remember comment location across block level changes
17298             || ( $level_end < $last_side_comment_level && $$rfields[0] =~ /^}/ )
17299           )
17300         {
17301             forget_side_comment();
17302         }
17303         $last_side_comment_line_number = $line_number;
17304         $last_side_comment_level       = $level_end;
17305     }
17306 }
17307
17308 sub decide_if_list {
17309
17310     my $line = shift;
17311
17312     # A list will be taken to be a line with a forced break in which all
17313     # of the field separators are commas or comma-arrows (except for the
17314     # trailing #)
17315
17316     # List separator tokens are things like ',3'   or '=>2',
17317     # where the trailing digit is the nesting depth.  Allow braces
17318     # to allow nested list items.
17319     my $rtokens    = $line->get_rtokens();
17320     my $test_token = $$rtokens[0];
17321     if ( $test_token =~ /^(\,|=>)/ ) {
17322         my $list_type = $test_token;
17323         my $jmax      = $line->get_jmax();
17324
17325         foreach ( 1 .. $jmax - 2 ) {
17326             if ( $$rtokens[$_] !~ /^(\,|=>|\{)/ ) {
17327                 $list_type = "";
17328                 last;
17329             }
17330         }
17331         $line->set_list_type($list_type);
17332     }
17333 }
17334
17335 sub eliminate_new_fields {
17336
17337     return unless ( $maximum_line_index >= 0 );
17338     my $new_line = shift;
17339     my $old_line = shift;
17340     my $jmax     = $new_line->get_jmax();
17341
17342     my $old_rtokens = $old_line->get_rtokens();
17343     my $rtokens     = $new_line->get_rtokens();
17344     my $is_assignment =
17345       (      $rtokens->[0] =~ /^=\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] )
17346           || $group_type eq "TERMINAL" );
17347
17348     # must be monotonic variation
17349     return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax );
17350
17351     # must be more fields in the new line
17352     my $maximum_field_index = $old_line->get_jmax();
17353     return unless ( $maximum_field_index < $jmax );
17354
17355     unless ($is_assignment) {
17356         return
17357           unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen )
17358           ;    # only if monotonic
17359
17360         # never combine fields of a comma list
17361         return
17362           unless ( $maximum_field_index > 1 )
17363           && ( $new_line->get_list_type() !~ /^,/ );
17364     }
17365
17366     my $rfields       = $new_line->get_rfields();
17367     my $rpatterns     = $new_line->get_rpatterns();
17368     my $old_rpatterns = $old_line->get_rpatterns();
17369
17370     # loop over all OLD tokens except comment and check match
17371     my $match = 1;
17372     my $k;
17373     for ( $k = 0 ; $k < $maximum_field_index - 1 ; $k++ ) {
17374         if (   ( $$old_rtokens[$k] ne $$rtokens[$k] )
17375             || ( $$old_rpatterns[$k] ne $$rpatterns[$k] )
17376             && $group_type ne "TERMINAL" )
17377         {
17378             $match = 0;
17379             last;
17380         }
17381     }
17382
17383     # first tokens agree, so combine extra new tokens
17384     if ($match) {
17385         for $k ( $maximum_field_index .. $jmax - 1 ) {
17386
17387             $$rfields[ $maximum_field_index - 1 ] .= $$rfields[$k];
17388             $$rfields[$k] = "";
17389             $$rpatterns[ $maximum_field_index - 1 ] .= $$rpatterns[$k];
17390             $$rpatterns[$k] = "";
17391         }
17392
17393         $$rtokens[ $maximum_field_index - 1 ] = '#';
17394         $$rfields[$maximum_field_index]       = $$rfields[$jmax];
17395         $$rpatterns[$maximum_field_index]     = $$rpatterns[$jmax];
17396         $jmax                                 = $maximum_field_index;
17397     }
17398     $new_line->set_jmax($jmax);
17399 }
17400
17401 sub fix_terminal_ternary {
17402
17403     # Add empty fields as necessary to align a ternary term
17404     # like this:
17405     #
17406     #  my $leapyear =
17407     #      $year % 4   ? 0
17408     #    : $year % 100 ? 1
17409     #    : $year % 400 ? 0
17410     #    :               1;
17411     #
17412     my ( $rfields, $rtokens, $rpatterns ) = @_;
17413
17414     my $jmax        = @{$rfields} - 1;
17415     my $old_line    = $group_lines[$maximum_line_index];
17416     my $rfields_old = $old_line->get_rfields();
17417
17418     my $rpatterns_old       = $old_line->get_rpatterns();
17419     my $rtokens_old         = $old_line->get_rtokens();
17420     my $maximum_field_index = $old_line->get_jmax();
17421
17422     # look for the question mark after the :
17423     my ($jquestion);
17424     my $depth_question;
17425     my $pad = "";
17426     for ( my $j = 0 ; $j < $maximum_field_index ; $j++ ) {
17427         my $tok = $rtokens_old->[$j];
17428         if ( $tok =~ /^\?(\d+)$/ ) {
17429             $depth_question = $1;
17430
17431             # depth must be correct
17432             next unless ( $depth_question eq $group_level );
17433
17434             $jquestion = $j;
17435             if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
17436                 $pad = " " x length($1);
17437             }
17438             else {
17439                 return;    # shouldn't happen
17440             }
17441             last;
17442         }
17443     }
17444     return unless ( defined($jquestion) );    # shouldn't happen
17445
17446     # Now splice the tokens and patterns of the previous line
17447     # into the else line to insure a match.  Add empty fields
17448     # as necessary.
17449     my $jadd = $jquestion;
17450
17451     # Work on copies of the actual arrays in case we have
17452     # to return due to an error
17453     my @fields   = @{$rfields};
17454     my @patterns = @{$rpatterns};
17455     my @tokens   = @{$rtokens};
17456
17457     VALIGN_DEBUG_FLAG_TERNARY && do {
17458         local $" = '><';
17459         print "CURRENT FIELDS=<@{$rfields_old}>\n";
17460         print "CURRENT TOKENS=<@{$rtokens_old}>\n";
17461         print "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
17462         print "UNMODIFIED FIELDS=<@{$rfields}>\n";
17463         print "UNMODIFIED TOKENS=<@{$rtokens}>\n";
17464         print "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
17465     };
17466
17467     # handle cases of leading colon on this line
17468     if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {
17469
17470         my ( $colon, $therest ) = ( $1, $2 );
17471
17472         # Handle sub-case of first field with leading colon plus additional code
17473         # This is the usual situation as at the '1' below:
17474         #  ...
17475         #  : $year % 400 ? 0
17476         #  :               1;
17477         if ($therest) {
17478
17479             # Split the first field after the leading colon and insert padding.
17480             # Note that this padding will remain even if the terminal value goes
17481             # out on a separate line.  This does not seem to look to bad, so no
17482             # mechanism has been included to undo it.
17483             my $field1 = shift @fields;
17484             unshift @fields, ( $colon, $pad . $therest );
17485
17486             # change the leading pattern from : to ?
17487             return unless ( $patterns[0] =~ s/^\:/?/ );
17488
17489             # install leading tokens and patterns of existing line
17490             unshift( @tokens,   @{$rtokens_old}[ 0 .. $jquestion ] );
17491             unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
17492
17493             # insert appropriate number of empty fields
17494             splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
17495         }
17496
17497         # handle sub-case of first field just equal to leading colon.
17498         # This can happen for example in the example below where
17499         # the leading '(' would create a new alignment token
17500         # : ( $name =~ /[]}]$/ ) ? ( $mname = $name )
17501         # :                        ( $mname = $name . '->' );
17502         else {
17503
17504             return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen
17505
17506             # prepend a leading ? onto the second pattern
17507             $patterns[1] = "?b" . $patterns[1];
17508
17509             # pad the second field
17510             $fields[1] = $pad . $fields[1];
17511
17512             # install leading tokens and patterns of existing line, replacing
17513             # leading token and inserting appropriate number of empty fields
17514             splice( @tokens,   0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
17515             splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
17516             splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
17517         }
17518     }
17519
17520     # Handle case of no leading colon on this line.  This will
17521     # be the case when -wba=':' is used.  For example,
17522     #  $year % 400 ? 0 :
17523     #                1;
17524     else {
17525
17526         # install leading tokens and patterns of existing line
17527         $patterns[0] = '?' . 'b' . $patterns[0];
17528         unshift( @tokens,   @{$rtokens_old}[ 0 .. $jquestion ] );
17529         unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
17530
17531         # insert appropriate number of empty fields
17532         $jadd = $jquestion + 1;
17533         $fields[0] = $pad . $fields[0];
17534         splice( @fields, 0, 0, ('') x $jadd ) if $jadd;
17535     }
17536
17537     VALIGN_DEBUG_FLAG_TERNARY && do {
17538         local $" = '><';
17539         print "MODIFIED TOKENS=<@tokens>\n";
17540         print "MODIFIED PATTERNS=<@patterns>\n";
17541         print "MODIFIED FIELDS=<@fields>\n";
17542     };
17543
17544     # all ok .. update the arrays
17545     @{$rfields}   = @fields;
17546     @{$rtokens}   = @tokens;
17547     @{$rpatterns} = @patterns;
17548
17549     # force a flush after this line
17550     $group_type = "TERMINAL";
17551     return;
17552 }
17553
17554 sub fix_terminal_else {
17555
17556     # Add empty fields as necessary to align a balanced terminal
17557     # else block to a previous if/elsif/unless block,
17558     # like this:
17559     #
17560     #  if   ( 1 || $x ) { print "ok 13\n"; }
17561     #  else             { print "not ok 13\n"; }
17562     #
17563     my ( $rfields, $rtokens, $rpatterns ) = @_;
17564     my $jmax = @{$rfields} - 1;
17565     return unless ( $jmax > 0 );
17566
17567     # check for balanced else block following if/elsif/unless
17568     my $rfields_old = $current_line->get_rfields();
17569
17570     # TBD: add handling for 'case'
17571     return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ );
17572
17573     # look for the opening brace after the else, and extrace the depth
17574     my $tok_brace = $rtokens->[0];
17575     my $depth_brace;
17576     if ( $tok_brace =~ /^\{(\d+)$/ ) { $depth_brace = $1; }
17577
17578     # probably:  "else # side_comment"
17579     else { return }
17580
17581     my $rpatterns_old       = $current_line->get_rpatterns();
17582     my $rtokens_old         = $current_line->get_rtokens();
17583     my $maximum_field_index = $current_line->get_jmax();
17584
17585     # be sure the previous if/elsif is followed by an opening paren
17586     my $jparen    = 0;
17587     my $tok_paren = '(' . $depth_brace;
17588     my $tok_test  = $rtokens_old->[$jparen];
17589     return unless ( $tok_test eq $tok_paren );    # shouldn't happen
17590
17591     # Now find the opening block brace
17592     my ($jbrace);
17593     for ( my $j = 1 ; $j < $maximum_field_index ; $j++ ) {
17594         my $tok = $rtokens_old->[$j];
17595         if ( $tok eq $tok_brace ) {
17596             $jbrace = $j;
17597             last;
17598         }
17599     }
17600     return unless ( defined($jbrace) );           # shouldn't happen
17601
17602     # Now splice the tokens and patterns of the previous line
17603     # into the else line to insure a match.  Add empty fields
17604     # as necessary.
17605     my $jadd = $jbrace - $jparen;
17606     splice( @{$rtokens},   0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
17607     splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
17608     splice( @{$rfields}, 1, 0, ('') x $jadd );
17609
17610     # force a flush after this line if it does not follow a case
17611     $group_type = "TERMINAL"
17612       unless ( $rfields_old->[0] =~ /^case\s*$/ );
17613     return;
17614 }
17615
17616 sub check_match {
17617
17618     my $new_line = shift;
17619     my $old_line = shift;
17620
17621     my $jmax                = $new_line->get_jmax();
17622     my $maximum_field_index = $old_line->get_jmax();
17623
17624     # flush if this line has too many fields
17625     if ( $jmax > $maximum_field_index ) { my_flush(); return }
17626
17627     # flush if adding this line would make a non-monotonic field count
17628     if (
17629         ( $maximum_field_index > $jmax )    # this has too few fields
17630         && (
17631             ( $previous_minimum_jmax_seen < $jmax )  # and wouldn't be monotonic
17632             || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen )
17633         )
17634       )
17635     {
17636         my_flush();
17637         return;
17638     }
17639
17640     # otherwise append this line if everything matches
17641     my $jmax_original_line      = $new_line->get_jmax_original_line();
17642     my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
17643     my $rtokens                 = $new_line->get_rtokens();
17644     my $rfields                 = $new_line->get_rfields();
17645     my $rpatterns               = $new_line->get_rpatterns();
17646     my $list_type               = $new_line->get_list_type();
17647
17648     my $group_list_type = $old_line->get_list_type();
17649     my $old_rpatterns   = $old_line->get_rpatterns();
17650     my $old_rtokens     = $old_line->get_rtokens();
17651
17652     my $jlimit = $jmax - 1;
17653     if ( $maximum_field_index > $jmax ) {
17654         $jlimit = $jmax_original_line;
17655         --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) );
17656     }
17657
17658     my $everything_matches = 1;
17659
17660     # common list types always match
17661     unless ( ( $group_list_type && ( $list_type eq $group_list_type ) )
17662         || $is_hanging_side_comment )
17663     {
17664
17665         my $leading_space_count = $new_line->get_leading_space_count();
17666         my $saw_equals          = 0;
17667         for my $j ( 0 .. $jlimit ) {
17668             my $match = 1;
17669
17670             my $old_tok = $$old_rtokens[$j];
17671             my $new_tok = $$rtokens[$j];
17672
17673             # Dumb down the match AFTER an equals and
17674             # also dumb down after seeing a ? ternary operator ...
17675             # Everything after a + is the token which preceded the previous
17676             # opening paren (container name).  We won't require them to match.
17677             if ( $saw_equals && $new_tok =~ /(.*)\+/ ) {
17678                 $new_tok = $1;
17679                 $old_tok =~ s/\+.*$//;
17680             }
17681
17682             if ( $new_tok =~ /^[\?=]\d*$/ ) { $saw_equals = 1 }
17683
17684             # we never match if the matching tokens differ
17685             if (   $j < $jlimit
17686                 && $old_tok ne $new_tok )
17687             {
17688                 $match = 0;
17689             }
17690
17691             # otherwise, if patterns match, we always have a match.
17692             # However, if patterns don't match, we have to be careful...
17693             elsif ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) {
17694
17695                 # We have to be very careful about aligning commas when the
17696                 # pattern's don't match, because it can be worse to create an
17697                 # alignment where none is needed than to omit one.  The current
17698                 # rule: if we are within a matching sub call (indicated by '+'
17699                 # in the matching token), we'll allow a marginal match, but
17700                 # otherwise not.
17701                 #
17702                 # Here's an example where we'd like to align the '='
17703                 #  my $cfile = File::Spec->catfile( 't',    'callext.c' );
17704                 #  my $inc   = File::Spec->catdir( 'Basic', 'Core' );
17705                 # because the function names differ.
17706                 # Future alignment logic should make this unnecessary.
17707                 #
17708                 # Here's an example where the ','s are not contained in a call.
17709                 # The first line below should probably not match the next two:
17710                 #   ( $a, $b ) = ( $b, $r );
17711                 #   ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
17712                 #   ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
17713                 if ( $new_tok =~ /^,/ ) {
17714                     if ( $$rtokens[$j] =~ /[A-Za-z]/ ) {
17715                         $marginal_match = 1;
17716                     }
17717                     else {
17718                         $match = 0;
17719                     }
17720                 }
17721
17722                 # parens don't align well unless patterns match
17723                 elsif ( $new_tok =~ /^\(/ ) {
17724                     $match = 0;
17725                 }
17726
17727                 # Handle an '=' alignment with different patterns to
17728                 # the left.
17729                 elsif ( $new_tok =~ /^=\d*$/ ) {
17730
17731                     $saw_equals = 1;
17732
17733                     # It is best to be a little restrictive when
17734                     # aligning '=' tokens.  Here is an example of
17735                     # two lines that we will not align:
17736                     #       my $variable=6;
17737                     #       $bb=4;
17738                     # The problem is that one is a 'my' declaration,
17739                     # and the other isn't, so they're not very similar.
17740                     # We will filter these out by comparing the first
17741                     # letter of the pattern.  This is crude, but works
17742                     # well enough.
17743                     if (
17744                         substr( $$old_rpatterns[$j], 0, 1 ) ne
17745                         substr( $$rpatterns[$j], 0, 1 ) )
17746                     {
17747                         $match = 0;
17748                     }
17749
17750                     # If we pass that test, we'll call it a marginal match.
17751                     # Here is an example of a marginal match:
17752                     #       $done{$$op} = 1;
17753                     #       $op         = compile_bblock($op);
17754                     # The left tokens are both identifiers, but
17755                     # one accesses a hash and the other doesn't.
17756                     # We'll let this be a tentative match and undo
17757                     # it later if we don't find more than 2 lines
17758                     # in the group.
17759                     elsif ( $maximum_line_index == 0 ) {
17760                         $marginal_match = 1;
17761                     }
17762                 }
17763             }
17764
17765             # Don't let line with fewer fields increase column widths
17766             # ( align3.t )
17767             if ( $maximum_field_index > $jmax ) {
17768                 my $pad =
17769                   length( $$rfields[$j] ) - $old_line->current_field_width($j);
17770
17771                 if ( $j == 0 ) {
17772                     $pad += $leading_space_count;
17773                 }
17774
17775                 # TESTING: suspend this rule to allow last lines to join
17776                 if ( $pad > 0 ) { $match = 0; }
17777             }
17778
17779             unless ($match) {
17780                 $everything_matches = 0;
17781                 last;
17782             }
17783         }
17784     }
17785
17786     if ( $maximum_field_index > $jmax ) {
17787
17788         if ($everything_matches) {
17789
17790             my $comment = $$rfields[$jmax];
17791             for $jmax ( $jlimit .. $maximum_field_index ) {
17792                 $$rtokens[$jmax]     = $$old_rtokens[$jmax];
17793                 $$rfields[ ++$jmax ] = '';
17794                 $$rpatterns[$jmax]   = $$old_rpatterns[$jmax];
17795             }
17796             $$rfields[$jmax] = $comment;
17797             $new_line->set_jmax($jmax);
17798         }
17799     }
17800
17801     my_flush() unless ($everything_matches);
17802 }
17803
17804 sub check_fit {
17805
17806     return unless ( $maximum_line_index >= 0 );
17807     my $new_line = shift;
17808     my $old_line = shift;
17809
17810     my $jmax                    = $new_line->get_jmax();
17811     my $leading_space_count     = $new_line->get_leading_space_count();
17812     my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
17813     my $rtokens                 = $new_line->get_rtokens();
17814     my $rfields                 = $new_line->get_rfields();
17815     my $rpatterns               = $new_line->get_rpatterns();
17816
17817     my $group_list_type = $group_lines[0]->get_list_type();
17818
17819     my $padding_so_far    = 0;
17820     my $padding_available = $old_line->get_available_space_on_right();
17821
17822     # save current columns in case this doesn't work
17823     save_alignment_columns();
17824
17825     my ( $j, $pad, $eight );
17826     my $maximum_field_index = $old_line->get_jmax();
17827     for $j ( 0 .. $jmax ) {
17828
17829         $pad = length( $$rfields[$j] ) - $old_line->current_field_width($j);
17830
17831         if ( $j == 0 ) {
17832             $pad += $leading_space_count;
17833         }
17834
17835         # remember largest gap of the group, excluding gap to side comment
17836         if (   $pad < 0
17837             && $group_maximum_gap < -$pad
17838             && $j > 0
17839             && $j < $jmax - 1 )
17840         {
17841             $group_maximum_gap = -$pad;
17842         }
17843
17844         next if $pad < 0;
17845
17846         ## This patch helps sometimes, but it doesn't check to see if
17847         ## the line is too long even without the side comment.  It needs
17848         ## to be reworked.
17849         ##don't let a long token with no trailing side comment push
17850         ##side comments out, or end a group.  (sidecmt1.t)
17851         ##next if ($j==$jmax-1 && length($$rfields[$jmax])==0);
17852
17853         # This line will need space; lets see if we want to accept it..
17854         if (
17855
17856             # not if this won't fit
17857             ( $pad > $padding_available )
17858
17859             # previously, there were upper bounds placed on padding here
17860             # (maximum_whitespace_columns), but they were not really helpful
17861
17862           )
17863         {
17864
17865             # revert to starting state then flush; things didn't work out
17866             restore_alignment_columns();
17867             my_flush();
17868             last;
17869         }
17870
17871         # patch to avoid excessive gaps in previous lines,
17872         # due to a line of fewer fields.
17873         #   return join( ".",
17874         #       $self->{"dfi"},  $self->{"aa"}, $self->rsvd,     $self->{"rd"},
17875         #       $self->{"area"}, $self->{"id"}, $self->{"sel"} );
17876         next if ( $jmax < $maximum_field_index && $j == $jmax - 1 );
17877
17878         # looks ok, squeeze this field in
17879         $old_line->increase_field_width( $j, $pad );
17880         $padding_available -= $pad;
17881
17882         # remember largest gap of the group, excluding gap to side comment
17883         if ( $pad > $group_maximum_gap && $j > 0 && $j < $jmax - 1 ) {
17884             $group_maximum_gap = $pad;
17885         }
17886     }
17887 }
17888
17889 sub accept_line {
17890
17891     # The current line either starts a new alignment group or is
17892     # accepted into the current alignment group.
17893     my $new_line = shift;
17894     $group_lines[ ++$maximum_line_index ] = $new_line;
17895
17896     # initialize field lengths if starting new group
17897     if ( $maximum_line_index == 0 ) {
17898
17899         my $jmax    = $new_line->get_jmax();
17900         my $rfields = $new_line->get_rfields();
17901         my $rtokens = $new_line->get_rtokens();
17902         my $j;
17903         my $col = $new_line->get_leading_space_count();
17904
17905         for $j ( 0 .. $jmax ) {
17906             $col += length( $$rfields[$j] );
17907
17908             # create initial alignments for the new group
17909             my $token = "";
17910             if ( $j < $jmax ) { $token = $$rtokens[$j] }
17911             my $alignment = make_alignment( $col, $token );
17912             $new_line->set_alignment( $j, $alignment );
17913         }
17914
17915         $maximum_jmax_seen = $jmax;
17916         $minimum_jmax_seen = $jmax;
17917     }
17918
17919     # use previous alignments otherwise
17920     else {
17921         my @new_alignments =
17922           $group_lines[ $maximum_line_index - 1 ]->get_alignments();
17923         $new_line->set_alignments(@new_alignments);
17924     }
17925
17926     # remember group jmax extremes for next call to append_line
17927     $previous_minimum_jmax_seen = $minimum_jmax_seen;
17928     $previous_maximum_jmax_seen = $maximum_jmax_seen;
17929 }
17930
17931 sub dump_array {
17932
17933     # debug routine to dump array contents
17934     local $" = ')(';
17935     print "(@_)\n";
17936 }
17937
17938 # flush() sends the current Perl::Tidy::VerticalAligner group down the
17939 # pipeline to Perl::Tidy::FileWriter.
17940
17941 # This is the external flush, which also empties the cache
17942 sub flush {
17943
17944     if ( $maximum_line_index < 0 ) {
17945         if ($cached_line_type) {
17946             $seqno_string = $cached_seqno_string;
17947             entab_and_output( $cached_line_text,
17948                 $cached_line_leading_space_count,
17949                 $last_group_level_written );
17950             $cached_line_type    = 0;
17951             $cached_line_text    = "";
17952             $cached_seqno_string = "";
17953         }
17954     }
17955     else {
17956         my_flush();
17957     }
17958 }
17959
17960 # This is the internal flush, which leaves the cache intact
17961 sub my_flush {
17962
17963     return if ( $maximum_line_index < 0 );
17964
17965     # handle a group of comment lines
17966     if ( $group_type eq 'COMMENT' ) {
17967
17968         VALIGN_DEBUG_FLAG_APPEND0 && do {
17969             my ( $a, $b, $c ) = caller();
17970             print
17971 "APPEND0: Flush called from $a $b $c for COMMENT group: lines=$maximum_line_index \n";
17972
17973         };
17974         my $leading_space_count = $comment_leading_space_count;
17975         my $leading_string      = get_leading_string($leading_space_count);
17976
17977         # zero leading space count if any lines are too long
17978         my $max_excess = 0;
17979         for my $i ( 0 .. $maximum_line_index ) {
17980             my $str = $group_lines[$i];
17981             my $excess =
17982               length($str) + $leading_space_count - $rOpts_maximum_line_length;
17983             if ( $excess > $max_excess ) {
17984                 $max_excess = $excess;
17985             }
17986         }
17987
17988         if ( $max_excess > 0 ) {
17989             $leading_space_count -= $max_excess;
17990             if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
17991             $last_outdented_line_at =
17992               $file_writer_object->get_output_line_number();
17993             unless ($outdented_line_count) {
17994                 $first_outdented_line_at = $last_outdented_line_at;
17995             }
17996             $outdented_line_count += ( $maximum_line_index + 1 );
17997         }
17998
17999         # write the group of lines
18000         my $outdent_long_lines = 0;
18001         for my $i ( 0 .. $maximum_line_index ) {
18002             write_leader_and_string( $leading_space_count, $group_lines[$i], 0,
18003                 $outdent_long_lines, "" );
18004         }
18005     }
18006
18007     # handle a group of code lines
18008     else {
18009
18010         VALIGN_DEBUG_FLAG_APPEND0 && do {
18011             my $group_list_type = $group_lines[0]->get_list_type();
18012             my ( $a, $b, $c ) = caller();
18013             my $maximum_field_index = $group_lines[0]->get_jmax();
18014             print
18015 "APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n";
18016
18017         };
18018
18019         # some small groups are best left unaligned
18020         my $do_not_align = decide_if_aligned();
18021
18022         # optimize side comment location
18023         $do_not_align = adjust_side_comment($do_not_align);
18024
18025         # recover spaces for -lp option if possible
18026         my $extra_leading_spaces = get_extra_leading_spaces();
18027
18028         # all lines of this group have the same basic leading spacing
18029         my $group_leader_length = $group_lines[0]->get_leading_space_count();
18030
18031         # add extra leading spaces if helpful
18032         my $min_ci_gap =
18033           improve_continuation_indentation( $do_not_align,
18034             $group_leader_length );
18035
18036         # loop to output all lines
18037         for my $i ( 0 .. $maximum_line_index ) {
18038             my $line = $group_lines[$i];
18039             write_vertically_aligned_line( $line, $min_ci_gap, $do_not_align,
18040                 $group_leader_length, $extra_leading_spaces );
18041         }
18042     }
18043     initialize_for_new_group();
18044 }
18045
18046 sub decide_if_aligned {
18047
18048     # Do not try to align two lines which are not really similar
18049     return unless $maximum_line_index == 1;
18050     return if ( $group_type eq "TERMINAL" );
18051
18052     my $group_list_type = $group_lines[0]->get_list_type();
18053
18054     my $do_not_align = (
18055
18056         # always align lists
18057         !$group_list_type
18058
18059           && (
18060
18061             # don't align if it was just a marginal match
18062             $marginal_match
18063
18064             # don't align two lines with big gap
18065             || $group_maximum_gap > 12
18066
18067             # or lines with differing number of alignment tokens
18068             # TODO: this could be improved.  It occasionally rejects
18069             # good matches.
18070             || $previous_maximum_jmax_seen != $previous_minimum_jmax_seen
18071           )
18072     );
18073
18074     # But try to convert them into a simple comment group if the first line
18075     # a has side comment
18076     my $rfields             = $group_lines[0]->get_rfields();
18077     my $maximum_field_index = $group_lines[0]->get_jmax();
18078     if (   $do_not_align
18079         && ( $maximum_line_index > 0 )
18080         && ( length( $$rfields[$maximum_field_index] ) > 0 ) )
18081     {
18082         combine_fields();
18083         $do_not_align = 0;
18084     }
18085     return $do_not_align;
18086 }
18087
18088 sub adjust_side_comment {
18089
18090     my $do_not_align = shift;
18091
18092     # let's see if we can move the side comment field out a little
18093     # to improve readability (the last field is always a side comment field)
18094     my $have_side_comment       = 0;
18095     my $first_side_comment_line = -1;
18096     my $maximum_field_index     = $group_lines[0]->get_jmax();
18097     for my $i ( 0 .. $maximum_line_index ) {
18098         my $line = $group_lines[$i];
18099
18100         if ( length( $line->get_rfields()->[$maximum_field_index] ) ) {
18101             $have_side_comment       = 1;
18102             $first_side_comment_line = $i;
18103             last;
18104         }
18105     }
18106
18107     my $kmax = $maximum_field_index + 1;
18108
18109     if ($have_side_comment) {
18110
18111         my $line = $group_lines[0];
18112
18113         # the maximum space without exceeding the line length:
18114         my $avail = $line->get_available_space_on_right();
18115
18116         # try to use the previous comment column
18117         my $side_comment_column = $line->get_column( $kmax - 2 );
18118         my $move                = $last_comment_column - $side_comment_column;
18119
18120 ##        my $sc_line0 = $side_comment_history[0]->[0];
18121 ##        my $sc_col0  = $side_comment_history[0]->[1];
18122 ##        my $sc_line1 = $side_comment_history[1]->[0];
18123 ##        my $sc_col1  = $side_comment_history[1]->[1];
18124 ##        my $sc_line2 = $side_comment_history[2]->[0];
18125 ##        my $sc_col2  = $side_comment_history[2]->[1];
18126 ##
18127 ##        # FUTURE UPDATES:
18128 ##        # Be sure to ignore 'do not align' and  '} # end comments'
18129 ##        # Find first $move > 0 and $move <= $avail as follows:
18130 ##        # 1. try sc_col1 if sc_col1 == sc_col0 && (line-sc_line0) < 12
18131 ##        # 2. try sc_col2 if (line-sc_line2) < 12
18132 ##        # 3. try min possible space, plus up to 8,
18133 ##        # 4. try min possible space
18134
18135         if ( $kmax > 0 && !$do_not_align ) {
18136
18137             # but if this doesn't work, give up and use the minimum space
18138             if ( $move > $avail ) {
18139                 $move = $rOpts_minimum_space_to_comment - 1;
18140             }
18141
18142             # but we want some minimum space to the comment
18143             my $min_move = $rOpts_minimum_space_to_comment - 1;
18144             if (   $move >= 0
18145                 && $last_side_comment_length > 0
18146                 && ( $first_side_comment_line == 0 )
18147                 && $group_level == $last_group_level_written )
18148             {
18149                 $min_move = 0;
18150             }
18151
18152             if ( $move < $min_move ) {
18153                 $move = $min_move;
18154             }
18155
18156             # prevously, an upper bound was placed on $move here,
18157             # (maximum_space_to_comment), but it was not helpful
18158
18159             # don't exceed the available space
18160             if ( $move > $avail ) { $move = $avail }
18161
18162             # we can only increase space, never decrease
18163             if ( $move > 0 ) {
18164                 $line->increase_field_width( $maximum_field_index - 1, $move );
18165             }
18166
18167             # remember this column for the next group
18168             $last_comment_column = $line->get_column( $kmax - 2 );
18169         }
18170         else {
18171
18172             # try to at least line up the existing side comment location
18173             if ( $kmax > 0 && $move > 0 && $move < $avail ) {
18174                 $line->increase_field_width( $maximum_field_index - 1, $move );
18175                 $do_not_align = 0;
18176             }
18177
18178             # reset side comment column if we can't align
18179             else {
18180                 forget_side_comment();
18181             }
18182         }
18183     }
18184     return $do_not_align;
18185 }
18186
18187 sub improve_continuation_indentation {
18188     my ( $do_not_align, $group_leader_length ) = @_;
18189
18190     # See if we can increase the continuation indentation
18191     # to move all continuation lines closer to the next field
18192     # (unless it is a comment).
18193     #
18194     # '$min_ci_gap'is the extra indentation that we may need to introduce.
18195     # We will only introduce this to fields which already have some ci.
18196     # Without this variable, we would occasionally get something like this
18197     # (Complex.pm):
18198     #
18199     # use overload '+' => \&plus,
18200     #   '-'            => \&minus,
18201     #   '*'            => \&multiply,
18202     #   ...
18203     #   'tan'          => \&tan,
18204     #   'atan2'        => \&atan2,
18205     #
18206     # Whereas with this variable, we can shift variables over to get this:
18207     #
18208     # use overload '+' => \&plus,
18209     #          '-'     => \&minus,
18210     #          '*'     => \&multiply,
18211     #          ...
18212     #          'tan'   => \&tan,
18213     #          'atan2' => \&atan2,
18214
18215     ## BUB: Deactivated####################
18216     # The trouble with this patch is that it may, for example,
18217     # move in some 'or's  or ':'s, and leave some out, so that the
18218     # left edge alignment suffers.
18219     return 0;
18220     ###########################################
18221
18222     my $maximum_field_index = $group_lines[0]->get_jmax();
18223
18224     my $min_ci_gap = $rOpts_maximum_line_length;
18225     if ( $maximum_field_index > 1 && !$do_not_align ) {
18226
18227         for my $i ( 0 .. $maximum_line_index ) {
18228             my $line                = $group_lines[$i];
18229             my $leading_space_count = $line->get_leading_space_count();
18230             my $rfields             = $line->get_rfields();
18231
18232             my $gap = $line->get_column(0) - $leading_space_count -
18233               length( $$rfields[0] );
18234
18235             if ( $leading_space_count > $group_leader_length ) {
18236                 if ( $gap < $min_ci_gap ) { $min_ci_gap = $gap }
18237             }
18238         }
18239
18240         if ( $min_ci_gap >= $rOpts_maximum_line_length ) {
18241             $min_ci_gap = 0;
18242         }
18243     }
18244     else {
18245         $min_ci_gap = 0;
18246     }
18247     return $min_ci_gap;
18248 }
18249
18250 sub write_vertically_aligned_line {
18251
18252     my ( $line, $min_ci_gap, $do_not_align, $group_leader_length,
18253         $extra_leading_spaces )
18254       = @_;
18255     my $rfields                   = $line->get_rfields();
18256     my $leading_space_count       = $line->get_leading_space_count();
18257     my $outdent_long_lines        = $line->get_outdent_long_lines();
18258     my $maximum_field_index       = $line->get_jmax();
18259     my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags();
18260
18261     # add any extra spaces
18262     if ( $leading_space_count > $group_leader_length ) {
18263         $leading_space_count += $min_ci_gap;
18264     }
18265
18266     my $str = $$rfields[0];
18267
18268     # loop to concatenate all fields of this line and needed padding
18269     my $total_pad_count = 0;
18270     my ( $j, $pad );
18271     for $j ( 1 .. $maximum_field_index ) {
18272
18273         # skip zero-length side comments
18274         last
18275           if ( ( $j == $maximum_field_index )
18276             && ( !defined( $$rfields[$j] ) || ( length( $$rfields[$j] ) == 0 ) )
18277           );
18278
18279         # compute spaces of padding before this field
18280         my $col = $line->get_column( $j - 1 );
18281         $pad = $col - ( length($str) + $leading_space_count );
18282
18283         if ($do_not_align) {
18284             $pad =
18285               ( $j < $maximum_field_index )
18286               ? 0
18287               : $rOpts_minimum_space_to_comment - 1;
18288         }
18289
18290         # accumulate the padding
18291         if ( $pad > 0 ) { $total_pad_count += $pad; }
18292
18293         # add this field
18294         if ( !defined $$rfields[$j] ) {
18295             write_diagnostics("UNDEFined field at j=$j\n");
18296         }
18297
18298         # only add padding when we have a finite field;
18299         # this avoids extra terminal spaces if we have empty fields
18300         if ( length( $$rfields[$j] ) > 0 ) {
18301             $str .= ' ' x $total_pad_count;
18302             $total_pad_count = 0;
18303             $str .= $$rfields[$j];
18304         }
18305         else {
18306             $total_pad_count = 0;
18307         }
18308
18309         # update side comment history buffer
18310         if ( $j == $maximum_field_index ) {
18311             my $lineno = $file_writer_object->get_output_line_number();
18312             shift @side_comment_history;
18313             push @side_comment_history, [ $lineno, $col ];
18314         }
18315     }
18316
18317     my $side_comment_length = ( length( $$rfields[$maximum_field_index] ) );
18318
18319     # ship this line off
18320     write_leader_and_string( $leading_space_count + $extra_leading_spaces,
18321         $str, $side_comment_length, $outdent_long_lines,
18322         $rvertical_tightness_flags );
18323 }
18324
18325 sub get_extra_leading_spaces {
18326
18327     #----------------------------------------------------------
18328     # Define any extra indentation space (for the -lp option).
18329     # Here is why:
18330     # If a list has side comments, sub scan_list must dump the
18331     # list before it sees everything.  When this happens, it sets
18332     # the indentation to the standard scheme, but notes how
18333     # many spaces it would have liked to use.  We may be able
18334     # to recover that space here in the event that that all of the
18335     # lines of a list are back together again.
18336     #----------------------------------------------------------
18337
18338     my $extra_leading_spaces = 0;
18339     if ($extra_indent_ok) {
18340         my $object = $group_lines[0]->get_indentation();
18341         if ( ref($object) ) {
18342             my $extra_indentation_spaces_wanted =
18343               get_RECOVERABLE_SPACES($object);
18344
18345             # all indentation objects must be the same
18346             my $i;
18347             for $i ( 1 .. $maximum_line_index ) {
18348                 if ( $object != $group_lines[$i]->get_indentation() ) {
18349                     $extra_indentation_spaces_wanted = 0;
18350                     last;
18351                 }
18352             }
18353
18354             if ($extra_indentation_spaces_wanted) {
18355
18356                 # the maximum space without exceeding the line length:
18357                 my $avail = $group_lines[0]->get_available_space_on_right();
18358                 $extra_leading_spaces =
18359                   ( $avail > $extra_indentation_spaces_wanted )
18360                   ? $extra_indentation_spaces_wanted
18361                   : $avail;
18362
18363                 # update the indentation object because with -icp the terminal
18364                 # ');' will use the same adjustment.
18365                 $object->permanently_decrease_AVAILABLE_SPACES(
18366                     -$extra_leading_spaces );
18367             }
18368         }
18369     }
18370     return $extra_leading_spaces;
18371 }
18372
18373 sub combine_fields {
18374
18375     # combine all fields except for the comment field  ( sidecmt.t )
18376     my ( $j, $k );
18377     my $maximum_field_index = $group_lines[0]->get_jmax();
18378     for ( $j = 0 ; $j <= $maximum_line_index ; $j++ ) {
18379         my $line    = $group_lines[$j];
18380         my $rfields = $line->get_rfields();
18381         foreach ( 1 .. $maximum_field_index - 1 ) {
18382             $$rfields[0] .= $$rfields[$_];
18383         }
18384         $$rfields[1] = $$rfields[$maximum_field_index];
18385
18386         $line->set_jmax(1);
18387         $line->set_column( 0, 0 );
18388         $line->set_column( 1, 0 );
18389
18390     }
18391     $maximum_field_index = 1;
18392
18393     for $j ( 0 .. $maximum_line_index ) {
18394         my $line    = $group_lines[$j];
18395         my $rfields = $line->get_rfields();
18396         for $k ( 0 .. $maximum_field_index ) {
18397             my $pad = length( $$rfields[$k] ) - $line->current_field_width($k);
18398             if ( $k == 0 ) {
18399                 $pad += $group_lines[$j]->get_leading_space_count();
18400             }
18401
18402             if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) }
18403
18404         }
18405     }
18406 }
18407
18408 sub get_output_line_number {
18409
18410     # the output line number reported to a caller is the number of items
18411     # written plus the number of items in the buffer
18412     my $self = shift;
18413     1 + $maximum_line_index + $file_writer_object->get_output_line_number();
18414 }
18415
18416 sub write_leader_and_string {
18417
18418     my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines,
18419         $rvertical_tightness_flags )
18420       = @_;
18421
18422     # handle outdenting of long lines:
18423     if ($outdent_long_lines) {
18424         my $excess =
18425           length($str) - $side_comment_length + $leading_space_count -
18426           $rOpts_maximum_line_length;
18427         if ( $excess > 0 ) {
18428             $leading_space_count = 0;
18429             $last_outdented_line_at =
18430               $file_writer_object->get_output_line_number();
18431
18432             unless ($outdented_line_count) {
18433                 $first_outdented_line_at = $last_outdented_line_at;
18434             }
18435             $outdented_line_count++;
18436         }
18437     }
18438
18439     # Make preliminary leading whitespace.  It could get changed
18440     # later by entabbing, so we have to keep track of any changes
18441     # to the leading_space_count from here on.
18442     my $leading_string =
18443       $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : "";
18444
18445     # Unpack any recombination data; it was packed by
18446     # sub send_lines_to_vertical_aligner. Contents:
18447     #
18448     #   [0] type: 1=opening  2=closing  3=opening block brace
18449     #   [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
18450     #             if closing: spaces of padding to use
18451     #   [2] sequence number of container
18452     #   [3] valid flag: do not append if this flag is false
18453     #
18454     my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
18455         $seqno_end );
18456     if ($rvertical_tightness_flags) {
18457         (
18458             $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
18459             $seqno_end
18460         ) = @{$rvertical_tightness_flags};
18461     }
18462
18463     $seqno_string = $seqno_end;
18464
18465     # handle any cached line ..
18466     # either append this line to it or write it out
18467     if ( length($cached_line_text) ) {
18468
18469         if ( !$cached_line_valid ) {
18470             entab_and_output( $cached_line_text,
18471                 $cached_line_leading_space_count,
18472                 $last_group_level_written );
18473         }
18474
18475         # handle cached line with opening container token
18476         elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
18477
18478             my $gap = $leading_space_count - length($cached_line_text);
18479
18480             # handle option of just one tight opening per line:
18481             if ( $cached_line_flag == 1 ) {
18482                 if ( defined($open_or_close) && $open_or_close == 1 ) {
18483                     $gap = -1;
18484                 }
18485             }
18486
18487             if ( $gap >= 0 ) {
18488                 $leading_string      = $cached_line_text . ' ' x $gap;
18489                 $leading_space_count = $cached_line_leading_space_count;
18490                 $seqno_string        = $cached_seqno_string . ':' . $seqno_beg;
18491             }
18492             else {
18493                 entab_and_output( $cached_line_text,
18494                     $cached_line_leading_space_count,
18495                     $last_group_level_written );
18496             }
18497         }
18498
18499         # handle cached line to place before this closing container token
18500         else {
18501             my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str;
18502
18503             if ( length($test_line) <= $rOpts_maximum_line_length ) {
18504
18505                 $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
18506
18507                 # Patch to outdent closing tokens ending # in ');'
18508                 # If we are joining a line like ');' to a previous stacked
18509                 # set of closing tokens, then decide if we may outdent the
18510                 # combined stack to the indentation of the ');'.  Since we
18511                 # should not normally outdent any of the other tokens more than
18512                 # the indentation of the lines that contained them, we will
18513                 # only do this if all of the corresponding opening
18514                 # tokens were on the same line.  This can happen with
18515                 # -sot and -sct.  For example, it is ok here:
18516                 #   __PACKAGE__->load_components( qw(
18517                 #         PK::Auto
18518                 #         Core
18519                 #   ));
18520                 #
18521                 #   But, for example, we do not outdent in this example because
18522                 #   that would put the closing sub brace out farther than the
18523                 #   opening sub brace:
18524                 #
18525                 #   perltidy -sot -sct
18526                 #   $c->Tk::bind(
18527                 #       '<Control-f>' => sub {
18528                 #           my ($c) = @_;
18529                 #           my $e = $c->XEvent;
18530                 #           itemsUnderArea $c;
18531                 #       } );
18532                 #
18533                 if ( $str =~ /^\);/ && $cached_line_text =~ /^[\)\}\]\s]*$/ ) {
18534
18535                     # The way to tell this is if the stacked sequence numbers
18536                     # of this output line are the reverse of the stacked
18537                     # sequence numbers of the previous non-blank line of
18538                     # sequence numbers.  So we can join if the previous
18539                     # nonblank string of tokens is the mirror image.  For
18540                     # example if stack )}] is 13:8:6 then we are looking for a
18541                     # leading stack like [{( which is 6:8:13 We only need to
18542                     # check the two ends, because the intermediate tokens must
18543                     # fall in order.  Note on speed: having to split on colons
18544                     # and eliminate multiple colons might appear to be slow,
18545                     # but it's not an issue because we almost never come
18546                     # through here.  In a typical file we don't.
18547                     $seqno_string               =~ s/^:+//;
18548                     $last_nonblank_seqno_string =~ s/^:+//;
18549                     $seqno_string               =~ s/:+/:/g;
18550                     $last_nonblank_seqno_string =~ s/:+/:/g;
18551
18552                     # how many spaces can we outdent?
18553                     my $diff =
18554                       $cached_line_leading_space_count - $leading_space_count;
18555                     if (   $diff > 0
18556                         && length($seqno_string)
18557                         && length($last_nonblank_seqno_string) ==
18558                         length($seqno_string) )
18559                     {
18560                         my @seqno_last =
18561                           ( split ':', $last_nonblank_seqno_string );
18562                         my @seqno_now = ( split ':', $seqno_string );
18563                         if (   $seqno_now[-1] == $seqno_last[0]
18564                             && $seqno_now[0] == $seqno_last[-1] )
18565                         {
18566
18567                             # OK to outdent ..
18568                             # for absolute safety, be sure we only remove
18569                             # whitespace
18570                             my $ws = substr( $test_line, 0, $diff );
18571                             if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
18572
18573                                 $test_line = substr( $test_line, $diff );
18574                                 $cached_line_leading_space_count -= $diff;
18575                             }
18576
18577                             # shouldn't happen, but not critical:
18578                             ##else {
18579                             ## ERROR transferring indentation here
18580                             ##}
18581                         }
18582                     }
18583                 }
18584
18585                 $str                 = $test_line;
18586                 $leading_string      = "";
18587                 $leading_space_count = $cached_line_leading_space_count;
18588             }
18589             else {
18590                 entab_and_output( $cached_line_text,
18591                     $cached_line_leading_space_count,
18592                     $last_group_level_written );
18593             }
18594         }
18595     }
18596     $cached_line_type = 0;
18597     $cached_line_text = "";
18598
18599     # make the line to be written
18600     my $line = $leading_string . $str;
18601
18602     # write or cache this line
18603     if ( !$open_or_close || $side_comment_length > 0 ) {
18604         entab_and_output( $line, $leading_space_count, $group_level );
18605     }
18606     else {
18607         $cached_line_text                = $line;
18608         $cached_line_type                = $open_or_close;
18609         $cached_line_flag                = $tightness_flag;
18610         $cached_seqno                    = $seqno;
18611         $cached_line_valid               = $valid;
18612         $cached_line_leading_space_count = $leading_space_count;
18613         $cached_seqno_string             = $seqno_string;
18614     }
18615
18616     $last_group_level_written = $group_level;
18617     $last_side_comment_length = $side_comment_length;
18618     $extra_indent_ok          = 0;
18619 }
18620
18621 sub entab_and_output {
18622     my ( $line, $leading_space_count, $level ) = @_;
18623
18624     # The line is currently correct if there is no tabbing (recommended!)
18625     # We may have to lop off some leading spaces and replace with tabs.
18626     if ( $leading_space_count > 0 ) {
18627
18628         # Nothing to do if no tabs
18629         if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
18630             || $rOpts_indent_columns <= 0 )
18631         {
18632
18633             # nothing to do
18634         }
18635
18636         # Handle entab option
18637         elsif ($rOpts_entab_leading_whitespace) {
18638             my $space_count =
18639               $leading_space_count % $rOpts_entab_leading_whitespace;
18640             my $tab_count =
18641               int( $leading_space_count / $rOpts_entab_leading_whitespace );
18642             my $leading_string = "\t" x $tab_count . ' ' x $space_count;
18643             if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
18644                 substr( $line, 0, $leading_space_count ) = $leading_string;
18645             }
18646             else {
18647
18648                 # REMOVE AFTER TESTING
18649                 # shouldn't happen - program error counting whitespace
18650                 # we'll skip entabbing
18651                 warning(
18652 "Error entabbing in entab_and_output: expected count=$leading_space_count\n"
18653                 );
18654             }
18655         }
18656
18657         # Handle option of one tab per level
18658         else {
18659             my $leading_string = ( "\t" x $level );
18660             my $space_count =
18661               $leading_space_count - $level * $rOpts_indent_columns;
18662
18663             # shouldn't happen:
18664             if ( $space_count < 0 ) {
18665                 warning(
18666 "Error entabbing in append_line: for level=$group_level count=$leading_space_count\n"
18667                 );
18668                 $leading_string = ( ' ' x $leading_space_count );
18669             }
18670             else {
18671                 $leading_string .= ( ' ' x $space_count );
18672             }
18673             if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
18674                 substr( $line, 0, $leading_space_count ) = $leading_string;
18675             }
18676             else {
18677
18678                 # REMOVE AFTER TESTING
18679                 # shouldn't happen - program error counting whitespace
18680                 # we'll skip entabbing
18681                 warning(
18682 "Error entabbing in entab_and_output: expected count=$leading_space_count\n"
18683                 );
18684             }
18685         }
18686     }
18687     $file_writer_object->write_code_line( $line . "\n" );
18688     if ($seqno_string) {
18689         $last_nonblank_seqno_string = $seqno_string;
18690     }
18691 }
18692
18693 {    # begin get_leading_string
18694
18695     my @leading_string_cache;
18696
18697     sub get_leading_string {
18698
18699         # define the leading whitespace string for this line..
18700         my $leading_whitespace_count = shift;
18701
18702         # Handle case of zero whitespace, which includes multi-line quotes
18703         # (which may have a finite level; this prevents tab problems)
18704         if ( $leading_whitespace_count <= 0 ) {
18705             return "";
18706         }
18707
18708         # look for previous result
18709         elsif ( $leading_string_cache[$leading_whitespace_count] ) {
18710             return $leading_string_cache[$leading_whitespace_count];
18711         }
18712
18713         # must compute a string for this number of spaces
18714         my $leading_string;
18715
18716         # Handle simple case of no tabs
18717         if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
18718             || $rOpts_indent_columns <= 0 )
18719         {
18720             $leading_string = ( ' ' x $leading_whitespace_count );
18721         }
18722
18723         # Handle entab option
18724         elsif ($rOpts_entab_leading_whitespace) {
18725             my $space_count =
18726               $leading_whitespace_count % $rOpts_entab_leading_whitespace;
18727             my $tab_count =
18728               int(
18729                 $leading_whitespace_count / $rOpts_entab_leading_whitespace );
18730             $leading_string = "\t" x $tab_count . ' ' x $space_count;
18731         }
18732
18733         # Handle option of one tab per level
18734         else {
18735             $leading_string = ( "\t" x $group_level );
18736             my $space_count =
18737               $leading_whitespace_count - $group_level * $rOpts_indent_columns;
18738
18739             # shouldn't happen:
18740             if ( $space_count < 0 ) {
18741                 warning(
18742 "Error in append_line: for level=$group_level count=$leading_whitespace_count\n"
18743                 );
18744                 $leading_string = ( ' ' x $leading_whitespace_count );
18745             }
18746             else {
18747                 $leading_string .= ( ' ' x $space_count );
18748             }
18749         }
18750         $leading_string_cache[$leading_whitespace_count] = $leading_string;
18751         return $leading_string;
18752     }
18753 }    # end get_leading_string
18754
18755 sub report_anything_unusual {
18756     my $self = shift;
18757     if ( $outdented_line_count > 0 ) {
18758         write_logfile_entry(
18759             "$outdented_line_count long lines were outdented:\n");
18760         write_logfile_entry(
18761             "  First at output line $first_outdented_line_at\n");
18762
18763         if ( $outdented_line_count > 1 ) {
18764             write_logfile_entry(
18765                 "   Last at output line $last_outdented_line_at\n");
18766         }
18767         write_logfile_entry(
18768             "  use -noll to prevent outdenting, -l=n to increase line length\n"
18769         );
18770         write_logfile_entry("\n");
18771     }
18772 }
18773
18774 #####################################################################
18775 #
18776 # the Perl::Tidy::FileWriter class writes the output file
18777 #
18778 #####################################################################
18779
18780 package Perl::Tidy::FileWriter;
18781
18782 # Maximum number of little messages; probably need not be changed.
18783 use constant MAX_NAG_MESSAGES => 6;
18784
18785 sub write_logfile_entry {
18786     my $self          = shift;
18787     my $logger_object = $self->{_logger_object};
18788     if ($logger_object) {
18789         $logger_object->write_logfile_entry(@_);
18790     }
18791 }
18792
18793 sub new {
18794     my $class = shift;
18795     my ( $line_sink_object, $rOpts, $logger_object ) = @_;
18796
18797     bless {
18798         _line_sink_object           => $line_sink_object,
18799         _logger_object              => $logger_object,
18800         _rOpts                      => $rOpts,
18801         _output_line_number         => 1,
18802         _consecutive_blank_lines    => 0,
18803         _consecutive_nonblank_lines => 0,
18804         _first_line_length_error    => 0,
18805         _max_line_length_error      => 0,
18806         _last_line_length_error     => 0,
18807         _first_line_length_error_at => 0,
18808         _max_line_length_error_at   => 0,
18809         _last_line_length_error_at  => 0,
18810         _line_length_error_count    => 0,
18811         _max_output_line_length     => 0,
18812         _max_output_line_length_at  => 0,
18813     }, $class;
18814 }
18815
18816 sub tee_on {
18817     my $self = shift;
18818     $self->{_line_sink_object}->tee_on();
18819 }
18820
18821 sub tee_off {
18822     my $self = shift;
18823     $self->{_line_sink_object}->tee_off();
18824 }
18825
18826 sub get_output_line_number {
18827     my $self = shift;
18828     return $self->{_output_line_number};
18829 }
18830
18831 sub decrement_output_line_number {
18832     my $self = shift;
18833     $self->{_output_line_number}--;
18834 }
18835
18836 sub get_consecutive_nonblank_lines {
18837     my $self = shift;
18838     return $self->{_consecutive_nonblank_lines};
18839 }
18840
18841 sub reset_consecutive_blank_lines {
18842     my $self = shift;
18843     $self->{_consecutive_blank_lines} = 0;
18844 }
18845
18846 sub want_blank_line {
18847     my $self = shift;
18848     unless ( $self->{_consecutive_blank_lines} ) {
18849         $self->write_blank_code_line();
18850     }
18851 }
18852
18853 sub write_blank_code_line {
18854     my $self  = shift;
18855     my $rOpts = $self->{_rOpts};
18856     return
18857       if ( $self->{_consecutive_blank_lines} >=
18858         $rOpts->{'maximum-consecutive-blank-lines'} );
18859     $self->{_consecutive_blank_lines}++;
18860     $self->{_consecutive_nonblank_lines} = 0;
18861     $self->write_line("\n");
18862 }
18863
18864 sub write_code_line {
18865     my $self = shift;
18866     my $a    = shift;
18867
18868     if ( $a =~ /^\s*$/ ) {
18869         my $rOpts = $self->{_rOpts};
18870         return
18871           if ( $self->{_consecutive_blank_lines} >=
18872             $rOpts->{'maximum-consecutive-blank-lines'} );
18873         $self->{_consecutive_blank_lines}++;
18874         $self->{_consecutive_nonblank_lines} = 0;
18875     }
18876     else {
18877         $self->{_consecutive_blank_lines} = 0;
18878         $self->{_consecutive_nonblank_lines}++;
18879     }
18880     $self->write_line($a);
18881 }
18882
18883 sub write_line {
18884     my $self = shift;
18885     my $a    = shift;
18886
18887     # TODO: go through and see if the test is necessary here
18888     if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; }
18889
18890     $self->{_line_sink_object}->write_line($a);
18891
18892     # This calculation of excess line length ignores any internal tabs
18893     my $rOpts  = $self->{_rOpts};
18894     my $exceed = length($a) - $rOpts->{'maximum-line-length'} - 1;
18895     if ( $a =~ /^\t+/g ) {
18896         $exceed += pos($a) * ( $rOpts->{'indent-columns'} - 1 );
18897     }
18898
18899     # Note that we just incremented output line number to future value
18900     # so we must subtract 1 for current line number
18901     if ( length($a) > 1 + $self->{_max_output_line_length} ) {
18902         $self->{_max_output_line_length}    = length($a) - 1;
18903         $self->{_max_output_line_length_at} = $self->{_output_line_number} - 1;
18904     }
18905
18906     if ( $exceed > 0 ) {
18907         my $output_line_number = $self->{_output_line_number};
18908         $self->{_last_line_length_error}    = $exceed;
18909         $self->{_last_line_length_error_at} = $output_line_number - 1;
18910         if ( $self->{_line_length_error_count} == 0 ) {
18911             $self->{_first_line_length_error}    = $exceed;
18912             $self->{_first_line_length_error_at} = $output_line_number - 1;
18913         }
18914
18915         if (
18916             $self->{_last_line_length_error} > $self->{_max_line_length_error} )
18917         {
18918             $self->{_max_line_length_error}    = $exceed;
18919             $self->{_max_line_length_error_at} = $output_line_number - 1;
18920         }
18921
18922         if ( $self->{_line_length_error_count} < MAX_NAG_MESSAGES ) {
18923             $self->write_logfile_entry(
18924                 "Line length exceeded by $exceed characters\n");
18925         }
18926         $self->{_line_length_error_count}++;
18927     }
18928
18929 }
18930
18931 sub report_line_length_errors {
18932     my $self                    = shift;
18933     my $rOpts                   = $self->{_rOpts};
18934     my $line_length_error_count = $self->{_line_length_error_count};
18935     if ( $line_length_error_count == 0 ) {
18936         $self->write_logfile_entry(
18937             "No lines exceeded $rOpts->{'maximum-line-length'} characters\n");
18938         my $max_output_line_length    = $self->{_max_output_line_length};
18939         my $max_output_line_length_at = $self->{_max_output_line_length_at};
18940         $self->write_logfile_entry(
18941 "  Maximum output line length was $max_output_line_length at line $max_output_line_length_at\n"
18942         );
18943
18944     }
18945     else {
18946
18947         my $word = ( $line_length_error_count > 1 ) ? "s" : "";
18948         $self->write_logfile_entry(
18949 "$line_length_error_count output line$word exceeded $rOpts->{'maximum-line-length'} characters:\n"
18950         );
18951
18952         $word = ( $line_length_error_count > 1 ) ? "First" : "";
18953         my $first_line_length_error    = $self->{_first_line_length_error};
18954         my $first_line_length_error_at = $self->{_first_line_length_error_at};
18955         $self->write_logfile_entry(
18956 " $word at line $first_line_length_error_at by $first_line_length_error characters\n"
18957         );
18958
18959         if ( $line_length_error_count > 1 ) {
18960             my $max_line_length_error     = $self->{_max_line_length_error};
18961             my $max_line_length_error_at  = $self->{_max_line_length_error_at};
18962             my $last_line_length_error    = $self->{_last_line_length_error};
18963             my $last_line_length_error_at = $self->{_last_line_length_error_at};
18964             $self->write_logfile_entry(
18965 " Maximum at line $max_line_length_error_at by $max_line_length_error characters\n"
18966             );
18967             $self->write_logfile_entry(
18968 " Last at line $last_line_length_error_at by $last_line_length_error characters\n"
18969             );
18970         }
18971     }
18972 }
18973
18974 #####################################################################
18975 #
18976 # The Perl::Tidy::Debugger class shows line tokenization
18977 #
18978 #####################################################################
18979
18980 package Perl::Tidy::Debugger;
18981
18982 sub new {
18983
18984     my ( $class, $filename ) = @_;
18985
18986     bless {
18987         _debug_file        => $filename,
18988         _debug_file_opened => 0,
18989         _fh                => undef,
18990     }, $class;
18991 }
18992
18993 sub really_open_debug_file {
18994
18995     my $self       = shift;
18996     my $debug_file = $self->{_debug_file};
18997     my $fh;
18998     unless ( $fh = IO::File->new("> $debug_file") ) {
18999         warn("can't open $debug_file: $!\n");
19000     }
19001     $self->{_debug_file_opened} = 1;
19002     $self->{_fh}                = $fh;
19003     print $fh
19004       "Use -dump-token-types (-dtt) to get a list of token type codes\n";
19005 }
19006
19007 sub close_debug_file {
19008
19009     my $self = shift;
19010     my $fh   = $self->{_fh};
19011     if ( $self->{_debug_file_opened} ) {
19012
19013         eval { $self->{_fh}->close() };
19014     }
19015 }
19016
19017 sub write_debug_entry {
19018
19019     # This is a debug dump routine which may be modified as necessary
19020     # to dump tokens on a line-by-line basis.  The output will be written
19021     # to the .DEBUG file when the -D flag is entered.
19022     my $self           = shift;
19023     my $line_of_tokens = shift;
19024
19025     my $input_line        = $line_of_tokens->{_line_text};
19026     my $rtoken_type       = $line_of_tokens->{_rtoken_type};
19027     my $rtokens           = $line_of_tokens->{_rtokens};
19028     my $rlevels           = $line_of_tokens->{_rlevels};
19029     my $rslevels          = $line_of_tokens->{_rslevels};
19030     my $rblock_type       = $line_of_tokens->{_rblock_type};
19031     my $input_line_number = $line_of_tokens->{_line_number};
19032     my $line_type         = $line_of_tokens->{_line_type};
19033
19034     my ( $j, $num );
19035
19036     my $token_str              = "$input_line_number: ";
19037     my $reconstructed_original = "$input_line_number: ";
19038     my $block_str              = "$input_line_number: ";
19039
19040     #$token_str .= "$line_type: ";
19041     #$reconstructed_original .= "$line_type: ";
19042
19043     my $pattern   = "";
19044     my @next_char = ( '"', '"' );
19045     my $i_next    = 0;
19046     unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() }
19047     my $fh = $self->{_fh};
19048
19049     for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
19050
19051         # testing patterns
19052         if ( $$rtoken_type[$j] eq 'k' ) {
19053             $pattern .= $$rtokens[$j];
19054         }
19055         else {
19056             $pattern .= $$rtoken_type[$j];
19057         }
19058         $reconstructed_original .= $$rtokens[$j];
19059         $block_str              .= "($$rblock_type[$j])";
19060         $num = length( $$rtokens[$j] );
19061         my $type_str = $$rtoken_type[$j];
19062
19063         # be sure there are no blank tokens (shouldn't happen)
19064         # This can only happen if a programming error has been made
19065         # because all valid tokens are non-blank
19066         if ( $type_str eq ' ' ) {
19067             print $fh "BLANK TOKEN on the next line\n";
19068             $type_str = $next_char[$i_next];
19069             $i_next   = 1 - $i_next;
19070         }
19071
19072         if ( length($type_str) == 1 ) {
19073             $type_str = $type_str x $num;
19074         }
19075         $token_str .= $type_str;
19076     }
19077
19078     # Write what you want here ...
19079     # print $fh "$input_line\n";
19080     # print $fh "$pattern\n";
19081     print $fh "$reconstructed_original\n";
19082     print $fh "$token_str\n";
19083
19084     #print $fh "$block_str\n";
19085 }
19086
19087 #####################################################################
19088 #
19089 # The Perl::Tidy::LineBuffer class supplies a 'get_line()'
19090 # method for returning the next line to be parsed, as well as a
19091 # 'peek_ahead()' method
19092 #
19093 # The input parameter is an object with a 'get_line()' method
19094 # which returns the next line to be parsed
19095 #
19096 #####################################################################
19097
19098 package Perl::Tidy::LineBuffer;
19099
19100 sub new {
19101
19102     my $class              = shift;
19103     my $line_source_object = shift;
19104
19105     return bless {
19106         _line_source_object => $line_source_object,
19107         _rlookahead_buffer  => [],
19108     }, $class;
19109 }
19110
19111 sub peek_ahead {
19112     my $self               = shift;
19113     my $buffer_index       = shift;
19114     my $line               = undef;
19115     my $line_source_object = $self->{_line_source_object};
19116     my $rlookahead_buffer  = $self->{_rlookahead_buffer};
19117     if ( $buffer_index < scalar(@$rlookahead_buffer) ) {
19118         $line = $$rlookahead_buffer[$buffer_index];
19119     }
19120     else {
19121         $line = $line_source_object->get_line();
19122         push( @$rlookahead_buffer, $line );
19123     }
19124     return $line;
19125 }
19126
19127 sub get_line {
19128     my $self               = shift;
19129     my $line               = undef;
19130     my $line_source_object = $self->{_line_source_object};
19131     my $rlookahead_buffer  = $self->{_rlookahead_buffer};
19132
19133     if ( scalar(@$rlookahead_buffer) ) {
19134         $line = shift @$rlookahead_buffer;
19135     }
19136     else {
19137         $line = $line_source_object->get_line();
19138     }
19139     return $line;
19140 }
19141
19142 ########################################################################
19143 #
19144 # the Perl::Tidy::Tokenizer package is essentially a filter which
19145 # reads lines of perl source code from a source object and provides
19146 # corresponding tokenized lines through its get_line() method.  Lines
19147 # flow from the source_object to the caller like this:
19148 #
19149 # source_object --> LineBuffer_object --> Tokenizer -->  calling routine
19150 #   get_line()         get_line()           get_line()     line_of_tokens
19151 #
19152 # The source object can be any object with a get_line() method which
19153 # supplies one line (a character string) perl call.
19154 # The LineBuffer object is created by the Tokenizer.
19155 # The Tokenizer returns a reference to a data structure 'line_of_tokens'
19156 # containing one tokenized line for each call to its get_line() method.
19157 #
19158 # WARNING: This is not a real class yet.  Only one tokenizer my be used.
19159 #
19160 ########################################################################
19161
19162 package Perl::Tidy::Tokenizer;
19163
19164 BEGIN {
19165
19166     # Caution: these debug flags produce a lot of output
19167     # They should all be 0 except when debugging small scripts
19168
19169     use constant TOKENIZER_DEBUG_FLAG_EXPECT   => 0;
19170     use constant TOKENIZER_DEBUG_FLAG_NSCAN    => 0;
19171     use constant TOKENIZER_DEBUG_FLAG_QUOTE    => 0;
19172     use constant TOKENIZER_DEBUG_FLAG_SCAN_ID  => 0;
19173     use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0;
19174
19175     my $debug_warning = sub {
19176         print "TOKENIZER_DEBUGGING with key $_[0]\n";
19177     };
19178
19179     TOKENIZER_DEBUG_FLAG_EXPECT   && $debug_warning->('EXPECT');
19180     TOKENIZER_DEBUG_FLAG_NSCAN    && $debug_warning->('NSCAN');
19181     TOKENIZER_DEBUG_FLAG_QUOTE    && $debug_warning->('QUOTE');
19182     TOKENIZER_DEBUG_FLAG_SCAN_ID  && $debug_warning->('SCAN_ID');
19183     TOKENIZER_DEBUG_FLAG_TOKENIZE && $debug_warning->('TOKENIZE');
19184
19185 }
19186
19187 use Carp;
19188
19189 # PACKAGE VARIABLES for for processing an entire FILE.
19190 use vars qw{
19191   $tokenizer_self
19192
19193   $last_nonblank_token
19194   $last_nonblank_type
19195   $last_nonblank_block_type
19196   $statement_type
19197   $in_attribute_list
19198   $current_package
19199   $context
19200
19201   %is_constant
19202   %is_user_function
19203   %user_function_prototype
19204   %is_block_function
19205   %is_block_list_function
19206   %saw_function_definition
19207
19208   $brace_depth
19209   $paren_depth
19210   $square_bracket_depth
19211
19212   @current_depth
19213   @nesting_sequence_number
19214   @current_sequence_number
19215   @paren_type
19216   @paren_semicolon_count
19217   @paren_structural_type
19218   @brace_type
19219   @brace_structural_type
19220   @brace_statement_type
19221   @brace_context
19222   @brace_package
19223   @square_bracket_type
19224   @square_bracket_structural_type
19225   @depth_array
19226   @starting_line_of_current_depth
19227 };
19228
19229 # GLOBAL CONSTANTS for routines in this package
19230 use vars qw{
19231   %is_indirect_object_taker
19232   %is_block_operator
19233   %expecting_operator_token
19234   %expecting_operator_types
19235   %expecting_term_types
19236   %expecting_term_token
19237   %is_digraph
19238   %is_file_test_operator
19239   %is_trigraph
19240   %is_valid_token_type
19241   %is_keyword
19242   %is_code_block_token
19243   %really_want_term
19244   @opening_brace_names
19245   @closing_brace_names
19246   %is_keyword_taking_list
19247   %is_q_qq_qw_qx_qr_s_y_tr_m
19248 };
19249
19250 # possible values of operator_expected()
19251 use constant TERM     => -1;
19252 use constant UNKNOWN  => 0;
19253 use constant OPERATOR => 1;
19254
19255 # possible values of context
19256 use constant SCALAR_CONTEXT  => -1;
19257 use constant UNKNOWN_CONTEXT => 0;
19258 use constant LIST_CONTEXT    => 1;
19259
19260 # Maximum number of little messages; probably need not be changed.
19261 use constant MAX_NAG_MESSAGES => 6;
19262
19263 {
19264
19265     # methods to count instances
19266     my $_count = 0;
19267     sub get_count        { $_count; }
19268     sub _increment_count { ++$_count }
19269     sub _decrement_count { --$_count }
19270 }
19271
19272 sub DESTROY {
19273     $_[0]->_decrement_count();
19274 }
19275
19276 sub new {
19277
19278     my $class = shift;
19279
19280     # Note: 'tabs' and 'indent_columns' are temporary and should be
19281     # removed asap
19282     my %defaults = (
19283         source_object        => undef,
19284         debugger_object      => undef,
19285         diagnostics_object   => undef,
19286         logger_object        => undef,
19287         starting_level       => undef,
19288         indent_columns       => 4,
19289         tabs                 => 0,
19290         look_for_hash_bang   => 0,
19291         trim_qw              => 1,
19292         look_for_autoloader  => 1,
19293         look_for_selfloader  => 1,
19294         starting_line_number => 1,
19295     );
19296     my %args = ( %defaults, @_ );
19297
19298     # we are given an object with a get_line() method to supply source lines
19299     my $source_object = $args{source_object};
19300
19301     # we create another object with a get_line() and peek_ahead() method
19302     my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object);
19303
19304     # Tokenizer state data is as follows:
19305     # _rhere_target_list    reference to list of here-doc targets
19306     # _here_doc_target      the target string for a here document
19307     # _here_quote_character the type of here-doc quoting (" ' ` or none)
19308     #                       to determine if interpolation is done
19309     # _quote_target         character we seek if chasing a quote
19310     # _line_start_quote     line where we started looking for a long quote
19311     # _in_here_doc          flag indicating if we are in a here-doc
19312     # _in_pod               flag set if we are in pod documentation
19313     # _in_error             flag set if we saw severe error (binary in script)
19314     # _in_data              flag set if we are in __DATA__ section
19315     # _in_end               flag set if we are in __END__ section
19316     # _in_format            flag set if we are in a format description
19317     # _in_attribute_list    flag telling if we are looking for attributes
19318     # _in_quote             flag telling if we are chasing a quote
19319     # _starting_level       indentation level of first line
19320     # _input_tabstr         string denoting one indentation level of input file
19321     # _know_input_tabstr    flag indicating if we know _input_tabstr
19322     # _line_buffer_object   object with get_line() method to supply source code
19323     # _diagnostics_object   place to write debugging information
19324     # _unexpected_error_count  error count used to limit output
19325     # _lower_case_labels_at  line numbers where lower case labels seen
19326     $tokenizer_self = {
19327         _rhere_target_list                  => [],
19328         _in_here_doc                        => 0,
19329         _here_doc_target                    => "",
19330         _here_quote_character               => "",
19331         _in_data                            => 0,
19332         _in_end                             => 0,
19333         _in_format                          => 0,
19334         _in_error                           => 0,
19335         _in_pod                             => 0,
19336         _in_attribute_list                  => 0,
19337         _in_quote                           => 0,
19338         _quote_target                       => "",
19339         _line_start_quote                   => -1,
19340         _starting_level                     => $args{starting_level},
19341         _know_starting_level                => defined( $args{starting_level} ),
19342         _tabs                               => $args{tabs},
19343         _indent_columns                     => $args{indent_columns},
19344         _look_for_hash_bang                 => $args{look_for_hash_bang},
19345         _trim_qw                            => $args{trim_qw},
19346         _input_tabstr                       => "",
19347         _know_input_tabstr                  => -1,
19348         _last_line_number                   => $args{starting_line_number} - 1,
19349         _saw_perl_dash_P                    => 0,
19350         _saw_perl_dash_w                    => 0,
19351         _saw_use_strict                     => 0,
19352         _saw_v_string                       => 0,
19353         _look_for_autoloader                => $args{look_for_autoloader},
19354         _look_for_selfloader                => $args{look_for_selfloader},
19355         _saw_autoloader                     => 0,
19356         _saw_selfloader                     => 0,
19357         _saw_hash_bang                      => 0,
19358         _saw_end                            => 0,
19359         _saw_data                           => 0,
19360         _saw_negative_indentation           => 0,
19361         _started_tokenizing                 => 0,
19362         _line_buffer_object                 => $line_buffer_object,
19363         _debugger_object                    => $args{debugger_object},
19364         _diagnostics_object                 => $args{diagnostics_object},
19365         _logger_object                      => $args{logger_object},
19366         _unexpected_error_count             => 0,
19367         _started_looking_for_here_target_at => 0,
19368         _nearly_matched_here_target_at      => undef,
19369         _line_text                          => "",
19370         _rlower_case_labels_at              => undef,
19371     };
19372
19373     prepare_for_a_new_file();
19374     find_starting_indentation_level();
19375
19376     bless $tokenizer_self, $class;
19377
19378     # This is not a full class yet, so die if an attempt is made to
19379     # create more than one object.
19380
19381     if ( _increment_count() > 1 ) {
19382         confess
19383 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
19384     }
19385
19386     return $tokenizer_self;
19387
19388 }
19389
19390 # interface to Perl::Tidy::Logger routines
19391 sub warning {
19392     my $logger_object = $tokenizer_self->{_logger_object};
19393     if ($logger_object) {
19394         $logger_object->warning(@_);
19395     }
19396 }
19397
19398 sub complain {
19399     my $logger_object = $tokenizer_self->{_logger_object};
19400     if ($logger_object) {
19401         $logger_object->complain(@_);
19402     }
19403 }
19404
19405 sub write_logfile_entry {
19406     my $logger_object = $tokenizer_self->{_logger_object};
19407     if ($logger_object) {
19408         $logger_object->write_logfile_entry(@_);
19409     }
19410 }
19411
19412 sub interrupt_logfile {
19413     my $logger_object = $tokenizer_self->{_logger_object};
19414     if ($logger_object) {
19415         $logger_object->interrupt_logfile();
19416     }
19417 }
19418
19419 sub resume_logfile {
19420     my $logger_object = $tokenizer_self->{_logger_object};
19421     if ($logger_object) {
19422         $logger_object->resume_logfile();
19423     }
19424 }
19425
19426 sub increment_brace_error {
19427     my $logger_object = $tokenizer_self->{_logger_object};
19428     if ($logger_object) {
19429         $logger_object->increment_brace_error();
19430     }
19431 }
19432
19433 sub report_definite_bug {
19434     my $logger_object = $tokenizer_self->{_logger_object};
19435     if ($logger_object) {
19436         $logger_object->report_definite_bug();
19437     }
19438 }
19439
19440 sub brace_warning {
19441     my $logger_object = $tokenizer_self->{_logger_object};
19442     if ($logger_object) {
19443         $logger_object->brace_warning(@_);
19444     }
19445 }
19446
19447 sub get_saw_brace_error {
19448     my $logger_object = $tokenizer_self->{_logger_object};
19449     if ($logger_object) {
19450         $logger_object->get_saw_brace_error();
19451     }
19452     else {
19453         0;
19454     }
19455 }
19456
19457 # interface to Perl::Tidy::Diagnostics routines
19458 sub write_diagnostics {
19459     if ( $tokenizer_self->{_diagnostics_object} ) {
19460         $tokenizer_self->{_diagnostics_object}->write_diagnostics(@_);
19461     }
19462 }
19463
19464 sub report_tokenization_errors {
19465
19466     my $self = shift;
19467
19468     my $level = get_indentation_level();
19469     if ( $level != $tokenizer_self->{_starting_level} ) {
19470         warning("final indentation level: $level\n");
19471     }
19472
19473     check_final_nesting_depths();
19474
19475     if ( $tokenizer_self->{_look_for_hash_bang}
19476         && !$tokenizer_self->{_saw_hash_bang} )
19477     {
19478         warning(
19479             "hit EOF without seeing hash-bang line; maybe don't need -x?\n");
19480     }
19481
19482     if ( $tokenizer_self->{_in_format} ) {
19483         warning("hit EOF while in format description\n");
19484     }
19485
19486     if ( $tokenizer_self->{_in_pod} ) {
19487
19488         # Just write log entry if this is after __END__ or __DATA__
19489         # because this happens to often, and it is not likely to be
19490         # a parsing error.
19491         if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
19492             write_logfile_entry(
19493 "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
19494             );
19495         }
19496
19497         else {
19498             complain(
19499 "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
19500             );
19501         }
19502
19503     }
19504
19505     if ( $tokenizer_self->{_in_here_doc} ) {
19506         my $here_doc_target = $tokenizer_self->{_here_doc_target};
19507         my $started_looking_for_here_target_at =
19508           $tokenizer_self->{_started_looking_for_here_target_at};
19509         if ($here_doc_target) {
19510             warning(
19511 "hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n"
19512             );
19513         }
19514         else {
19515             warning(
19516 "hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n"
19517             );
19518         }
19519         my $nearly_matched_here_target_at =
19520           $tokenizer_self->{_nearly_matched_here_target_at};
19521         if ($nearly_matched_here_target_at) {
19522             warning(
19523 "NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n"
19524             );
19525         }
19526     }
19527
19528     if ( $tokenizer_self->{_in_quote} ) {
19529         my $line_start_quote = $tokenizer_self->{_line_start_quote};
19530         my $quote_target     = $tokenizer_self->{_quote_target};
19531         my $what =
19532           ( $tokenizer_self->{_in_attribute_list} )
19533           ? "attribute list"
19534           : "quote/pattern";
19535         warning(
19536 "hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n"
19537         );
19538     }
19539
19540     unless ( $tokenizer_self->{_saw_perl_dash_w} ) {
19541         if ( $] < 5.006 ) {
19542             write_logfile_entry("Suggest including '-w parameter'\n");
19543         }
19544         else {
19545             write_logfile_entry("Suggest including 'use warnings;'\n");
19546         }
19547     }
19548
19549     if ( $tokenizer_self->{_saw_perl_dash_P} ) {
19550         write_logfile_entry("Use of -P parameter for defines is discouraged\n");
19551     }
19552
19553     unless ( $tokenizer_self->{_saw_use_strict} ) {
19554         write_logfile_entry("Suggest including 'use strict;'\n");
19555     }
19556
19557     # it is suggested that lables have at least one upper case character
19558     # for legibility and to avoid code breakage as new keywords are introduced
19559     if ( $tokenizer_self->{_rlower_case_labels_at} ) {
19560         my @lower_case_labels_at =
19561           @{ $tokenizer_self->{_rlower_case_labels_at} };
19562         write_logfile_entry(
19563             "Suggest using upper case characters in label(s)\n");
19564         local $" = ')(';
19565         write_logfile_entry("  defined at line(s): (@lower_case_labels_at)\n");
19566     }
19567 }
19568
19569 sub report_v_string {
19570
19571     # warn if this version can't handle v-strings
19572     my $tok = shift;
19573     unless ( $tokenizer_self->{_saw_v_string} ) {
19574         $tokenizer_self->{_saw_v_string} = $tokenizer_self->{_last_line_number};
19575     }
19576     if ( $] < 5.006 ) {
19577         warning(
19578 "Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n"
19579         );
19580     }
19581 }
19582
19583 sub get_input_line_number {
19584     return $tokenizer_self->{_last_line_number};
19585 }
19586
19587 # returns the next tokenized line
19588 sub get_line {
19589
19590     my $self = shift;
19591
19592     # USES GLOBAL VARIABLES: $tokenizer_self, $brace_depth,
19593     # $square_bracket_depth, $paren_depth
19594
19595     my $input_line = $tokenizer_self->{_line_buffer_object}->get_line();
19596     $tokenizer_self->{_line_text} = $input_line;
19597
19598     return undef unless ($input_line);
19599
19600     my $input_line_number = ++$tokenizer_self->{_last_line_number};
19601
19602     # Find and remove what characters terminate this line, including any
19603     # control r
19604     my $input_line_separator = "";
19605     if ( chomp($input_line) ) { $input_line_separator = $/ }
19606
19607     # TODO: what other characters should be included here?
19608     if ( $input_line =~ s/((\r|\035|\032)+)$// ) {
19609         $input_line_separator = $2 . $input_line_separator;
19610     }
19611
19612     # for backwards compatability we keep the line text terminated with
19613     # a newline character
19614     $input_line .= "\n";
19615     $tokenizer_self->{_line_text} = $input_line;    # update
19616
19617     # create a data structure describing this line which will be
19618     # returned to the caller.
19619
19620     # _line_type codes are:
19621     #   SYSTEM         - system-specific code before hash-bang line
19622     #   CODE           - line of perl code (including comments)
19623     #   POD_START      - line starting pod, such as '=head'
19624     #   POD            - pod documentation text
19625     #   POD_END        - last line of pod section, '=cut'
19626     #   HERE           - text of here-document
19627     #   HERE_END       - last line of here-doc (target word)
19628     #   FORMAT         - format section
19629     #   FORMAT_END     - last line of format section, '.'
19630     #   DATA_START     - __DATA__ line
19631     #   DATA           - unidentified text following __DATA__
19632     #   END_START      - __END__ line
19633     #   END            - unidentified text following __END__
19634     #   ERROR          - we are in big trouble, probably not a perl script
19635
19636     # Other variables:
19637     #   _curly_brace_depth     - depth of curly braces at start of line
19638     #   _square_bracket_depth  - depth of square brackets at start of line
19639     #   _paren_depth           - depth of parens at start of line
19640     #   _starting_in_quote     - this line continues a multi-line quote
19641     #                            (so don't trim leading blanks!)
19642     #   _ending_in_quote       - this line ends in a multi-line quote
19643     #                            (so don't trim trailing blanks!)
19644     my $line_of_tokens = {
19645         _line_type                => 'EOF',
19646         _line_text                => $input_line,
19647         _line_number              => $input_line_number,
19648         _rtoken_type              => undef,
19649         _rtokens                  => undef,
19650         _rlevels                  => undef,
19651         _rslevels                 => undef,
19652         _rblock_type              => undef,
19653         _rcontainer_type          => undef,
19654         _rcontainer_environment   => undef,
19655         _rtype_sequence           => undef,
19656         _rnesting_tokens          => undef,
19657         _rci_levels               => undef,
19658         _rnesting_blocks          => undef,
19659         _python_indentation_level => -1,                   ## 0,
19660         _starting_in_quote    => 0,                    # to be set by subroutine
19661         _ending_in_quote      => 0,
19662         _curly_brace_depth    => $brace_depth,
19663         _square_bracket_depth => $square_bracket_depth,
19664         _paren_depth          => $paren_depth,
19665         _quote_character      => '',
19666     };
19667
19668     # must print line unchanged if we are in a here document
19669     if ( $tokenizer_self->{_in_here_doc} ) {
19670
19671         $line_of_tokens->{_line_type} = 'HERE';
19672         my $here_doc_target      = $tokenizer_self->{_here_doc_target};
19673         my $here_quote_character = $tokenizer_self->{_here_quote_character};
19674         my $candidate_target     = $input_line;
19675         chomp $candidate_target;
19676         if ( $candidate_target eq $here_doc_target ) {
19677             $tokenizer_self->{_nearly_matched_here_target_at} = undef;
19678             $line_of_tokens->{_line_type}                     = 'HERE_END';
19679             write_logfile_entry("Exiting HERE document $here_doc_target\n");
19680
19681             my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
19682             if (@$rhere_target_list) {    # there can be multiple here targets
19683                 ( $here_doc_target, $here_quote_character ) =
19684                   @{ shift @$rhere_target_list };
19685                 $tokenizer_self->{_here_doc_target} = $here_doc_target;
19686                 $tokenizer_self->{_here_quote_character} =
19687                   $here_quote_character;
19688                 write_logfile_entry(
19689                     "Entering HERE document $here_doc_target\n");
19690                 $tokenizer_self->{_nearly_matched_here_target_at} = undef;
19691                 $tokenizer_self->{_started_looking_for_here_target_at} =
19692                   $input_line_number;
19693             }
19694             else {
19695                 $tokenizer_self->{_in_here_doc}          = 0;
19696                 $tokenizer_self->{_here_doc_target}      = "";
19697                 $tokenizer_self->{_here_quote_character} = "";
19698             }
19699         }
19700
19701         # check for error of extra whitespace
19702         # note for PERL6: leading whitespace is allowed
19703         else {
19704             $candidate_target =~ s/\s*$//;
19705             $candidate_target =~ s/^\s*//;
19706             if ( $candidate_target eq $here_doc_target ) {
19707                 $tokenizer_self->{_nearly_matched_here_target_at} =
19708                   $input_line_number;
19709             }
19710         }
19711         return $line_of_tokens;
19712     }
19713
19714     # must print line unchanged if we are in a format section
19715     elsif ( $tokenizer_self->{_in_format} ) {
19716
19717         if ( $input_line =~ /^\.[\s#]*$/ ) {
19718             write_logfile_entry("Exiting format section\n");
19719             $tokenizer_self->{_in_format} = 0;
19720             $line_of_tokens->{_line_type} = 'FORMAT_END';
19721         }
19722         else {
19723             $line_of_tokens->{_line_type} = 'FORMAT';
19724         }
19725         return $line_of_tokens;
19726     }
19727
19728     # must print line unchanged if we are in pod documentation
19729     elsif ( $tokenizer_self->{_in_pod} ) {
19730
19731         $line_of_tokens->{_line_type} = 'POD';
19732         if ( $input_line =~ /^=cut/ ) {
19733             $line_of_tokens->{_line_type} = 'POD_END';
19734             write_logfile_entry("Exiting POD section\n");
19735             $tokenizer_self->{_in_pod} = 0;
19736         }
19737         if ( $input_line =~ /^\#\!.*perl\b/ ) {
19738             warning(
19739                 "Hash-bang in pod can cause older versions of perl to fail! \n"
19740             );
19741         }
19742
19743         return $line_of_tokens;
19744     }
19745
19746     # must print line unchanged if we have seen a severe error (i.e., we
19747     # are seeing illegal tokens and connot continue.  Syntax errors do
19748     # not pass this route).  Calling routine can decide what to do, but
19749     # the default can be to just pass all lines as if they were after __END__
19750     elsif ( $tokenizer_self->{_in_error} ) {
19751         $line_of_tokens->{_line_type} = 'ERROR';
19752         return $line_of_tokens;
19753     }
19754
19755     # print line unchanged if we are __DATA__ section
19756     elsif ( $tokenizer_self->{_in_data} ) {
19757
19758         # ...but look for POD
19759         # Note that the _in_data and _in_end flags remain set
19760         # so that we return to that state after seeing the
19761         # end of a pod section
19762         if ( $input_line =~ /^=(?!cut)/ ) {
19763             $line_of_tokens->{_line_type} = 'POD_START';
19764             write_logfile_entry("Entering POD section\n");
19765             $tokenizer_self->{_in_pod} = 1;
19766             return $line_of_tokens;
19767         }
19768         else {
19769             $line_of_tokens->{_line_type} = 'DATA';
19770             return $line_of_tokens;
19771         }
19772     }
19773
19774     # print line unchanged if we are in __END__ section
19775     elsif ( $tokenizer_self->{_in_end} ) {
19776
19777         # ...but look for POD
19778         # Note that the _in_data and _in_end flags remain set
19779         # so that we return to that state after seeing the
19780         # end of a pod section
19781         if ( $input_line =~ /^=(?!cut)/ ) {
19782             $line_of_tokens->{_line_type} = 'POD_START';
19783             write_logfile_entry("Entering POD section\n");
19784             $tokenizer_self->{_in_pod} = 1;
19785             return $line_of_tokens;
19786         }
19787         else {
19788             $line_of_tokens->{_line_type} = 'END';
19789             return $line_of_tokens;
19790         }
19791     }
19792
19793     # check for a hash-bang line if we haven't seen one
19794     if ( !$tokenizer_self->{_saw_hash_bang} ) {
19795         if ( $input_line =~ /^\#\!.*perl\b/ ) {
19796             $tokenizer_self->{_saw_hash_bang} = $input_line_number;
19797
19798             # check for -w and -P flags
19799             if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) {
19800                 $tokenizer_self->{_saw_perl_dash_P} = 1;
19801             }
19802
19803             if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) {
19804                 $tokenizer_self->{_saw_perl_dash_w} = 1;
19805             }
19806
19807             if (   ( $input_line_number > 1 )
19808                 && ( !$tokenizer_self->{_look_for_hash_bang} ) )
19809             {
19810
19811                 # this is helpful for VMS systems; we may have accidentally
19812                 # tokenized some DCL commands
19813                 if ( $tokenizer_self->{_started_tokenizing} ) {
19814                     warning(
19815 "There seems to be a hash-bang after line 1; do you need to run with -x ?\n"
19816                     );
19817                 }
19818                 else {
19819                     complain("Useless hash-bang after line 1\n");
19820                 }
19821             }
19822
19823             # Report the leading hash-bang as a system line
19824             # This will prevent -dac from deleting it
19825             else {
19826                 $line_of_tokens->{_line_type} = 'SYSTEM';
19827                 return $line_of_tokens;
19828             }
19829         }
19830     }
19831
19832     # wait for a hash-bang before parsing if the user invoked us with -x
19833     if ( $tokenizer_self->{_look_for_hash_bang}
19834         && !$tokenizer_self->{_saw_hash_bang} )
19835     {
19836         $line_of_tokens->{_line_type} = 'SYSTEM';
19837         return $line_of_tokens;
19838     }
19839
19840     # a first line of the form ': #' will be marked as SYSTEM
19841     # since lines of this form may be used by tcsh
19842     if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) {
19843         $line_of_tokens->{_line_type} = 'SYSTEM';
19844         return $line_of_tokens;
19845     }
19846
19847     # now we know that it is ok to tokenize the line...
19848     # the line tokenizer will modify any of these private variables:
19849     #        _rhere_target_list
19850     #        _in_data
19851     #        _in_end
19852     #        _in_format
19853     #        _in_error
19854     #        _in_pod
19855     #        _in_quote
19856     my $ending_in_quote_last = $tokenizer_self->{_in_quote};
19857     tokenize_this_line($line_of_tokens);
19858
19859     # Now finish defining the return structure and return it
19860     $line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote};
19861
19862     # handle severe error (binary data in script)
19863     if ( $tokenizer_self->{_in_error} ) {
19864         $tokenizer_self->{_in_quote} = 0;    # to avoid any more messages
19865         warning("Giving up after error\n");
19866         $line_of_tokens->{_line_type} = 'ERROR';
19867         reset_indentation_level(0);          # avoid error messages
19868         return $line_of_tokens;
19869     }
19870
19871     # handle start of pod documentation
19872     if ( $tokenizer_self->{_in_pod} ) {
19873
19874         # This gets tricky..above a __DATA__ or __END__ section, perl
19875         # accepts '=cut' as the start of pod section. But afterwards,
19876         # only pod utilities see it and they may ignore an =cut without
19877         # leading =head.  In any case, this isn't good.
19878         if ( $input_line =~ /^=cut\b/ ) {
19879             if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
19880                 complain("=cut while not in pod ignored\n");
19881                 $tokenizer_self->{_in_pod}    = 0;
19882                 $line_of_tokens->{_line_type} = 'POD_STOP';
19883             }
19884             else {
19885                 $line_of_tokens->{_line_type} = 'POD_END';
19886                 complain(
19887 "=cut starts a pod section .. this can fool pod utilities.\n"
19888                 );
19889                 write_logfile_entry("Entering POD section\n");
19890             }
19891         }
19892
19893         else {
19894             $line_of_tokens->{_line_type} = 'POD_START';
19895             write_logfile_entry("Entering POD section\n");
19896         }
19897
19898         return $line_of_tokens;
19899     }
19900
19901     # update indentation levels for log messages
19902     if ( $input_line !~ /^\s*$/ ) {
19903         my $rlevels                      = $line_of_tokens->{_rlevels};
19904         my $structural_indentation_level = $$rlevels[0];
19905         my ( $python_indentation_level, $msg ) =
19906           find_indentation_level( $input_line, $structural_indentation_level );
19907         if ($msg) { write_logfile_entry("$msg") }
19908         if ( $tokenizer_self->{_know_input_tabstr} == 1 ) {
19909             $line_of_tokens->{_python_indentation_level} =
19910               $python_indentation_level;
19911         }
19912     }
19913
19914     # see if this line contains here doc targets
19915     my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
19916     if (@$rhere_target_list) {
19917
19918         my ( $here_doc_target, $here_quote_character ) =
19919           @{ shift @$rhere_target_list };
19920         $tokenizer_self->{_in_here_doc}          = 1;
19921         $tokenizer_self->{_here_doc_target}      = $here_doc_target;
19922         $tokenizer_self->{_here_quote_character} = $here_quote_character;
19923         write_logfile_entry("Entering HERE document $here_doc_target\n");
19924         $tokenizer_self->{_started_looking_for_here_target_at} =
19925           $input_line_number;
19926     }
19927
19928     # NOTE: __END__ and __DATA__ statements are written unformatted
19929     # because they can theoretically contain additional characters
19930     # which are not tokenized (and cannot be read with <DATA> either!).
19931     if ( $tokenizer_self->{_in_data} ) {
19932         $line_of_tokens->{_line_type} = 'DATA_START';
19933         write_logfile_entry("Starting __DATA__ section\n");
19934         $tokenizer_self->{_saw_data} = 1;
19935
19936         # keep parsing after __DATA__ if use SelfLoader was seen
19937         if ( $tokenizer_self->{_saw_selfloader} ) {
19938             $tokenizer_self->{_in_data} = 0;
19939             write_logfile_entry(
19940                 "SelfLoader seen, continuing; -nlsl deactivates\n");
19941         }
19942
19943         return $line_of_tokens;
19944     }
19945
19946     elsif ( $tokenizer_self->{_in_end} ) {
19947         $line_of_tokens->{_line_type} = 'END_START';
19948         write_logfile_entry("Starting __END__ section\n");
19949         $tokenizer_self->{_saw_end} = 1;
19950
19951         # keep parsing after __END__ if use AutoLoader was seen
19952         if ( $tokenizer_self->{_saw_autoloader} ) {
19953             $tokenizer_self->{_in_end} = 0;
19954             write_logfile_entry(
19955                 "AutoLoader seen, continuing; -nlal deactivates\n");
19956         }
19957         return $line_of_tokens;
19958     }
19959
19960     # now, finally, we know that this line is type 'CODE'
19961     $line_of_tokens->{_line_type} = 'CODE';
19962
19963     # remember if we have seen any real code
19964     if (   !$tokenizer_self->{_started_tokenizing}
19965         && $input_line !~ /^\s*$/
19966         && $input_line !~ /^\s*#/ )
19967     {
19968         $tokenizer_self->{_started_tokenizing} = 1;
19969     }
19970
19971     if ( $tokenizer_self->{_debugger_object} ) {
19972         $tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens);
19973     }
19974
19975     # Note: if keyword 'format' occurs in this line code, it is still CODE
19976     # (keyword 'format' need not start a line)
19977     if ( $tokenizer_self->{_in_format} ) {
19978         write_logfile_entry("Entering format section\n");
19979     }
19980
19981     if ( $tokenizer_self->{_in_quote}
19982         and ( $tokenizer_self->{_line_start_quote} < 0 ) )
19983     {
19984
19985         #if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {
19986         if (
19987             ( my $quote_target = $tokenizer_self->{_quote_target} ) !~ /^\s*$/ )
19988         {
19989             $tokenizer_self->{_line_start_quote} = $input_line_number;
19990             write_logfile_entry(
19991                 "Start multi-line quote or pattern ending in $quote_target\n");
19992         }
19993     }
19994     elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 )
19995         and !$tokenizer_self->{_in_quote} )
19996     {
19997         $tokenizer_self->{_line_start_quote} = -1;
19998         write_logfile_entry("End of multi-line quote or pattern\n");
19999     }
20000
20001     # we are returning a line of CODE
20002     return $line_of_tokens;
20003 }
20004
20005 sub find_starting_indentation_level {
20006
20007     # USES GLOBAL VARIABLES: $tokenizer_self
20008     my $starting_level    = 0;
20009     my $know_input_tabstr = -1;    # flag for find_indentation_level
20010
20011     # use value if given as parameter
20012     if ( $tokenizer_self->{_know_starting_level} ) {
20013         $starting_level = $tokenizer_self->{_starting_level};
20014     }
20015
20016     # if we know there is a hash_bang line, the level must be zero
20017     elsif ( $tokenizer_self->{_look_for_hash_bang} ) {
20018         $tokenizer_self->{_know_starting_level} = 1;
20019     }
20020
20021     # otherwise figure it out from the input file
20022     else {
20023         my $line;
20024         my $i                            = 0;
20025         my $structural_indentation_level = -1; # flag for find_indentation_level
20026
20027         my $msg = "";
20028         while ( $line =
20029             $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
20030         {
20031
20032             # if first line is #! then assume starting level is zero
20033             if ( $i == 1 && $line =~ /^\#\!/ ) {
20034                 $starting_level = 0;
20035                 last;
20036             }
20037             next if ( $line =~ /^\s*#/ );      # must not be comment
20038             next if ( $line =~ /^\s*$/ );      # must not be blank
20039             ( $starting_level, $msg ) =
20040               find_indentation_level( $line, $structural_indentation_level );
20041             if ($msg) { write_logfile_entry("$msg") }
20042             last;
20043         }
20044         $msg = "Line $i implies starting-indentation-level = $starting_level\n";
20045
20046         if ( $starting_level > 0 ) {
20047
20048             my $input_tabstr = $tokenizer_self->{_input_tabstr};
20049             if ( $input_tabstr eq "\t" ) {
20050                 $msg .= "by guessing input tabbing uses 1 tab per level\n";
20051             }
20052             else {
20053                 my $cols = length($input_tabstr);
20054                 $msg .=
20055                   "by guessing input tabbing uses $cols blanks per level\n";
20056             }
20057         }
20058         write_logfile_entry("$msg");
20059     }
20060     $tokenizer_self->{_starting_level} = $starting_level;
20061     reset_indentation_level($starting_level);
20062 }
20063
20064 # Find indentation level given a input line.  At the same time, try to
20065 # figure out the input tabbing scheme.
20066 #
20067 # There are two types of calls:
20068 #
20069 # Type 1: $structural_indentation_level < 0
20070 #  In this case we have to guess $input_tabstr to figure out the level.
20071 #
20072 # Type 2: $structural_indentation_level >= 0
20073 #  In this case the level of this line is known, and this routine can
20074 #  update the tabbing string, if still unknown, to make the level correct.
20075
20076 sub find_indentation_level {
20077     my ( $line, $structural_indentation_level ) = @_;
20078
20079     # USES GLOBAL VARIABLES: $tokenizer_self
20080     my $level = 0;
20081     my $msg   = "";
20082
20083     my $know_input_tabstr = $tokenizer_self->{_know_input_tabstr};
20084     my $input_tabstr      = $tokenizer_self->{_input_tabstr};
20085
20086     # find leading whitespace
20087     my $leading_whitespace = ( $line =~ /^(\s*)/ ) ? $1 : "";
20088
20089     # make first guess at input tabbing scheme if necessary
20090     if ( $know_input_tabstr < 0 ) {
20091
20092         $know_input_tabstr = 0;
20093
20094         if ( $tokenizer_self->{_tabs} ) {
20095             $input_tabstr = "\t";
20096             if ( length($leading_whitespace) > 0 ) {
20097                 if ( $leading_whitespace !~ /\t/ ) {
20098
20099                     my $cols = $tokenizer_self->{_indent_columns};
20100
20101                     if ( length($leading_whitespace) < $cols ) {
20102                         $cols = length($leading_whitespace);
20103                     }
20104                     $input_tabstr = " " x $cols;
20105                 }
20106             }
20107         }
20108         else {
20109             $input_tabstr = " " x $tokenizer_self->{_indent_columns};
20110
20111             if ( length($leading_whitespace) > 0 ) {
20112                 if ( $leading_whitespace =~ /^\t/ ) {
20113                     $input_tabstr = "\t";
20114                 }
20115             }
20116         }
20117         $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;
20118         $tokenizer_self->{_input_tabstr}      = $input_tabstr;
20119     }
20120
20121     # determine the input tabbing scheme if possible
20122     if (   ( $know_input_tabstr == 0 )
20123         && ( length($leading_whitespace) > 0 )
20124         && ( $structural_indentation_level > 0 ) )
20125     {
20126         my $saved_input_tabstr = $input_tabstr;
20127
20128         # check for common case of one tab per indentation level
20129         if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {
20130             if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {
20131                 $input_tabstr = "\t";
20132                 $msg          = "Guessing old indentation was tab character\n";
20133             }
20134         }
20135
20136         else {
20137
20138             # detab any tabs based on 8 blanks per tab
20139             my $entabbed = "";
20140             if ( $leading_whitespace =~ s/^\t+/        /g ) {
20141                 $entabbed = "entabbed";
20142             }
20143
20144             # now compute tabbing from number of spaces
20145             my $columns =
20146               length($leading_whitespace) / $structural_indentation_level;
20147             if ( $columns == int $columns ) {
20148                 $msg =
20149                   "Guessing old indentation was $columns $entabbed spaces\n";
20150             }
20151             else {
20152                 $columns = int $columns;
20153                 $msg =
20154 "old indentation is unclear, using $columns $entabbed spaces\n";
20155             }
20156             $input_tabstr = " " x $columns;
20157         }
20158         $know_input_tabstr                    = 1;
20159         $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;
20160         $tokenizer_self->{_input_tabstr}      = $input_tabstr;
20161
20162         # see if mistakes were made
20163         if ( ( $tokenizer_self->{_starting_level} > 0 )
20164             && !$tokenizer_self->{_know_starting_level} )
20165         {
20166
20167             if ( $input_tabstr ne $saved_input_tabstr ) {
20168                 complain(
20169 "I made a bad starting level guess; rerun with a value for -sil \n"
20170                 );
20171             }
20172         }
20173     }
20174
20175     # use current guess at input tabbing to get input indentation level
20176     #
20177     # Patch to handle a common case of entabbed leading whitespace
20178     # If the leading whitespace equals 4 spaces and we also have
20179     # tabs, detab the input whitespace assuming 8 spaces per tab.
20180     if ( length($input_tabstr) == 4 ) {
20181         $leading_whitespace =~ s/^\t+/        /g;
20182     }
20183
20184     if ( ( my $len_tab = length($input_tabstr) ) > 0 ) {
20185         my $pos = 0;
20186
20187         while ( substr( $leading_whitespace, $pos, $len_tab ) eq $input_tabstr )
20188         {
20189             $pos += $len_tab;
20190             $level++;
20191         }
20192     }
20193     return ( $level, $msg );
20194 }
20195
20196 # This is a currently unused debug routine
20197 sub dump_functions {
20198
20199     my $fh = *STDOUT;
20200     my ( $pkg, $sub );
20201     foreach $pkg ( keys %is_user_function ) {
20202         print $fh "\nnon-constant subs in package $pkg\n";
20203
20204         foreach $sub ( keys %{ $is_user_function{$pkg} } ) {
20205             my $msg = "";
20206             if ( $is_block_list_function{$pkg}{$sub} ) {
20207                 $msg = 'block_list';
20208             }
20209
20210             if ( $is_block_function{$pkg}{$sub} ) {
20211                 $msg = 'block';
20212             }
20213             print $fh "$sub $msg\n";
20214         }
20215     }
20216
20217     foreach $pkg ( keys %is_constant ) {
20218         print $fh "\nconstants and constant subs in package $pkg\n";
20219
20220         foreach $sub ( keys %{ $is_constant{$pkg} } ) {
20221             print $fh "$sub\n";
20222         }
20223     }
20224 }
20225
20226 sub prepare_for_a_new_file {
20227
20228     # previous tokens needed to determine what to expect next
20229     $last_nonblank_token      = ';';    # the only possible starting state which
20230     $last_nonblank_type       = ';';    # will make a leading brace a code block
20231     $last_nonblank_block_type = '';
20232
20233     # scalars for remembering statement types across multiple lines
20234     $statement_type    = '';            # '' or 'use' or 'sub..' or 'case..'
20235     $in_attribute_list = 0;
20236
20237     # scalars for remembering where we are in the file
20238     $current_package = "main";
20239     $context         = UNKNOWN_CONTEXT;
20240
20241     # hashes used to remember function information
20242     %is_constant             = ();      # user-defined constants
20243     %is_user_function        = ();      # user-defined functions
20244     %user_function_prototype = ();      # their prototypes
20245     %is_block_function       = ();
20246     %is_block_list_function  = ();
20247     %saw_function_definition = ();
20248
20249     # variables used to track depths of various containers
20250     # and report nesting errors
20251     $paren_depth          = 0;
20252     $brace_depth          = 0;
20253     $square_bracket_depth = 0;
20254     @current_depth[ 0 .. $#closing_brace_names ] =
20255       (0) x scalar @closing_brace_names;
20256     @nesting_sequence_number[ 0 .. $#closing_brace_names ] =
20257       ( 0 .. $#closing_brace_names );
20258     @current_sequence_number             = ();
20259     $paren_type[$paren_depth]            = '';
20260     $paren_semicolon_count[$paren_depth] = 0;
20261     $paren_structural_type[$brace_depth] = '';
20262     $brace_type[$brace_depth] = ';';    # identify opening brace as code block
20263     $brace_structural_type[$brace_depth]                   = '';
20264     $brace_statement_type[$brace_depth]                    = "";
20265     $brace_context[$brace_depth]                           = UNKNOWN_CONTEXT;
20266     $brace_package[$paren_depth]                           = $current_package;
20267     $square_bracket_type[$square_bracket_depth]            = '';
20268     $square_bracket_structural_type[$square_bracket_depth] = '';
20269
20270     initialize_tokenizer_state();
20271 }
20272
20273 {                                       # begin tokenize_this_line
20274
20275     use constant BRACE          => 0;
20276     use constant SQUARE_BRACKET => 1;
20277     use constant PAREN          => 2;
20278     use constant QUESTION_COLON => 3;
20279
20280     # TV1: scalars for processing one LINE.
20281     # Re-initialized on each entry to sub tokenize_this_line.
20282     my (
20283         $block_type,        $container_type,    $expecting,
20284         $i,                 $i_tok,             $input_line,
20285         $input_line_number, $last_nonblank_i,   $max_token_index,
20286         $next_tok,          $next_type,         $peeked_ahead,
20287         $prototype,         $rhere_target_list, $rtoken_map,
20288         $rtoken_type,       $rtokens,           $tok,
20289         $type,              $type_sequence,
20290     );
20291
20292     # TV2: refs to ARRAYS for processing one LINE
20293     # Re-initialized on each call.
20294     my $routput_token_list     = [];    # stack of output token indexes
20295     my $routput_token_type     = [];    # token types
20296     my $routput_block_type     = [];    # types of code block
20297     my $routput_container_type = [];    # paren types, such as if, elsif, ..
20298     my $routput_type_sequence  = [];    # nesting sequential number
20299
20300     # TV3: SCALARS for quote variables.  These are initialized with a
20301     # subroutine call and continually updated as lines are processed.
20302     my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
20303         $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, );
20304
20305     # TV4: SCALARS for multi-line identifiers and
20306     # statements. These are initialized with a subroutine call
20307     # and continually updated as lines are processed.
20308     my ( $id_scan_state, $identifier, $want_paren, );
20309
20310     # TV5: SCALARS for tracking indentation level.
20311     # Initialized once and continually updated as lines are
20312     # processed.
20313     my (
20314         $nesting_token_string,      $nesting_type_string,
20315         $nesting_block_string,      $nesting_block_flag,
20316         $nesting_list_string,       $nesting_list_flag,
20317         $ci_string_in_tokenizer,    $continuation_string_in_tokenizer,
20318         $in_statement_continuation, $level_in_tokenizer,
20319         $slevel_in_tokenizer,       $rslevel_stack,
20320     );
20321
20322     # TV6: SCALARS for remembering several previous
20323     # tokens. Initialized once and continually updated as
20324     # lines are processed.
20325     my (
20326         $last_nonblank_container_type,     $last_nonblank_type_sequence,
20327         $last_last_nonblank_token,         $last_last_nonblank_type,
20328         $last_last_nonblank_block_type,    $last_last_nonblank_container_type,
20329         $last_last_nonblank_type_sequence, $last_nonblank_prototype,
20330     );
20331
20332     # ----------------------------------------------------------------
20333     # beginning of tokenizer variable access and manipulation routines
20334     # ----------------------------------------------------------------
20335
20336     sub initialize_tokenizer_state {
20337
20338         # TV1: initialized on each call
20339         # TV2: initialized on each call
20340         # TV3:
20341         $in_quote                = 0;
20342         $quote_type              = 'Q';
20343         $quote_character         = "";
20344         $quote_pos               = 0;
20345         $quote_depth             = 0;
20346         $quoted_string_1         = "";
20347         $quoted_string_2         = "";
20348         $allowed_quote_modifiers = "";
20349
20350         # TV4:
20351         $id_scan_state = '';
20352         $identifier    = '';
20353         $want_paren    = "";
20354
20355         # TV5:
20356         $nesting_token_string             = "";
20357         $nesting_type_string              = "";
20358         $nesting_block_string             = '1';    # initially in a block
20359         $nesting_block_flag               = 1;
20360         $nesting_list_string              = '0';    # initially not in a list
20361         $nesting_list_flag                = 0;      # initially not in a list
20362         $ci_string_in_tokenizer           = "";
20363         $continuation_string_in_tokenizer = "0";
20364         $in_statement_continuation        = 0;
20365         $level_in_tokenizer               = 0;
20366         $slevel_in_tokenizer              = 0;
20367         $rslevel_stack                    = [];
20368
20369         # TV6:
20370         $last_nonblank_container_type      = '';
20371         $last_nonblank_type_sequence       = '';
20372         $last_last_nonblank_token          = ';';
20373         $last_last_nonblank_type           = ';';
20374         $last_last_nonblank_block_type     = '';
20375         $last_last_nonblank_container_type = '';
20376         $last_last_nonblank_type_sequence  = '';
20377         $last_nonblank_prototype           = "";
20378     }
20379
20380     sub save_tokenizer_state {
20381
20382         my $rTV1 = [
20383             $block_type,        $container_type,    $expecting,
20384             $i,                 $i_tok,             $input_line,
20385             $input_line_number, $last_nonblank_i,   $max_token_index,
20386             $next_tok,          $next_type,         $peeked_ahead,
20387             $prototype,         $rhere_target_list, $rtoken_map,
20388             $rtoken_type,       $rtokens,           $tok,
20389             $type,              $type_sequence,
20390         ];
20391
20392         my $rTV2 = [
20393             $routput_token_list, $routput_token_type,
20394             $routput_block_type, $routput_container_type,
20395             $routput_type_sequence,
20396         ];
20397
20398         my $rTV3 = [
20399             $in_quote,        $quote_type,
20400             $quote_character, $quote_pos,
20401             $quote_depth,     $quoted_string_1,
20402             $quoted_string_2, $allowed_quote_modifiers,
20403         ];
20404
20405         my $rTV4 = [ $id_scan_state, $identifier, $want_paren, ];
20406
20407         my $rTV5 = [
20408             $nesting_token_string,      $nesting_type_string,
20409             $nesting_block_string,      $nesting_block_flag,
20410             $nesting_list_string,       $nesting_list_flag,
20411             $ci_string_in_tokenizer,    $continuation_string_in_tokenizer,
20412             $in_statement_continuation, $level_in_tokenizer,
20413             $slevel_in_tokenizer,       $rslevel_stack,
20414         ];
20415
20416         my $rTV6 = [
20417             $last_nonblank_container_type,
20418             $last_nonblank_type_sequence,
20419             $last_last_nonblank_token,
20420             $last_last_nonblank_type,
20421             $last_last_nonblank_block_type,
20422             $last_last_nonblank_container_type,
20423             $last_last_nonblank_type_sequence,
20424             $last_nonblank_prototype,
20425         ];
20426         return [ $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ];
20427     }
20428
20429     sub restore_tokenizer_state {
20430         my ($rstate) = @_;
20431         my ( $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate};
20432         (
20433             $block_type,        $container_type,    $expecting,
20434             $i,                 $i_tok,             $input_line,
20435             $input_line_number, $last_nonblank_i,   $max_token_index,
20436             $next_tok,          $next_type,         $peeked_ahead,
20437             $prototype,         $rhere_target_list, $rtoken_map,
20438             $rtoken_type,       $rtokens,           $tok,
20439             $type,              $type_sequence,
20440         ) = @{$rTV1};
20441
20442         (
20443             $routput_token_list, $routput_token_type,
20444             $routput_block_type, $routput_container_type,
20445             $routput_type_sequence,
20446         ) = @{$rTV2};
20447
20448         (
20449             $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
20450             $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers,
20451         ) = @{$rTV3};
20452
20453         ( $id_scan_state, $identifier, $want_paren, ) = @{$rTV4};
20454
20455         (
20456             $nesting_token_string,      $nesting_type_string,
20457             $nesting_block_string,      $nesting_block_flag,
20458             $nesting_list_string,       $nesting_list_flag,
20459             $ci_string_in_tokenizer,    $continuation_string_in_tokenizer,
20460             $in_statement_continuation, $level_in_tokenizer,
20461             $slevel_in_tokenizer,       $rslevel_stack,
20462         ) = @{$rTV5};
20463
20464         (
20465             $last_nonblank_container_type,
20466             $last_nonblank_type_sequence,
20467             $last_last_nonblank_token,
20468             $last_last_nonblank_type,
20469             $last_last_nonblank_block_type,
20470             $last_last_nonblank_container_type,
20471             $last_last_nonblank_type_sequence,
20472             $last_nonblank_prototype,
20473         ) = @{$rTV6};
20474     }
20475
20476     sub get_indentation_level {
20477         return $level_in_tokenizer;
20478     }
20479
20480     sub reset_indentation_level {
20481         $level_in_tokenizer  = $_[0];
20482         $slevel_in_tokenizer = $_[0];
20483         push @{$rslevel_stack}, $slevel_in_tokenizer;
20484     }
20485
20486     sub peeked_ahead {
20487         $peeked_ahead = defined( $_[0] ) ? $_[0] : $peeked_ahead;
20488     }
20489
20490     # ------------------------------------------------------------
20491     # end of tokenizer variable access and manipulation routines
20492     # ------------------------------------------------------------
20493
20494     # ------------------------------------------------------------
20495     # beginning of various scanner interface routines
20496     # ------------------------------------------------------------
20497     sub scan_replacement_text {
20498
20499         # check for here-docs in replacement text invoked by
20500         # a substitution operator with executable modifier 'e'.
20501         #
20502         # given:
20503         #  $replacement_text
20504         # return:
20505         #  $rht = reference to any here-doc targets
20506         my ($replacement_text) = @_;
20507
20508         # quick check
20509         return undef unless ( $replacement_text =~ /<</ );
20510
20511         write_logfile_entry("scanning replacement text for here-doc targets\n");
20512
20513         # save the logger object for error messages
20514         my $logger_object = $tokenizer_self->{_logger_object};
20515
20516         # localize all package variables
20517         local (
20518             $tokenizer_self,          $last_nonblank_token,
20519             $last_nonblank_type,      $last_nonblank_block_type,
20520             $statement_type,          $in_attribute_list,
20521             $current_package,         $context,
20522             %is_constant,             %is_user_function,
20523             %user_function_prototype, %is_block_function,
20524             %is_block_list_function,  %saw_function_definition,
20525             $brace_depth,             $paren_depth,
20526             $square_bracket_depth,    @current_depth,
20527             @nesting_sequence_number, @current_sequence_number,
20528             @paren_type,              @paren_semicolon_count,
20529             @paren_structural_type,   @brace_type,
20530             @brace_structural_type,   @brace_statement_type,
20531             @brace_context,           @brace_package,
20532             @square_bracket_type,     @square_bracket_structural_type,
20533             @depth_array,             @starting_line_of_current_depth,
20534         );
20535
20536         # save all lexical variables
20537         my $rstate = save_tokenizer_state();
20538         _decrement_count();    # avoid error check for multiple tokenizers
20539
20540         # make a new tokenizer
20541         my $rOpts = {};
20542         my $rpending_logfile_message;
20543         my $source_object =
20544           Perl::Tidy::LineSource->new( \$replacement_text, $rOpts,
20545             $rpending_logfile_message );
20546         my $tokenizer = Perl::Tidy::Tokenizer->new(
20547             source_object        => $source_object,
20548             logger_object        => $logger_object,
20549             starting_line_number => $input_line_number,
20550         );
20551
20552         # scan the replacement text
20553         1 while ( $tokenizer->get_line() );
20554
20555         # remove any here doc targets
20556         my $rht = undef;
20557         if ( $tokenizer_self->{_in_here_doc} ) {
20558             $rht = [];
20559             push @{$rht},
20560               [
20561                 $tokenizer_self->{_here_doc_target},
20562                 $tokenizer_self->{_here_quote_character}
20563               ];
20564             if ( $tokenizer_self->{_rhere_target_list} ) {
20565                 push @{$rht}, @{ $tokenizer_self->{_rhere_target_list} };
20566                 $tokenizer_self->{_rhere_target_list} = undef;
20567             }
20568             $tokenizer_self->{_in_here_doc} = undef;
20569         }
20570
20571         # now its safe to report errors
20572         $tokenizer->report_tokenization_errors();
20573
20574         # restore all tokenizer lexical variables
20575         restore_tokenizer_state($rstate);
20576
20577         # return the here doc targets
20578         return $rht;
20579     }
20580
20581     sub scan_bare_identifier {
20582         ( $i, $tok, $type, $prototype ) =
20583           scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype,
20584             $rtoken_map, $max_token_index );
20585     }
20586
20587     sub scan_identifier {
20588         ( $i, $tok, $type, $id_scan_state, $identifier ) =
20589           scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens,
20590             $max_token_index );
20591     }
20592
20593     sub scan_id {
20594         ( $i, $tok, $type, $id_scan_state ) =
20595           scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map,
20596             $id_scan_state, $max_token_index );
20597     }
20598
20599     sub scan_number {
20600         my $number;
20601         ( $i, $type, $number ) =
20602           scan_number_do( $input_line, $i, $rtoken_map, $type,
20603             $max_token_index );
20604         return $number;
20605     }
20606
20607     # a sub to warn if token found where term expected
20608     sub error_if_expecting_TERM {
20609         if ( $expecting == TERM ) {
20610             if ( $really_want_term{$last_nonblank_type} ) {
20611                 unexpected( $tok, "term", $i_tok, $last_nonblank_i, $rtoken_map,
20612                     $rtoken_type, $input_line );
20613                 1;
20614             }
20615         }
20616     }
20617
20618     # a sub to warn if token found where operator expected
20619     sub error_if_expecting_OPERATOR {
20620         if ( $expecting == OPERATOR ) {
20621             my $thing = defined $_[0] ? $_[0] : $tok;
20622             unexpected( $thing, "operator", $i_tok, $last_nonblank_i,
20623                 $rtoken_map, $rtoken_type, $input_line );
20624             if ( $i_tok == 0 ) {
20625                 interrupt_logfile();
20626                 warning("Missing ';' above?\n");
20627                 resume_logfile();
20628             }
20629             1;
20630         }
20631     }
20632
20633     # ------------------------------------------------------------
20634     # end scanner interfaces
20635     # ------------------------------------------------------------
20636
20637     my %is_for_foreach;
20638     @_ = qw(for foreach);
20639     @is_for_foreach{@_} = (1) x scalar(@_);
20640
20641     my %is_my_our;
20642     @_ = qw(my our);
20643     @is_my_our{@_} = (1) x scalar(@_);
20644
20645     # These keywords may introduce blocks after parenthesized expressions,
20646     # in the form:
20647     # keyword ( .... ) { BLOCK }
20648     # patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when'
20649     my %is_blocktype_with_paren;
20650     @_ = qw(if elsif unless while until for foreach switch case given when);
20651     @is_blocktype_with_paren{@_} = (1) x scalar(@_);
20652
20653     # ------------------------------------------------------------
20654     # begin hash of code for handling most token types
20655     # ------------------------------------------------------------
20656     my $tokenization_code = {
20657
20658         # no special code for these types yet, but syntax checks
20659         # could be added
20660
20661 ##      '!'   => undef,
20662 ##      '!='  => undef,
20663 ##      '!~'  => undef,
20664 ##      '%='  => undef,
20665 ##      '&&=' => undef,
20666 ##      '&='  => undef,
20667 ##      '+='  => undef,
20668 ##      '-='  => undef,
20669 ##      '..'  => undef,
20670 ##      '..'  => undef,
20671 ##      '...' => undef,
20672 ##      '.='  => undef,
20673 ##      '<<=' => undef,
20674 ##      '<='  => undef,
20675 ##      '<=>' => undef,
20676 ##      '<>'  => undef,
20677 ##      '='   => undef,
20678 ##      '=='  => undef,
20679 ##      '=~'  => undef,
20680 ##      '>='  => undef,
20681 ##      '>>'  => undef,
20682 ##      '>>=' => undef,
20683 ##      '\\'  => undef,
20684 ##      '^='  => undef,
20685 ##      '|='  => undef,
20686 ##      '||=' => undef,
20687 ##      '//=' => undef,
20688 ##      '~'   => undef,
20689 ##      '~~'  => undef,
20690
20691         '>' => sub {
20692             error_if_expecting_TERM()
20693               if ( $expecting == TERM );
20694         },
20695         '|' => sub {
20696             error_if_expecting_TERM()
20697               if ( $expecting == TERM );
20698         },
20699         '$' => sub {
20700
20701             # start looking for a scalar
20702             error_if_expecting_OPERATOR("Scalar")
20703               if ( $expecting == OPERATOR );
20704             scan_identifier();
20705
20706             if ( $identifier eq '$^W' ) {
20707                 $tokenizer_self->{_saw_perl_dash_w} = 1;
20708             }
20709
20710             # Check for indentifier in indirect object slot
20711             # (vorboard.pl, sort.t).  Something like:
20712             #   /^(print|printf|sort|exec|system)$/
20713             if (
20714                 $is_indirect_object_taker{$last_nonblank_token}
20715
20716                 || ( ( $last_nonblank_token eq '(' )
20717                     && $is_indirect_object_taker{ $paren_type[$paren_depth] } )
20718                 || ( $last_nonblank_type =~ /^[Uw]$/ )    # possible object
20719               )
20720             {
20721                 $type = 'Z';
20722             }
20723         },
20724         '(' => sub {
20725
20726             ++$paren_depth;
20727             $paren_semicolon_count[$paren_depth] = 0;
20728             if ($want_paren) {
20729                 $container_type = $want_paren;
20730                 $want_paren     = "";
20731             }
20732             else {
20733                 $container_type = $last_nonblank_token;
20734
20735                 # We can check for a syntax error here of unexpected '(',
20736                 # but this is going to get messy...
20737                 if (
20738                     $expecting == OPERATOR
20739
20740                     # be sure this is not a method call of the form
20741                     # &method(...), $method->(..), &{method}(...),
20742                     # $ref[2](list) is ok & short for $ref[2]->(list)
20743                     # NOTE: at present, braces in something like &{ xxx }
20744                     # are not marked as a block, we might have a method call
20745                     && $last_nonblank_token !~ /^([\]\}\&]|\-\>)/
20746
20747                   )
20748                 {
20749
20750                     # ref: camel 3 p 703.
20751                     if ( $last_last_nonblank_token eq 'do' ) {
20752                         complain(
20753 "do SUBROUTINE is deprecated; consider & or -> notation\n"
20754                         );
20755                     }
20756                     else {
20757
20758                         # if this is an empty list, (), then it is not an
20759                         # error; for example, we might have a constant pi and
20760                         # invoke it with pi() or just pi;
20761                         my ( $next_nonblank_token, $i_next ) =
20762                           find_next_nonblank_token( $i, $rtokens,
20763                             $max_token_index );
20764                         if ( $next_nonblank_token ne ')' ) {
20765                             my $hint;
20766                             error_if_expecting_OPERATOR('(');
20767
20768                             if ( $last_nonblank_type eq 'C' ) {
20769                                 $hint =
20770                                   "$last_nonblank_token has a void prototype\n";
20771                             }
20772                             elsif ( $last_nonblank_type eq 'i' ) {
20773                                 if (   $i_tok > 0
20774                                     && $last_nonblank_token =~ /^\$/ )
20775                                 {
20776                                     $hint =
20777 "Do you mean '$last_nonblank_token->(' ?\n";
20778                                 }
20779                             }
20780                             if ($hint) {
20781                                 interrupt_logfile();
20782                                 warning($hint);
20783                                 resume_logfile();
20784                             }
20785                         } ## end if ( $next_nonblank_token...
20786                     } ## end else [ if ( $last_last_nonblank_token...
20787                 } ## end if ( $expecting == OPERATOR...
20788             }
20789             $paren_type[$paren_depth] = $container_type;
20790             $type_sequence =
20791               increase_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
20792
20793             # propagate types down through nested parens
20794             # for example: the second paren in 'if ((' would be structural
20795             # since the first is.
20796
20797             if ( $last_nonblank_token eq '(' ) {
20798                 $type = $last_nonblank_type;
20799             }
20800
20801             #     We exclude parens as structural after a ',' because it
20802             #     causes subtle problems with continuation indentation for
20803             #     something like this, where the first 'or' will not get
20804             #     indented.
20805             #
20806             #         assert(
20807             #             __LINE__,
20808             #             ( not defined $check )
20809             #               or ref $check
20810             #               or $check eq "new"
20811             #               or $check eq "old",
20812             #         );
20813             #
20814             #     Likewise, we exclude parens where a statement can start
20815             #     because of problems with continuation indentation, like
20816             #     these:
20817             #
20818             #         ($firstline =~ /^#\!.*perl/)
20819             #         and (print $File::Find::name, "\n")
20820             #           and (return 1);
20821             #
20822             #         (ref($usage_fref) =~ /CODE/)
20823             #         ? &$usage_fref
20824             #           : (&blast_usage, &blast_params, &blast_general_params);
20825
20826             else {
20827                 $type = '{';
20828             }
20829
20830             if ( $last_nonblank_type eq ')' ) {
20831                 warning(
20832                     "Syntax error? found token '$last_nonblank_type' then '('\n"
20833                 );
20834             }
20835             $paren_structural_type[$paren_depth] = $type;
20836
20837         },
20838         ')' => sub {
20839             $type_sequence =
20840               decrease_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
20841
20842             if ( $paren_structural_type[$paren_depth] eq '{' ) {
20843                 $type = '}';
20844             }
20845
20846             $container_type = $paren_type[$paren_depth];
20847
20848             #    /^(for|foreach)$/
20849             if ( $is_for_foreach{ $paren_type[$paren_depth] } ) {
20850                 my $num_sc = $paren_semicolon_count[$paren_depth];
20851                 if ( $num_sc > 0 && $num_sc != 2 ) {
20852                     warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n");
20853                 }
20854             }
20855
20856             if ( $paren_depth > 0 ) { $paren_depth-- }
20857         },
20858         ',' => sub {
20859             if ( $last_nonblank_type eq ',' ) {
20860                 complain("Repeated ','s \n");
20861             }
20862
20863             # patch for operator_expected: note if we are in the list (use.t)
20864             if ( $statement_type eq 'use' ) { $statement_type = '_use' }
20865 ##                FIXME: need to move this elsewhere, perhaps check after a '('
20866 ##                elsif ($last_nonblank_token eq '(') {
20867 ##                    warning("Leading ','s illegal in some versions of perl\n");
20868 ##                }
20869         },
20870         ';' => sub {
20871             $context        = UNKNOWN_CONTEXT;
20872             $statement_type = '';
20873
20874             #    /^(for|foreach)$/
20875             if ( $is_for_foreach{ $paren_type[$paren_depth] } )
20876             {    # mark ; in for loop
20877
20878                 # Be careful: we do not want a semicolon such as the
20879                 # following to be included:
20880                 #
20881                 #    for (sort {strcoll($a,$b);} keys %investments) {
20882
20883                 if (   $brace_depth == $depth_array[PAREN][BRACE][$paren_depth]
20884                     && $square_bracket_depth ==
20885                     $depth_array[PAREN][SQUARE_BRACKET][$paren_depth] )
20886                 {
20887
20888                     $type = 'f';
20889                     $paren_semicolon_count[$paren_depth]++;
20890                 }
20891             }
20892
20893         },
20894         '"' => sub {
20895             error_if_expecting_OPERATOR("String")
20896               if ( $expecting == OPERATOR );
20897             $in_quote                = 1;
20898             $type                    = 'Q';
20899             $allowed_quote_modifiers = "";
20900         },
20901         "'" => sub {
20902             error_if_expecting_OPERATOR("String")
20903               if ( $expecting == OPERATOR );
20904             $in_quote                = 1;
20905             $type                    = 'Q';
20906             $allowed_quote_modifiers = "";
20907         },
20908         '`' => sub {
20909             error_if_expecting_OPERATOR("String")
20910               if ( $expecting == OPERATOR );
20911             $in_quote                = 1;
20912             $type                    = 'Q';
20913             $allowed_quote_modifiers = "";
20914         },
20915         '/' => sub {
20916             my $is_pattern;
20917
20918             if ( $expecting == UNKNOWN ) {    # indeterminte, must guess..
20919                 my $msg;
20920                 ( $is_pattern, $msg ) =
20921                   guess_if_pattern_or_division( $i, $rtokens, $rtoken_map,
20922                     $max_token_index );
20923
20924                 if ($msg) {
20925                     write_diagnostics("DIVIDE:$msg\n");
20926                     write_logfile_entry($msg);
20927                 }
20928             }
20929             else { $is_pattern = ( $expecting == TERM ) }
20930
20931             if ($is_pattern) {
20932                 $in_quote                = 1;
20933                 $type                    = 'Q';
20934                 $allowed_quote_modifiers = '[cgimosx]';
20935             }
20936             else {    # not a pattern; check for a /= token
20937
20938                 if ( $$rtokens[ $i + 1 ] eq '=' ) {    # form token /=
20939                     $i++;
20940                     $tok  = '/=';
20941                     $type = $tok;
20942                 }
20943
20944               #DEBUG - collecting info on what tokens follow a divide
20945               # for development of guessing algorithm
20946               #if ( numerator_expected( $i, $rtokens, $max_token_index ) < 0 ) {
20947               #    #write_diagnostics( "DIVIDE? $input_line\n" );
20948               #}
20949             }
20950         },
20951         '{' => sub {
20952
20953             # if we just saw a ')', we will label this block with
20954             # its type.  We need to do this to allow sub
20955             # code_block_type to determine if this brace starts a
20956             # code block or anonymous hash.  (The type of a paren
20957             # pair is the preceding token, such as 'if', 'else',
20958             # etc).
20959             $container_type = "";
20960
20961             # ATTRS: for a '{' following an attribute list, reset
20962             # things to look like we just saw the sub name
20963             if ( $statement_type =~ /^sub/ ) {
20964                 $last_nonblank_token = $statement_type;
20965                 $last_nonblank_type  = 'i';
20966                 $statement_type      = "";
20967             }
20968
20969             # patch for SWITCH/CASE: hide these keywords from an immediately
20970             # following opening brace
20971             elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' )
20972                 && $statement_type eq $last_nonblank_token )
20973             {
20974                 $last_nonblank_token = ";";
20975             }
20976
20977             elsif ( $last_nonblank_token eq ')' ) {
20978                 $last_nonblank_token = $paren_type[ $paren_depth + 1 ];
20979
20980                 # defensive move in case of a nesting error (pbug.t)
20981                 # in which this ')' had no previous '('
20982                 # this nesting error will have been caught
20983                 if ( !defined($last_nonblank_token) ) {
20984                     $last_nonblank_token = 'if';
20985                 }
20986
20987                 # check for syntax error here;
20988                 unless ( $is_blocktype_with_paren{$last_nonblank_token} ) {
20989                     my $list = join( ' ', sort keys %is_blocktype_with_paren );
20990                     warning(
20991                         "syntax error at ') {', didn't see one of: $list\n");
20992                 }
20993             }
20994
20995             # patch for paren-less for/foreach glitch, part 2.
20996             # see note below under 'qw'
20997             elsif ($last_nonblank_token eq 'qw'
20998                 && $is_for_foreach{$want_paren} )
20999             {
21000                 $last_nonblank_token = $want_paren;
21001                 if ( $last_last_nonblank_token eq $want_paren ) {
21002                     warning(
21003 "syntax error at '$want_paren .. {' -- missing \$ loop variable\n"
21004                     );
21005
21006                 }
21007                 $want_paren = "";
21008             }
21009
21010             # now identify which of the three possible types of
21011             # curly braces we have: hash index container, anonymous
21012             # hash reference, or code block.
21013
21014             # non-structural (hash index) curly brace pair
21015             # get marked 'L' and 'R'
21016             if ( is_non_structural_brace() ) {
21017                 $type = 'L';
21018
21019                 # patch for SWITCH/CASE:
21020                 # allow paren-less identifier after 'when'
21021                 # if the brace is preceded by a space
21022                 if (   $statement_type eq 'when'
21023                     && $last_nonblank_type      eq 'i'
21024                     && $last_last_nonblank_type eq 'k'
21025                     && ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) )
21026                 {
21027                     $type       = '{';
21028                     $block_type = $statement_type;
21029                 }
21030             }
21031
21032             # code and anonymous hash have the same type, '{', but are
21033             # distinguished by 'block_type',
21034             # which will be blank for an anonymous hash
21035             else {
21036
21037                 $block_type =
21038                   code_block_type( $i_tok, $rtokens, $rtoken_type,
21039                     $max_token_index );
21040
21041                 # patch to promote bareword type to function taking block
21042                 if (   $block_type
21043                     && $last_nonblank_type eq 'w'
21044                     && $last_nonblank_i >= 0 )
21045                 {
21046                     if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) {
21047                         $routput_token_type->[$last_nonblank_i] = 'G';
21048                     }
21049                 }
21050
21051                 # patch for SWITCH/CASE: if we find a stray opening block brace
21052                 # where we might accept a 'case' or 'when' block, then take it
21053                 if (   $statement_type eq 'case'
21054                     || $statement_type eq 'when' )
21055                 {
21056                     if ( !$block_type || $block_type eq '}' ) {
21057                         $block_type = $statement_type;
21058                     }
21059                 }
21060             }
21061             $brace_type[ ++$brace_depth ] = $block_type;
21062             $brace_package[$brace_depth] = $current_package;
21063             $type_sequence =
21064               increase_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
21065             $brace_structural_type[$brace_depth] = $type;
21066             $brace_context[$brace_depth]         = $context;
21067             $brace_statement_type[$brace_depth]  = $statement_type;
21068         },
21069         '}' => sub {
21070             $block_type = $brace_type[$brace_depth];
21071             if ($block_type) { $statement_type = '' }
21072             if ( defined( $brace_package[$brace_depth] ) ) {
21073                 $current_package = $brace_package[$brace_depth];
21074             }
21075
21076             # can happen on brace error (caught elsewhere)
21077             else {
21078             }
21079             $type_sequence =
21080               decrease_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
21081
21082             if ( $brace_structural_type[$brace_depth] eq 'L' ) {
21083                 $type = 'R';
21084             }
21085
21086             # propagate type information for 'do' and 'eval' blocks.
21087             # This is necessary to enable us to know if an operator
21088             # or term is expected next
21089             if ( $is_block_operator{ $brace_type[$brace_depth] } ) {
21090                 $tok = $brace_type[$brace_depth];
21091             }
21092
21093             $context        = $brace_context[$brace_depth];
21094             $statement_type = $brace_statement_type[$brace_depth];
21095             if ( $brace_depth > 0 ) { $brace_depth--; }
21096         },
21097         '&' => sub {    # maybe sub call? start looking
21098
21099             # We have to check for sub call unless we are sure we
21100             # are expecting an operator.  This example from s2p
21101             # got mistaken as a q operator in an early version:
21102             #   print BODY &q(<<'EOT');
21103             if ( $expecting != OPERATOR ) {
21104                 scan_identifier();
21105             }
21106             else {
21107             }
21108         },
21109         '<' => sub {    # angle operator or less than?
21110
21111             if ( $expecting != OPERATOR ) {
21112                 ( $i, $type ) =
21113                   find_angle_operator_termination( $input_line, $i, $rtoken_map,
21114                     $expecting, $max_token_index );
21115
21116             }
21117             else {
21118             }
21119         },
21120         '?' => sub {    # ?: conditional or starting pattern?
21121
21122             my $is_pattern;
21123
21124             if ( $expecting == UNKNOWN ) {
21125
21126                 my $msg;
21127                 ( $is_pattern, $msg ) =
21128                   guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map,
21129                     $max_token_index );
21130
21131                 if ($msg) { write_logfile_entry($msg) }
21132             }
21133             else { $is_pattern = ( $expecting == TERM ) }
21134
21135             if ($is_pattern) {
21136                 $in_quote                = 1;
21137                 $type                    = 'Q';
21138                 $allowed_quote_modifiers = '[cgimosx]';    # TBD:check this
21139             }
21140             else {
21141                 $type_sequence =
21142                   increase_nesting_depth( QUESTION_COLON,
21143                     $$rtoken_map[$i_tok] );
21144             }
21145         },
21146         '*' => sub {    # typeglob, or multiply?
21147
21148             if ( $expecting == TERM ) {
21149                 scan_identifier();
21150             }
21151             else {
21152
21153                 if ( $$rtokens[ $i + 1 ] eq '=' ) {
21154                     $tok  = '*=';
21155                     $type = $tok;
21156                     $i++;
21157                 }
21158                 elsif ( $$rtokens[ $i + 1 ] eq '*' ) {
21159                     $tok  = '**';
21160                     $type = $tok;
21161                     $i++;
21162                     if ( $$rtokens[ $i + 1 ] eq '=' ) {
21163                         $tok  = '**=';
21164                         $type = $tok;
21165                         $i++;
21166                     }
21167                 }
21168             }
21169         },
21170         '.' => sub {    # what kind of . ?
21171
21172             if ( $expecting != OPERATOR ) {
21173                 scan_number();
21174                 if ( $type eq '.' ) {
21175                     error_if_expecting_TERM()
21176                       if ( $expecting == TERM );
21177                 }
21178             }
21179             else {
21180             }
21181         },
21182         ':' => sub {
21183
21184             # if this is the first nonblank character, call it a label
21185             # since perl seems to just swallow it
21186             if ( $input_line_number == 1 && $last_nonblank_i == -1 ) {
21187                 $type = 'J';
21188             }
21189
21190             # ATTRS: check for a ':' which introduces an attribute list
21191             # (this might eventually get its own token type)
21192             elsif ( $statement_type =~ /^sub/ ) {
21193                 $type              = 'A';
21194                 $in_attribute_list = 1;
21195             }
21196
21197             # check for scalar attribute, such as
21198             # my $foo : shared = 1;
21199             elsif ($is_my_our{$statement_type}
21200                 && $current_depth[QUESTION_COLON] == 0 )
21201             {
21202                 $type              = 'A';
21203                 $in_attribute_list = 1;
21204             }
21205
21206             # otherwise, it should be part of a ?/: operator
21207             else {
21208                 $type_sequence =
21209                   decrease_nesting_depth( QUESTION_COLON,
21210                     $$rtoken_map[$i_tok] );
21211                 if ( $last_nonblank_token eq '?' ) {
21212                     warning("Syntax error near ? :\n");
21213                 }
21214             }
21215         },
21216         '+' => sub {    # what kind of plus?
21217
21218             if ( $expecting == TERM ) {
21219                 my $number = scan_number();
21220
21221                 # unary plus is safest assumption if not a number
21222                 if ( !defined($number) ) { $type = 'p'; }
21223             }
21224             elsif ( $expecting == OPERATOR ) {
21225             }
21226             else {
21227                 if ( $next_type eq 'w' ) { $type = 'p' }
21228             }
21229         },
21230         '@' => sub {
21231
21232             error_if_expecting_OPERATOR("Array")
21233               if ( $expecting == OPERATOR );
21234             scan_identifier();
21235         },
21236         '%' => sub {    # hash or modulo?
21237
21238             # first guess is hash if no following blank
21239             if ( $expecting == UNKNOWN ) {
21240                 if ( $next_type ne 'b' ) { $expecting = TERM }
21241             }
21242             if ( $expecting == TERM ) {
21243                 scan_identifier();
21244             }
21245         },
21246         '[' => sub {
21247             $square_bracket_type[ ++$square_bracket_depth ] =
21248               $last_nonblank_token;
21249             $type_sequence =
21250               increase_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
21251
21252             # It may seem odd, but structural square brackets have
21253             # type '{' and '}'.  This simplifies the indentation logic.
21254             if ( !is_non_structural_brace() ) {
21255                 $type = '{';
21256             }
21257             $square_bracket_structural_type[$square_bracket_depth] = $type;
21258         },
21259         ']' => sub {
21260             $type_sequence =
21261               decrease_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
21262
21263             if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' )
21264             {
21265                 $type = '}';
21266             }
21267             if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; }
21268         },
21269         '-' => sub {    # what kind of minus?
21270
21271             if ( ( $expecting != OPERATOR )
21272                 && $is_file_test_operator{$next_tok} )
21273             {
21274                 $i++;
21275                 $tok .= $next_tok;
21276                 $type = 'F';
21277             }
21278             elsif ( $expecting == TERM ) {
21279                 my $number = scan_number();
21280
21281                 # maybe part of bareword token? unary is safest
21282                 if ( !defined($number) ) { $type = 'm'; }
21283
21284             }
21285             elsif ( $expecting == OPERATOR ) {
21286             }
21287             else {
21288
21289                 if ( $next_type eq 'w' ) {
21290                     $type = 'm';
21291                 }
21292             }
21293         },
21294
21295         '^' => sub {
21296
21297             # check for special variables like ${^WARNING_BITS}
21298             if ( $expecting == TERM ) {
21299
21300                 # FIXME: this should work but will not catch errors
21301                 # because we also have to be sure that previous token is
21302                 # a type character ($,@,%).
21303                 if ( $last_nonblank_token eq '{'
21304                     && ( $next_tok =~ /^[A-Za-z_]/ ) )
21305                 {
21306
21307                     if ( $next_tok eq 'W' ) {
21308                         $tokenizer_self->{_saw_perl_dash_w} = 1;
21309                     }
21310                     $tok  = $tok . $next_tok;
21311                     $i    = $i + 1;
21312                     $type = 'w';
21313                 }
21314
21315                 else {
21316                     unless ( error_if_expecting_TERM() ) {
21317
21318                         # Something like this is valid but strange:
21319                         # undef ^I;
21320                         complain("The '^' seems unusual here\n");
21321                     }
21322                 }
21323             }
21324         },
21325
21326         '::' => sub {    # probably a sub call
21327             scan_bare_identifier();
21328         },
21329         '<<' => sub {    # maybe a here-doc?
21330             return
21331               unless ( $i < $max_token_index )
21332               ;          # here-doc not possible if end of line
21333
21334             if ( $expecting != OPERATOR ) {
21335                 my ( $found_target, $here_doc_target, $here_quote_character,
21336                     $saw_error );
21337                 (
21338                     $found_target, $here_doc_target, $here_quote_character, $i,
21339                     $saw_error
21340                   )
21341                   = find_here_doc( $expecting, $i, $rtokens, $rtoken_map,
21342                     $max_token_index );
21343
21344                 if ($found_target) {
21345                     push @{$rhere_target_list},
21346                       [ $here_doc_target, $here_quote_character ];
21347                     $type = 'h';
21348                     if ( length($here_doc_target) > 80 ) {
21349                         my $truncated = substr( $here_doc_target, 0, 80 );
21350                         complain("Long here-target: '$truncated' ...\n");
21351                     }
21352                     elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
21353                         complain(
21354                             "Unconventional here-target: '$here_doc_target'\n"
21355                         );
21356                     }
21357                 }
21358                 elsif ( $expecting == TERM ) {
21359                     unless ($saw_error) {
21360
21361                         # shouldn't happen..
21362                         warning("Program bug; didn't find here doc target\n");
21363                         report_definite_bug();
21364                     }
21365                 }
21366             }
21367             else {
21368             }
21369         },
21370         '->' => sub {
21371
21372             # if -> points to a bare word, we must scan for an identifier,
21373             # otherwise something like ->y would look like the y operator
21374             scan_identifier();
21375         },
21376
21377         # type = 'pp' for pre-increment, '++' for post-increment
21378         '++' => sub {
21379             if ( $expecting == TERM ) { $type = 'pp' }
21380             elsif ( $expecting == UNKNOWN ) {
21381                 my ( $next_nonblank_token, $i_next ) =
21382                   find_next_nonblank_token( $i, $rtokens, $max_token_index );
21383                 if ( $next_nonblank_token eq '$' ) { $type = 'pp' }
21384             }
21385         },
21386
21387         '=>' => sub {
21388             if ( $last_nonblank_type eq $tok ) {
21389                 complain("Repeated '=>'s \n");
21390             }
21391
21392             # patch for operator_expected: note if we are in the list (use.t)
21393             # TODO: make version numbers a new token type
21394             if ( $statement_type eq 'use' ) { $statement_type = '_use' }
21395         },
21396
21397         # type = 'mm' for pre-decrement, '--' for post-decrement
21398         '--' => sub {
21399
21400             if ( $expecting == TERM ) { $type = 'mm' }
21401             elsif ( $expecting == UNKNOWN ) {
21402                 my ( $next_nonblank_token, $i_next ) =
21403                   find_next_nonblank_token( $i, $rtokens, $max_token_index );
21404                 if ( $next_nonblank_token eq '$' ) { $type = 'mm' }
21405             }
21406         },
21407
21408         '&&' => sub {
21409             error_if_expecting_TERM()
21410               if ( $expecting == TERM );
21411         },
21412
21413         '||' => sub {
21414             error_if_expecting_TERM()
21415               if ( $expecting == TERM );
21416         },
21417
21418         '//' => sub {
21419             error_if_expecting_TERM()
21420               if ( $expecting == TERM );
21421         },
21422     };
21423
21424     # ------------------------------------------------------------
21425     # end hash of code for handling individual token types
21426     # ------------------------------------------------------------
21427
21428     my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' );
21429
21430     # These block types terminate statements and do not need a trailing
21431     # semicolon
21432     # patched for SWITCH/CASE:
21433     my %is_zero_continuation_block_type;
21434     @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY continue ;
21435       if elsif else unless while until for foreach switch case given when);
21436     @is_zero_continuation_block_type{@_} = (1) x scalar(@_);
21437
21438     my %is_not_zero_continuation_block_type;
21439     @_ = qw(sort grep map do eval);
21440     @is_not_zero_continuation_block_type{@_} = (1) x scalar(@_);
21441
21442     my %is_logical_container;
21443     @_ = qw(if elsif unless while and or err not && !  || for foreach);
21444     @is_logical_container{@_} = (1) x scalar(@_);
21445
21446     my %is_binary_type;
21447     @_ = qw(|| &&);
21448     @is_binary_type{@_} = (1) x scalar(@_);
21449
21450     my %is_binary_keyword;
21451     @_ = qw(and or err eq ne cmp);
21452     @is_binary_keyword{@_} = (1) x scalar(@_);
21453
21454     # 'L' is token for opening { at hash key
21455     my %is_opening_type;
21456     @_ = qw" L { ( [ ";
21457     @is_opening_type{@_} = (1) x scalar(@_);
21458
21459     # 'R' is token for closing } at hash key
21460     my %is_closing_type;
21461     @_ = qw" R } ) ] ";
21462     @is_closing_type{@_} = (1) x scalar(@_);
21463
21464     my %is_redo_last_next_goto;
21465     @_ = qw(redo last next goto);
21466     @is_redo_last_next_goto{@_} = (1) x scalar(@_);
21467
21468     my %is_use_require;
21469     @_ = qw(use require);
21470     @is_use_require{@_} = (1) x scalar(@_);
21471
21472     my %is_sub_package;
21473     @_ = qw(sub package);
21474     @is_sub_package{@_} = (1) x scalar(@_);
21475
21476     # This hash holds the hash key in $tokenizer_self for these keywords:
21477     my %is_format_END_DATA = (
21478         'format'   => '_in_format',
21479         '__END__'  => '_in_end',
21480         '__DATA__' => '_in_data',
21481     );
21482
21483     # ref: camel 3 p 147,
21484     # but perl may accept undocumented flags
21485     my %quote_modifiers = (
21486         's'  => '[cegimosx]',
21487         'y'  => '[cds]',
21488         'tr' => '[cds]',
21489         'm'  => '[cgimosx]',
21490         'qr' => '[imosx]',
21491         'q'  => "",
21492         'qq' => "",
21493         'qw' => "",
21494         'qx' => "",
21495     );
21496
21497     # table showing how many quoted things to look for after quote operator..
21498     # s, y, tr have 2 (pattern and replacement)
21499     # others have 1 (pattern only)
21500     my %quote_items = (
21501         's'  => 2,
21502         'y'  => 2,
21503         'tr' => 2,
21504         'm'  => 1,
21505         'qr' => 1,
21506         'q'  => 1,
21507         'qq' => 1,
21508         'qw' => 1,
21509         'qx' => 1,
21510     );
21511
21512     sub tokenize_this_line {
21513
21514   # This routine breaks a line of perl code into tokens which are of use in
21515   # indentation and reformatting.  One of my goals has been to define tokens
21516   # such that a newline may be inserted between any pair of tokens without
21517   # changing or invalidating the program. This version comes close to this,
21518   # although there are necessarily a few exceptions which must be caught by
21519   # the formatter.  Many of these involve the treatment of bare words.
21520   #
21521   # The tokens and their types are returned in arrays.  See previous
21522   # routine for their names.
21523   #
21524   # See also the array "valid_token_types" in the BEGIN section for an
21525   # up-to-date list.
21526   #
21527   # To simplify things, token types are either a single character, or they
21528   # are identical to the tokens themselves.
21529   #
21530   # As a debugging aid, the -D flag creates a file containing a side-by-side
21531   # comparison of the input string and its tokenization for each line of a file.
21532   # This is an invaluable debugging aid.
21533   #
21534   # In addition to tokens, and some associated quantities, the tokenizer
21535   # also returns flags indication any special line types.  These include
21536   # quotes, here_docs, formats.
21537   #
21538   # -----------------------------------------------------------------------
21539   #
21540   # How to add NEW_TOKENS:
21541   #
21542   # New token types will undoubtedly be needed in the future both to keep up
21543   # with changes in perl and to help adapt the tokenizer to other applications.
21544   #
21545   # Here are some notes on the minimal steps.  I wrote these notes while
21546   # adding the 'v' token type for v-strings, which are things like version
21547   # numbers 5.6.0, and ip addresses, and will use that as an example.  ( You
21548   # can use your editor to search for the string "NEW_TOKENS" to find the
21549   # appropriate sections to change):
21550   #
21551   # *. Try to talk somebody else into doing it!  If not, ..
21552   #
21553   # *. Make a backup of your current version in case things don't work out!
21554   #
21555   # *. Think of a new, unused character for the token type, and add to
21556   # the array @valid_token_types in the BEGIN section of this package.
21557   # For example, I used 'v' for v-strings.
21558   #
21559   # *. Implement coding to recognize the $type of the token in this routine.
21560   # This is the hardest part, and is best done by immitating or modifying
21561   # some of the existing coding.  For example, to recognize v-strings, I
21562   # patched 'sub scan_bare_identifier' to recognize v-strings beginning with
21563   # 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'.
21564   #
21565   # *. Update sub operator_expected.  This update is critically important but
21566   # the coding is trivial.  Look at the comments in that routine for help.
21567   # For v-strings, which should behave like numbers, I just added 'v' to the
21568   # regex used to handle numbers and strings (types 'n' and 'Q').
21569   #
21570   # *. Implement a 'bond strength' rule in sub set_bond_strengths in
21571   # Perl::Tidy::Formatter for breaking lines around this token type.  You can
21572   # skip this step and take the default at first, then adjust later to get
21573   # desired results.  For adding type 'v', I looked at sub bond_strength and
21574   # saw that number type 'n' was using default strengths, so I didn't do
21575   # anything.  I may tune it up someday if I don't like the way line
21576   # breaks with v-strings look.
21577   #
21578   # *. Implement a 'whitespace' rule in sub set_white_space_flag in
21579   # Perl::Tidy::Formatter.  For adding type 'v', I looked at this routine
21580   # and saw that type 'n' used spaces on both sides, so I just added 'v'
21581   # to the array @spaces_both_sides.
21582   #
21583   # *. Update HtmlWriter package so that users can colorize the token as
21584   # desired.  This is quite easy; see comments identified by 'NEW_TOKENS' in
21585   # that package.  For v-strings, I initially chose to use a default color
21586   # equal to the default for numbers, but it might be nice to change that
21587   # eventually.
21588   #
21589   # *. Update comments in Perl::Tidy::Tokenizer::dump_token_types.
21590   #
21591   # *. Run lots and lots of debug tests.  Start with special files designed
21592   # to test the new token type.  Run with the -D flag to create a .DEBUG
21593   # file which shows the tokenization.  When these work ok, test as many old
21594   # scripts as possible.  Start with all of the '.t' files in the 'test'
21595   # directory of the distribution file.  Compare .tdy output with previous
21596   # version and updated version to see the differences.  Then include as
21597   # many more files as possible. My own technique has been to collect a huge
21598   # number of perl scripts (thousands!) into one directory and run perltidy
21599   # *, then run diff between the output of the previous version and the
21600   # current version.
21601   #
21602   # *. For another example, search for the smartmatch operator '~~'
21603   # with your editor to see where updates were made for it.
21604   #
21605   # -----------------------------------------------------------------------
21606
21607         my $line_of_tokens = shift;
21608         my ($untrimmed_input_line) = $line_of_tokens->{_line_text};
21609
21610         # patch while coding change is underway
21611         # make callers private data to allow access
21612         # $tokenizer_self = $caller_tokenizer_self;
21613
21614         # extract line number for use in error messages
21615         $input_line_number = $line_of_tokens->{_line_number};
21616
21617         # reinitialize for multi-line quote
21618         $line_of_tokens->{_starting_in_quote} = $in_quote && $quote_type eq 'Q';
21619
21620         # check for pod documentation
21621         if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) {
21622
21623             # must not be in multi-line quote
21624             # and must not be in an eqn
21625             if ( !$in_quote and ( operator_expected( 'b', '=', 'b' ) == TERM ) )
21626             {
21627                 $tokenizer_self->{_in_pod} = 1;
21628                 return;
21629             }
21630         }
21631
21632         $input_line = $untrimmed_input_line;
21633
21634         chomp $input_line;
21635
21636         # trim start of this line unless we are continuing a quoted line
21637         # do not trim end because we might end in a quote (test: deken4.pl)
21638         # Perl::Tidy::Formatter will delete needless trailing blanks
21639         unless ( $in_quote && ( $quote_type eq 'Q' ) ) {
21640             $input_line =~ s/^\s*//;    # trim left end
21641         }
21642
21643         # update the copy of the line for use in error messages
21644         # This must be exactly what we give the pre_tokenizer
21645         $tokenizer_self->{_line_text} = $input_line;
21646
21647         # re-initialize for the main loop
21648         $routput_token_list     = [];    # stack of output token indexes
21649         $routput_token_type     = [];    # token types
21650         $routput_block_type     = [];    # types of code block
21651         $routput_container_type = [];    # paren types, such as if, elsif, ..
21652         $routput_type_sequence  = [];    # nesting sequential number
21653
21654         $rhere_target_list = [];
21655
21656         $tok             = $last_nonblank_token;
21657         $type            = $last_nonblank_type;
21658         $prototype       = $last_nonblank_prototype;
21659         $last_nonblank_i = -1;
21660         $block_type      = $last_nonblank_block_type;
21661         $container_type  = $last_nonblank_container_type;
21662         $type_sequence   = $last_nonblank_type_sequence;
21663         $peeked_ahead    = 0;
21664
21665         # tokenization is done in two stages..
21666         # stage 1 is a very simple pre-tokenization
21667         my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens
21668
21669         # a little optimization for a full-line comment
21670         if ( !$in_quote && ( $input_line =~ /^#/ ) ) {
21671             $max_tokens_wanted = 1    # no use tokenizing a comment
21672         }
21673
21674         # start by breaking the line into pre-tokens
21675         ( $rtokens, $rtoken_map, $rtoken_type ) =
21676           pre_tokenize( $input_line, $max_tokens_wanted );
21677
21678         $max_token_index = scalar(@$rtokens) - 1;
21679         push( @$rtokens,    ' ', ' ', ' ' ); # extra whitespace simplifies logic
21680         push( @$rtoken_map, 0,   0,   0 );   # shouldn't be referenced
21681         push( @$rtoken_type, 'b', 'b', 'b' );
21682
21683         # initialize for main loop
21684         for $i ( 0 .. $max_token_index + 3 ) {
21685             $routput_token_type->[$i]     = "";
21686             $routput_block_type->[$i]     = "";
21687             $routput_container_type->[$i] = "";
21688             $routput_type_sequence->[$i]  = "";
21689         }
21690         $i     = -1;
21691         $i_tok = -1;
21692
21693         # ------------------------------------------------------------
21694         # begin main tokenization loop
21695         # ------------------------------------------------------------
21696
21697         # we are looking at each pre-token of one line and combining them
21698         # into tokens
21699         while ( ++$i <= $max_token_index ) {
21700
21701             if ($in_quote) {    # continue looking for end of a quote
21702                 $type = $quote_type;
21703
21704                 unless ( @{$routput_token_list} )
21705                 {               # initialize if continuation line
21706                     push( @{$routput_token_list}, $i );
21707                     $routput_token_type->[$i] = $type;
21708
21709                 }
21710                 $tok = $quote_character unless ( $quote_character =~ /^\s*$/ );
21711
21712                 # scan for the end of the quote or pattern
21713                 (
21714                     $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
21715                     $quoted_string_1, $quoted_string_2
21716                   )
21717                   = do_quote(
21718                     $i,               $in_quote,    $quote_character,
21719                     $quote_pos,       $quote_depth, $quoted_string_1,
21720                     $quoted_string_2, $rtokens,     $rtoken_map,
21721                     $max_token_index
21722                   );
21723
21724                 # all done if we didn't find it
21725                 last if ($in_quote);
21726
21727                 # save pattern and replacement text for rescanning
21728                 my $qs1 = $quoted_string_1;
21729                 my $qs2 = $quoted_string_2;
21730
21731                 # re-initialize for next search
21732                 $quote_character = '';
21733                 $quote_pos       = 0;
21734                 $quote_type      = 'Q';
21735                 $quoted_string_1 = "";
21736                 $quoted_string_2 = "";
21737                 last if ( ++$i > $max_token_index );
21738
21739                 # look for any modifiers
21740                 if ($allowed_quote_modifiers) {
21741
21742                     # check for exact quote modifiers
21743                     if ( $$rtokens[$i] =~ /^[A-Za-z_]/ ) {
21744                         my $str = $$rtokens[$i];
21745                         my $saw_modifier_e;
21746                         while ( $str =~ /\G$allowed_quote_modifiers/gc ) {
21747                             my $pos = pos($str);
21748                             my $char = substr( $str, $pos - 1, 1 );
21749                             $saw_modifier_e ||= ( $char eq 'e' );
21750                         }
21751
21752                         # For an 'e' quote modifier we must scan the replacement
21753                         # text for here-doc targets.
21754                         if ($saw_modifier_e) {
21755
21756                             my $rht = scan_replacement_text($qs1);
21757
21758                             # Change type from 'Q' to 'h' for quotes with
21759                             # here-doc targets so that the formatter (see sub
21760                             # print_line_of_tokens) will not make any line
21761                             # breaks after this point.
21762                             if ($rht) {
21763                                 push @{$rhere_target_list}, @{$rht};
21764                                 $type = 'h';
21765                                 if ( $i_tok < 0 ) {
21766                                     my $ilast = $routput_token_list->[-1];
21767                                     $routput_token_type->[$ilast] = $type;
21768                                 }
21769                             }
21770                         }
21771
21772                         if ( defined( pos($str) ) ) {
21773
21774                             # matched
21775                             if ( pos($str) == length($str) ) {
21776                                 last if ( ++$i > $max_token_index );
21777                             }
21778
21779                             # Looks like a joined quote modifier
21780                             # and keyword, maybe something like
21781                             # s/xxx/yyy/gefor @k=...
21782                             # Example is "galgen.pl".  Would have to split
21783                             # the word and insert a new token in the
21784                             # pre-token list.  This is so rare that I haven't
21785                             # done it.  Will just issue a warning citation.
21786
21787                             # This error might also be triggered if my quote
21788                             # modifier characters are incomplete
21789                             else {
21790                                 warning(<<EOM);
21791
21792 Partial match to quote modifier $allowed_quote_modifiers at word: '$str'
21793 Please put a space between quote modifiers and trailing keywords.
21794 EOM
21795
21796                            # print "token $$rtokens[$i]\n";
21797                            # my $num = length($str) - pos($str);
21798                            # $$rtokens[$i]=substr($$rtokens[$i],pos($str),$num);
21799                            # print "continuing with new token $$rtokens[$i]\n";
21800
21801                                 # skipping past this token does least damage
21802                                 last if ( ++$i > $max_token_index );
21803                             }
21804                         }
21805                         else {
21806
21807                             # example file: rokicki4.pl
21808                             # This error might also be triggered if my quote
21809                             # modifier characters are incomplete
21810                             write_logfile_entry(
21811 "Note: found word $str at quote modifier location\n"
21812                             );
21813                         }
21814                     }
21815
21816                     # re-initialize
21817                     $allowed_quote_modifiers = "";
21818                 }
21819             }
21820
21821             unless ( $tok =~ /^\s*$/ ) {
21822
21823                 # try to catch some common errors
21824                 if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) {
21825
21826                     if ( $last_nonblank_token eq 'eq' ) {
21827                         complain("Should 'eq' be '==' here ?\n");
21828                     }
21829                     elsif ( $last_nonblank_token eq 'ne' ) {
21830                         complain("Should 'ne' be '!=' here ?\n");
21831                     }
21832                 }
21833
21834                 $last_last_nonblank_token      = $last_nonblank_token;
21835                 $last_last_nonblank_type       = $last_nonblank_type;
21836                 $last_last_nonblank_block_type = $last_nonblank_block_type;
21837                 $last_last_nonblank_container_type =
21838                   $last_nonblank_container_type;
21839                 $last_last_nonblank_type_sequence =
21840                   $last_nonblank_type_sequence;
21841                 $last_nonblank_token          = $tok;
21842                 $last_nonblank_type           = $type;
21843                 $last_nonblank_prototype      = $prototype;
21844                 $last_nonblank_block_type     = $block_type;
21845                 $last_nonblank_container_type = $container_type;
21846                 $last_nonblank_type_sequence  = $type_sequence;
21847                 $last_nonblank_i              = $i_tok;
21848             }
21849
21850             # store previous token type
21851             if ( $i_tok >= 0 ) {
21852                 $routput_token_type->[$i_tok]     = $type;
21853                 $routput_block_type->[$i_tok]     = $block_type;
21854                 $routput_container_type->[$i_tok] = $container_type;
21855                 $routput_type_sequence->[$i_tok]  = $type_sequence;
21856             }
21857             my $pre_tok  = $$rtokens[$i];        # get the next pre-token
21858             my $pre_type = $$rtoken_type[$i];    # and type
21859             $tok  = $pre_tok;
21860             $type = $pre_type;                   # to be modified as necessary
21861             $block_type = "";    # blank for all tokens except code block braces
21862             $container_type = "";    # blank for all tokens except some parens
21863             $type_sequence  = "";    # blank for all tokens except ?/:
21864             $prototype = "";    # blank for all tokens except user defined subs
21865             $i_tok     = $i;
21866
21867             # this pre-token will start an output token
21868             push( @{$routput_token_list}, $i_tok );
21869
21870             # continue gathering identifier if necessary
21871             # but do not start on blanks and comments
21872             if ( $id_scan_state && $pre_type !~ /[b#]/ ) {
21873
21874                 if ( $id_scan_state =~ /^(sub|package)/ ) {
21875                     scan_id();
21876                 }
21877                 else {
21878                     scan_identifier();
21879                 }
21880
21881                 last if ($id_scan_state);
21882                 next if ( ( $i > 0 ) || $type );
21883
21884                 # didn't find any token; start over
21885                 $type = $pre_type;
21886                 $tok  = $pre_tok;
21887             }
21888
21889             # handle whitespace tokens..
21890             next if ( $type eq 'b' );
21891             my $prev_tok  = $i > 0 ? $$rtokens[ $i - 1 ]     : ' ';
21892             my $prev_type = $i > 0 ? $$rtoken_type[ $i - 1 ] : 'b';
21893
21894             # Build larger tokens where possible, since we are not in a quote.
21895             #
21896             # First try to assemble digraphs.  The following tokens are
21897             # excluded and handled specially:
21898             # '/=' is excluded because the / might start a pattern.
21899             # 'x=' is excluded since it might be $x=, with $ on previous line
21900             # '**' and *= might be typeglobs of punctuation variables
21901             # I have allowed tokens starting with <, such as <=,
21902             # because I don't think these could be valid angle operators.
21903             # test file: storrs4.pl
21904             my $test_tok   = $tok . $$rtokens[ $i + 1 ];
21905             my $combine_ok = $is_digraph{$test_tok};
21906
21907             # check for special cases which cannot be combined
21908             if ($combine_ok) {
21909
21910                 # '//' must be defined_or operator if an operator is expected.
21911                 # TODO: Code for other ambiguous digraphs (/=, x=, **, *=)
21912                 # could be migrated here for clarity
21913                 if ( $test_tok eq '//' ) {
21914                     my $next_type = $$rtokens[ $i + 1 ];
21915                     my $expecting =
21916                       operator_expected( $prev_type, $tok, $next_type );
21917                     $combine_ok = 0 unless ( $expecting == OPERATOR );
21918                 }
21919             }
21920
21921             if (
21922                 $combine_ok
21923                 && ( $test_tok ne '/=' )    # might be pattern
21924                 && ( $test_tok ne 'x=' )    # might be $x
21925                 && ( $test_tok ne '**' )    # typeglob?
21926                 && ( $test_tok ne '*=' )    # typeglob?
21927               )
21928             {
21929                 $tok = $test_tok;
21930                 $i++;
21931
21932                 # Now try to assemble trigraphs.  Note that all possible
21933                 # perl trigraphs can be constructed by appending a character
21934                 # to a digraph.
21935                 $test_tok = $tok . $$rtokens[ $i + 1 ];
21936
21937                 if ( $is_trigraph{$test_tok} ) {
21938                     $tok = $test_tok;
21939                     $i++;
21940                 }
21941             }
21942
21943             $type      = $tok;
21944             $next_tok  = $$rtokens[ $i + 1 ];
21945             $next_type = $$rtoken_type[ $i + 1 ];
21946
21947             TOKENIZER_DEBUG_FLAG_TOKENIZE && do {
21948                 local $" = ')(';
21949                 my @debug_list = (
21950                     $last_nonblank_token,      $tok,
21951                     $next_tok,                 $brace_depth,
21952                     $brace_type[$brace_depth], $paren_depth,
21953                     $paren_type[$paren_depth]
21954                 );
21955                 print "TOKENIZE:(@debug_list)\n";
21956             };
21957
21958             # turn off attribute list on first non-blank, non-bareword
21959             if ( $pre_type ne 'w' ) { $in_attribute_list = 0 }
21960
21961             ###############################################################
21962             # We have the next token, $tok.
21963             # Now we have to examine this token and decide what it is
21964             # and define its $type
21965             #
21966             # section 1: bare words
21967             ###############################################################
21968
21969             if ( $pre_type eq 'w' ) {
21970                 $expecting = operator_expected( $prev_type, $tok, $next_type );
21971                 my ( $next_nonblank_token, $i_next ) =
21972                   find_next_nonblank_token( $i, $rtokens, $max_token_index );
21973
21974                 # ATTRS: handle sub and variable attributes
21975                 if ($in_attribute_list) {
21976
21977                     # treat bare word followed by open paren like qw(
21978                     if ( $next_nonblank_token eq '(' ) {
21979                         $in_quote                = $quote_items{q};
21980                         $allowed_quote_modifiers = $quote_modifiers{q};
21981                         $type                    = 'q';
21982                         $quote_type              = 'q';
21983                         next;
21984                     }
21985
21986                     # handle bareword not followed by open paren
21987                     else {
21988                         $type = 'w';
21989                         next;
21990                     }
21991                 }
21992
21993                 # quote a word followed by => operator
21994                 if ( $next_nonblank_token eq '=' ) {
21995
21996                     if ( $$rtokens[ $i_next + 1 ] eq '>' ) {
21997                         if ( $is_constant{$current_package}{$tok} ) {
21998                             $type = 'C';
21999                         }
22000                         elsif ( $is_user_function{$current_package}{$tok} ) {
22001                             $type = 'U';
22002                             $prototype =
22003                               $user_function_prototype{$current_package}{$tok};
22004                         }
22005                         elsif ( $tok =~ /^v\d+$/ ) {
22006                             $type = 'v';
22007                             report_v_string($tok);
22008                         }
22009                         else { $type = 'w' }
22010
22011                         next;
22012                     }
22013                 }
22014
22015                 # quote a bare word within braces..like xxx->{s}; note that we
22016                 # must be sure this is not a structural brace, to avoid
22017                 # mistaking {s} in the following for a quoted bare word:
22018                 #     for(@[){s}bla}BLA}
22019                 if (   ( $last_nonblank_type eq 'L' )
22020                     && ( $next_nonblank_token eq '}' ) )
22021                 {
22022                     $type = 'w';
22023                     next;
22024                 }
22025
22026                 # a bare word immediately followed by :: is not a keyword;
22027                 # use $tok_kw when testing for keywords to avoid a mistake
22028                 my $tok_kw = $tok;
22029                 if ( $$rtokens[ $i + 1 ] eq ':' && $$rtokens[ $i + 2 ] eq ':' )
22030                 {
22031                     $tok_kw .= '::';
22032                 }
22033
22034                 # handle operator x (now we know it isn't $x=)
22035                 if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) {
22036                     if ( $tok eq 'x' ) {
22037
22038                         if ( $$rtokens[ $i + 1 ] eq '=' ) {    # x=
22039                             $tok  = 'x=';
22040                             $type = $tok;
22041                             $i++;
22042                         }
22043                         else {
22044                             $type = 'x';
22045                         }
22046                     }
22047
22048                     # FIXME: Patch: mark something like x4 as an integer for now
22049                     # It gets fixed downstream.  This is easier than
22050                     # splitting the pretoken.
22051                     else {
22052                         $type = 'n';
22053                     }
22054                 }
22055
22056                 elsif ( ( $tok eq 'strict' )
22057                     and ( $last_nonblank_token eq 'use' ) )
22058                 {
22059                     $tokenizer_self->{_saw_use_strict} = 1;
22060                     scan_bare_identifier();
22061                 }
22062
22063                 elsif ( ( $tok eq 'warnings' )
22064                     and ( $last_nonblank_token eq 'use' ) )
22065                 {
22066                     $tokenizer_self->{_saw_perl_dash_w} = 1;
22067
22068                     # scan as identifier, so that we pick up something like:
22069                     # use warnings::register
22070                     scan_bare_identifier();
22071                 }
22072
22073                 elsif (
22074                        $tok eq 'AutoLoader'
22075                     && $tokenizer_self->{_look_for_autoloader}
22076                     && (
22077                         $last_nonblank_token eq 'use'
22078
22079                         # these regexes are from AutoSplit.pm, which we want
22080                         # to mimic
22081                         || $input_line =~ /^\s*(use|require)\s+AutoLoader\b/
22082                         || $input_line =~ /\bISA\s*=.*\bAutoLoader\b/
22083                     )
22084                   )
22085                 {
22086                     write_logfile_entry("AutoLoader seen, -nlal deactivates\n");
22087                     $tokenizer_self->{_saw_autoloader}      = 1;
22088                     $tokenizer_self->{_look_for_autoloader} = 0;
22089                     scan_bare_identifier();
22090                 }
22091
22092                 elsif (
22093                        $tok eq 'SelfLoader'
22094                     && $tokenizer_self->{_look_for_selfloader}
22095                     && (   $last_nonblank_token eq 'use'
22096                         || $input_line =~ /^\s*(use|require)\s+SelfLoader\b/
22097                         || $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ )
22098                   )
22099                 {
22100                     write_logfile_entry("SelfLoader seen, -nlsl deactivates\n");
22101                     $tokenizer_self->{_saw_selfloader}      = 1;
22102                     $tokenizer_self->{_look_for_selfloader} = 0;
22103                     scan_bare_identifier();
22104                 }
22105
22106                 elsif ( ( $tok eq 'constant' )
22107                     and ( $last_nonblank_token eq 'use' ) )
22108                 {
22109                     scan_bare_identifier();
22110                     my ( $next_nonblank_token, $i_next ) =
22111                       find_next_nonblank_token( $i, $rtokens,
22112                         $max_token_index );
22113
22114                     if ($next_nonblank_token) {
22115
22116                         if ( $is_keyword{$next_nonblank_token} ) {
22117                             warning(
22118 "Attempting to define constant '$next_nonblank_token' which is a perl keyword\n"
22119                             );
22120                         }
22121
22122                         # FIXME: could check for error in which next token is
22123                         # not a word (number, punctuation, ..)
22124                         else {
22125                             $is_constant{$current_package}
22126                               {$next_nonblank_token} = 1;
22127                         }
22128                     }
22129                 }
22130
22131                 # various quote operators
22132                 elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) {
22133                     if ( $expecting == OPERATOR ) {
22134
22135                         # patch for paren-less for/foreach glitch, part 1
22136                         # perl will accept this construct as valid:
22137                         #
22138                         #    foreach my $key qw\Uno Due Tres Quadro\ {
22139                         #        print "Set $key\n";
22140                         #    }
22141                         unless ( $tok eq 'qw' && $is_for_foreach{$want_paren} )
22142                         {
22143                             error_if_expecting_OPERATOR();
22144                         }
22145                     }
22146                     $in_quote                = $quote_items{$tok};
22147                     $allowed_quote_modifiers = $quote_modifiers{$tok};
22148
22149                    # All quote types are 'Q' except possibly qw quotes.
22150                    # qw quotes are special in that they may generally be trimmed
22151                    # of leading and trailing whitespace.  So they are given a
22152                    # separate type, 'q', unless requested otherwise.
22153                     $type =
22154                       ( $tok eq 'qw' && $tokenizer_self->{_trim_qw} )
22155                       ? 'q'
22156                       : 'Q';
22157                     $quote_type = $type;
22158                 }
22159
22160                 # check for a statement label
22161                 elsif (
22162                        ( $next_nonblank_token eq ':' )
22163                     && ( $$rtokens[ $i_next + 1 ] ne ':' )
22164                     && ( $i_next <= $max_token_index )    # colon on same line
22165                     && label_ok()
22166                   )
22167                 {
22168                     if ( $tok !~ /A-Z/ ) {
22169                         push @{ $tokenizer_self->{_rlower_case_labels_at} },
22170                           $input_line_number;
22171                     }
22172                     $type = 'J';
22173                     $tok .= ':';
22174                     $i = $i_next;
22175                     next;
22176                 }
22177
22178                 #      'sub' || 'package'
22179                 elsif ( $is_sub_package{$tok_kw} ) {
22180                     error_if_expecting_OPERATOR()
22181                       if ( $expecting == OPERATOR );
22182                     scan_id();
22183                 }
22184
22185                 # Note on token types for format, __DATA__, __END__:
22186                 # It simplifies things to give these type ';', so that when we
22187                 # start rescanning we will be expecting a token of type TERM.
22188                 # We will switch to type 'k' before outputting the tokens.
22189                 elsif ( $is_format_END_DATA{$tok_kw} ) {
22190                     $type = ';';    # make tokenizer look for TERM next
22191                     $tokenizer_self->{ $is_format_END_DATA{$tok_kw} } = 1;
22192                     last;
22193                 }
22194
22195                 elsif ( $is_keyword{$tok_kw} ) {
22196                     $type = 'k';
22197
22198                     # Since for and foreach may not be followed immediately
22199                     # by an opening paren, we have to remember which keyword
22200                     # is associated with the next '('
22201                     if ( $is_for_foreach{$tok} ) {
22202                         if ( new_statement_ok() ) {
22203                             $want_paren = $tok;
22204                         }
22205                     }
22206
22207                     # recognize 'use' statements, which are special
22208                     elsif ( $is_use_require{$tok} ) {
22209                         $statement_type = $tok;
22210                         error_if_expecting_OPERATOR()
22211                           if ( $expecting == OPERATOR );
22212                     }
22213
22214                     # remember my and our to check for trailing ": shared"
22215                     elsif ( $is_my_our{$tok} ) {
22216                         $statement_type = $tok;
22217                     }
22218
22219                     # Check for misplaced 'elsif' and 'else', but allow isolated
22220                     # else or elsif blocks to be formatted.  This is indicated
22221                     # by a last noblank token of ';'
22222                     elsif ( $tok eq 'elsif' ) {
22223                         if (   $last_nonblank_token ne ';'
22224                             && $last_nonblank_block_type !~
22225                             /^(if|elsif|unless)$/ )
22226                         {
22227                             warning(
22228 "expecting '$tok' to follow one of 'if|elsif|unless'\n"
22229                             );
22230                         }
22231                     }
22232                     elsif ( $tok eq 'else' ) {
22233
22234                         # patched for SWITCH/CASE
22235                         if (   $last_nonblank_token ne ';'
22236                             && $last_nonblank_block_type !~
22237                             /^(if|elsif|unless|case|when)$/ )
22238                         {
22239                             warning(
22240 "expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n"
22241                             );
22242                         }
22243                     }
22244                     elsif ( $tok eq 'continue' ) {
22245                         if (   $last_nonblank_token ne ';'
22246                             && $last_nonblank_block_type !~
22247                             /(^(\{|\}|;|while|until|for|foreach)|:$)/ )
22248                         {
22249
22250                             # note: ';' '{' and '}' in list above
22251                             # because continues can follow bare blocks;
22252                             # ':' is labeled block
22253                             warning("'$tok' should follow a block\n");
22254                         }
22255                     }
22256
22257                     # patch for SWITCH/CASE if 'case' and 'when are
22258                     # treated as keywords.
22259                     elsif ( $tok eq 'when' || $tok eq 'case' ) {
22260                         $statement_type = $tok;    # next '{' is block
22261                     }
22262                 }
22263
22264                 # check for inline label following
22265                 #         /^(redo|last|next|goto)$/
22266                 elsif (( $last_nonblank_type eq 'k' )
22267                     && ( $is_redo_last_next_goto{$last_nonblank_token} ) )
22268                 {
22269                     $type = 'j';
22270                     next;
22271                 }
22272
22273                 # something else --
22274                 else {
22275
22276                     scan_bare_identifier();
22277                     if ( $type eq 'w' ) {
22278
22279                         if ( $expecting == OPERATOR ) {
22280
22281                             # don't complain about possible indirect object
22282                             # notation.
22283                             # For example:
22284                             #   package main;
22285                             #   sub new($) { ... }
22286                             #   $b = new A::;  # calls A::new
22287                             #   $c = new A;    # same thing but suspicious
22288                             # This will call A::new but we have a 'new' in
22289                             # main:: which looks like a constant.
22290                             #
22291                             if ( $last_nonblank_type eq 'C' ) {
22292                                 if ( $tok !~ /::$/ ) {
22293                                     complain(<<EOM);
22294 Expecting operator after '$last_nonblank_token' but found bare word '$tok'
22295        Maybe indirectet object notation?
22296 EOM
22297                                 }
22298                             }
22299                             else {
22300                                 error_if_expecting_OPERATOR("bareword");
22301                             }
22302                         }
22303
22304                         # mark bare words immediately followed by a paren as
22305                         # functions
22306                         $next_tok = $$rtokens[ $i + 1 ];
22307                         if ( $next_tok eq '(' ) {
22308                             $type = 'U';
22309                         }
22310
22311                         # underscore after file test operator is file handle
22312                         if ( $tok eq '_' && $last_nonblank_type eq 'F' ) {
22313                             $type = 'Z';
22314                         }
22315
22316                         # patch for SWITCH/CASE if 'case' and 'when are
22317                         # not treated as keywords:
22318                         if (
22319                             (
22320                                    $tok                      eq 'case'
22321                                 && $brace_type[$brace_depth] eq 'switch'
22322                             )
22323                             || (   $tok eq 'when'
22324                                 && $brace_type[$brace_depth] eq 'given' )
22325                           )
22326                         {
22327                             $statement_type = $tok;    # next '{' is block
22328                             $type = 'k';    # for keyword syntax coloring
22329                         }
22330
22331                         # patch for SWITCH/CASE if switch and given not keywords
22332                         # Switch is not a perl 5 keyword, but we will gamble
22333                         # and mark switch followed by paren as a keyword.  This
22334                         # is only necessary to get html syntax coloring nice,
22335                         # and does not commit this as being a switch/case.
22336                         if ( $next_nonblank_token eq '('
22337                             && ( $tok eq 'switch' || $tok eq 'given' ) )
22338                         {
22339                             $type = 'k';    # for keyword syntax coloring
22340                         }
22341                     }
22342                 }
22343             }
22344
22345             ###############################################################
22346             # section 2: strings of digits
22347             ###############################################################
22348             elsif ( $pre_type eq 'd' ) {
22349                 $expecting = operator_expected( $prev_type, $tok, $next_type );
22350                 error_if_expecting_OPERATOR("Number")
22351                   if ( $expecting == OPERATOR );
22352                 my $number = scan_number();
22353                 if ( !defined($number) ) {
22354
22355                     # shouldn't happen - we should always get a number
22356                     warning("non-number beginning with digit--program bug\n");
22357                     report_definite_bug();
22358                 }
22359             }
22360
22361             ###############################################################
22362             # section 3: all other tokens
22363             ###############################################################
22364
22365             else {
22366                 last if ( $tok eq '#' );
22367                 my $code = $tokenization_code->{$tok};
22368                 if ($code) {
22369                     $expecting =
22370                       operator_expected( $prev_type, $tok, $next_type );
22371                     $code->();
22372                     redo if $in_quote;
22373                 }
22374             }
22375         }
22376
22377         # -----------------------------
22378         # end of main tokenization loop
22379         # -----------------------------
22380
22381         if ( $i_tok >= 0 ) {
22382             $routput_token_type->[$i_tok]     = $type;
22383             $routput_block_type->[$i_tok]     = $block_type;
22384             $routput_container_type->[$i_tok] = $container_type;
22385             $routput_type_sequence->[$i_tok]  = $type_sequence;
22386         }
22387
22388         unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) {
22389             $last_last_nonblank_token          = $last_nonblank_token;
22390             $last_last_nonblank_type           = $last_nonblank_type;
22391             $last_last_nonblank_block_type     = $last_nonblank_block_type;
22392             $last_last_nonblank_container_type = $last_nonblank_container_type;
22393             $last_last_nonblank_type_sequence  = $last_nonblank_type_sequence;
22394             $last_nonblank_token               = $tok;
22395             $last_nonblank_type                = $type;
22396             $last_nonblank_block_type          = $block_type;
22397             $last_nonblank_container_type      = $container_type;
22398             $last_nonblank_type_sequence       = $type_sequence;
22399             $last_nonblank_prototype           = $prototype;
22400         }
22401
22402         # reset indentation level if necessary at a sub or package
22403         # in an attempt to recover from a nesting error
22404         if ( $level_in_tokenizer < 0 ) {
22405             if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) {
22406                 reset_indentation_level(0);
22407                 brace_warning("resetting level to 0 at $1 $2\n");
22408             }
22409         }
22410
22411         # all done tokenizing this line ...
22412         # now prepare the final list of tokens and types
22413
22414         my @token_type     = ();   # stack of output token types
22415         my @block_type     = ();   # stack of output code block types
22416         my @container_type = ();   # stack of output code container types
22417         my @type_sequence  = ();   # stack of output type sequence numbers
22418         my @tokens         = ();   # output tokens
22419         my @levels         = ();   # structural brace levels of output tokens
22420         my @slevels        = ();   # secondary nesting levels of output tokens
22421         my @nesting_tokens = ();   # string of tokens leading to this depth
22422         my @nesting_types  = ();   # string of token types leading to this depth
22423         my @nesting_blocks = ();   # string of block types leading to this depth
22424         my @nesting_lists  = ();   # string of list types leading to this depth
22425         my @ci_string = ();  # string needed to compute continuation indentation
22426         my @container_environment = ();    # BLOCK or LIST
22427         my $container_environment = '';
22428         my $im                    = -1;    # previous $i value
22429         my $num;
22430         my $ci_string_sum = ( $_ = $ci_string_in_tokenizer ) =~ tr/1/0/;
22431
22432 # =head1 Computing Token Indentation
22433 #
22434 #     The final section of the tokenizer forms tokens and also computes
22435 #     parameters needed to find indentation.  It is much easier to do it
22436 #     in the tokenizer than elsewhere.  Here is a brief description of how
22437 #     indentation is computed.  Perl::Tidy computes indentation as the sum
22438 #     of 2 terms:
22439 #
22440 #     (1) structural indentation, such as if/else/elsif blocks
22441 #     (2) continuation indentation, such as long parameter call lists.
22442 #
22443 #     These are occasionally called primary and secondary indentation.
22444 #
22445 #     Structural indentation is introduced by tokens of type '{', although
22446 #     the actual tokens might be '{', '(', or '['.  Structural indentation
22447 #     is of two types: BLOCK and non-BLOCK.  Default structural indentation
22448 #     is 4 characters if the standard indentation scheme is used.
22449 #
22450 #     Continuation indentation is introduced whenever a line at BLOCK level
22451 #     is broken before its termination.  Default continuation indentation
22452 #     is 2 characters in the standard indentation scheme.
22453 #
22454 #     Both types of indentation may be nested arbitrarily deep and
22455 #     interlaced.  The distinction between the two is somewhat arbitrary.
22456 #
22457 #     For each token, we will define two variables which would apply if
22458 #     the current statement were broken just before that token, so that
22459 #     that token started a new line:
22460 #
22461 #     $level = the structural indentation level,
22462 #     $ci_level = the continuation indentation level
22463 #
22464 #     The total indentation will be $level * (4 spaces) + $ci_level * (2 spaces),
22465 #     assuming defaults.  However, in some special cases it is customary
22466 #     to modify $ci_level from this strict value.
22467 #
22468 #     The total structural indentation is easy to compute by adding and
22469 #     subtracting 1 from a saved value as types '{' and '}' are seen.  The
22470 #     running value of this variable is $level_in_tokenizer.
22471 #
22472 #     The total continuation is much more difficult to compute, and requires
22473 #     several variables.  These veriables are:
22474 #
22475 #     $ci_string_in_tokenizer = a string of 1's and 0's indicating, for
22476 #       each indentation level, if there are intervening open secondary
22477 #       structures just prior to that level.
22478 #     $continuation_string_in_tokenizer = a string of 1's and 0's indicating
22479 #       if the last token at that level is "continued", meaning that it
22480 #       is not the first token of an expression.
22481 #     $nesting_block_string = a string of 1's and 0's indicating, for each
22482 #       indentation level, if the level is of type BLOCK or not.
22483 #     $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string
22484 #     $nesting_list_string = a string of 1's and 0's indicating, for each
22485 #       indentation level, if it is is appropriate for list formatting.
22486 #       If so, continuation indentation is used to indent long list items.
22487 #     $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string
22488 #     @{$rslevel_stack} = a stack of total nesting depths at each
22489 #       structural indentation level, where "total nesting depth" means
22490 #       the nesting depth that would occur if every nesting token -- '{', '[',
22491 #       and '(' -- , regardless of context, is used to compute a nesting
22492 #       depth.
22493
22494         #my $nesting_block_flag = ($nesting_block_string =~ /1$/);
22495         #my $nesting_list_flag = ($nesting_list_string =~ /1$/);
22496
22497         my ( $ci_string_i, $level_i, $nesting_block_string_i,
22498             $nesting_list_string_i, $nesting_token_string_i,
22499             $nesting_type_string_i, );
22500
22501         foreach $i ( @{$routput_token_list} )
22502         {    # scan the list of pre-tokens indexes
22503
22504             # self-checking for valid token types
22505             my $type = $routput_token_type->[$i];
22506             my $tok = $$rtokens[$i];   # the token, but ONLY if same as pretoken
22507             $level_i = $level_in_tokenizer;
22508
22509             # This can happen by running perltidy on non-scripts
22510             # although it could also be bug introduced by programming change.
22511             # Perl silently accepts a 032 (^Z) and takes it as the end
22512             if ( !$is_valid_token_type{$type} ) {
22513                 my $val = ord($type);
22514                 warning(
22515                     "unexpected character decimal $val ($type) in script\n");
22516                 $tokenizer_self->{_in_error} = 1;
22517             }
22518
22519             # ----------------------------------------------------------------
22520             # TOKEN TYPE PATCHES
22521             #  output __END__, __DATA__, and format as type 'k' instead of ';'
22522             # to make html colors correct, etc.
22523             my $fix_type = $type;
22524             if ( $type eq ';' && $tok =~ /\w/ ) { $fix_type = 'k' }
22525
22526             # output anonymous 'sub' as keyword
22527             if ( $type eq 't' && $tok eq 'sub' ) { $fix_type = 'k' }
22528
22529             # -----------------------------------------------------------------
22530
22531             $nesting_token_string_i = $nesting_token_string;
22532             $nesting_type_string_i  = $nesting_type_string;
22533             $nesting_block_string_i = $nesting_block_string;
22534             $nesting_list_string_i  = $nesting_list_string;
22535
22536             # set primary indentation levels based on structural braces
22537             # Note: these are set so that the leading braces have a HIGHER
22538             # level than their CONTENTS, which is convenient for indentation
22539             # Also, define continuation indentation for each token.
22540             if ( $type eq '{' || $type eq 'L' ) {
22541
22542                 # use environment before updating
22543                 $container_environment =
22544                     $nesting_block_flag ? 'BLOCK'
22545                   : $nesting_list_flag  ? 'LIST'
22546                   :                       "";
22547
22548                 # if the difference between total nesting levels is not 1,
22549                 # there are intervening non-structural nesting types between
22550                 # this '{' and the previous unclosed '{'
22551                 my $intervening_secondary_structure = 0;
22552                 if ( @{$rslevel_stack} ) {
22553                     $intervening_secondary_structure =
22554                       $slevel_in_tokenizer - $rslevel_stack->[-1];
22555                 }
22556
22557      # =head1 Continuation Indentation
22558      #
22559      # Having tried setting continuation indentation both in the formatter and
22560      # in the tokenizer, I can say that setting it in the tokenizer is much,
22561      # much easier.  The formatter already has too much to do, and can't
22562      # make decisions on line breaks without knowing what 'ci' will be at
22563      # arbitrary locations.
22564      #
22565      # But a problem with setting the continuation indentation (ci) here
22566      # in the tokenizer is that we do not know where line breaks will actually
22567      # be.  As a result, we don't know if we should propagate continuation
22568      # indentation to higher levels of structure.
22569      #
22570      # For nesting of only structural indentation, we never need to do this.
22571      # For example, in a long if statement, like this
22572      #
22573      #   if ( !$output_block_type[$i]
22574      #     && ($in_statement_continuation) )
22575      #   {           <--outdented
22576      #       do_something();
22577      #   }
22578      #
22579      # the second line has ci but we do normally give the lines within the BLOCK
22580      # any ci.  This would be true if we had blocks nested arbitrarily deeply.
22581      #
22582      # But consider something like this, where we have created a break after
22583      # an opening paren on line 1, and the paren is not (currently) a
22584      # structural indentation token:
22585      #
22586      # my $file = $menubar->Menubutton(
22587      #   qw/-text File -underline 0 -menuitems/ => [
22588      #       [
22589      #           Cascade    => '~View',
22590      #           -menuitems => [
22591      #           ...
22592      #
22593      # The second line has ci, so it would seem reasonable to propagate it
22594      # down, giving the third line 1 ci + 1 indentation.  This suggests the
22595      # following rule, which is currently used to propagating ci down: if there
22596      # are any non-structural opening parens (or brackets, or braces), before
22597      # an opening structural brace, then ci is propagated down, and otherwise
22598      # not.  The variable $intervening_secondary_structure contains this
22599      # information for the current token, and the string
22600      # "$ci_string_in_tokenizer" is a stack of previous values of this
22601      # variable.
22602
22603                 # save the current states
22604                 push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer );
22605                 $level_in_tokenizer++;
22606
22607                 if ( $routput_block_type->[$i] ) {
22608                     $nesting_block_flag = 1;
22609                     $nesting_block_string .= '1';
22610                 }
22611                 else {
22612                     $nesting_block_flag = 0;
22613                     $nesting_block_string .= '0';
22614                 }
22615
22616                 # we will use continuation indentation within containers
22617                 # which are not blocks and not logical expressions
22618                 my $bit = 0;
22619                 if ( !$routput_block_type->[$i] ) {
22620
22621                     # propagate flag down at nested open parens
22622                     if ( $routput_container_type->[$i] eq '(' ) {
22623                         $bit = 1 if $nesting_list_flag;
22624                     }
22625
22626                   # use list continuation if not a logical grouping
22627                   # /^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/
22628                     else {
22629                         $bit = 1
22630                           unless
22631                           $is_logical_container{ $routput_container_type->[$i]
22632                           };
22633                     }
22634                 }
22635                 $nesting_list_string .= $bit;
22636                 $nesting_list_flag = $bit;
22637
22638                 $ci_string_in_tokenizer .=
22639                   ( $intervening_secondary_structure != 0 ) ? '1' : '0';
22640                 $ci_string_sum = ( $_ = $ci_string_in_tokenizer ) =~ tr/1/0/;
22641                 $continuation_string_in_tokenizer .=
22642                   ( $in_statement_continuation > 0 ) ? '1' : '0';
22643
22644    #  Sometimes we want to give an opening brace continuation indentation,
22645    #  and sometimes not.  For code blocks, we don't do it, so that the leading
22646    #  '{' gets outdented, like this:
22647    #
22648    #   if ( !$output_block_type[$i]
22649    #     && ($in_statement_continuation) )
22650    #   {           <--outdented
22651    #
22652    #  For other types, we will give them continuation indentation.  For example,
22653    #  here is how a list looks with the opening paren indented:
22654    #
22655    #     @LoL =
22656    #       ( [ "fred", "barney" ], [ "george", "jane", "elroy" ],
22657    #         [ "homer", "marge", "bart" ], );
22658    #
22659    #  This looks best when 'ci' is one-half of the indentation  (i.e., 2 and 4)
22660
22661                 my $total_ci = $ci_string_sum;
22662                 if (
22663                     !$routput_block_type->[$i]    # patch: skip for BLOCK
22664                     && ($in_statement_continuation)
22665                   )
22666                 {
22667                     $total_ci += $in_statement_continuation
22668                       unless ( $ci_string_in_tokenizer =~ /1$/ );
22669                 }
22670
22671                 $ci_string_i               = $total_ci;
22672                 $in_statement_continuation = 0;
22673             }
22674
22675             elsif ( $type eq '}' || $type eq 'R' ) {
22676
22677                 # only a nesting error in the script would prevent popping here
22678                 if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); }
22679
22680                 $level_i = --$level_in_tokenizer;
22681
22682                 # restore previous level values
22683                 if ( length($nesting_block_string) > 1 )
22684                 {    # true for valid script
22685                     chop $nesting_block_string;
22686                     $nesting_block_flag = ( $nesting_block_string =~ /1$/ );
22687                     chop $nesting_list_string;
22688                     $nesting_list_flag = ( $nesting_list_string =~ /1$/ );
22689
22690                     chop $ci_string_in_tokenizer;
22691                     $ci_string_sum =
22692                       ( $_ = $ci_string_in_tokenizer ) =~ tr/1/0/;
22693
22694                     $in_statement_continuation =
22695                       chop $continuation_string_in_tokenizer;
22696
22697                     # zero continuation flag at terminal BLOCK '}' which
22698                     # ends a statement.
22699                     if ( $routput_block_type->[$i] ) {
22700
22701                         # ...These include non-anonymous subs
22702                         # note: could be sub ::abc { or sub 'abc
22703                         if ( $routput_block_type->[$i] =~ m/^sub\s*/gc ) {
22704
22705                          # note: older versions of perl require the /gc modifier
22706                          # here or else the \G does not work.
22707                             if ( $routput_block_type->[$i] =~ /\G('|::|\w)/gc )
22708                             {
22709                                 $in_statement_continuation = 0;
22710                             }
22711                         }
22712
22713 # ...and include all block types except user subs with
22714 # block prototypes and these: (sort|grep|map|do|eval)
22715 # /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|continue|;|if|elsif|else|unless|while|until|for|foreach)$/
22716                         elsif (
22717                             $is_zero_continuation_block_type{
22718                                 $routput_block_type->[$i] } )
22719                         {
22720                             $in_statement_continuation = 0;
22721                         }
22722
22723                         # ..but these are not terminal types:
22724                         #     /^(sort|grep|map|do|eval)$/ )
22725                         elsif (
22726                             $is_not_zero_continuation_block_type{
22727                                 $routput_block_type->[$i] } )
22728                         {
22729                         }
22730
22731                         # ..and a block introduced by a label
22732                         # /^\w+\s*:$/gc ) {
22733                         elsif ( $routput_block_type->[$i] =~ /:$/ ) {
22734                             $in_statement_continuation = 0;
22735                         }
22736
22737                         # user function with block prototype
22738                         else {
22739                             $in_statement_continuation = 0;
22740                         }
22741                     }
22742
22743                     # If we are in a list, then
22744                     # we must set continuatoin indentation at the closing
22745                     # paren of something like this (paren after $check):
22746                     #     assert(
22747                     #         __LINE__,
22748                     #         ( not defined $check )
22749                     #           or ref $check
22750                     #           or $check eq "new"
22751                     #           or $check eq "old",
22752                     #     );
22753                     elsif ( $tok eq ')' ) {
22754                         $in_statement_continuation = 1
22755                           if $routput_container_type->[$i] =~ /^[;,\{\}]$/;
22756                     }
22757                 }
22758
22759                 # use environment after updating
22760                 $container_environment =
22761                     $nesting_block_flag ? 'BLOCK'
22762                   : $nesting_list_flag  ? 'LIST'
22763                   :                       "";
22764                 $ci_string_i = $ci_string_sum + $in_statement_continuation;
22765                 $nesting_block_string_i = $nesting_block_string;
22766                 $nesting_list_string_i  = $nesting_list_string;
22767             }
22768
22769             # not a structural indentation type..
22770             else {
22771
22772                 $container_environment =
22773                     $nesting_block_flag ? 'BLOCK'
22774                   : $nesting_list_flag  ? 'LIST'
22775                   :                       "";
22776
22777                 # zero the continuation indentation at certain tokens so
22778                 # that they will be at the same level as its container.  For
22779                 # commas, this simplifies the -lp indentation logic, which
22780                 # counts commas.  For ?: it makes them stand out.
22781                 if ($nesting_list_flag) {
22782                     if ( $type =~ /^[,\?\:]$/ ) {
22783                         $in_statement_continuation = 0;
22784                     }
22785                 }
22786
22787                 # be sure binary operators get continuation indentation
22788                 if (
22789                     $container_environment
22790                     && (   $type eq 'k' && $is_binary_keyword{$tok}
22791                         || $is_binary_type{$type} )
22792                   )
22793                 {
22794                     $in_statement_continuation = 1;
22795                 }
22796
22797                 # continuation indentation is sum of any open ci from previous
22798                 # levels plus the current level
22799                 $ci_string_i = $ci_string_sum + $in_statement_continuation;
22800
22801                 # update continuation flag ...
22802                 # if this isn't a blank or comment..
22803                 if ( $type ne 'b' && $type ne '#' ) {
22804
22805                     # and we are in a BLOCK
22806                     if ($nesting_block_flag) {
22807
22808                         # the next token after a ';' and label starts a new stmt
22809                         if ( $type eq ';' || $type eq 'J' ) {
22810                             $in_statement_continuation = 0;
22811                         }
22812
22813                         # otherwise, we are continuing the current statement
22814                         else {
22815                             $in_statement_continuation = 1;
22816                         }
22817                     }
22818
22819                     # if we are not in a BLOCK..
22820                     else {
22821
22822                         # do not use continuation indentation if not list
22823                         # environment (could be within if/elsif clause)
22824                         if ( !$nesting_list_flag ) {
22825                             $in_statement_continuation = 0;
22826                         }
22827
22828                        # otherwise, the next token after a ',' starts a new term
22829                         elsif ( $type eq ',' ) {
22830                             $in_statement_continuation = 0;
22831                         }
22832
22833                         # otherwise, we are continuing the current term
22834                         else {
22835                             $in_statement_continuation = 1;
22836                         }
22837                     }
22838                 }
22839             }
22840
22841             if ( $level_in_tokenizer < 0 ) {
22842                 unless ( $tokenizer_self->{_saw_negative_indentation} ) {
22843                     $tokenizer_self->{_saw_negative_indentation} = 1;
22844                     warning("Starting negative indentation\n");
22845                 }
22846             }
22847
22848             # set secondary nesting levels based on all continment token types
22849             # Note: these are set so that the nesting depth is the depth
22850             # of the PREVIOUS TOKEN, which is convenient for setting
22851             # the stength of token bonds
22852             my $slevel_i = $slevel_in_tokenizer;
22853
22854             #    /^[L\{\(\[]$/
22855             if ( $is_opening_type{$type} ) {
22856                 $slevel_in_tokenizer++;
22857                 $nesting_token_string .= $tok;
22858                 $nesting_type_string  .= $type;
22859             }
22860
22861             #       /^[R\}\)\]]$/
22862             elsif ( $is_closing_type{$type} ) {
22863                 $slevel_in_tokenizer--;
22864                 my $char = chop $nesting_token_string;
22865
22866                 if ( $char ne $matching_start_token{$tok} ) {
22867                     $nesting_token_string .= $char . $tok;
22868                     $nesting_type_string  .= $type;
22869                 }
22870                 else {
22871                     chop $nesting_type_string;
22872                 }
22873             }
22874
22875             push( @block_type,            $routput_block_type->[$i] );
22876             push( @ci_string,             $ci_string_i );
22877             push( @container_environment, $container_environment );
22878             push( @container_type,        $routput_container_type->[$i] );
22879             push( @levels,                $level_i );
22880             push( @nesting_tokens,        $nesting_token_string_i );
22881             push( @nesting_types,         $nesting_type_string_i );
22882             push( @slevels,               $slevel_i );
22883             push( @token_type,            $fix_type );
22884             push( @type_sequence,         $routput_type_sequence->[$i] );
22885             push( @nesting_blocks,        $nesting_block_string );
22886             push( @nesting_lists,         $nesting_list_string );
22887
22888             # now form the previous token
22889             if ( $im >= 0 ) {
22890                 $num =
22891                   $$rtoken_map[$i] - $$rtoken_map[$im];    # how many characters
22892
22893                 if ( $num > 0 ) {
22894                     push( @tokens,
22895                         substr( $input_line, $$rtoken_map[$im], $num ) );
22896                 }
22897             }
22898             $im = $i;
22899         }
22900
22901         $num = length($input_line) - $$rtoken_map[$im];    # make the last token
22902         if ( $num > 0 ) {
22903             push( @tokens, substr( $input_line, $$rtoken_map[$im], $num ) );
22904         }
22905
22906         $tokenizer_self->{_in_attribute_list} = $in_attribute_list;
22907         $tokenizer_self->{_in_quote}          = $in_quote;
22908         $tokenizer_self->{_quote_target} =
22909           $in_quote ? matching_end_token($quote_character) : "";
22910         $tokenizer_self->{_rhere_target_list} = $rhere_target_list;
22911
22912         $line_of_tokens->{_rtoken_type}            = \@token_type;
22913         $line_of_tokens->{_rtokens}                = \@tokens;
22914         $line_of_tokens->{_rblock_type}            = \@block_type;
22915         $line_of_tokens->{_rcontainer_type}        = \@container_type;
22916         $line_of_tokens->{_rcontainer_environment} = \@container_environment;
22917         $line_of_tokens->{_rtype_sequence}         = \@type_sequence;
22918         $line_of_tokens->{_rlevels}                = \@levels;
22919         $line_of_tokens->{_rslevels}               = \@slevels;
22920         $line_of_tokens->{_rnesting_tokens}        = \@nesting_tokens;
22921         $line_of_tokens->{_rci_levels}             = \@ci_string;
22922         $line_of_tokens->{_rnesting_blocks}        = \@nesting_blocks;
22923
22924         return;
22925     }
22926 }    # end tokenize_this_line
22927
22928 #########i#############################################################
22929 # Tokenizer routines which assist in identifying token types
22930 #######################################################################
22931
22932 sub operator_expected {
22933
22934     # Many perl symbols have two or more meanings.  For example, '<<'
22935     # can be a shift operator or a here-doc operator.  The
22936     # interpretation of these symbols depends on the current state of
22937     # the tokenizer, which may either be expecting a term or an
22938     # operator.  For this example, a << would be a shift if an operator
22939     # is expected, and a here-doc if a term is expected.  This routine
22940     # is called to make this decision for any current token.  It returns
22941     # one of three possible values:
22942     #
22943     #     OPERATOR - operator expected (or at least, not a term)
22944     #     UNKNOWN  - can't tell
22945     #     TERM     - a term is expected (or at least, not an operator)
22946     #
22947     # The decision is based on what has been seen so far.  This
22948     # information is stored in the "$last_nonblank_type" and
22949     # "$last_nonblank_token" variables.  For example, if the
22950     # $last_nonblank_type is '=~', then we are expecting a TERM, whereas
22951     # if $last_nonblank_type is 'n' (numeric), we are expecting an
22952     # OPERATOR.
22953     #
22954     # If a UNKNOWN is returned, the calling routine must guess. A major
22955     # goal of this tokenizer is to minimize the possiblity of returning
22956     # UNKNOWN, because a wrong guess can spoil the formatting of a
22957     # script.
22958     #
22959     # adding NEW_TOKENS: it is critically important that this routine be
22960     # updated to allow it to determine if an operator or term is to be
22961     # expected after the new token.  Doing this simply involves adding
22962     # the new token character to one of the regexes in this routine or
22963     # to one of the hash lists
22964     # that it uses, which are initialized in the BEGIN section.
22965     # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token,
22966     # $statement_type
22967
22968     my ( $prev_type, $tok, $next_type ) = @_;
22969
22970     my $op_expected = UNKNOWN;
22971
22972 #print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n";
22973
22974 # Note: function prototype is available for token type 'U' for future
22975 # program development.  It contains the leading and trailing parens,
22976 # and no blanks.  It might be used to eliminate token type 'C', for
22977 # example (prototype = '()'). Thus:
22978 # if ($last_nonblank_type eq 'U') {
22979 #     print "previous token=$last_nonblank_token  type=$last_nonblank_type prototype=$last_nonblank_prototype\n";
22980 # }
22981
22982     # A possible filehandle (or object) requires some care...
22983     if ( $last_nonblank_type eq 'Z' ) {
22984
22985         # angle.t
22986         if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) {
22987             $op_expected = UNKNOWN;
22988         }
22989
22990         # For possible file handle like "$a", Perl uses weird parsing rules.
22991         # For example:
22992         # print $a/2,"/hi";   - division
22993         # print $a / 2,"/hi"; - division
22994         # print $a/ 2,"/hi";  - division
22995         # print $a /2,"/hi";  - pattern (and error)!
22996         elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) {
22997             $op_expected = TERM;
22998         }
22999
23000         # Note when an operation is being done where a
23001         # filehandle might be expected, since a change in whitespace
23002         # could change the interpretation of the statement.
23003         else {
23004             if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) {
23005                 complain("operator in print statement not recommended\n");
23006                 $op_expected = OPERATOR;
23007             }
23008         }
23009     }
23010
23011     # handle something after 'do' and 'eval'
23012     elsif ( $is_block_operator{$last_nonblank_token} ) {
23013
23014         # something like $a = eval "expression";
23015         #                          ^
23016         if ( $last_nonblank_type eq 'k' ) {
23017             $op_expected = TERM;    # expression or list mode following keyword
23018         }
23019
23020         # something like $a = do { BLOCK } / 2;
23021         #                                  ^
23022         else {
23023             $op_expected = OPERATOR;    # block mode following }
23024         }
23025     }
23026
23027     # handle bare word..
23028     elsif ( $last_nonblank_type eq 'w' ) {
23029
23030         # unfortunately, we can't tell what type of token to expect next
23031         # after most bare words
23032         $op_expected = UNKNOWN;
23033     }
23034
23035     # operator, but not term possible after these types
23036     # Note: moved ')' from type to token because parens in list context
23037     # get marked as '{' '}' now.  This is a minor glitch in the following:
23038     #    my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : ();
23039     #
23040     elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ )
23041         || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) )
23042     {
23043         $op_expected = OPERATOR;
23044
23045         # in a 'use' statement, numbers and v-strings are not true
23046         # numbers, so to avoid incorrect error messages, we will
23047         # mark them as unknown for now (use.t)
23048         # TODO: it would be much nicer to create a new token V for VERSION
23049         # number in a use statement.  Then this could be a check on type V
23050         # and related patches which change $statement_type for '=>'
23051         # and ',' could be removed.  Further, it would clean things up to
23052         # scan the 'use' statement with a separate subroutine.
23053         if (   ( $statement_type eq 'use' )
23054             && ( $last_nonblank_type =~ /^[nv]$/ ) )
23055         {
23056             $op_expected = UNKNOWN;
23057         }
23058     }
23059
23060     # no operator after many keywords, such as "die", "warn", etc
23061     elsif ( $expecting_term_token{$last_nonblank_token} ) {
23062
23063         # patch for dor.t (defined or).
23064         # perl functions which may be unary operators
23065         # TODO: This list is incomplete, and these should be put
23066         # into a hash.
23067         if (   $tok eq '/'
23068             && $next_type          eq '/'
23069             && $last_nonblank_type eq 'k'
23070             && $last_nonblank_token =~ /^eof|undef|shift|pop$/ )
23071         {
23072             $op_expected = OPERATOR;
23073         }
23074         else {
23075             $op_expected = TERM;
23076         }
23077     }
23078
23079     # no operator after things like + - **  (i.e., other operators)
23080     elsif ( $expecting_term_types{$last_nonblank_type} ) {
23081         $op_expected = TERM;
23082     }
23083
23084     # a few operators, like "time", have an empty prototype () and so
23085     # take no parameters but produce a value to operate on
23086     elsif ( $expecting_operator_token{$last_nonblank_token} ) {
23087         $op_expected = OPERATOR;
23088     }
23089
23090     # post-increment and decrement produce values to be operated on
23091     elsif ( $expecting_operator_types{$last_nonblank_type} ) {
23092         $op_expected = OPERATOR;
23093     }
23094
23095     # no value to operate on after sub block
23096     elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; }
23097
23098     # a right brace here indicates the end of a simple block.
23099     # all non-structural right braces have type 'R'
23100     # all braces associated with block operator keywords have been given those
23101     # keywords as "last_nonblank_token" and caught above.
23102     # (This statement is order dependent, and must come after checking
23103     # $last_nonblank_token).
23104     elsif ( $last_nonblank_type eq '}' ) {
23105
23106         # patch for dor.t (defined or).
23107         if (   $tok eq '/'
23108             && $next_type           eq '/'
23109             && $last_nonblank_token eq ']' )
23110         {
23111             $op_expected = OPERATOR;
23112         }
23113         else {
23114             $op_expected = TERM;
23115         }
23116     }
23117
23118     # something else..what did I forget?
23119     else {
23120
23121         # collecting diagnostics on unknown operator types..see what was missed
23122         $op_expected = UNKNOWN;
23123         write_diagnostics(
23124 "OP: unknown after type=$last_nonblank_type  token=$last_nonblank_token\n"
23125         );
23126     }
23127
23128     TOKENIZER_DEBUG_FLAG_EXPECT && do {
23129         print
23130 "EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
23131     };
23132     return $op_expected;
23133 }
23134
23135 sub new_statement_ok {
23136
23137     # return true if the current token can start a new statement
23138     # USES GLOBAL VARIABLES: $last_nonblank_type
23139
23140     return label_ok()    # a label would be ok here
23141
23142       || $last_nonblank_type eq 'J';    # or we follow a label
23143
23144 }
23145
23146 sub label_ok {
23147
23148     # Decide if a bare word followed by a colon here is a label
23149     # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
23150     # $brace_depth, @brace_type
23151
23152     # if it follows an opening or closing code block curly brace..
23153     if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' )
23154         && $last_nonblank_type eq $last_nonblank_token )
23155     {
23156
23157         # it is a label if and only if the curly encloses a code block
23158         return $brace_type[$brace_depth];
23159     }
23160
23161     # otherwise, it is a label if and only if it follows a ';'
23162     # (real or fake)
23163     else {
23164         return ( $last_nonblank_type eq ';' );
23165     }
23166 }
23167
23168 sub code_block_type {
23169
23170     # Decide if this is a block of code, and its type.
23171     # Must be called only when $type = $token = '{'
23172     # The problem is to distinguish between the start of a block of code
23173     # and the start of an anonymous hash reference
23174     # Returns "" if not code block, otherwise returns 'last_nonblank_token'
23175     # to indicate the type of code block.  (For example, 'last_nonblank_token'
23176     # might be 'if' for an if block, 'else' for an else block, etc).
23177     # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
23178     # $last_nonblank_block_type, $brace_depth, @brace_type
23179
23180     # handle case of multiple '{'s
23181
23182 # print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n";
23183
23184     my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
23185     if (   $last_nonblank_token eq '{'
23186         && $last_nonblank_type eq $last_nonblank_token )
23187     {
23188
23189         # opening brace where a statement may appear is probably
23190         # a code block but might be and anonymous hash reference
23191         if ( $brace_type[$brace_depth] ) {
23192             return decide_if_code_block( $i, $rtokens, $rtoken_type,
23193                 $max_token_index );
23194         }
23195
23196         # cannot start a code block within an anonymous hash
23197         else {
23198             return "";
23199         }
23200     }
23201
23202     elsif ( $last_nonblank_token eq ';' ) {
23203
23204         # an opening brace where a statement may appear is probably
23205         # a code block but might be and anonymous hash reference
23206         return decide_if_code_block( $i, $rtokens, $rtoken_type,
23207             $max_token_index );
23208     }
23209
23210     # handle case of '}{'
23211     elsif ($last_nonblank_token eq '}'
23212         && $last_nonblank_type eq $last_nonblank_token )
23213     {
23214
23215         # a } { situation ...
23216         # could be hash reference after code block..(blktype1.t)
23217         if ($last_nonblank_block_type) {
23218             return decide_if_code_block( $i, $rtokens, $rtoken_type,
23219                 $max_token_index );
23220         }
23221
23222         # must be a block if it follows a closing hash reference
23223         else {
23224             return $last_nonblank_token;
23225         }
23226     }
23227
23228     # NOTE: braces after type characters start code blocks, but for
23229     # simplicity these are not identified as such.  See also
23230     # sub is_non_structural_brace.
23231     # elsif ( $last_nonblank_type eq 't' ) {
23232     #    return $last_nonblank_token;
23233     # }
23234
23235     # brace after label:
23236     elsif ( $last_nonblank_type eq 'J' ) {
23237         return $last_nonblank_token;
23238     }
23239
23240 # otherwise, look at previous token.  This must be a code block if
23241 # it follows any of these:
23242 # /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/
23243     elsif ( $is_code_block_token{$last_nonblank_token} ) {
23244         return $last_nonblank_token;
23245     }
23246
23247     # or a sub definition
23248     elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' )
23249         && $last_nonblank_token =~ /^sub\b/ )
23250     {
23251         return $last_nonblank_token;
23252     }
23253
23254     # user-defined subs with block parameters (like grep/map/eval)
23255     elsif ( $last_nonblank_type eq 'G' ) {
23256         return $last_nonblank_token;
23257     }
23258
23259     # check bareword
23260     elsif ( $last_nonblank_type eq 'w' ) {
23261         return decide_if_code_block( $i, $rtokens, $rtoken_type,
23262             $max_token_index );
23263     }
23264
23265     # anything else must be anonymous hash reference
23266     else {
23267         return "";
23268     }
23269 }
23270
23271 sub decide_if_code_block {
23272
23273     # USES GLOBAL VARIABLES: $last_nonblank_token
23274     my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
23275     my ( $next_nonblank_token, $i_next ) =
23276       find_next_nonblank_token( $i, $rtokens, $max_token_index );
23277
23278     # we are at a '{' where a statement may appear.
23279     # We must decide if this brace starts an anonymous hash or a code
23280     # block.
23281     # return "" if anonymous hash, and $last_nonblank_token otherwise
23282
23283     # initialize to be code BLOCK
23284     my $code_block_type = $last_nonblank_token;
23285
23286     # Check for the common case of an empty anonymous hash reference:
23287     # Maybe something like sub { { } }
23288     if ( $next_nonblank_token eq '}' ) {
23289         $code_block_type = "";
23290     }
23291
23292     else {
23293
23294         # To guess if this '{' is an anonymous hash reference, look ahead
23295         # and test as follows:
23296         #
23297         # it is a hash reference if next come:
23298         #   - a string or digit followed by a comma or =>
23299         #   - bareword followed by =>
23300         # otherwise it is a code block
23301         #
23302         # Examples of anonymous hash ref:
23303         # {'aa',};
23304         # {1,2}
23305         #
23306         # Examples of code blocks:
23307         # {1; print "hello\n", 1;}
23308         # {$a,1};
23309
23310         # We are only going to look ahead one more (nonblank/comment) line.
23311         # Strange formatting could cause a bad guess, but that's unlikely.
23312         my @pre_types  = @$rtoken_type[ $i + 1 .. $max_token_index ];
23313         my @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ];
23314         my ( $rpre_tokens, $rpre_types ) =
23315           peek_ahead_for_n_nonblank_pre_tokens(20);    # 20 is arbitrary but
23316                                                        # generous, and prevents
23317                                                        # wasting lots of
23318                                                        # time in mangled files
23319         if ( defined($rpre_types) && @$rpre_types ) {
23320             push @pre_types,  @$rpre_types;
23321             push @pre_tokens, @$rpre_tokens;
23322         }
23323
23324         # put a sentinal token to simplify stopping the search
23325         push @pre_types, '}';
23326
23327         my $jbeg = 0;
23328         $jbeg = 1 if $pre_types[0] eq 'b';
23329
23330         # first look for one of these
23331         #  - bareword
23332         #  - bareword with leading -
23333         #  - digit
23334         #  - quoted string
23335         my $j = $jbeg;
23336         if ( $pre_types[$j] =~ /^[\'\"]/ ) {
23337
23338             # find the closing quote; don't worry about escapes
23339             my $quote_mark = $pre_types[$j];
23340             for ( my $k = $j + 1 ; $k < $#pre_types ; $k++ ) {
23341                 if ( $pre_types[$k] eq $quote_mark ) {
23342                     $j = $k + 1;
23343                     my $next = $pre_types[$j];
23344                     last;
23345                 }
23346             }
23347         }
23348         elsif ( $pre_types[$j] eq 'd' ) {
23349             $j++;
23350         }
23351         elsif ( $pre_types[$j] eq 'w' ) {
23352             unless ( $is_keyword{ $pre_tokens[$j] } ) {
23353                 $j++;
23354             }
23355         }
23356         elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) {
23357             $j++;
23358         }
23359         if ( $j > $jbeg ) {
23360
23361             $j++ if $pre_types[$j] eq 'b';
23362
23363             # it's a hash ref if a comma or => follow next
23364             if ( $pre_types[$j] eq ','
23365                 || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) )
23366             {
23367                 $code_block_type = "";
23368             }
23369         }
23370     }
23371
23372     return $code_block_type;
23373 }
23374
23375 sub unexpected {
23376
23377     # report unexpected token type and show where it is
23378     # USES GLOBAL VARIABLES: $tokenizer_self
23379     my ( $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map,
23380         $rpretoken_type, $input_line )
23381       = @_;
23382
23383     if ( ++$tokenizer_self->{_unexpected_error_count} <= MAX_NAG_MESSAGES ) {
23384         my $msg = "found $found where $expecting expected";
23385         my $pos = $$rpretoken_map[$i_tok];
23386         interrupt_logfile();
23387         my $input_line_number = $tokenizer_self->{_last_line_number};
23388         my ( $offset, $numbered_line, $underline ) =
23389           make_numbered_line( $input_line_number, $input_line, $pos );
23390         $underline = write_on_underline( $underline, $pos - $offset, '^' );
23391
23392         my $trailer = "";
23393         if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) {
23394             my $pos_prev = $$rpretoken_map[$last_nonblank_i];
23395             my $num;
23396             if ( $$rpretoken_type[ $i_tok - 1 ] eq 'b' ) {
23397                 $num = $$rpretoken_map[ $i_tok - 1 ] - $pos_prev;
23398             }
23399             else {
23400                 $num = $pos - $pos_prev;
23401             }
23402             if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; }
23403
23404             $underline =
23405               write_on_underline( $underline, $pos_prev - $offset, '-' x $num );
23406             $trailer = " (previous token underlined)";
23407         }
23408         warning( $numbered_line . "\n" );
23409         warning( $underline . "\n" );
23410         warning( $msg . $trailer . "\n" );
23411         resume_logfile();
23412     }
23413 }
23414
23415 sub is_non_structural_brace {
23416
23417     # Decide if a brace or bracket is structural or non-structural
23418     # by looking at the previous token and type
23419     # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token
23420
23421     # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting.
23422     # Tentatively deactivated because it caused the wrong operator expectation
23423     # for this code:
23424     #      $user = @vars[1] / 100;
23425     # Must update sub operator_expected before re-implementing.
23426     # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) {
23427     #    return 0;
23428     # }
23429
23430     # NOTE: braces after type characters start code blocks, but for
23431     # simplicity these are not identified as such.  See also
23432     # sub code_block_type
23433     # if ($last_nonblank_type eq 't') {return 0}
23434
23435     # otherwise, it is non-structural if it is decorated
23436     # by type information.
23437     # For example, the '{' here is non-structural:   ${xxx}
23438     (
23439         $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/
23440
23441           # or if we follow a hash or array closing curly brace or bracket
23442           # For example, the second '{' in this is non-structural: $a{'x'}{'y'}
23443           # because the first '}' would have been given type 'R'
23444           || $last_nonblank_type =~ /^([R\]])$/
23445     );
23446 }
23447
23448 #########i#############################################################
23449 # Tokenizer routines for tracking container nesting depths
23450 #######################################################################
23451
23452 # The following routines keep track of nesting depths of the nesting
23453 # types, ( [ { and ?.  This is necessary for determining the indentation
23454 # level, and also for debugging programs.  Not only do they keep track of
23455 # nesting depths of the individual brace types, but they check that each
23456 # of the other brace types is balanced within matching pairs.  For
23457 # example, if the program sees this sequence:
23458 #
23459 #         {  ( ( ) }
23460 #
23461 # then it can determine that there is an extra left paren somewhere
23462 # between the { and the }.  And so on with every other possible
23463 # combination of outer and inner brace types.  For another
23464 # example:
23465 #
23466 #         ( [ ..... ]  ] )
23467 #
23468 # which has an extra ] within the parens.
23469 #
23470 # The brace types have indexes 0 .. 3 which are indexes into
23471 # the matrices.
23472 #
23473 # The pair ? : are treated as just another nesting type, with ? acting
23474 # as the opening brace and : acting as the closing brace.
23475 #
23476 # The matrix
23477 #
23478 #         $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
23479 #
23480 # saves the nesting depth of brace type $b (where $b is either of the other
23481 # nesting types) when brace type $a enters a new depth.  When this depth
23482 # decreases, a check is made that the current depth of brace types $b is
23483 # unchanged, or otherwise there must have been an error.  This can
23484 # be very useful for localizing errors, particularly when perl runs to
23485 # the end of a large file (such as this one) and announces that there
23486 # is a problem somewhere.
23487 #
23488 # A numerical sequence number is maintained for every nesting type,
23489 # so that each matching pair can be uniquely identified in a simple
23490 # way.
23491
23492 sub increase_nesting_depth {
23493     my ( $a, $pos ) = @_;
23494
23495     # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
23496     # @current_sequence_number, @depth_array, @starting_line_of_current_depth
23497     my $b;
23498     $current_depth[$a]++;
23499     my $input_line_number = $tokenizer_self->{_last_line_number};
23500     my $input_line        = $tokenizer_self->{_line_text};
23501
23502     # Sequence numbers increment by number of items.  This keeps
23503     # a unique set of numbers but still allows the relative location
23504     # of any type to be determined.
23505     $nesting_sequence_number[$a] += scalar(@closing_brace_names);
23506     my $seqno = $nesting_sequence_number[$a];
23507     $current_sequence_number[$a][ $current_depth[$a] ] = $seqno;
23508
23509     $starting_line_of_current_depth[$a][ $current_depth[$a] ] =
23510       [ $input_line_number, $input_line, $pos ];
23511
23512     for $b ( 0 .. $#closing_brace_names ) {
23513         next if ( $b == $a );
23514         $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
23515     }
23516     return $seqno;
23517 }
23518
23519 sub decrease_nesting_depth {
23520
23521     my ( $a, $pos ) = @_;
23522
23523     # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
23524     # @current_sequence_number, @depth_array, @starting_line_of_current_depth
23525     my $b;
23526     my $seqno             = 0;
23527     my $input_line_number = $tokenizer_self->{_last_line_number};
23528     my $input_line        = $tokenizer_self->{_line_text};
23529
23530     if ( $current_depth[$a] > 0 ) {
23531
23532         $seqno = $current_sequence_number[$a][ $current_depth[$a] ];
23533
23534         # check that any brace types $b contained within are balanced
23535         for $b ( 0 .. $#closing_brace_names ) {
23536             next if ( $b == $a );
23537
23538             unless ( $depth_array[$a][$b][ $current_depth[$a] ] ==
23539                 $current_depth[$b] )
23540             {
23541                 my $diff = $current_depth[$b] -
23542                   $depth_array[$a][$b][ $current_depth[$a] ];
23543
23544                 # don't whine too many times
23545                 my $saw_brace_error = get_saw_brace_error();
23546                 if (
23547                     $saw_brace_error <= MAX_NAG_MESSAGES
23548
23549                     # if too many closing types have occured, we probably
23550                     # already caught this error
23551                     && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) )
23552                   )
23553                 {
23554                     interrupt_logfile();
23555                     my $rsl =
23556                       $starting_line_of_current_depth[$a][ $current_depth[$a] ];
23557                     my $sl  = $$rsl[0];
23558                     my $rel = [ $input_line_number, $input_line, $pos ];
23559                     my $el  = $$rel[0];
23560                     my ($ess);
23561
23562                     if ( $diff == 1 || $diff == -1 ) {
23563                         $ess = '';
23564                     }
23565                     else {
23566                         $ess = 's';
23567                     }
23568                     my $bname =
23569                       ( $diff > 0 )
23570                       ? $opening_brace_names[$b]
23571                       : $closing_brace_names[$b];
23572                     write_error_indicator_pair( @$rsl, '^' );
23573                     my $msg = <<"EOM";
23574 Found $diff extra $bname$ess between $opening_brace_names[$a] on line $sl and $closing_brace_names[$a] on line $el
23575 EOM
23576
23577                     if ( $diff > 0 ) {
23578                         my $rml =
23579                           $starting_line_of_current_depth[$b]
23580                           [ $current_depth[$b] ];
23581                         my $ml = $$rml[0];
23582                         $msg .=
23583 "    The most recent un-matched $bname is on line $ml\n";
23584                         write_error_indicator_pair( @$rml, '^' );
23585                     }
23586                     write_error_indicator_pair( @$rel, '^' );
23587                     warning($msg);
23588                     resume_logfile();
23589                 }
23590                 increment_brace_error();
23591             }
23592         }
23593         $current_depth[$a]--;
23594     }
23595     else {
23596
23597         my $saw_brace_error = get_saw_brace_error();
23598         if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
23599             my $msg = <<"EOM";
23600 There is no previous $opening_brace_names[$a] to match a $closing_brace_names[$a] on line $input_line_number
23601 EOM
23602             indicate_error( $msg, $input_line_number, $input_line, $pos, '^' );
23603         }
23604         increment_brace_error();
23605     }
23606     return $seqno;
23607 }
23608
23609 sub check_final_nesting_depths {
23610     my ($a);
23611
23612     # USES GLOBAL VARIABLES: @current_depth, @starting_line_of_current_depth
23613
23614     for $a ( 0 .. $#closing_brace_names ) {
23615
23616         if ( $current_depth[$a] ) {
23617             my $rsl = $starting_line_of_current_depth[$a][ $current_depth[$a] ];
23618             my $sl  = $$rsl[0];
23619             my $msg = <<"EOM";
23620 Final nesting depth of $opening_brace_names[$a]s is $current_depth[$a]
23621 The most recent un-matched $opening_brace_names[$a] is on line $sl
23622 EOM
23623             indicate_error( $msg, @$rsl, '^' );
23624             increment_brace_error();
23625         }
23626     }
23627 }
23628
23629 #########i#############################################################
23630 # Tokenizer routines for looking ahead in input stream
23631 #######################################################################
23632
23633 sub peek_ahead_for_n_nonblank_pre_tokens {
23634
23635     # returns next n pretokens if they exist
23636     # returns undef's if hits eof without seeing any pretokens
23637     # USES GLOBAL VARIABLES: $tokenizer_self
23638     my $max_pretokens = shift;
23639     my $line;
23640     my $i = 0;
23641     my ( $rpre_tokens, $rmap, $rpre_types );
23642
23643     while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
23644     {
23645         $line =~ s/^\s*//;    # trim leading blanks
23646         next if ( length($line) <= 0 );    # skip blank
23647         next if ( $line =~ /^#/ );         # skip comment
23648         ( $rpre_tokens, $rmap, $rpre_types ) =
23649           pre_tokenize( $line, $max_pretokens );
23650         last;
23651     }
23652     return ( $rpre_tokens, $rpre_types );
23653 }
23654
23655 # look ahead for next non-blank, non-comment line of code
23656 sub peek_ahead_for_nonblank_token {
23657
23658     # USES GLOBAL VARIABLES: $tokenizer_self
23659     my ( $rtokens, $max_token_index ) = @_;
23660     my $line;
23661     my $i = 0;
23662
23663     while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
23664     {
23665         $line =~ s/^\s*//;    # trim leading blanks
23666         next if ( length($line) <= 0 );    # skip blank
23667         next if ( $line =~ /^#/ );         # skip comment
23668         my ( $rtok, $rmap, $rtype ) =
23669           pre_tokenize( $line, 2 );        # only need 2 pre-tokens
23670         my $j = $max_token_index + 1;
23671         my $tok;
23672
23673         foreach $tok (@$rtok) {
23674             last if ( $tok =~ "\n" );
23675             $$rtokens[ ++$j ] = $tok;
23676         }
23677         last;
23678     }
23679     return $rtokens;
23680 }
23681
23682 #########i#############################################################
23683 # Tokenizer guessing routines for ambiguous situations
23684 #######################################################################
23685
23686 sub guess_if_pattern_or_conditional {
23687
23688     # this routine is called when we have encountered a ? following an
23689     # unknown bareword, and we must decide if it starts a pattern or not
23690     # input parameters:
23691     #   $i - token index of the ? starting possible pattern
23692     # output parameters:
23693     #   $is_pattern = 0 if probably not pattern,  =1 if probably a pattern
23694     #   msg = a warning or diagnostic message
23695     # USES GLOBAL VARIABLES: $last_nonblank_token
23696     my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
23697     my $is_pattern = 0;
23698     my $msg        = "guessing that ? after $last_nonblank_token starts a ";
23699
23700     if ( $i >= $max_token_index ) {
23701         $msg .= "conditional (no end to pattern found on the line)\n";
23702     }
23703     else {
23704         my $ibeg = $i;
23705         $i = $ibeg + 1;
23706         my $next_token = $$rtokens[$i];    # first token after ?
23707
23708         # look for a possible ending ? on this line..
23709         my $in_quote        = 1;
23710         my $quote_depth     = 0;
23711         my $quote_character = '';
23712         my $quote_pos       = 0;
23713         my $quoted_string;
23714         (
23715             $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
23716             $quoted_string
23717           )
23718           = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
23719             $quote_pos, $quote_depth, $max_token_index );
23720
23721         if ($in_quote) {
23722
23723             # we didn't find an ending ? on this line,
23724             # so we bias towards conditional
23725             $is_pattern = 0;
23726             $msg .= "conditional (no ending ? on this line)\n";
23727
23728             # we found an ending ?, so we bias towards a pattern
23729         }
23730         else {
23731
23732             if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
23733                 $is_pattern = 1;
23734                 $msg .= "pattern (found ending ? and pattern expected)\n";
23735             }
23736             else {
23737                 $msg .= "pattern (uncertain, but found ending ?)\n";
23738             }
23739         }
23740     }
23741     return ( $is_pattern, $msg );
23742 }
23743
23744 sub guess_if_pattern_or_division {
23745
23746     # this routine is called when we have encountered a / following an
23747     # unknown bareword, and we must decide if it starts a pattern or is a
23748     # division
23749     # input parameters:
23750     #   $i - token index of the / starting possible pattern
23751     # output parameters:
23752     #   $is_pattern = 0 if probably division,  =1 if probably a pattern
23753     #   msg = a warning or diagnostic message
23754     # USES GLOBAL VARIABLES: $last_nonblank_token
23755     my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
23756     my $is_pattern = 0;
23757     my $msg        = "guessing that / after $last_nonblank_token starts a ";
23758
23759     if ( $i >= $max_token_index ) {
23760         "division (no end to pattern found on the line)\n";
23761     }
23762     else {
23763         my $ibeg = $i;
23764         my $divide_expected =
23765           numerator_expected( $i, $rtokens, $max_token_index );
23766         $i = $ibeg + 1;
23767         my $next_token = $$rtokens[$i];    # first token after slash
23768
23769         # look for a possible ending / on this line..
23770         my $in_quote        = 1;
23771         my $quote_depth     = 0;
23772         my $quote_character = '';
23773         my $quote_pos       = 0;
23774         my $quoted_string;
23775         (
23776             $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
23777             $quoted_string
23778           )
23779           = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
23780             $quote_pos, $quote_depth, $max_token_index );
23781
23782         if ($in_quote) {
23783
23784             # we didn't find an ending / on this line,
23785             # so we bias towards division
23786             if ( $divide_expected >= 0 ) {
23787                 $is_pattern = 0;
23788                 $msg .= "division (no ending / on this line)\n";
23789             }
23790             else {
23791                 $msg        = "multi-line pattern (division not possible)\n";
23792                 $is_pattern = 1;
23793             }
23794
23795         }
23796
23797         # we found an ending /, so we bias towards a pattern
23798         else {
23799
23800             if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
23801
23802                 if ( $divide_expected >= 0 ) {
23803
23804                     if ( $i - $ibeg > 60 ) {
23805                         $msg .= "division (matching / too distant)\n";
23806                         $is_pattern = 0;
23807                     }
23808                     else {
23809                         $msg .= "pattern (but division possible too)\n";
23810                         $is_pattern = 1;
23811                     }
23812                 }
23813                 else {
23814                     $is_pattern = 1;
23815                     $msg .= "pattern (division not possible)\n";
23816                 }
23817             }
23818             else {
23819
23820                 if ( $divide_expected >= 0 ) {
23821                     $is_pattern = 0;
23822                     $msg .= "division (pattern not possible)\n";
23823                 }
23824                 else {
23825                     $is_pattern = 1;
23826                     $msg .=
23827                       "pattern (uncertain, but division would not work here)\n";
23828                 }
23829             }
23830         }
23831     }
23832     return ( $is_pattern, $msg );
23833 }
23834
23835 # try to resolve here-doc vs. shift by looking ahead for
23836 # non-code or the end token (currently only looks for end token)
23837 # returns 1 if it is probably a here doc, 0 if not
23838 sub guess_if_here_doc {
23839
23840     # This is how many lines we will search for a target as part of the
23841     # guessing strategy.  It is a constant because there is probably
23842     # little reason to change it.
23843     # USES GLOBAL VARIABLES: $tokenizer_self, $current_package
23844     # %is_constant,
23845     use constant HERE_DOC_WINDOW => 40;
23846
23847     my $next_token        = shift;
23848     my $here_doc_expected = 0;
23849     my $line;
23850     my $k   = 0;
23851     my $msg = "checking <<";
23852
23853     while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) )
23854     {
23855         chomp $line;
23856
23857         if ( $line =~ /^$next_token$/ ) {
23858             $msg .= " -- found target $next_token ahead $k lines\n";
23859             $here_doc_expected = 1;    # got it
23860             last;
23861         }
23862         last if ( $k >= HERE_DOC_WINDOW );
23863     }
23864
23865     unless ($here_doc_expected) {
23866
23867         if ( !defined($line) ) {
23868             $here_doc_expected = -1;    # hit eof without seeing target
23869             $msg .= " -- must be shift; target $next_token not in file\n";
23870
23871         }
23872         else {                          # still unsure..taking a wild guess
23873
23874             if ( !$is_constant{$current_package}{$next_token} ) {
23875                 $here_doc_expected = 1;
23876                 $msg .=
23877                   " -- guessing it's a here-doc ($next_token not a constant)\n";
23878             }
23879             else {
23880                 $msg .=
23881                   " -- guessing it's a shift ($next_token is a constant)\n";
23882             }
23883         }
23884     }
23885     write_logfile_entry($msg);
23886     return $here_doc_expected;
23887 }
23888
23889 #########i#############################################################
23890 # Tokenizer Routines for scanning identifiers and related items
23891 #######################################################################
23892
23893 sub scan_bare_identifier_do {
23894
23895     # this routine is called to scan a token starting with an alphanumeric
23896     # variable or package separator, :: or '.
23897     # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token,
23898     # $last_nonblank_type,@paren_type, $paren_depth
23899
23900     my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map,
23901         $max_token_index )
23902       = @_;
23903     my $i_begin = $i;
23904     my $package = undef;
23905
23906     my $i_beg = $i;
23907
23908     # we have to back up one pretoken at a :: since each : is one pretoken
23909     if ( $tok eq '::' ) { $i_beg-- }
23910     if ( $tok eq '->' ) { $i_beg-- }
23911     my $pos_beg = $$rtoken_map[$i_beg];
23912     pos($input_line) = $pos_beg;
23913
23914     #  Examples:
23915     #   A::B::C
23916     #   A::
23917     #   ::A
23918     #   A'B
23919     if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) {
23920
23921         my $pos  = pos($input_line);
23922         my $numc = $pos - $pos_beg;
23923         $tok = substr( $input_line, $pos_beg, $numc );
23924
23925         # type 'w' includes anything without leading type info
23926         # ($,%,@,*) including something like abc::def::ghi
23927         $type = 'w';
23928
23929         my $sub_name = "";
23930         if ( defined($2) ) { $sub_name = $2; }
23931         if ( defined($1) ) {
23932             $package = $1;
23933
23934             # patch: don't allow isolated package name which just ends
23935             # in the old style package separator (single quote).  Example:
23936             #   use CGI':all';
23937             if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) {
23938                 $pos--;
23939             }
23940
23941             $package =~ s/\'/::/g;
23942             if ( $package =~ /^\:/ ) { $package = 'main' . $package }
23943             $package =~ s/::$//;
23944         }
23945         else {
23946             $package = $current_package;
23947
23948             if ( $is_keyword{$tok} ) {
23949                 $type = 'k';
23950             }
23951         }
23952
23953         # if it is a bareword..
23954         if ( $type eq 'w' ) {
23955
23956             # check for v-string with leading 'v' type character
23957             # (This seems to have presidence over filehandle, type 'Y')
23958             if ( $tok =~ /^v\d[_\d]*$/ ) {
23959
23960                 # we only have the first part - something like 'v101' -
23961                 # look for more
23962                 if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) {
23963                     $pos  = pos($input_line);
23964                     $numc = $pos - $pos_beg;
23965                     $tok  = substr( $input_line, $pos_beg, $numc );
23966                 }
23967                 $type = 'v';
23968
23969                 # warn if this version can't handle v-strings
23970                 report_v_string($tok);
23971             }
23972
23973             elsif ( $is_constant{$package}{$sub_name} ) {
23974                 $type = 'C';
23975             }
23976
23977             # bareword after sort has implied empty prototype; for example:
23978             # @sorted = sort numerically ( 53, 29, 11, 32, 7 );
23979             # This has priority over whatever the user has specified.
23980             elsif ($last_nonblank_token eq 'sort'
23981                 && $last_nonblank_type eq 'k' )
23982             {
23983                 $type = 'Z';
23984             }
23985
23986             # Note: strangely, perl does not seem to really let you create
23987             # functions which act like eval and do, in the sense that eval
23988             # and do may have operators following the final }, but any operators
23989             # that you create with prototype (&) apparently do not allow
23990             # trailing operators, only terms.  This seems strange.
23991             # If this ever changes, here is the update
23992             # to make perltidy behave accordingly:
23993
23994             # elsif ( $is_block_function{$package}{$tok} ) {
23995             #    $tok='eval'; # patch to do braces like eval  - doesn't work
23996             #    $type = 'k';
23997             #}
23998             # FIXME: This could become a separate type to allow for different
23999             # future behavior:
24000             elsif ( $is_block_function{$package}{$sub_name} ) {
24001                 $type = 'G';
24002             }
24003
24004             elsif ( $is_block_list_function{$package}{$sub_name} ) {
24005                 $type = 'G';
24006             }
24007             elsif ( $is_user_function{$package}{$sub_name} ) {
24008                 $type      = 'U';
24009                 $prototype = $user_function_prototype{$package}{$sub_name};
24010             }
24011
24012             # check for indirect object
24013             elsif (
24014
24015                 # added 2001-03-27: must not be followed immediately by '('
24016                 # see fhandle.t
24017                 ( $input_line !~ m/\G\(/gc )
24018
24019                 # and
24020                 && (
24021
24022                     # preceded by keyword like 'print', 'printf' and friends
24023                     $is_indirect_object_taker{$last_nonblank_token}
24024
24025                     # or preceded by something like 'print(' or 'printf('
24026                     || (
24027                         ( $last_nonblank_token eq '(' )
24028                         && $is_indirect_object_taker{ $paren_type[$paren_depth]
24029                         }
24030
24031                     )
24032                 )
24033               )
24034             {
24035
24036                 # may not be indirect object unless followed by a space
24037                 if ( $input_line =~ m/\G\s+/gc ) {
24038                     $type = 'Y';
24039
24040                     # Abandon Hope ...
24041                     # Perl's indirect object notation is a very bad
24042                     # thing and can cause subtle bugs, especially for
24043                     # beginning programmers.  And I haven't even been
24044                     # able to figure out a sane warning scheme which
24045                     # doesn't get in the way of good scripts.
24046
24047                     # Complain if a filehandle has any lower case
24048                     # letters.  This is suggested good practice, but the
24049                     # main reason for this warning is that prior to
24050                     # release 20010328, perltidy incorrectly parsed a
24051                     # function call after a print/printf, with the
24052                     # result that a space got added before the opening
24053                     # paren, thereby converting the function name to a
24054                     # filehandle according to perl's weird rules.  This
24055                     # will not usually generate a syntax error, so this
24056                     # is a potentially serious bug.  By warning
24057                     # of filehandles with any lower case letters,
24058                     # followed by opening parens, we will help the user
24059                     # find almost all of these older errors.
24060                     # use 'sub_name' because something like
24061                     # main::MYHANDLE is ok for filehandle
24062                     if ( $sub_name =~ /[a-z]/ ) {
24063
24064                         # could be bug caused by older perltidy if
24065                         # followed by '('
24066                         if ( $input_line =~ m/\G\s*\(/gc ) {
24067                             complain(
24068 "Caution: unknown word '$tok' in indirect object slot\n"
24069                             );
24070                         }
24071                     }
24072                 }
24073
24074                 # bareword not followed by a space -- may not be filehandle
24075                 # (may be function call defined in a 'use' statement)
24076                 else {
24077                     $type = 'Z';
24078                 }
24079             }
24080         }
24081
24082         # Now we must convert back from character position
24083         # to pre_token index.
24084         # I don't think an error flag can occur here ..but who knows
24085         my $error;
24086         ( $i, $error ) =
24087           inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
24088         if ($error) {
24089             warning("scan_bare_identifier: Possibly invalid tokenization\n");
24090         }
24091     }
24092
24093     # no match but line not blank - could be syntax error
24094     # perl will take '::' alone without complaint
24095     else {
24096         $type = 'w';
24097
24098         # change this warning to log message if it becomes annoying
24099         warning("didn't find identifier after leading ::\n");
24100     }
24101     return ( $i, $tok, $type, $prototype );
24102 }
24103
24104 sub scan_id_do {
24105
24106 # This is the new scanner and will eventually replace scan_identifier.
24107 # Only type 'sub' and 'package' are implemented.
24108 # Token types $ * % @ & -> are not yet implemented.
24109 #
24110 # Scan identifier following a type token.
24111 # The type of call depends on $id_scan_state: $id_scan_state = ''
24112 # for starting call, in which case $tok must be the token defining
24113 # the type.
24114 #
24115 # If the type token is the last nonblank token on the line, a value
24116 # of $id_scan_state = $tok is returned, indicating that further
24117 # calls must be made to get the identifier.  If the type token is
24118 # not the last nonblank token on the line, the identifier is
24119 # scanned and handled and a value of '' is returned.
24120 # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, $in_attribute_list,
24121 # $statement_type, $tokenizer_self
24122
24123     my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state,
24124         $max_token_index )
24125       = @_;
24126     my $type = '';
24127     my ( $i_beg, $pos_beg );
24128
24129     #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
24130     #my ($a,$b,$c) = caller;
24131     #print "NSCAN: scan_id called with tok=$tok $a $b $c\n";
24132
24133     # on re-entry, start scanning at first token on the line
24134     if ($id_scan_state) {
24135         $i_beg = $i;
24136         $type  = '';
24137     }
24138
24139     # on initial entry, start scanning just after type token
24140     else {
24141         $i_beg         = $i + 1;
24142         $id_scan_state = $tok;
24143         $type          = 't';
24144     }
24145
24146     # find $i_beg = index of next nonblank token,
24147     # and handle empty lines
24148     my $blank_line          = 0;
24149     my $next_nonblank_token = $$rtokens[$i_beg];
24150     if ( $i_beg > $max_token_index ) {
24151         $blank_line = 1;
24152     }
24153     else {
24154
24155         # only a '#' immediately after a '$' is not a comment
24156         if ( $next_nonblank_token eq '#' ) {
24157             unless ( $tok eq '$' ) {
24158                 $blank_line = 1;
24159             }
24160         }
24161
24162         if ( $next_nonblank_token =~ /^\s/ ) {
24163             ( $next_nonblank_token, $i_beg ) =
24164               find_next_nonblank_token_on_this_line( $i_beg, $rtokens,
24165                 $max_token_index );
24166             if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) {
24167                 $blank_line = 1;
24168             }
24169         }
24170     }
24171
24172     # handle non-blank line; identifier, if any, must follow
24173     unless ($blank_line) {
24174
24175         if ( $id_scan_state eq 'sub' ) {
24176             ( $i, $tok, $type, $id_scan_state ) = do_scan_sub(
24177                 $input_line, $i,             $i_beg,
24178                 $tok,        $type,          $rtokens,
24179                 $rtoken_map, $id_scan_state, $max_token_index
24180             );
24181         }
24182
24183         elsif ( $id_scan_state eq 'package' ) {
24184             ( $i, $tok, $type ) =
24185               do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens,
24186                 $rtoken_map, $max_token_index );
24187             $id_scan_state = '';
24188         }
24189
24190         else {
24191             warning("invalid token in scan_id: $tok\n");
24192             $id_scan_state = '';
24193         }
24194     }
24195
24196     if ( $id_scan_state && ( !defined($type) || !$type ) ) {
24197
24198         # shouldn't happen:
24199         warning(
24200 "Program bug in scan_id: undefined type but scan_state=$id_scan_state\n"
24201         );
24202         report_definite_bug();
24203     }
24204
24205     TOKENIZER_DEBUG_FLAG_NSCAN && do {
24206         print
24207           "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
24208     };
24209     return ( $i, $tok, $type, $id_scan_state );
24210 }
24211
24212 sub check_prototype {
24213     my ( $proto, $package, $subname ) = @_;
24214     return unless ( defined($package) && defined($subname) );
24215     if ( defined($proto) ) {
24216         $proto =~ s/^\s*\(\s*//;
24217         $proto =~ s/\s*\)$//;
24218         if ($proto) {
24219             $is_user_function{$package}{$subname}        = 1;
24220             $user_function_prototype{$package}{$subname} = "($proto)";
24221
24222             # prototypes containing '&' must be treated specially..
24223             if ( $proto =~ /\&/ ) {
24224
24225                 # right curly braces of prototypes ending in
24226                 # '&' may be followed by an operator
24227                 if ( $proto =~ /\&$/ ) {
24228                     $is_block_function{$package}{$subname} = 1;
24229                 }
24230
24231                 # right curly braces of prototypes NOT ending in
24232                 # '&' may NOT be followed by an operator
24233                 elsif ( $proto !~ /\&$/ ) {
24234                     $is_block_list_function{$package}{$subname} = 1;
24235                 }
24236             }
24237         }
24238         else {
24239             $is_constant{$package}{$subname} = 1;
24240         }
24241     }
24242     else {
24243         $is_user_function{$package}{$subname} = 1;
24244     }
24245 }
24246
24247 sub do_scan_package {
24248
24249     # do_scan_package parses a package name
24250     # it is called with $i_beg equal to the index of the first nonblank
24251     # token following a 'package' token.
24252     # USES GLOBAL VARIABLES: $current_package,
24253
24254     my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map,
24255         $max_token_index )
24256       = @_;
24257     my $package = undef;
24258     my $pos_beg = $$rtoken_map[$i_beg];
24259     pos($input_line) = $pos_beg;
24260
24261     # handle non-blank line; package name, if any, must follow
24262     if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) {
24263         $package = $1;
24264         $package = ( defined($1) && $1 ) ? $1 : 'main';
24265         $package =~ s/\'/::/g;
24266         if ( $package =~ /^\:/ ) { $package = 'main' . $package }
24267         $package =~ s/::$//;
24268         my $pos  = pos($input_line);
24269         my $numc = $pos - $pos_beg;
24270         $tok = 'package ' . substr( $input_line, $pos_beg, $numc );
24271         $type = 'i';
24272
24273         # Now we must convert back from character position
24274         # to pre_token index.
24275         # I don't think an error flag can occur here ..but ?
24276         my $error;
24277         ( $i, $error ) =
24278           inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
24279         if ($error) { warning("Possibly invalid package\n") }
24280         $current_package = $package;
24281
24282         # check for error
24283         my ( $next_nonblank_token, $i_next ) =
24284           find_next_nonblank_token( $i, $rtokens, $max_token_index );
24285         if ( $next_nonblank_token !~ /^[;\}]$/ ) {
24286             warning(
24287                 "Unexpected '$next_nonblank_token' after package name '$tok'\n"
24288             );
24289         }
24290     }
24291
24292     # no match but line not blank --
24293     # could be a label with name package, like package:  , for example.
24294     else {
24295         $type = 'k';
24296     }
24297
24298     return ( $i, $tok, $type );
24299 }
24300
24301 sub scan_identifier_do {
24302
24303     # This routine assembles tokens into identifiers.  It maintains a
24304     # scan state, id_scan_state.  It updates id_scan_state based upon
24305     # current id_scan_state and token, and returns an updated
24306     # id_scan_state and the next index after the identifier.
24307     # USES GLOBAL VARIABLES: $context, $last_nonblank_token,
24308     # $last_nonblank_type
24309
24310     my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index ) = @_;
24311     my $i_begin   = $i;
24312     my $type      = '';
24313     my $tok_begin = $$rtokens[$i_begin];
24314     if ( $tok_begin eq ':' ) { $tok_begin = '::' }
24315     my $id_scan_state_begin = $id_scan_state;
24316     my $identifier_begin    = $identifier;
24317     my $tok                 = $tok_begin;
24318     my $message             = "";
24319
24320     # these flags will be used to help figure out the type:
24321     my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ );
24322     my $saw_type;
24323
24324     # allow old package separator (') except in 'use' statement
24325     my $allow_tick = ( $last_nonblank_token ne 'use' );
24326
24327     # get started by defining a type and a state if necessary
24328     unless ($id_scan_state) {
24329         $context = UNKNOWN_CONTEXT;
24330
24331         # fixup for digraph
24332         if ( $tok eq '>' ) {
24333             $tok       = '->';
24334             $tok_begin = $tok;
24335         }
24336         $identifier = $tok;
24337
24338         if ( $tok eq '$' || $tok eq '*' ) {
24339             $id_scan_state = '$';
24340             $context       = SCALAR_CONTEXT;
24341         }
24342         elsif ( $tok eq '%' || $tok eq '@' ) {
24343             $id_scan_state = '$';
24344             $context       = LIST_CONTEXT;
24345         }
24346         elsif ( $tok eq '&' ) {
24347             $id_scan_state = '&';
24348         }
24349         elsif ( $tok eq 'sub' or $tok eq 'package' ) {
24350             $saw_alpha     = 0;     # 'sub' is considered type info here
24351             $id_scan_state = '$';
24352             $identifier .= ' ';     # need a space to separate sub from sub name
24353         }
24354         elsif ( $tok eq '::' ) {
24355             $id_scan_state = 'A';
24356         }
24357         elsif ( $tok =~ /^[A-Za-z_]/ ) {
24358             $id_scan_state = ':';
24359         }
24360         elsif ( $tok eq '->' ) {
24361             $id_scan_state = '$';
24362         }
24363         else {
24364
24365             # shouldn't happen
24366             my ( $a, $b, $c ) = caller;
24367             warning("Program Bug: scan_identifier given bad token = $tok \n");
24368             warning("   called from sub $a  line: $c\n");
24369             report_definite_bug();
24370         }
24371         $saw_type = !$saw_alpha;
24372     }
24373     else {
24374         $i--;
24375         $saw_type = ( $tok =~ /([\$\%\@\*\&])/ );
24376     }
24377
24378     # now loop to gather the identifier
24379     my $i_save = $i;
24380
24381     while ( $i < $max_token_index ) {
24382         $i_save = $i unless ( $tok =~ /^\s*$/ );
24383         $tok = $$rtokens[ ++$i ];
24384
24385         if ( ( $tok eq ':' ) && ( $$rtokens[ $i + 1 ] eq ':' ) ) {
24386             $tok = '::';
24387             $i++;
24388         }
24389
24390         if ( $id_scan_state eq '$' ) {    # starting variable name
24391
24392             if ( $tok eq '$' ) {
24393
24394                 $identifier .= $tok;
24395
24396                 # we've got a punctuation variable if end of line (punct.t)
24397                 if ( $i == $max_token_index ) {
24398                     $type          = 'i';
24399                     $id_scan_state = '';
24400                     last;
24401                 }
24402             }
24403             elsif ( $tok =~ /^[A-Za-z_]/ ) {    # alphanumeric ..
24404                 $saw_alpha     = 1;
24405                 $id_scan_state = ':';           # now need ::
24406                 $identifier .= $tok;
24407             }
24408             elsif ( $tok eq "'" && $allow_tick ) {    # alphanumeric ..
24409                 $saw_alpha     = 1;
24410                 $id_scan_state = ':';                 # now need ::
24411                 $identifier .= $tok;
24412
24413                 # Perl will accept leading digits in identifiers,
24414                 # although they may not always produce useful results.
24415                 # Something like $main::0 is ok.  But this also works:
24416                 #
24417                 #  sub howdy::123::bubba{ print "bubba $54321!\n" }
24418                 #  howdy::123::bubba();
24419                 #
24420             }
24421             elsif ( $tok =~ /^[0-9]/ ) {              # numeric
24422                 $saw_alpha     = 1;
24423                 $id_scan_state = ':';                 # now need ::
24424                 $identifier .= $tok;
24425             }
24426             elsif ( $tok eq '::' ) {
24427                 $id_scan_state = 'A';
24428                 $identifier .= $tok;
24429             }
24430             elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) {    # $#array
24431                 $identifier .= $tok;    # keep same state, a $ could follow
24432             }
24433             elsif ( $tok eq '{' ) {
24434
24435                 # check for something like ${#} or ${©}
24436                 if (   $identifier eq '$'
24437                     && $i + 2 <= $max_token_index
24438                     && $$rtokens[ $i + 2 ] eq '}'
24439                     && $$rtokens[ $i + 1 ] !~ /[\s\w]/ )
24440                 {
24441                     my $next2 = $$rtokens[ $i + 2 ];
24442                     my $next1 = $$rtokens[ $i + 1 ];
24443                     $identifier .= $tok . $next1 . $next2;
24444                     $i += 2;
24445                     $id_scan_state = '';
24446                     last;
24447                 }
24448
24449                 # skip something like ${xxx} or ->{
24450                 $id_scan_state = '';
24451
24452                 # if this is the first token of a line, any tokens for this
24453                 # identifier have already been accumulated
24454                 if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; }
24455                 $i = $i_save;
24456                 last;
24457             }
24458
24459             # space ok after leading $ % * & @
24460             elsif ( $tok =~ /^\s*$/ ) {
24461
24462                 if ( $identifier =~ /^[\$\%\*\&\@]/ ) {
24463
24464                     if ( length($identifier) > 1 ) {
24465                         $id_scan_state = '';
24466                         $i             = $i_save;
24467                         $type          = 'i';    # probably punctuation variable
24468                         last;
24469                     }
24470                     else {
24471
24472                         # spaces after $'s are common, and space after @
24473                         # is harmless, so only complain about space
24474                         # after other type characters. Space after $ and
24475                         # @ will be removed in formatting.  Report space
24476                         # after % and * because they might indicate a
24477                         # parsing error.  In other words '% ' might be a
24478                         # modulo operator.  Delete this warning if it
24479                         # gets annoying.
24480                         if ( $identifier !~ /^[\@\$]$/ ) {
24481                             $message =
24482                               "Space in identifier, following $identifier\n";
24483                         }
24484                     }
24485                 }
24486
24487                 # else:
24488                 # space after '->' is ok
24489             }
24490             elsif ( $tok eq '^' ) {
24491
24492                 # check for some special variables like $^W
24493                 if ( $identifier =~ /^[\$\*\@\%]$/ ) {
24494                     $identifier .= $tok;
24495                     $id_scan_state = 'A';
24496
24497                     # Perl accepts '$^]' or '@^]', but
24498                     # there must not be a space before the ']'.
24499                     my $next1 = $$rtokens[ $i + 1 ];
24500                     if ( $next1 eq ']' ) {
24501                         $i++;
24502                         $identifier .= $next1;
24503                         $id_scan_state = "";
24504                         last;
24505                     }
24506                 }
24507                 else {
24508                     $id_scan_state = '';
24509                 }
24510             }
24511             else {    # something else
24512
24513                 # check for various punctuation variables
24514                 if ( $identifier =~ /^[\$\*\@\%]$/ ) {
24515                     $identifier .= $tok;
24516                 }
24517
24518                 elsif ( $identifier eq '$#' ) {
24519
24520                     if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
24521
24522                     # perl seems to allow just these: $#: $#- $#+
24523                     elsif ( $tok =~ /^[\:\-\+]$/ ) {
24524                         $type = 'i';
24525                         $identifier .= $tok;
24526                     }
24527                     else {
24528                         $i = $i_save;
24529                         write_logfile_entry( 'Use of $# is deprecated' . "\n" );
24530                     }
24531                 }
24532                 elsif ( $identifier eq '$$' ) {
24533
24534                     # perl does not allow references to punctuation
24535                     # variables without braces.  For example, this
24536                     # won't work:
24537                     #  $:=\4;
24538                     #  $a = $$:;
24539                     # You would have to use
24540                     #  $a = ${$:};
24541
24542                     $i = $i_save;
24543                     if   ( $tok eq '{' ) { $type = 't' }
24544                     else                 { $type = 'i' }
24545                 }
24546                 elsif ( $identifier eq '->' ) {
24547                     $i = $i_save;
24548                 }
24549                 else {
24550                     $i = $i_save;
24551                     if ( length($identifier) == 1 ) { $identifier = ''; }
24552                 }
24553                 $id_scan_state = '';
24554                 last;
24555             }
24556         }
24557         elsif ( $id_scan_state eq '&' ) {    # starting sub call?
24558
24559             if ( $tok =~ /^[\$A-Za-z_]/ ) {    # alphanumeric ..
24560                 $id_scan_state = ':';          # now need ::
24561                 $saw_alpha     = 1;
24562                 $identifier .= $tok;
24563             }
24564             elsif ( $tok eq "'" && $allow_tick ) {    # alphanumeric ..
24565                 $id_scan_state = ':';                 # now need ::
24566                 $saw_alpha     = 1;
24567                 $identifier .= $tok;
24568             }
24569             elsif ( $tok =~ /^[0-9]/ ) {    # numeric..see comments above
24570                 $id_scan_state = ':';       # now need ::
24571                 $saw_alpha     = 1;
24572                 $identifier .= $tok;
24573             }
24574             elsif ( $tok =~ /^\s*$/ ) {     # allow space
24575             }
24576             elsif ( $tok eq '::' ) {        # leading ::
24577                 $id_scan_state = 'A';       # accept alpha next
24578                 $identifier .= $tok;
24579             }
24580             elsif ( $tok eq '{' ) {
24581                 if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; }
24582                 $i             = $i_save;
24583                 $id_scan_state = '';
24584                 last;
24585             }
24586             else {
24587
24588                 # punctuation variable?
24589                 # testfile: cunningham4.pl
24590                 if ( $identifier eq '&' ) {
24591                     $identifier .= $tok;
24592                 }
24593                 else {
24594                     $identifier = '';
24595                     $i          = $i_save;
24596                     $type       = '&';
24597                 }
24598                 $id_scan_state = '';
24599                 last;
24600             }
24601         }
24602         elsif ( $id_scan_state eq 'A' ) {    # looking for alpha (after ::)
24603
24604             if ( $tok =~ /^[A-Za-z_]/ ) {    # found it
24605                 $identifier .= $tok;
24606                 $id_scan_state = ':';        # now need ::
24607                 $saw_alpha     = 1;
24608             }
24609             elsif ( $tok eq "'" && $allow_tick ) {
24610                 $identifier .= $tok;
24611                 $id_scan_state = ':';        # now need ::
24612                 $saw_alpha     = 1;
24613             }
24614             elsif ( $tok =~ /^[0-9]/ ) {     # numeric..see comments above
24615                 $identifier .= $tok;
24616                 $id_scan_state = ':';        # now need ::
24617                 $saw_alpha     = 1;
24618             }
24619             elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
24620                 $id_scan_state = '(';
24621                 $identifier .= $tok;
24622             }
24623             elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
24624                 $id_scan_state = ')';
24625                 $identifier .= $tok;
24626             }
24627             else {
24628                 $id_scan_state = '';
24629                 $i             = $i_save;
24630                 last;
24631             }
24632         }
24633         elsif ( $id_scan_state eq ':' ) {    # looking for :: after alpha
24634
24635             if ( $tok eq '::' ) {            # got it
24636                 $identifier .= $tok;
24637                 $id_scan_state = 'A';        # now require alpha
24638             }
24639             elsif ( $tok =~ /^[A-Za-z_]/ ) {    # more alphanumeric is ok here
24640                 $identifier .= $tok;
24641                 $id_scan_state = ':';           # now need ::
24642                 $saw_alpha     = 1;
24643             }
24644             elsif ( $tok =~ /^[0-9]/ ) {        # numeric..see comments above
24645                 $identifier .= $tok;
24646                 $id_scan_state = ':';           # now need ::
24647                 $saw_alpha     = 1;
24648             }
24649             elsif ( $tok eq "'" && $allow_tick ) {    # tick
24650
24651                 if ( $is_keyword{$identifier} ) {
24652                     $id_scan_state = '';              # that's all
24653                     $i             = $i_save;
24654                 }
24655                 else {
24656                     $identifier .= $tok;
24657                 }
24658             }
24659             elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
24660                 $id_scan_state = '(';
24661                 $identifier .= $tok;
24662             }
24663             elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
24664                 $id_scan_state = ')';
24665                 $identifier .= $tok;
24666             }
24667             else {
24668                 $id_scan_state = '';        # that's all
24669                 $i             = $i_save;
24670                 last;
24671             }
24672         }
24673         elsif ( $id_scan_state eq '(' ) {    # looking for ( of prototype
24674
24675             if ( $tok eq '(' ) {             # got it
24676                 $identifier .= $tok;
24677                 $id_scan_state = ')';        # now find the end of it
24678             }
24679             elsif ( $tok =~ /^\s*$/ ) {      # blank - keep going
24680                 $identifier .= $tok;
24681             }
24682             else {
24683                 $id_scan_state = '';         # that's all - no prototype
24684                 $i             = $i_save;
24685                 last;
24686             }
24687         }
24688         elsif ( $id_scan_state eq ')' ) {    # looking for ) to end
24689
24690             if ( $tok eq ')' ) {             # got it
24691                 $identifier .= $tok;
24692                 $id_scan_state = '';         # all done
24693                 last;
24694             }
24695             elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {
24696                 $identifier .= $tok;
24697             }
24698             else {    # probable error in script, but keep going
24699                 warning("Unexpected '$tok' while seeking end of prototype\n");
24700                 $identifier .= $tok;
24701             }
24702         }
24703         else {        # can get here due to error in initialization
24704             $id_scan_state = '';
24705             $i             = $i_save;
24706             last;
24707         }
24708     }
24709
24710     if ( $id_scan_state eq ')' ) {
24711         warning("Hit end of line while seeking ) to end prototype\n");
24712     }
24713
24714     # once we enter the actual identifier, it may not extend beyond
24715     # the end of the current line
24716     if ( $id_scan_state =~ /^[A\:\(\)]/ ) {
24717         $id_scan_state = '';
24718     }
24719     if ( $i < 0 ) { $i = 0 }
24720
24721     unless ($type) {
24722
24723         if ($saw_type) {
24724
24725             if ($saw_alpha) {
24726                 if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) {
24727                     $type = 'w';
24728                 }
24729                 else { $type = 'i' }
24730             }
24731             elsif ( $identifier eq '->' ) {
24732                 $type = '->';
24733             }
24734             elsif (
24735                 ( length($identifier) > 1 )
24736
24737                 # In something like '@$=' we have an identifier '@$'
24738                 # In something like '$${' we have type '$$' (and only
24739                 # part of an identifier)
24740                 && !( $identifier =~ /\$$/ && $tok eq '{' )
24741                 && ( $identifier !~ /^(sub |package )$/ )
24742               )
24743             {
24744                 $type = 'i';
24745             }
24746             else { $type = 't' }
24747         }
24748         elsif ($saw_alpha) {
24749
24750             # type 'w' includes anything without leading type info
24751             # ($,%,@,*) including something like abc::def::ghi
24752             $type = 'w';
24753         }
24754         else {
24755             $type = '';
24756         }    # this can happen on a restart
24757     }
24758
24759     if ($identifier) {
24760         $tok = $identifier;
24761         if ($message) { write_logfile_entry($message) }
24762     }
24763     else {
24764         $tok = $tok_begin;
24765         $i   = $i_begin;
24766     }
24767
24768     TOKENIZER_DEBUG_FLAG_SCAN_ID && do {
24769         my ( $a, $b, $c ) = caller;
24770         print
24771 "SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
24772         print
24773 "SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";
24774     };
24775     return ( $i, $tok, $type, $id_scan_state, $identifier );
24776 }
24777
24778 {
24779
24780     # saved package and subnames in case prototype is on separate line
24781     my ( $package_saved, $subname_saved );
24782
24783     sub do_scan_sub {
24784
24785         # do_scan_sub parses a sub name and prototype
24786         # it is called with $i_beg equal to the index of the first nonblank
24787         # token following a 'sub' token.
24788
24789         # TODO: add future error checks to be sure we have a valid
24790         # sub name.  For example, 'sub &doit' is wrong.  Also, be sure
24791         # a name is given if and only if a non-anonymous sub is
24792         # appropriate.
24793         # USES GLOBAL VARS: $current_package, $last_nonblank_token,
24794         # $in_attribute_list, %saw_function_definition,
24795         # $statement_type
24796
24797         my (
24798             $input_line, $i,             $i_beg,
24799             $tok,        $type,          $rtokens,
24800             $rtoken_map, $id_scan_state, $max_token_index
24801         ) = @_;
24802         $id_scan_state = "";    # normally we get everything in one call
24803         my $subname = undef;
24804         my $package = undef;
24805         my $proto   = undef;
24806         my $attrs   = undef;
24807         my $match;
24808
24809         my $pos_beg = $$rtoken_map[$i_beg];
24810         pos($input_line) = $pos_beg;
24811
24812         # sub NAME PROTO ATTRS
24813         if (
24814             $input_line =~ m/\G\s*
24815         ((?:\w*(?:'|::))*)  # package - something that ends in :: or '
24816         (\w+)               # NAME    - required
24817         (\s*\([^){]*\))?    # PROTO   - something in parens
24818         (\s*:)?             # ATTRS   - leading : of attribute list
24819         /gcx
24820           )
24821         {
24822             $match   = 1;
24823             $subname = $2;
24824             $proto   = $3;
24825             $attrs   = $4;
24826
24827             $package = ( defined($1) && $1 ) ? $1 : $current_package;
24828             $package =~ s/\'/::/g;
24829             if ( $package =~ /^\:/ ) { $package = 'main' . $package }
24830             $package =~ s/::$//;
24831             my $pos  = pos($input_line);
24832             my $numc = $pos - $pos_beg;
24833             $tok = 'sub ' . substr( $input_line, $pos_beg, $numc );
24834             $type = 'i';
24835         }
24836
24837         # Look for prototype/attributes not preceded on this line by subname;
24838         # This might be an anonymous sub with attributes,
24839         # or a prototype on a separate line from its sub name
24840         elsif (
24841             $input_line =~ m/\G(\s*\([^){]*\))?  # PROTO
24842             (\s*:)?                              # ATTRS leading ':'
24843             /gcx
24844             && ( $1 || $2 )
24845           )
24846         {
24847             $match = 1;
24848             $proto = $1;
24849             $attrs = $2;
24850
24851             # Handle prototype on separate line from subname
24852             if ($subname_saved) {
24853                 $package = $package_saved;
24854                 $subname = $subname_saved;
24855                 $tok     = $last_nonblank_token;
24856             }
24857             $type = 'i';
24858         }
24859
24860         if ($match) {
24861
24862             # ATTRS: if there are attributes, back up and let the ':' be
24863             # found later by the scanner.
24864             my $pos = pos($input_line);
24865             if ($attrs) {
24866                 $pos -= length($attrs);
24867             }
24868
24869             my $next_nonblank_token = $tok;
24870
24871             # catch case of line with leading ATTR ':' after anonymous sub
24872             if ( $pos == $pos_beg && $tok eq ':' ) {
24873                 $type              = 'A';
24874                 $in_attribute_list = 1;
24875             }
24876
24877             # We must convert back from character position
24878             # to pre_token index.
24879             else {
24880
24881                 # I don't think an error flag can occur here ..but ?
24882                 my $error;
24883                 ( $i, $error ) =
24884                   inverse_pretoken_map( $i, $pos, $rtoken_map,
24885                     $max_token_index );
24886                 if ($error) { warning("Possibly invalid sub\n") }
24887
24888                 # check for multiple definitions of a sub
24889                 ( $next_nonblank_token, my $i_next ) =
24890                   find_next_nonblank_token_on_this_line( $i, $rtokens,
24891                     $max_token_index );
24892             }
24893
24894             if ( $next_nonblank_token =~ /^(\s*|#)$/ )
24895             {    # skip blank or side comment
24896                 my ( $rpre_tokens, $rpre_types ) =
24897                   peek_ahead_for_n_nonblank_pre_tokens(1);
24898                 if ( defined($rpre_tokens) && @$rpre_tokens ) {
24899                     $next_nonblank_token = $rpre_tokens->[0];
24900                 }
24901                 else {
24902                     $next_nonblank_token = '}';
24903                 }
24904             }
24905             $package_saved = "";
24906             $subname_saved = "";
24907             if ( $next_nonblank_token eq '{' ) {
24908                 if ($subname) {
24909                     if ( $saw_function_definition{$package}{$subname} ) {
24910                         my $lno = $saw_function_definition{$package}{$subname};
24911                         warning(
24912 "already saw definition of 'sub $subname' in package '$package' at line $lno\n"
24913                         );
24914                     }
24915                     $saw_function_definition{$package}{$subname} =
24916                       $tokenizer_self->{_last_line_number};
24917                 }
24918             }
24919             elsif ( $next_nonblank_token eq ';' ) {
24920             }
24921             elsif ( $next_nonblank_token eq '}' ) {
24922             }
24923
24924             # ATTRS - if an attribute list follows, remember the name
24925             # of the sub so the next opening brace can be labeled.
24926             # Setting 'statement_type' causes any ':'s to introduce
24927             # attributes.
24928             elsif ( $next_nonblank_token eq ':' ) {
24929                 $statement_type = $tok;
24930             }
24931
24932             # see if PROTO follows on another line:
24933             elsif ( $next_nonblank_token eq '(' ) {
24934                 if ( $attrs || $proto ) {
24935                     warning(
24936 "unexpected '(' after definition or declaration of sub '$subname'\n"
24937                     );
24938                 }
24939                 else {
24940                     $id_scan_state  = 'sub';    # we must come back to get proto
24941                     $statement_type = $tok;
24942                     $package_saved  = $package;
24943                     $subname_saved  = $subname;
24944                 }
24945             }
24946             elsif ($next_nonblank_token) {      # EOF technically ok
24947                 warning(
24948 "expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n"
24949                 );
24950             }
24951             check_prototype( $proto, $package, $subname );
24952         }
24953
24954         # no match but line not blank
24955         else {
24956         }
24957         return ( $i, $tok, $type, $id_scan_state );
24958     }
24959 }
24960
24961 #########i###############################################################
24962 # Tokenizer utility routines which may use CONSTANTS but no other GLOBALS
24963 #########################################################################
24964
24965 sub find_next_nonblank_token {
24966     my ( $i, $rtokens, $max_token_index ) = @_;
24967
24968     if ( $i >= $max_token_index ) {
24969         if ( !peeked_ahead() ) {
24970             peeked_ahead(1);
24971             $rtokens =
24972               peek_ahead_for_nonblank_token( $rtokens, $max_token_index );
24973         }
24974     }
24975     my $next_nonblank_token = $$rtokens[ ++$i ];
24976
24977     if ( $next_nonblank_token =~ /^\s*$/ ) {
24978         $next_nonblank_token = $$rtokens[ ++$i ];
24979     }
24980     return ( $next_nonblank_token, $i );
24981 }
24982
24983 sub numerator_expected {
24984
24985     # this is a filter for a possible numerator, in support of guessing
24986     # for the / pattern delimiter token.
24987     # returns -
24988     #   1 - yes
24989     #   0 - can't tell
24990     #  -1 - no
24991     # Note: I am using the convention that variables ending in
24992     # _expected have these 3 possible values.
24993     my ( $i, $rtokens, $max_token_index ) = @_;
24994     my $next_token = $$rtokens[ $i + 1 ];
24995     if ( $next_token eq '=' ) { $i++; }    # handle /=
24996     my ( $next_nonblank_token, $i_next ) =
24997       find_next_nonblank_token( $i, $rtokens, $max_token_index );
24998
24999     if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) {
25000         1;
25001     }
25002     else {
25003
25004         if ( $next_nonblank_token =~ /^\s*$/ ) {
25005             0;
25006         }
25007         else {
25008             -1;
25009         }
25010     }
25011 }
25012
25013 sub pattern_expected {
25014
25015     # This is the start of a filter for a possible pattern.
25016     # It looks at the token after a possbible pattern and tries to
25017     # determine if that token could end a pattern.
25018     # returns -
25019     #   1 - yes
25020     #   0 - can't tell
25021     #  -1 - no
25022     my ( $i, $rtokens, $max_token_index ) = @_;
25023     my $next_token = $$rtokens[ $i + 1 ];
25024     if ( $next_token =~ /^[cgimosx]/ ) { $i++; }    # skip possible modifier
25025     my ( $next_nonblank_token, $i_next ) =
25026       find_next_nonblank_token( $i, $rtokens, $max_token_index );
25027
25028     # list of tokens which may follow a pattern
25029     # (can probably be expanded)
25030     if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ )
25031     {
25032         1;
25033     }
25034     else {
25035
25036         if ( $next_nonblank_token =~ /^\s*$/ ) {
25037             0;
25038         }
25039         else {
25040             -1;
25041         }
25042     }
25043 }
25044
25045 sub find_next_nonblank_token_on_this_line {
25046     my ( $i, $rtokens, $max_token_index ) = @_;
25047     my $next_nonblank_token;
25048
25049     if ( $i < $max_token_index ) {
25050         $next_nonblank_token = $$rtokens[ ++$i ];
25051
25052         if ( $next_nonblank_token =~ /^\s*$/ ) {
25053
25054             if ( $i < $max_token_index ) {
25055                 $next_nonblank_token = $$rtokens[ ++$i ];
25056             }
25057         }
25058     }
25059     else {
25060         $next_nonblank_token = "";
25061     }
25062     return ( $next_nonblank_token, $i );
25063 }
25064
25065 sub find_angle_operator_termination {
25066
25067     # We are looking at a '<' and want to know if it is an angle operator.
25068     # We are to return:
25069     #   $i = pretoken index of ending '>' if found, current $i otherwise
25070     #   $type = 'Q' if found, '>' otherwise
25071     my ( $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) = @_;
25072     my $i    = $i_beg;
25073     my $type = '<';
25074     pos($input_line) = 1 + $$rtoken_map[$i];
25075
25076     my $filter;
25077
25078     # we just have to find the next '>' if a term is expected
25079     if ( $expecting == TERM ) { $filter = '[\>]' }
25080
25081     # we have to guess if we don't know what is expected
25082     elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' }
25083
25084     # shouldn't happen - we shouldn't be here if operator is expected
25085     else { warning("Program Bug in find_angle_operator_termination\n") }
25086
25087     # To illustrate what we might be looking at, in case we are
25088     # guessing, here are some examples of valid angle operators
25089     # (or file globs):
25090     #  <tmp_imp/*>
25091     #  <FH>
25092     #  <$fh>
25093     #  <*.c *.h>
25094     #  <_>
25095     #  <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t)
25096     #  <${PREFIX}*img*.$IMAGE_TYPE>
25097     #  <img*.$IMAGE_TYPE>
25098     #  <Timg*.$IMAGE_TYPE>
25099     #  <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl>
25100     #
25101     # Here are some examples of lines which do not have angle operators:
25102     #  return undef unless $self->[2]++ < $#{$self->[1]};
25103     #  < 2  || @$t >
25104     #
25105     # the following line from dlister.pl caused trouble:
25106     #  print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n";
25107     #
25108     # If the '<' starts an angle operator, it must end on this line and
25109     # it must not have certain characters like ';' and '=' in it.  I use
25110     # this to limit the testing.  This filter should be improved if
25111     # possible.
25112
25113     if ( $input_line =~ /($filter)/g ) {
25114
25115         if ( $1 eq '>' ) {
25116
25117             # We MAY have found an angle operator termination if we get
25118             # here, but we need to do more to be sure we haven't been
25119             # fooled.
25120             my $pos = pos($input_line);
25121
25122             my $pos_beg = $$rtoken_map[$i];
25123             my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
25124
25125             # Reject if the closing '>' follows a '-' as in:
25126             # if ( VERSION < 5.009 && $op-> name eq 'aassign' ) { }
25127             if ( $expecting eq UNKNOWN ) {
25128                 my $check = substr( $input_line, $pos - 2, 1 );
25129                 if ( $check eq '-' ) {
25130                     return ( $i, $type );
25131                 }
25132             }
25133
25134             ######################################debug#####
25135             #write_diagnostics( "ANGLE? :$str\n");
25136             #print "ANGLE: found $1 at pos=$pos str=$str check=$check\n";
25137             ######################################debug#####
25138             $type = 'Q';
25139             my $error;
25140             ( $i, $error ) =
25141               inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
25142
25143             # It may be possible that a quote ends midway in a pretoken.
25144             # If this happens, it may be necessary to split the pretoken.
25145             if ($error) {
25146                 warning(
25147                     "Possible tokinization error..please check this line\n");
25148                 report_possible_bug();
25149             }
25150
25151             # Now let's see where we stand....
25152             # OK if math op not possible
25153             if ( $expecting == TERM ) {
25154             }
25155
25156             # OK if there are no more than 2 pre-tokens inside
25157             # (not possible to write 2 token math between < and >)
25158             # This catches most common cases
25159             elsif ( $i <= $i_beg + 3 ) {
25160                 write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
25161             }
25162
25163             # Not sure..
25164             else {
25165
25166                 # Let's try a Brace Test: any braces inside must balance
25167                 my $br = 0;
25168                 while ( $str =~ /\{/g ) { $br++ }
25169                 while ( $str =~ /\}/g ) { $br-- }
25170                 my $sb = 0;
25171                 while ( $str =~ /\[/g ) { $sb++ }
25172                 while ( $str =~ /\]/g ) { $sb-- }
25173                 my $pr = 0;
25174                 while ( $str =~ /\(/g ) { $pr++ }
25175                 while ( $str =~ /\)/g ) { $pr-- }
25176
25177                 # if braces do not balance - not angle operator
25178                 if ( $br || $sb || $pr ) {
25179                     $i    = $i_beg;
25180                     $type = '<';
25181                     write_diagnostics(
25182                         "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n");
25183                 }
25184
25185                 # we should keep doing more checks here...to be continued
25186                 # Tentatively accepting this as a valid angle operator.
25187                 # There are lots more things that can be checked.
25188                 else {
25189                     write_diagnostics(
25190                         "ANGLE-Guessing yes: $str expecting=$expecting\n");
25191                     write_logfile_entry("Guessing angle operator here: $str\n");
25192                 }
25193             }
25194         }
25195
25196         # didn't find ending >
25197         else {
25198             if ( $expecting == TERM ) {
25199                 warning("No ending > for angle operator\n");
25200             }
25201         }
25202     }
25203     return ( $i, $type );
25204 }
25205
25206 sub scan_number_do {
25207
25208     #  scan a number in any of the formats that Perl accepts
25209     #  Underbars (_) are allowed in decimal numbers.
25210     #  input parameters -
25211     #      $input_line  - the string to scan
25212     #      $i           - pre_token index to start scanning
25213     #    $rtoken_map    - reference to the pre_token map giving starting
25214     #                    character position in $input_line of token $i
25215     #  output parameters -
25216     #    $i            - last pre_token index of the number just scanned
25217     #    number        - the number (characters); or undef if not a number
25218
25219     my ( $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = @_;
25220     my $pos_beg = $$rtoken_map[$i];
25221     my $pos;
25222     my $i_begin = $i;
25223     my $number  = undef;
25224     my $type    = $input_type;
25225
25226     my $first_char = substr( $input_line, $pos_beg, 1 );
25227
25228     # Look for bad starting characters; Shouldn't happen..
25229     if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
25230         warning("Program bug - scan_number given character $first_char\n");
25231         report_definite_bug();
25232         return ( $i, $type, $number );
25233     }
25234
25235     # handle v-string without leading 'v' character ('Two Dot' rule)
25236     # (vstring.t)
25237     # TODO: v-strings may contain underscores
25238     pos($input_line) = $pos_beg;
25239     if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
25240         $pos = pos($input_line);
25241         my $numc = $pos - $pos_beg;
25242         $number = substr( $input_line, $pos_beg, $numc );
25243         $type = 'v';
25244         report_v_string($number);
25245     }
25246
25247     # handle octal, hex, binary
25248     if ( !defined($number) ) {
25249         pos($input_line) = $pos_beg;
25250         if ( $input_line =~ /\G[+-]?0((x[0-9a-fA-F_]+)|([0-7_]+)|(b[01_]+))/g )
25251         {
25252             $pos = pos($input_line);
25253             my $numc = $pos - $pos_beg;
25254             $number = substr( $input_line, $pos_beg, $numc );
25255             $type = 'n';
25256         }
25257     }
25258
25259     # handle decimal
25260     if ( !defined($number) ) {
25261         pos($input_line) = $pos_beg;
25262
25263         if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) {
25264             $pos = pos($input_line);
25265
25266             # watch out for things like 0..40 which would give 0. by this;
25267             if (   ( substr( $input_line, $pos - 1, 1 ) eq '.' )
25268                 && ( substr( $input_line, $pos, 1 ) eq '.' ) )
25269             {
25270                 $pos--;
25271             }
25272             my $numc = $pos - $pos_beg;
25273             $number = substr( $input_line, $pos_beg, $numc );
25274             $type = 'n';
25275         }
25276     }
25277
25278     # filter out non-numbers like e + - . e2  .e3 +e6
25279     # the rule: at least one digit, and any 'e' must be preceded by a digit
25280     if (
25281         $number !~ /\d/    # no digits
25282         || (   $number =~ /^(.*)[eE]/
25283             && $1 !~ /\d/ )    # or no digits before the 'e'
25284       )
25285     {
25286         $number = undef;
25287         $type   = $input_type;
25288         return ( $i, $type, $number );
25289     }
25290
25291     # Found a number; now we must convert back from character position
25292     # to pre_token index. An error here implies user syntax error.
25293     # An example would be an invalid octal number like '009'.
25294     my $error;
25295     ( $i, $error ) =
25296       inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
25297     if ($error) { warning("Possibly invalid number\n") }
25298
25299     return ( $i, $type, $number );
25300 }
25301
25302 sub inverse_pretoken_map {
25303
25304     # Starting with the current pre_token index $i, scan forward until
25305     # finding the index of the next pre_token whose position is $pos.
25306     my ( $i, $pos, $rtoken_map, $max_token_index ) = @_;
25307     my $error = 0;
25308
25309     while ( ++$i <= $max_token_index ) {
25310
25311         if ( $pos <= $$rtoken_map[$i] ) {
25312
25313             # Let the calling routine handle errors in which we do not
25314             # land on a pre-token boundary.  It can happen by running
25315             # perltidy on some non-perl scripts, for example.
25316             if ( $pos < $$rtoken_map[$i] ) { $error = 1 }
25317             $i--;
25318             last;
25319         }
25320     }
25321     return ( $i, $error );
25322 }
25323
25324 sub find_here_doc {
25325
25326     # find the target of a here document, if any
25327     # input parameters:
25328     #   $i - token index of the second < of <<
25329     #   ($i must be less than the last token index if this is called)
25330     # output parameters:
25331     #   $found_target = 0 didn't find target; =1 found target
25332     #   HERE_TARGET - the target string (may be empty string)
25333     #   $i - unchanged if not here doc,
25334     #    or index of the last token of the here target
25335     #   $saw_error - flag noting unbalanced quote on here target
25336     my ( $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
25337     my $ibeg                 = $i;
25338     my $found_target         = 0;
25339     my $here_doc_target      = '';
25340     my $here_quote_character = '';
25341     my $saw_error            = 0;
25342     my ( $next_nonblank_token, $i_next_nonblank, $next_token );
25343     $next_token = $$rtokens[ $i + 1 ];
25344
25345     # perl allows a backslash before the target string (heredoc.t)
25346     my $backslash = 0;
25347     if ( $next_token eq '\\' ) {
25348         $backslash  = 1;
25349         $next_token = $$rtokens[ $i + 2 ];
25350     }
25351
25352     ( $next_nonblank_token, $i_next_nonblank ) =
25353       find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index );
25354
25355     if ( $next_nonblank_token =~ /[\'\"\`]/ ) {
25356
25357         my $in_quote    = 1;
25358         my $quote_depth = 0;
25359         my $quote_pos   = 0;
25360         my $quoted_string;
25361
25362         (
25363             $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth,
25364             $quoted_string
25365           )
25366           = follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens,
25367             $here_quote_character, $quote_pos, $quote_depth, $max_token_index );
25368
25369         if ($in_quote) {    # didn't find end of quote, so no target found
25370             $i = $ibeg;
25371             if ( $expecting == TERM ) {
25372                 warning(
25373 "Did not find here-doc string terminator ($here_quote_character) before end of line \n"
25374                 );
25375                 $saw_error = 1;
25376             }
25377         }
25378         else {              # found ending quote
25379             my $j;
25380             $found_target = 1;
25381
25382             my $tokj;
25383             for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) {
25384                 $tokj = $$rtokens[$j];
25385
25386                 # we have to remove any backslash before the quote character
25387                 # so that the here-doc-target exactly matches this string
25388                 next
25389                   if ( $tokj eq "\\"
25390                     && $j < $i - 1
25391                     && $$rtokens[ $j + 1 ] eq $here_quote_character );
25392                 $here_doc_target .= $tokj;
25393             }
25394         }
25395     }
25396
25397     elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
25398         $found_target = 1;
25399         write_logfile_entry(
25400             "found blank here-target after <<; suggest using \"\"\n");
25401         $i = $ibeg;
25402     }
25403     elsif ( $next_token =~ /^\w/ ) {    # simple bareword or integer after <<
25404
25405         my $here_doc_expected;
25406         if ( $expecting == UNKNOWN ) {
25407             $here_doc_expected = guess_if_here_doc($next_token);
25408         }
25409         else {
25410             $here_doc_expected = 1;
25411         }
25412
25413         if ($here_doc_expected) {
25414             $found_target    = 1;
25415             $here_doc_target = $next_token;
25416             $i               = $ibeg + 1;
25417         }
25418
25419     }
25420     else {
25421
25422         if ( $expecting == TERM ) {
25423             $found_target = 1;
25424             write_logfile_entry("Note: bare here-doc operator <<\n");
25425         }
25426         else {
25427             $i = $ibeg;
25428         }
25429     }
25430
25431     # patch to neglect any prepended backslash
25432     if ( $found_target && $backslash ) { $i++ }
25433
25434     return ( $found_target, $here_doc_target, $here_quote_character, $i,
25435         $saw_error );
25436 }
25437
25438 sub do_quote {
25439
25440     # follow (or continue following) quoted string(s)
25441     # $in_quote return code:
25442     #   0 - ok, found end
25443     #   1 - still must find end of quote whose target is $quote_character
25444     #   2 - still looking for end of first of two quotes
25445     #
25446     # Returns updated strings:
25447     #  $quoted_string_1 = quoted string seen while in_quote=1
25448     #  $quoted_string_2 = quoted string seen while in_quote=2
25449     my (
25450         $i,               $in_quote,    $quote_character,
25451         $quote_pos,       $quote_depth, $quoted_string_1,
25452         $quoted_string_2, $rtokens,     $rtoken_map,
25453         $max_token_index
25454     ) = @_;
25455
25456     my $in_quote_starting = $in_quote;
25457
25458     my $quoted_string;
25459     if ( $in_quote == 2 ) {    # two quotes/quoted_string_1s to follow
25460         my $ibeg = $i;
25461         (
25462             $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
25463             $quoted_string
25464           )
25465           = follow_quoted_string( $i, $in_quote, $rtokens, $quote_character,
25466             $quote_pos, $quote_depth, $max_token_index );
25467         $quoted_string_2 .= $quoted_string;
25468         if ( $in_quote == 1 ) {
25469             if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
25470             $quote_character = '';
25471         }
25472         else {
25473             $quoted_string_2 .= "\n";
25474         }
25475     }
25476
25477     if ( $in_quote == 1 ) {    # one (more) quote to follow
25478         my $ibeg = $i;
25479         (
25480             $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
25481             $quoted_string
25482           )
25483           = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
25484             $quote_pos, $quote_depth, $max_token_index );
25485         $quoted_string_1 .= $quoted_string;
25486         if ( $in_quote == 1 ) {
25487             $quoted_string_1 .= "\n";
25488         }
25489     }
25490     return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
25491         $quoted_string_1, $quoted_string_2 );
25492 }
25493
25494 sub follow_quoted_string {
25495
25496     # scan for a specific token, skipping escaped characters
25497     # if the quote character is blank, use the first non-blank character
25498     # input parameters:
25499     #   $rtokens = reference to the array of tokens
25500     #   $i = the token index of the first character to search
25501     #   $in_quote = number of quoted strings being followed
25502     #   $beginning_tok = the starting quote character
25503     #   $quote_pos = index to check next for alphanumeric delimiter
25504     # output parameters:
25505     #   $i = the token index of the ending quote character
25506     #   $in_quote = decremented if found end, unchanged if not
25507     #   $beginning_tok = the starting quote character
25508     #   $quote_pos = index to check next for alphanumeric delimiter
25509     #   $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested.
25510     #   $quoted_string = the text of the quote (without quotation tokens)
25511     my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth,
25512         $max_token_index )
25513       = @_;
25514     my ( $tok, $end_tok );
25515     my $i             = $i_beg - 1;
25516     my $quoted_string = "";
25517
25518     TOKENIZER_DEBUG_FLAG_QUOTE && do {
25519         print
25520 "QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n";
25521     };
25522
25523     # get the corresponding end token
25524     if ( $beginning_tok !~ /^\s*$/ ) {
25525         $end_tok = matching_end_token($beginning_tok);
25526     }
25527
25528     # a blank token means we must find and use the first non-blank one
25529     else {
25530         my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr>
25531
25532         while ( $i < $max_token_index ) {
25533             $tok = $$rtokens[ ++$i ];
25534
25535             if ( $tok !~ /^\s*$/ ) {
25536
25537                 if ( ( $tok eq '#' ) && ($allow_quote_comments) ) {
25538                     $i = $max_token_index;
25539                 }
25540                 else {
25541
25542                     if ( length($tok) > 1 ) {
25543                         if ( $quote_pos <= 0 ) { $quote_pos = 1 }
25544                         $beginning_tok = substr( $tok, $quote_pos - 1, 1 );
25545                     }
25546                     else {
25547                         $beginning_tok = $tok;
25548                         $quote_pos     = 0;
25549                     }
25550                     $end_tok     = matching_end_token($beginning_tok);
25551                     $quote_depth = 1;
25552                     last;
25553                 }
25554             }
25555             else {
25556                 $allow_quote_comments = 1;
25557             }
25558         }
25559     }
25560
25561     # There are two different loops which search for the ending quote
25562     # character.  In the rare case of an alphanumeric quote delimiter, we
25563     # have to look through alphanumeric tokens character-by-character, since
25564     # the pre-tokenization process combines multiple alphanumeric
25565     # characters, whereas for a non-alphanumeric delimiter, only tokens of
25566     # length 1 can match.
25567
25568     ###################################################################
25569     # Case 1 (rare): loop for case of alphanumeric quote delimiter..
25570     # "quote_pos" is the position the current word to begin searching
25571     ###################################################################
25572     if ( $beginning_tok =~ /\w/ ) {
25573
25574         # Note this because it is not recommended practice except
25575         # for obfuscated perl contests
25576         if ( $in_quote == 1 ) {
25577             write_logfile_entry(
25578                 "Note: alphanumeric quote delimiter ($beginning_tok) \n");
25579         }
25580
25581         while ( $i < $max_token_index ) {
25582
25583             if ( $quote_pos == 0 || ( $i < 0 ) ) {
25584                 $tok = $$rtokens[ ++$i ];
25585
25586                 if ( $tok eq '\\' ) {
25587
25588                     # retain backslash unless it hides the end token
25589                     $quoted_string .= $tok
25590                       unless $$rtokens[ $i + 1 ] eq $end_tok;
25591                     $quote_pos++;
25592                     last if ( $i >= $max_token_index );
25593                     $tok = $$rtokens[ ++$i ];
25594                 }
25595             }
25596             my $old_pos = $quote_pos;
25597
25598             unless ( defined($tok) && defined($end_tok) && defined($quote_pos) )
25599             {
25600
25601             }
25602             $quote_pos = 1 + index( $tok, $end_tok, $quote_pos );
25603
25604             if ( $quote_pos > 0 ) {
25605
25606                 $quoted_string .=
25607                   substr( $tok, $old_pos, $quote_pos - $old_pos - 1 );
25608
25609                 $quote_depth--;
25610
25611                 if ( $quote_depth == 0 ) {
25612                     $in_quote--;
25613                     last;
25614                 }
25615             }
25616             else {
25617                 $quoted_string .= substr( $tok, $old_pos );
25618             }
25619         }
25620     }
25621
25622     ########################################################################
25623     # Case 2 (normal): loop for case of a non-alphanumeric quote delimiter..
25624     ########################################################################
25625     else {
25626
25627         while ( $i < $max_token_index ) {
25628             $tok = $$rtokens[ ++$i ];
25629
25630             if ( $tok eq $end_tok ) {
25631                 $quote_depth--;
25632
25633                 if ( $quote_depth == 0 ) {
25634                     $in_quote--;
25635                     last;
25636                 }
25637             }
25638             elsif ( $tok eq $beginning_tok ) {
25639                 $quote_depth++;
25640             }
25641             elsif ( $tok eq '\\' ) {
25642
25643                 # retain backslash unless it hides the beginning or end token
25644                 $tok = $$rtokens[ ++$i ];
25645                 $quoted_string .= '\\'
25646                   unless ( $tok eq $end_tok || $tok eq $beginning_tok );
25647             }
25648             $quoted_string .= $tok;
25649         }
25650     }
25651     if ( $i > $max_token_index ) { $i = $max_token_index }
25652     return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth,
25653         $quoted_string );
25654 }
25655
25656 sub indicate_error {
25657     my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_;
25658     interrupt_logfile();
25659     warning($msg);
25660     write_error_indicator_pair( $line_number, $input_line, $pos, $carrat );
25661     resume_logfile();
25662 }
25663
25664 sub write_error_indicator_pair {
25665     my ( $line_number, $input_line, $pos, $carrat ) = @_;
25666     my ( $offset, $numbered_line, $underline ) =
25667       make_numbered_line( $line_number, $input_line, $pos );
25668     $underline = write_on_underline( $underline, $pos - $offset, $carrat );
25669     warning( $numbered_line . "\n" );
25670     $underline =~ s/\s*$//;
25671     warning( $underline . "\n" );
25672 }
25673
25674 sub make_numbered_line {
25675
25676     #  Given an input line, its line number, and a character position of
25677     #  interest, create a string not longer than 80 characters of the form
25678     #     $lineno: sub_string
25679     #  such that the sub_string of $str contains the position of interest
25680     #
25681     #  Here is an example of what we want, in this case we add trailing
25682     #  '...' because the line is long.
25683     #
25684     # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
25685     #
25686     #  Here is another example, this time in which we used leading '...'
25687     #  because of excessive length:
25688     #
25689     # 2: ... er of the World Wide Web Consortium's
25690     #
25691     #  input parameters are:
25692     #   $lineno = line number
25693     #   $str = the text of the line
25694     #   $pos = position of interest (the error) : 0 = first character
25695     #
25696     #   We return :
25697     #     - $offset = an offset which corrects the position in case we only
25698     #       display part of a line, such that $pos-$offset is the effective
25699     #       position from the start of the displayed line.
25700     #     - $numbered_line = the numbered line as above,
25701     #     - $underline = a blank 'underline' which is all spaces with the same
25702     #       number of characters as the numbered line.
25703
25704     my ( $lineno, $str, $pos ) = @_;
25705     my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
25706     my $excess = length($str) - $offset - 68;
25707     my $numc   = ( $excess > 0 ) ? 68 : undef;
25708
25709     if ( defined($numc) ) {
25710         if ( $offset == 0 ) {
25711             $str = substr( $str, $offset, $numc - 4 ) . " ...";
25712         }
25713         else {
25714             $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";
25715         }
25716     }
25717     else {
25718
25719         if ( $offset == 0 ) {
25720         }
25721         else {
25722             $str = "... " . substr( $str, $offset + 4 );
25723         }
25724     }
25725
25726     my $numbered_line = sprintf( "%d: ", $lineno );
25727     $offset -= length($numbered_line);
25728     $numbered_line .= $str;
25729     my $underline = " " x length($numbered_line);
25730     return ( $offset, $numbered_line, $underline );
25731 }
25732
25733 sub write_on_underline {
25734
25735     # The "underline" is a string that shows where an error is; it starts
25736     # out as a string of blanks with the same length as the numbered line of
25737     # code above it, and we have to add marking to show where an error is.
25738     # In the example below, we want to write the string '--^' just below
25739     # the line of bad code:
25740     #
25741     # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
25742     #                 ---^
25743     # We are given the current underline string, plus a position and a
25744     # string to write on it.
25745     #
25746     # In the above example, there will be 2 calls to do this:
25747     # First call:  $pos=19, pos_chr=^
25748     # Second call: $pos=16, pos_chr=---
25749     #
25750     # This is a trivial thing to do with substr, but there is some
25751     # checking to do.
25752
25753     my ( $underline, $pos, $pos_chr ) = @_;
25754
25755     # check for error..shouldn't happen
25756     unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) {
25757         return $underline;
25758     }
25759     my $excess = length($pos_chr) + $pos - length($underline);
25760     if ( $excess > 0 ) {
25761         $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess );
25762     }
25763     substr( $underline, $pos, length($pos_chr) ) = $pos_chr;
25764     return ($underline);
25765 }
25766
25767 sub pre_tokenize {
25768
25769     # Break a string, $str, into a sequence of preliminary tokens.  We
25770     # are interested in these types of tokens:
25771     #   words       (type='w'),            example: 'max_tokens_wanted'
25772     #   digits      (type = 'd'),          example: '0755'
25773     #   whitespace  (type = 'b'),          example: '   '
25774     #   any other single character (i.e. punct; type = the character itself).
25775     # We cannot do better than this yet because we might be in a quoted
25776     # string or pattern.  Caller sets $max_tokens_wanted to 0 to get all
25777     # tokens.
25778     my ( $str, $max_tokens_wanted ) = @_;
25779
25780     # we return references to these 3 arrays:
25781     my @tokens    = ();     # array of the tokens themselves
25782     my @token_map = (0);    # string position of start of each token
25783     my @type      = ();     # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct
25784
25785     do {
25786
25787         # whitespace
25788         if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; }
25789
25790         # numbers
25791         # note that this must come before words!
25792         elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; }
25793
25794         # words
25795         elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; }
25796
25797         # single-character punctuation
25798         elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; }
25799
25800         # that's all..
25801         else {
25802             return ( \@tokens, \@token_map, \@type );
25803         }
25804
25805         push @tokens,    $1;
25806         push @token_map, pos($str);
25807
25808     } while ( --$max_tokens_wanted != 0 );
25809
25810     return ( \@tokens, \@token_map, \@type );
25811 }
25812
25813 sub show_tokens {
25814
25815     # this is an old debug routine
25816     my ( $rtokens, $rtoken_map ) = @_;
25817     my $num = scalar(@$rtokens);
25818     my $i;
25819
25820     for ( $i = 0 ; $i < $num ; $i++ ) {
25821         my $len = length( $$rtokens[$i] );
25822         print "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n";
25823     }
25824 }
25825
25826 sub matching_end_token {
25827
25828     # find closing character for a pattern
25829     my $beginning_token = shift;
25830
25831     if ( $beginning_token eq '{' ) {
25832         '}';
25833     }
25834     elsif ( $beginning_token eq '[' ) {
25835         ']';
25836     }
25837     elsif ( $beginning_token eq '<' ) {
25838         '>';
25839     }
25840     elsif ( $beginning_token eq '(' ) {
25841         ')';
25842     }
25843     else {
25844         $beginning_token;
25845     }
25846 }
25847
25848 sub dump_token_types {
25849     my $class = shift;
25850     my $fh    = shift;
25851
25852     # This should be the latest list of token types in use
25853     # adding NEW_TOKENS: add a comment here
25854     print $fh <<'END_OF_LIST';
25855
25856 Here is a list of the token types currently used for lines of type 'CODE'.
25857 For the following tokens, the "type" of a token is just the token itself.
25858
25859 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
25860 ( ) <= >= == =~ !~ != ++ -- /= x=
25861 ... **= <<= >>= &&= ||= //= <=>
25862 , + - / * | % ! x ~ = \ ? : . < > ^ &
25863
25864 The following additional token types are defined:
25865
25866  type    meaning
25867     b    blank (white space)
25868     {    indent: opening structural curly brace or square bracket or paren
25869          (code block, anonymous hash reference, or anonymous array reference)
25870     }    outdent: right structural curly brace or square bracket or paren
25871     [    left non-structural square bracket (enclosing an array index)
25872     ]    right non-structural square bracket
25873     (    left non-structural paren (all but a list right of an =)
25874     )    right non-structural parena
25875     L    left non-structural curly brace (enclosing a key)
25876     R    right non-structural curly brace
25877     ;    terminal semicolon
25878     f    indicates a semicolon in a "for" statement
25879     h    here_doc operator <<
25880     #    a comment
25881     Q    indicates a quote or pattern
25882     q    indicates a qw quote block
25883     k    a perl keyword
25884     C    user-defined constant or constant function (with void prototype = ())
25885     U    user-defined function taking parameters
25886     G    user-defined function taking block parameter (like grep/map/eval)
25887     M    (unused, but reserved for subroutine definition name)
25888     P    (unused, but -html uses it to label pod text)
25889     t    type indicater such as %,$,@,*,&,sub
25890     w    bare word (perhaps a subroutine call)
25891     i    identifier of some type (with leading %, $, @, *, &, sub, -> )
25892     n    a number
25893     v    a v-string
25894     F    a file test operator (like -e)
25895     Y    File handle
25896     Z    identifier in indirect object slot: may be file handle, object
25897     J    LABEL:  code block label
25898     j    LABEL after next, last, redo, goto
25899     p    unary +
25900     m    unary -
25901     pp   pre-increment operator ++
25902     mm   pre-decrement operator --
25903     A    : used as attribute separator
25904
25905     Here are the '_line_type' codes used internally:
25906     SYSTEM         - system-specific code before hash-bang line
25907     CODE           - line of perl code (including comments)
25908     POD_START      - line starting pod, such as '=head'
25909     POD            - pod documentation text
25910     POD_END        - last line of pod section, '=cut'
25911     HERE           - text of here-document
25912     HERE_END       - last line of here-doc (target word)
25913     FORMAT         - format section
25914     FORMAT_END     - last line of format section, '.'
25915     DATA_START     - __DATA__ line
25916     DATA           - unidentified text following __DATA__
25917     END_START      - __END__ line
25918     END            - unidentified text following __END__
25919     ERROR          - we are in big trouble, probably not a perl script
25920 END_OF_LIST
25921 }
25922
25923 BEGIN {
25924
25925     # These names are used in error messages
25926     @opening_brace_names = qw# '{' '[' '(' '?' #;
25927     @closing_brace_names = qw# '}' ']' ')' ':' #;
25928
25929     ## TESTING: added ~~
25930     my @digraphs = qw(
25931       .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
25932       <= >= == =~ !~ != ++ -- /= x= ~~
25933     );
25934     @is_digraph{@digraphs} = (1) x scalar(@digraphs);
25935
25936     my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> );
25937     @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs);
25938
25939     # make a hash of all valid token types for self-checking the tokenizer
25940     # (adding NEW_TOKENS : select a new character and add to this list)
25941     my @valid_token_types = qw#
25942       A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v
25943       { } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ &
25944       #;
25945     push( @valid_token_types, @digraphs );
25946     push( @valid_token_types, @trigraphs );
25947     push( @valid_token_types, '#' );
25948     push( @valid_token_types, ',' );
25949     @is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types);
25950
25951     # a list of file test letters, as in -e (Table 3-4 of 'camel 3')
25952     my @file_test_operators =
25953       qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z);
25954     @is_file_test_operator{@file_test_operators} =
25955       (1) x scalar(@file_test_operators);
25956
25957     # these functions have prototypes of the form (&), so when they are
25958     # followed by a block, that block MAY BE followed by an operator.
25959     @_ = qw( do eval );
25960     @is_block_operator{@_} = (1) x scalar(@_);
25961
25962     # these functions allow an identifier in the indirect object slot
25963     @_ = qw( print printf sort exec system say);
25964     @is_indirect_object_taker{@_} = (1) x scalar(@_);
25965
25966     # These tokens may precede a code block
25967     # patched for SWITCH/CASE
25968     @_ = qw( BEGIN END CHECK INIT AUTOLOAD DESTROY continue if elsif else
25969       unless do while until eval for foreach map grep sort
25970       switch case given when);
25971     @is_code_block_token{@_} = (1) x scalar(@_);
25972
25973     # I'll build the list of keywords incrementally
25974     my @Keywords = ();
25975
25976     # keywords and tokens after which a value or pattern is expected,
25977     # but not an operator.  In other words, these should consume terms
25978     # to their right, or at least they are not expected to be followed
25979     # immediately by operators.
25980     my @value_requestor = qw(
25981       AUTOLOAD
25982       BEGIN
25983       CHECK
25984       DESTROY
25985       END
25986       EQ
25987       GE
25988       GT
25989       INIT
25990       LE
25991       LT
25992       NE
25993       abs
25994       accept
25995       alarm
25996       and
25997       atan2
25998       bind
25999       binmode
26000       bless
26001       caller
26002       chdir
26003       chmod
26004       chomp
26005       chop
26006       chown
26007       chr
26008       chroot
26009       close
26010       closedir
26011       cmp
26012       connect
26013       continue
26014       cos
26015       crypt
26016       dbmclose
26017       dbmopen
26018       defined
26019       delete
26020       die
26021       dump
26022       each
26023       else
26024       elsif
26025       eof
26026       eq
26027       exec
26028       exists
26029       exit
26030       exp
26031       fcntl
26032       fileno
26033       flock
26034       for
26035       foreach
26036       formline
26037       ge
26038       getc
26039       getgrgid
26040       getgrnam
26041       gethostbyaddr
26042       gethostbyname
26043       getnetbyaddr
26044       getnetbyname
26045       getpeername
26046       getpgrp
26047       getpriority
26048       getprotobyname
26049       getprotobynumber
26050       getpwnam
26051       getpwuid
26052       getservbyname
26053       getservbyport
26054       getsockname
26055       getsockopt
26056       glob
26057       gmtime
26058       goto
26059       grep
26060       gt
26061       hex
26062       if
26063       index
26064       int
26065       ioctl
26066       join
26067       keys
26068       kill
26069       last
26070       lc
26071       lcfirst
26072       le
26073       length
26074       link
26075       listen
26076       local
26077       localtime
26078       lock
26079       log
26080       lstat
26081       lt
26082       map
26083       mkdir
26084       msgctl
26085       msgget
26086       msgrcv
26087       msgsnd
26088       my
26089       ne
26090       next
26091       no
26092       not
26093       oct
26094       open
26095       opendir
26096       or
26097       ord
26098       our
26099       pack
26100       pipe
26101       pop
26102       pos
26103       print
26104       printf
26105       prototype
26106       push
26107       quotemeta
26108       rand
26109       read
26110       readdir
26111       readlink
26112       readline
26113       readpipe
26114       recv
26115       redo
26116       ref
26117       rename
26118       require
26119       reset
26120       return
26121       reverse
26122       rewinddir
26123       rindex
26124       rmdir
26125       scalar
26126       seek
26127       seekdir
26128       select
26129       semctl
26130       semget
26131       semop
26132       send
26133       sethostent
26134       setnetent
26135       setpgrp
26136       setpriority
26137       setprotoent
26138       setservent
26139       setsockopt
26140       shift
26141       shmctl
26142       shmget
26143       shmread
26144       shmwrite
26145       shutdown
26146       sin
26147       sleep
26148       socket
26149       socketpair
26150       sort
26151       splice
26152       split
26153       sprintf
26154       sqrt
26155       srand
26156       stat
26157       study
26158       substr
26159       symlink
26160       syscall
26161       sysopen
26162       sysread
26163       sysseek
26164       system
26165       syswrite
26166       tell
26167       telldir
26168       tie
26169       tied
26170       truncate
26171       uc
26172       ucfirst
26173       umask
26174       undef
26175       unless
26176       unlink
26177       unpack
26178       unshift
26179       untie
26180       until
26181       use
26182       utime
26183       values
26184       vec
26185       waitpid
26186       warn
26187       while
26188       write
26189       xor
26190
26191       switch
26192       case
26193       given
26194       when
26195       err
26196       say
26197     );
26198
26199     # patched above for SWITCH/CASE given/when err say
26200     # 'err' is a fairly safe addition.
26201     # TODO: 'default' still needed if appropriate
26202     # 'use feature' seen, but perltidy works ok without it.
26203     # Concerned that 'default' could break code.
26204     push( @Keywords, @value_requestor );
26205
26206     # These are treated the same but are not keywords:
26207     my @extra_vr = qw(
26208       constant
26209       vars
26210     );
26211     push( @value_requestor, @extra_vr );
26212
26213     @expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor);
26214
26215     # this list contains keywords which do not look for arguments,
26216     # so that they might be followed by an operator, or at least
26217     # not a term.
26218     my @operator_requestor = qw(
26219       endgrent
26220       endhostent
26221       endnetent
26222       endprotoent
26223       endpwent
26224       endservent
26225       fork
26226       getgrent
26227       gethostent
26228       getlogin
26229       getnetent
26230       getppid
26231       getprotoent
26232       getpwent
26233       getservent
26234       setgrent
26235       setpwent
26236       time
26237       times
26238       wait
26239       wantarray
26240     );
26241
26242     push( @Keywords, @operator_requestor );
26243
26244     # These are treated the same but are not considered keywords:
26245     my @extra_or = qw(
26246       STDERR
26247       STDIN
26248       STDOUT
26249     );
26250
26251     push( @operator_requestor, @extra_or );
26252
26253     @expecting_operator_token{@operator_requestor} =
26254       (1) x scalar(@operator_requestor);
26255
26256     # these token TYPES expect trailing operator but not a term
26257     # note: ++ and -- are post-increment and decrement, 'C' = constant
26258     my @operator_requestor_types = qw( ++ -- C <> q );
26259     @expecting_operator_types{@operator_requestor_types} =
26260       (1) x scalar(@operator_requestor_types);
26261
26262     # these token TYPES consume values (terms)
26263     # note: pp and mm are pre-increment and decrement
26264     # f=semicolon in for,  F=file test operator
26265     my @value_requestor_type = qw#
26266       L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x
26267       **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //=
26268       <= >= == != => \ > < % * / ? & | ** <=> ~~
26269       f F pp mm Y p m U J G j >> << ^ t
26270       #;
26271     push( @value_requestor_type, ',' )
26272       ;    # (perl doesn't like a ',' in a qw block)
26273     @expecting_term_types{@value_requestor_type} =
26274       (1) x scalar(@value_requestor_type);
26275
26276     # Note: the following valid token types are not assigned here to
26277     # hashes requesting to be followed by values or terms, but are
26278     # instead currently hard-coded into sub operator_expected:
26279     # ) -> :: Q R Z ] b h i k n v w } #
26280
26281     # For simple syntax checking, it is nice to have a list of operators which
26282     # will really be unhappy if not followed by a term.  This includes most
26283     # of the above...
26284     %really_want_term = %expecting_term_types;
26285
26286     # with these exceptions...
26287     delete $really_want_term{'U'}; # user sub, depends on prototype
26288     delete $really_want_term{'F'}; # file test works on $_ if no following term
26289     delete $really_want_term{'Y'}; # indirect object, too risky to check syntax;
26290                                    # let perl do it
26291
26292     @_ = qw(q qq qw qx qr s y tr m);
26293     @is_q_qq_qw_qx_qr_s_y_tr_m{@_} = (1) x scalar(@_);
26294
26295     # These keywords are handled specially in the tokenizer code:
26296     my @special_keywords = qw(
26297       do
26298       eval
26299       format
26300       m
26301       package
26302       q
26303       qq
26304       qr
26305       qw
26306       qx
26307       s
26308       sub
26309       tr
26310       y
26311     );
26312     push( @Keywords, @special_keywords );
26313
26314     # Keywords after which list formatting may be used
26315     # WARNING: do not include |map|grep|eval or perl may die on
26316     # syntax errors (map1.t).
26317     my @keyword_taking_list = qw(
26318       and
26319       chmod
26320       chomp
26321       chop
26322       chown
26323       dbmopen
26324       die
26325       elsif
26326       exec
26327       fcntl
26328       for
26329       foreach
26330       formline
26331       getsockopt
26332       if
26333       index
26334       ioctl
26335       join
26336       kill
26337       local
26338       msgctl
26339       msgrcv
26340       msgsnd
26341       my
26342       open
26343       or
26344       our
26345       pack
26346       print
26347       printf
26348       push
26349       read
26350       readpipe
26351       recv
26352       return
26353       reverse
26354       rindex
26355       seek
26356       select
26357       semctl
26358       semget
26359       send
26360       setpriority
26361       setsockopt
26362       shmctl
26363       shmget
26364       shmread
26365       shmwrite
26366       socket
26367       socketpair
26368       sort
26369       splice
26370       split
26371       sprintf
26372       substr
26373       syscall
26374       sysopen
26375       sysread
26376       sysseek
26377       system
26378       syswrite
26379       tie
26380       unless
26381       unlink
26382       unpack
26383       unshift
26384       until
26385       vec
26386       warn
26387       while
26388     );
26389     @is_keyword_taking_list{@keyword_taking_list} =
26390       (1) x scalar(@keyword_taking_list);
26391
26392     # These are not used in any way yet
26393     #    my @unused_keywords = qw(
26394     #      CORE
26395     #     __FILE__
26396     #     __LINE__
26397     #     __PACKAGE__
26398     #     );
26399
26400     #  The list of keywords was extracted from function 'keyword' in
26401     #  perl file toke.c version 5.005.03, using this utility, plus a
26402     #  little editing: (file getkwd.pl):
26403     #  while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } }
26404     #  Add 'get' prefix where necessary, then split into the above lists.
26405     #  This list should be updated as necessary.
26406     #  The list should not contain these special variables:
26407     #  ARGV DATA ENV SIG STDERR STDIN STDOUT
26408     #  __DATA__ __END__
26409
26410     @is_keyword{@Keywords} = (1) x scalar(@Keywords);
26411 }
26412 1;
26413 __END__
26414
26415 =head1 NAME
26416
26417 Perl::Tidy - Parses and beautifies perl source
26418
26419 =head1 SYNOPSIS
26420
26421     use Perl::Tidy;
26422
26423     Perl::Tidy::perltidy(
26424         source            => $source,
26425         destination       => $destination,
26426         stderr            => $stderr,
26427         argv              => $argv,
26428         perltidyrc        => $perltidyrc,
26429         logfile           => $logfile,
26430         errorfile         => $errorfile,
26431         formatter         => $formatter,           # callback object (see below)
26432         dump_options      => $dump_options,
26433         dump_options_type => $dump_options_type,
26434     );
26435
26436 =head1 DESCRIPTION
26437
26438 This module makes the functionality of the perltidy utility available to perl
26439 scripts.  Any or all of the input parameters may be omitted, in which case the
26440 @ARGV array will be used to provide input parameters as described
26441 in the perltidy(1) man page.
26442
26443 For example, the perltidy script is basically just this:
26444
26445     use Perl::Tidy;
26446     Perl::Tidy::perltidy();
26447
26448 The module accepts input and output streams by a variety of methods.
26449 The following list of parameters may be any of a the following: a
26450 filename, an ARRAY reference, a SCALAR reference, or an object with
26451 either a B<getline> or B<print> method, as appropriate.
26452
26453         source            - the source of the script to be formatted
26454         destination       - the destination of the formatted output
26455         stderr            - standard error output
26456         perltidyrc        - the .perltidyrc file
26457         logfile           - the .LOG file stream, if any
26458         errorfile         - the .ERR file stream, if any
26459         dump_options      - ref to a hash to receive parameters (see below),
26460         dump_options_type - controls contents of dump_options
26461         dump_getopt_flags - ref to a hash to receive Getopt flags
26462         dump_options_category - ref to a hash giving category of options
26463         dump_abbreviations    - ref to a hash giving all abbreviations
26464
26465 The following chart illustrates the logic used to decide how to
26466 treat a parameter.
26467
26468    ref($param)  $param is assumed to be:
26469    -----------  ---------------------
26470    undef        a filename
26471    SCALAR       ref to string
26472    ARRAY        ref to array
26473    (other)      object with getline (if source) or print method
26474
26475 If the parameter is an object, and the object has a B<close> method, that
26476 close method will be called at the end of the stream.
26477
26478 =over 4
26479
26480 =item source
26481
26482 If the B<source> parameter is given, it defines the source of the
26483 input stream.
26484
26485 =item destination
26486
26487 If the B<destination> parameter is given, it will be used to define the
26488 file or memory location to receive output of perltidy.
26489
26490 =item stderr
26491
26492 The B<stderr> parameter allows the calling program to capture the output
26493 to what would otherwise go to the standard error output device.
26494
26495 =item perltidyrc
26496
26497 If the B<perltidyrc> file is given, it will be used instead of any
26498 F<.perltidyrc> configuration file that would otherwise be used.
26499
26500 =item argv
26501
26502 If the B<argv> parameter is given, it will be used instead of the
26503 B<@ARGV> array.  The B<argv> parameter may be a string, a reference to a
26504 string, or a reference to an array.  If it is a string or reference to a
26505 string, it will be parsed into an array of items just as if it were a
26506 command line string.
26507
26508 =item dump_options
26509
26510 If the B<dump_options> parameter is given, it must be the reference to a hash.
26511 In this case, the parameters contained in any perltidyrc configuration file
26512 will be placed in this hash and perltidy will return immediately.  This is
26513 equivalent to running perltidy with --dump-options, except that the perameters
26514 are returned in a hash rather than dumped to standard output.  Also, by default
26515 only the parameters in the perltidyrc file are returned, but this can be
26516 changed (see the next parameter).  This parameter provides a convenient method
26517 for external programs to read a perltidyrc file.  An example program using
26518 this feature, F<perltidyrc_dump.pl>, is included in the distribution.
26519
26520 Any combination of the B<dump_> parameters may be used together.
26521
26522 =item dump_options_type
26523
26524 This parameter is a string which can be used to control the parameters placed
26525 in the hash reference supplied by B<dump_options>.  The possible values are
26526 'perltidyrc' (default) and 'full'.  The 'full' parameter causes both the
26527 default options plus any options found in a perltidyrc file to be returned.
26528
26529 =item dump_getopt_flags
26530
26531 If the B<dump_getopt_flags> parameter is given, it must be the reference to a
26532 hash.  This hash will receive all of the parameters that perltidy understands
26533 and flags that are passed to Getopt::Long.  This parameter may be
26534 used alone or with the B<dump_options> flag.  Perltidy will
26535 exit immediately after filling this hash.  See the demo program
26536 F<perltidyrc_dump.pl> for example usage.
26537
26538 =item dump_options_category
26539
26540 If the B<dump_options_category> parameter is given, it must be the reference to a
26541 hash.  This hash will receive a hash with keys equal to all long parameter names
26542 and values equal to the title of the corresponding section of the perltidy manual.
26543 See the demo program F<perltidyrc_dump.pl> for example usage.
26544
26545 =item dump_abbreviations
26546
26547 If the B<dump_abbreviations> parameter is given, it must be the reference to a
26548 hash.  This hash will receive all abbreviations used by Perl::Tidy.  See the
26549 demo program F<perltidyrc_dump.pl> for example usage.
26550
26551 =back
26552
26553 =head1 EXAMPLE
26554
26555 The following example passes perltidy a snippet as a reference
26556 to a string and receives the result back in a reference to
26557 an array.
26558
26559  use Perl::Tidy;
26560
26561  # some messy source code to format
26562  my $source = <<'EOM';
26563  use strict;
26564  my @editors=('Emacs', 'Vi   '); my $rand = rand();
26565  print "A poll of 10 random programmers gave these results:\n";
26566  foreach(0..10) {
26567  my $i=int ($rand+rand());
26568  print " $editors[$i] users are from Venus" . ", " .
26569  "$editors[1-$i] users are from Mars" .
26570  "\n";
26571  }
26572  EOM
26573
26574  # We'll pass it as ref to SCALAR and receive it in a ref to ARRAY
26575  my @dest;
26576  perltidy( source => \$source, destination => \@dest );
26577  foreach (@dest) {print}
26578
26579 =head1 Using the B<formatter> Callback Object
26580
26581 The B<formatter> parameter is an optional callback object which allows
26582 the calling program to receive tokenized lines directly from perltidy for
26583 further specialized processing.  When this parameter is used, the two
26584 formatting options which are built into perltidy (beautification or
26585 html) are ignored.  The following diagram illustrates the logical flow:
26586
26587                     |-- (normal route)   -> code beautification
26588   caller->perltidy->|-- (-html flag )    -> create html
26589                     |-- (formatter given)-> callback to write_line
26590
26591 This can be useful for processing perl scripts in some way.  The
26592 parameter C<$formatter> in the perltidy call,
26593
26594         formatter   => $formatter,
26595
26596 is an object created by the caller with a C<write_line> method which
26597 will accept and process tokenized lines, one line per call.  Here is
26598 a simple example of a C<write_line> which merely prints the line number,
26599 the line type (as determined by perltidy), and the text of the line:
26600
26601  sub write_line {
26602
26603      # This is called from perltidy line-by-line
26604      my $self              = shift;
26605      my $line_of_tokens    = shift;
26606      my $line_type         = $line_of_tokens->{_line_type};
26607      my $input_line_number = $line_of_tokens->{_line_number};
26608      my $input_line        = $line_of_tokens->{_line_text};
26609      print "$input_line_number:$line_type:$input_line";
26610  }
26611
26612 The complete program, B<perllinetype>, is contained in the examples section of
26613 the source distribution.  As this example shows, the callback method
26614 receives a parameter B<$line_of_tokens>, which is a reference to a hash
26615 of other useful information.  This example uses these hash entries:
26616
26617  $line_of_tokens->{_line_number} - the line number (1,2,...)
26618  $line_of_tokens->{_line_text}   - the text of the line
26619  $line_of_tokens->{_line_type}   - the type of the line, one of:
26620
26621     SYSTEM         - system-specific code before hash-bang line
26622     CODE           - line of perl code (including comments)
26623     POD_START      - line starting pod, such as '=head'
26624     POD            - pod documentation text
26625     POD_END        - last line of pod section, '=cut'
26626     HERE           - text of here-document
26627     HERE_END       - last line of here-doc (target word)
26628     FORMAT         - format section
26629     FORMAT_END     - last line of format section, '.'
26630     DATA_START     - __DATA__ line
26631     DATA           - unidentified text following __DATA__
26632     END_START      - __END__ line
26633     END            - unidentified text following __END__
26634     ERROR          - we are in big trouble, probably not a perl script
26635
26636 Most applications will be only interested in lines of type B<CODE>.  For
26637 another example, let's write a program which checks for one of the
26638 so-called I<naughty matching variables> C<&`>, C<$&>, and C<$'>, which
26639 can slow down processing.  Here is a B<write_line>, from the example
26640 program B<find_naughty.pl>, which does that:
26641
26642  sub write_line {
26643
26644      # This is called back from perltidy line-by-line
26645      # We're looking for $`, $&, and $'
26646      my ( $self, $line_of_tokens ) = @_;
26647
26648      # pull out some stuff we might need
26649      my $line_type         = $line_of_tokens->{_line_type};
26650      my $input_line_number = $line_of_tokens->{_line_number};
26651      my $input_line        = $line_of_tokens->{_line_text};
26652      my $rtoken_type       = $line_of_tokens->{_rtoken_type};
26653      my $rtokens           = $line_of_tokens->{_rtokens};
26654      chomp $input_line;
26655
26656      # skip comments, pod, etc
26657      return if ( $line_type ne 'CODE' );
26658
26659      # loop over tokens looking for $`, $&, and $'
26660      for ( my $j = 0 ; $j < @$rtoken_type ; $j++ ) {
26661
26662          # we only want to examine token types 'i' (identifier)
26663          next unless $$rtoken_type[$j] eq 'i';
26664
26665          # pull out the actual token text
26666          my $token = $$rtokens[$j];
26667
26668          # and check it
26669          if ( $token =~ /^\$[\`\&\']$/ ) {
26670              print STDERR
26671                "$input_line_number: $token\n";
26672          }
26673      }
26674  }
26675
26676 This example pulls out these tokenization variables from the $line_of_tokens
26677 hash reference:
26678
26679      $rtoken_type = $line_of_tokens->{_rtoken_type};
26680      $rtokens     = $line_of_tokens->{_rtokens};
26681
26682 The variable C<$rtoken_type> is a reference to an array of token type codes,
26683 and C<$rtokens> is a reference to a corresponding array of token text.
26684 These are obviously only defined for lines of type B<CODE>.
26685 Perltidy classifies tokens into types, and has a brief code for each type.
26686 You can get a complete list at any time by running perltidy from the
26687 command line with
26688
26689      perltidy --dump-token-types
26690
26691 In the present example, we are only looking for tokens of type B<i>
26692 (identifiers), so the for loop skips past all other types.  When an
26693 identifier is found, its actual text is checked to see if it is one
26694 being sought.  If so, the above write_line prints the token and its
26695 line number.
26696
26697 The B<formatter> feature is relatively new in perltidy, and further
26698 documentation needs to be written to complete its description.  However,
26699 several example programs have been written and can be found in the
26700 B<examples> section of the source distribution.  Probably the best way
26701 to get started is to find one of the examples which most closely matches
26702 your application and start modifying it.
26703
26704 For help with perltidy's pecular way of breaking lines into tokens, you
26705 might run, from the command line,
26706
26707  perltidy -D filename
26708
26709 where F<filename> is a short script of interest.  This will produce
26710 F<filename.DEBUG> with interleaved lines of text and their token types.
26711 The -D flag has been in perltidy from the beginning for this purpose.
26712 If you want to see the code which creates this file, it is
26713 C<write_debug_entry> in Tidy.pm.
26714
26715 =head1 EXPORT
26716
26717   &perltidy
26718
26719 =head1 CREDITS
26720
26721 Thanks to Hugh Myers who developed the initial modular interface
26722 to perltidy.
26723
26724 =head1 VERSION
26725
26726 This man page documents Perl::Tidy version 20060719.
26727
26728 =head1 AUTHOR
26729
26730  Steve Hancock
26731  perltidy at users.sourceforge.net
26732
26733 =head1 SEE ALSO
26734
26735 The perltidy(1) man page describes all of the features of perltidy.  It
26736 can be found at http://perltidy.sourceforge.net.
26737
26738 =cut