From 657098da8da16dccd551721ffc180956d8aab7fc Mon Sep 17 00:00:00 2001 From: Don Armstrong Date: Fri, 16 Aug 2019 19:43:20 -0700 Subject: [PATCH] New upstream version 20190601 --- BUGS | 44 - BUGS.md | 45 + CHANGES | 3183 -- CHANGES.md | 3405 +++ INSTALL | 387 - INSTALL.md | 398 + MANIFEST | 80 +- META.json | 14 +- META.yml | 7 +- Makefile.PL | 45 +- Makefile.npm | 37 - README | 57 - README.md | 11 + TODO | 6 - bin/perltidy | 471 +- docs/ChangeLog.html | 3460 +++ docs/INSTALL.html | 430 + docs/README | 13 - docs/Tidy.html | 377 + docs/index.html | 72 + docs/index.md | 75 + docs/perltidy.1 | 3621 --- docs/perltidy.html | 2907 ++ docs/stylekey.html | 682 + docs/stylekey.pod | 781 - docs/testfile.pl | 10 - docs/tutorial.html | 381 + docs/tutorial.pod | 534 - examples/delete_ending_blank_lines.pl | 99 + examples/perltidyrc_dump.pl | 11 +- lib/Perl/Tidy.pm | 30160 +------------------ lib/Perl/Tidy.pod | 12 +- lib/Perl/Tidy/Debugger.pm | 125 + lib/Perl/Tidy/DevNull.pm | 16 + lib/Perl/Tidy/Diagnostics.pm | 64 + lib/Perl/Tidy/FileWriter.pm | 228 + lib/Perl/Tidy/Formatter.pm | 17039 +++++++++++ lib/Perl/Tidy/HtmlWriter.pm | 1466 + lib/Perl/Tidy/IOScalar.pm | 87 + lib/Perl/Tidy/IOScalarArray.pm | 76 + lib/Perl/Tidy/IndentationItem.pm | 254 + lib/Perl/Tidy/LineBuffer.pm | 57 + lib/Perl/Tidy/LineSink.pm | 145 + lib/Perl/Tidy/LineSource.pm | 89 + lib/Perl/Tidy/Logger.pm | 467 + lib/Perl/Tidy/Tokenizer.pm | 7892 +++++ lib/Perl/Tidy/VerticalAligner.pm | 3267 ++ lib/Perl/Tidy/VerticalAligner/Alignment.pm | 123 + lib/Perl/Tidy/VerticalAligner/Line.pm | 261 + pm2pl | 113 +- t/filter_example.t.SKIP | 65 + t/snippets1.t | 565 + t/snippets10.t | 1039 + t/snippets11.t | 604 + t/snippets12.t | 618 + t/snippets13.t | 460 + t/snippets14.t | 1085 + t/snippets15.t | 305 + t/snippets2.t | 550 + t/snippets3.t | 871 + t/snippets4.t | 573 + t/snippets5.t | 971 + t/snippets6.t | 461 + t/snippets7.t | 566 + t/snippets8.t | 506 + t/snippets9.t | 530 + t/test-eol.t | 131 + 67 files changed, 54720 insertions(+), 38764 deletions(-) delete mode 100644 BUGS create mode 100644 BUGS.md delete mode 100644 CHANGES create mode 100644 CHANGES.md delete mode 100644 INSTALL create mode 100644 INSTALL.md delete mode 100644 Makefile.npm delete mode 100644 README create mode 100644 README.md delete mode 100644 TODO create mode 100644 docs/ChangeLog.html create mode 100644 docs/INSTALL.html delete mode 100644 docs/README create mode 100644 docs/Tidy.html create mode 100644 docs/index.html create mode 100644 docs/index.md delete mode 100644 docs/perltidy.1 create mode 100644 docs/perltidy.html create mode 100644 docs/stylekey.html delete mode 100644 docs/stylekey.pod delete mode 100644 docs/testfile.pl create mode 100644 docs/tutorial.html delete mode 100644 docs/tutorial.pod create mode 100755 examples/delete_ending_blank_lines.pl create mode 100644 lib/Perl/Tidy/Debugger.pm create mode 100644 lib/Perl/Tidy/DevNull.pm create mode 100644 lib/Perl/Tidy/Diagnostics.pm create mode 100644 lib/Perl/Tidy/FileWriter.pm create mode 100644 lib/Perl/Tidy/Formatter.pm create mode 100644 lib/Perl/Tidy/HtmlWriter.pm create mode 100644 lib/Perl/Tidy/IOScalar.pm create mode 100644 lib/Perl/Tidy/IOScalarArray.pm create mode 100644 lib/Perl/Tidy/IndentationItem.pm create mode 100644 lib/Perl/Tidy/LineBuffer.pm create mode 100644 lib/Perl/Tidy/LineSink.pm create mode 100644 lib/Perl/Tidy/LineSource.pm create mode 100644 lib/Perl/Tidy/Logger.pm create mode 100644 lib/Perl/Tidy/Tokenizer.pm create mode 100644 lib/Perl/Tidy/VerticalAligner.pm create mode 100644 lib/Perl/Tidy/VerticalAligner/Alignment.pm create mode 100644 lib/Perl/Tidy/VerticalAligner/Line.pm create mode 100755 t/filter_example.t.SKIP create mode 100644 t/snippets1.t create mode 100644 t/snippets10.t create mode 100644 t/snippets11.t create mode 100644 t/snippets12.t create mode 100644 t/snippets13.t create mode 100644 t/snippets14.t create mode 100644 t/snippets15.t create mode 100644 t/snippets2.t create mode 100644 t/snippets3.t create mode 100644 t/snippets4.t create mode 100644 t/snippets5.t create mode 100644 t/snippets6.t create mode 100644 t/snippets7.t create mode 100644 t/snippets8.t create mode 100644 t/snippets9.t create mode 100644 t/test-eol.t diff --git a/BUGS b/BUGS deleted file mode 100644 index 6febb0e..0000000 --- a/BUGS +++ /dev/null @@ -1,44 +0,0 @@ -Perltidy open BUGS and LIMITATIONS - You can help perltidy evolve into a better program. If you think you - have hit a bug or weird behavior, or have a suggested improvement, - please send a note to perltidy at users.sourceforge.net. - - This file only lists open bugs. For bugs which have been fixed, see the - ChangeLog. - - The --extrude and --mangle options can produce code with syntax errors - The --extrude tries to put as many newlines in the formatted code as - possible. The --mangle tries to remove as many newlines as possible. - These options are very useful for stress testing perltidy (and Perl) but - not so much for normal formatting. Occasionally they will produce code - which Perl considers to have a syntax error. These problems often - involve code where Perl is having to guess the tokenization based on - whitespace. The given/when and switch/case statements are also - particularly vulnerable to unusual line breaks and whitespace. This type - of error should not normally occur in practice, but if it does it should - be easy to fix the problem by rerunning perltidy with more normal - parameters or by manually changing whitespace or newlines. - - The Pod:Html module has some bugs - For the most part Pod::Html works very well and is very convenient - because it part of the standard Perl distribution. But for example the - following line - - =item B<< = Session->new_cflt_deck; >> - - which uses double brackets to contain single brackets does not render - correctly. - - Two iterations are sometimes needed - Usually the code produced by perltidy on the first pass does not change - if it is run again, but sometimes a second pass will produce some small - additional change. This mainly happens if a major style change is made, - particularly when perltidy is untangling complex ternary statements. Use - the iteration parameter -it=2 if it is important that the results be - unchanged on subsequent passes, but note that this doubles the run time. - - Latest Bug and Wishlist at CPAN: - For the latest list of bugs and feature requests at CPAN see: - - https://rt.cpan.org/Public/Dist/Display.html?Name=Perl-Tidy - diff --git a/BUGS.md b/BUGS.md new file mode 100644 index 0000000..1ec2b48 --- /dev/null +++ b/BUGS.md @@ -0,0 +1,45 @@ +# Perltidy open BUGS and LIMITATIONS + +You can help perltidy evolve into a better program. If you think you +have hit a bug or weird behavior, or have a suggested improvement, +please send a note to perltidy at users.sourceforge.net. + +This file only lists open bugs. For bugs which have been fixed, +see the ChangeLog. + +## The --extrude and --mangle options can produce code with syntax errors + +The --extrude tries to put as many newlines in the formatted code as possible. +The --mangle tries to remove as many newlines as possible. These options are +very useful for stress testing perltidy (and Perl) but not so much for normal +formatting. Occasionally they will produce code which Perl considers to have a +syntax error. These problems often involve code where Perl is having to guess +the tokenization based on whitespace. The given/when and switch/case +statements are also particularly vulnerable to unusual line breaks and +whitespace. This type of error should not normally occur in practice, but if +it does it should be easy to fix the problem by rerunning perltidy with more +normal parameters or by manually changing whitespace or newlines. + +## The Pod:Html module has some bugs + +For the most part Pod::Html works very well and is very convenient because +it part of the standard Perl distribution. But for example the following line + + =item B<< = Session->new_cflt_deck; >> + +which uses double brackets to contain single brackets does not render correctly. + +## Two iterations are sometimes needed + +Usually the code produced by perltidy on the first pass does not change if it +is run again, but sometimes a second pass will produce some small additional +change. This mainly happens if a major style change is made, particularly when +perltidy is untangling complex ternary statements. Use the iteration parameter +**-it=2** if it is important that the results be unchanged on subsequent passes, +but note that this doubles the run time. + +## Latest Bug and Wishlist at CPAN: + +For the latest list of bugs and feature requests at CPAN see: + +https://rt.cpan.org/Public/Dist/Display.html?Name=Perl-Tidy diff --git a/CHANGES b/CHANGES deleted file mode 100644 index 8415f57..0000000 --- a/CHANGES +++ /dev/null @@ -1,3183 +0,0 @@ -Perltidy Change Log - 2018 02 20 - - RT #124469, #124494, perltidy often making empty files. The previous had - an index error causing it to fail, particularly in version 5.18 of Perl. - - Please avoid version 20180219. - - 2018 02 19 - - RT #79947, cuddled-else generalization. A new flag -cb provides - 'cuddled-else' type formatting for an arbitrary type of block chain. The - default is try-catch-finally, but this can be modified with the - parameter -cbl. - - - Fixed RT #124298: add space after ! operator without breaking !! secret - operator - - - RT #123749: numerous minor improvements to the -wn flag were made. - - - Fixed a problem with convergence tests in which iterations were stopping - prematurely. - - - Here doc targets for <<~ type here-docs may now have leading whitespace. - - - Fixed RT #124354. The '-indent-only' flag was not working correctly in the - previous release. A bug in version 20180101 caused extra blank lines - to be output. - - - Issue RT #124114. Some improvements were made in vertical alignment - involving 'fat commas'. - - 2018 01 01 - - Added new flag -wn (--weld-nested-containers) which addresses these issues: - RT #123749: Problem with promises; - RT #119970: opening token stacking strange behavior; - RT #81853: Can't stack block braces - - This option causes closely nested pairs of opening and closing containers - to be "welded" together and essentially be formatted as a single unit, - with just one level of indentation. - - Since this is a new flag it is set to be "off" by default but it has given - excellent results in testing. - - EXAMPLE 1, multiple blocks, default formatting: - do { - { - next if $x == $y; # do something here - } - } until $x++ > $z; - - perltidy -wn - do { { - next if $x == $y; - } } until $x++ > $z; - - EXAMPLE 2, three levels of wrapped function calls, default formatting: - p( - em( - conjug( - translate( param('verb') ), param('tense'), - param('person') - ) - ) - ); - - # perltidy -wn - p( em( conjug( - translate( param('verb') ), - param('tense'), param('person') - ) ) ); - - # EXAMPLE 3, chained method calls, default formatting: - get('http://mojolicious.org')->then( - sub { - my $mojo = shift; - say $mojo->res->code; - return get('http://metacpan.org'); - } - )->then( - sub { - my $cpan = shift; - say $cpan->res->code; - } - )->catch( - sub { - my $err = shift; - warn "Something went wrong: $err"; - } - )->wait; - - # perltidy -wn - get('http://mojolicious.org')->then( sub { - my $mojo = shift; - say $mojo->res->code; - return get('http://metacpan.org'); - } )->then( sub { - my $cpan = shift; - say $cpan->res->code; - } )->catch( sub { - my $err = shift; - warn "Something went wrong: $err"; - } )->wait; - - - - Fixed RT #114359: Missparsing of "print $x ** 0.5; - - - Deactivated the --check-syntax flag for better security. It will be - ignored if set. - - - Corrected minimum perl version from 5.004 to 5.008 based on perlver - report. The change is required for coding involving wide characters. - - - For certain severe errors, the source file will be copied directly to the - output without formatting. These include ending in a quote, ending in a - here doc, and encountering an unidentified character. - - 2017 12 14 - - RT #123749, partial fix. "Continuation indentation" is removed from lines - with leading closing parens which are part of a call chain. - For example, the call to pack() is is now outdented to the starting - indentation in the following experession: - - # OLD - $mw->Button( - -text => "New Document", - -command => \&new_document - )->pack( - -side => 'bottom', - -anchor => 'e' - ); - - # NEW - $mw->Button( - -text => "New Document", - -command => \&new_document - )->pack( - -side => 'bottom', - -anchor => 'e' - ); - - This modification improves readability of complex expressions, especially - when the user uses the same value for continuation indentation (-ci=n) and - normal indentation (-i=n). Perltidy was already programmed to - do this but a minor bug was preventing it. - - - RT #123774, added flag to control space between a backslash and a single or - double quote, requested by Robert Rothenberg. The issue is that lines like - - $str1=\"string1"; - $str2=\'string2'; - - confuse syntax highlighters unless a space is left between the backslash and - the quote. - - The new flag to control this is -sbq=n (--space-backslash-quote=n), - where n=0 means no space, n=1 means follow existing code, n=2 means always - space. The default is n=1, meaning that a space will be retained if there - is one in the source code. - - - Fixed RT #123492, support added for indented here doc operator <<~ added - in v5.26. Thanks to Chris Weyl for the report. - - - Fixed docs; --closing-side-comment-list-string should have been just - --closing-side-comment-list. Thanks to F.Li. - - - Added patch RT #122030] Perl::Tidy sometimes does not call binmode. - Thanks to Irilis Aelae. - - - Fixed RT #121959, PERLTIDY doesn't honor the 'three dot' notation for - locating a config file using environment variables. Thanks to John - Wittkowski. - - - Minor improvements to formatting, in which some additional vertical - aligmnemt is done. Thanks to Keith Neargarder. - - - RT #119588. Vertical alignment is no longer done for // operator. - - 2017 05 21 - - Fixed debian #862667: failure to check for perltidy.ERR deletion can lead - to overwriting abritrary files by symlink attack. Perltidy was continuing - to write files after an unlink failure. Thanks to Don Armstrong - for a patch. - - - Fixed RT #116344, perltidy fails on certain anonymous hash references: - in the following code snippet the '?' was misparsed as a pattern - delimiter rather than a ternary operator. - return ref {} ? 1 : 0; - - - Fixed RT #113792: misparsing of a fat comma (=>) right after - the __END__ or __DATA__ tokens. These keywords were getting - incorrectly quoted by the following => operator. - - - Fixed RT #118558. Custom Getopt::Long configuration breaks parsing - of perltidyrc. Perltidy was resetting the users configuration too soon. - - - Fixed RT #119140, failure to parse double diamond operator. Code to - handle this new operator has been added. - - - Fixed RT #120968. Fixed problem where -enc=utf8 didn't work - with --backup-and-modify-in-place. Thanks to Heinz Knutzen for this patch. - - - Fixed minor formatting issue where one-line blocks for subs with signatures - were unnecesarily broken - - - RT #32905, patch to fix utf-8 error when output was STDOUT. - - - RT #79947, improved spacing of try/catch/finally blocks. Thanks to qsimpleq - for a patch. - - - Fixed #114909, Anonymous subs with signatures and prototypes misparsed as - broken ternaries, in which a statement such as this was not being parsed - correctly: - return sub ( $fh, $out ) : prototype(*$) { ... } - - - Implemented RT #113689, option to introduces spaces after an opening block - brace and before a closing block brace. Four new optional controls are - added. The first two define the minimum number of blank lines to be - inserted - - -blao=i or --blank-lines-after-opening-block=i - -blbc=i or --blank-lines-before-closing-block=i - - where i is an integer, the number of lines (the default is 0). - - The second two define the types of blocks to which the first two apply - - -blaol=s or --blank-lines-after-opening-block-list=s - -blbcl=s or --blank-lines-before-closing-block-list=s - - where s is a string of possible block keywords (default is just 'sub', - meaning a named subroutine). - - For more information please see the documentation. - - - The method for specifying block types for certain input parameters has - been generalized to distinguish between normal named subroutines and - anonymous subs. The keyword for normal subroutines remains 'sub', and - the new keyword for anonymous subs is 'asub'. - - - Minor documentation changes. The BUGS sections now have a link - to CPAN where most open bugs and issues can be reviewed and bug reports - can be submitted. The information in the AUTHOR and CREDITS sections of - the man pages have been removed from the man pages to streamline the - documentation. This information is still in the source code. - - 2016 03 02 - - RT #112534. Corrected a minor problem in which an unwanted newline - was placed before the closing brace of an anonymous sub with - a signature, if it was in a list. Thanks to Dmytro Zagashev. - - - Corrected a minor problem in which occasional extra indentation was - given to the closing brace of an anonymous sub in a list when the -lp - parameter was set. - - 2016 03 01 - - RT #104427. Added support for signatures. - - - RT #111512. Changed global warning flag $^W = 1 to use warnings; - Thanks to Dmytro Zagashev. - - - RT #110297, added support for new regexp modifier /n - Thanks to Dmytro Zagashev. - - - RT #111519. The -io (--indent-only) and -dac (--delete-all-comments) - can now both be used in one pass. Thanks to Dmitry Veltishev. - - - Patch to avoid error message with 'catch' used by TryCatch, as in - catch($err){ - # do something - } - Thanks to Nick Tonkin. - - - RT #32905, UTF-8 coding is now more robust. Thanks to qsimpleq - and Dmytro for patches. - - - RT #106885. Added string bitwise operators ^. &. |. ~. ^.= &.= |.= - - - Fixed RT #107832 and #106492, lack of vertical alignment of two lines - when -boc flag (break at old commas) is set. This bug was - inadvertantly introduced in previous bug fix RT #98902. - - - Some common extensions to Perl syntax are handled better. - In particular, the following snippet is now foratted cleanly: - - method deposit( Num $amount) { - $self->balance( $self->balance + $amount ); - } - - A new flag -xs (--extended-syntax) was added to enable this, and the default - is to use -xs. - - In previous versions, and now only when -nxs is set, this snippet of code - generates the following error message: - - "syntax error at ') {', didn't see one of: case elsif for foreach given if switch unless until when while" - - 2015 08 15 - - Fixed RT# 105484, Invalid warning about 'else' in 'switch' statement. The - warning happened if a 'case' statement did not use parens. - - - Fixed RT# 101547, misparse of // caused error message. Also.. - - - Fixed RT# 102371, misparse of // caused unwated space in //= - - - Fixed RT# 100871, "silent failure of HTML Output on Windows". - Changed calls to tempfile() from: - my ( $fh_tmp, $tmpfile ) = tempfile(); - to have the full path name: - my ( $fh_tmp, $tmpfile ) = File::Temp::tempfile() - because of problems in the Windows version reported by Dean Pearce. - - - Fixed RT# 99514, calling the perltidy module multiple times with - a .perltidyrc file containing the parameter --output-line-ending - caused a crash. This was a glitch in the memoization logic. - - - Fixed RT#99961, multiple lines inside a cast block caused unwanted - continuation indentation. - - - RT# 32905, broken handling of UTF-8 strings. - A new flag -utf8 causes perltidy assume UTF-8 encoding for input and - output of an io stream. Thanks to Sebastian Podjasek for a patch. - This feature may not work correctly in older versions of Perl. - It worked in a linux version 5.10.1 but not in a Windows version 5.8.3 (but - otherwise perltidy ran correctly). - - - Warning files now report perltidy VERSION. Suggested by John Karr. - - - Fixed long flag --nostack-closing-tokens (-nsct has always worked though). - This was due to a typo. This also fixed --nostack-opening-tokens to - behave correctly. Thanks to Rob Dixon. - - 2014 07 11 - - Fixed RT #94902: abbreviation parsing in .perltidyrc files was not - working for multi-line abbreviations. Thanks to Eric Fung for - supplying a patch. - - - Fixed RT #95708, misparsing of a hash when the first key was a perl - keyword, causing a semicolon to be incorrectly added. - - - Fixed RT #94338 for-loop in a parenthesized block-map. A code block within - parentheses of a map, sort, or grep function was being mistokenized. In - rare cases this could produce in an incorrect error message. The fix will - produce some minor formatting changes. Thanks to Daniel Trizen - discovering and documenting this. - - - Fixed RT #94354, excess indentation for stacked tokens. Thanks to - Colin Williams for supplying a patch. - - - Added support for experimental postfix dereferencing notation introduced in - perl 5.20. RT #96021. - - - Updated documentation to clarify the behavior of the -io flag - in response to RT #95709. You can add -noll or -l=0 to prevent - long comments from being outdented when -io is used. - - - Added a check to prevent a problem reported in RT #81866, where large - scripts which had been compressed to a single line could not be formatted - because of a check for VERSION for MakeMaker. The workaround was to - use -nvpl, but this shouldn't be necessary now. - - - Fixed RT #96101; Closing brace of anonymous sub in a list was being - indented. For example, the closing brace of the anonymous sub below - will now be lined up with the word 'callback'. This problem - occured if there was no comma after the closing brace of the anonymous sub. - This update may cause minor changes to formatting of code with lists - of anonymous subs, especially TK code. - - # OLD - my @menu_items = ( - - #... - { - path => '/_Operate/Transcode and split', - callback => sub { - return 1 if not $self->project_opened; - $self->comp('project')->transcode( split => 1 ); - } - } - ); - - # NEW - my @menu_items = ( - - #... - { - path => '/_Operate/Transcode and split', - callback => sub { - return 1 if not $self->project_opened; - $self->comp('project')->transcode( split => 1 ); - } - } - ); - - 2014 03 28 - - Fixed RT #94190 and debian Bug #742004: perltidy.LOG file left behind. - Thanks to George Hartzell for debugging this. The problem was - caused by the memoization speedup patch in version 20121207. An - unwanted flag was being set which caused a LOG to be written if - perltidy was called multiple times. - - - New default behavior for LOG files: If the source is from an array or - string (through a call to the perltidy module) then a LOG output is only - possible if a logfile stream is specified. This is to prevent - unexpected perltidy.LOG files. - - - Fixed debian Bug #740670, insecure temporary file usage. File::Temp is now - used to get a temporary file. Thanks to Don Anderson for a patch. - - - Any -b (--backup-and-modify-in-place) flag is silently ignored when a - source stream, destination stream, or standard output is used. - This is because the -b flag may have been in a .perltidyrc file and - warnings break Test::NoWarnings. Thanks to Marijn Brand. - - 2013 09 22 - - Fixed RT #88020. --converge was not working with wide characters. - - - Fixed RT #78156. package NAMESPACE VERSION syntax not accepted. - - - First attempt to fix RT #88588. INDEX END tag change in pod2html breaks - perltidy -html. I put in a patch which should work but I don't yet have - a way of testing it. - - 2013 08 06 - - Fixed RT #87107, spelling - - 2013 08 05 - - Fixed RT #87502, incorrect of parsing of smartmatch before hash brace - - - Added feature request RT #87330, trim whitespace after POD. - The flag -trp (--trim-pod) will trim trailing whitespace from lines of POD - - 2013 07 17 - - Fixed RT #86929, #86930, missing lhs of assignment. - - - Fixed RT #84922, moved pod from Tidy.pm into Tidy.pod - - 2012 12 07 - - The flag -cab=n or --comma-arrow-breakpoints=n has been generalized - to give better control over breaking open short containers. The - possible values are now: - - n=0 break at all commas after => - n=1 stable: break at all commas after => if container is open, - EXCEPT FOR one-line containers - n=2 break at all commas after =>, BUT try to form the maximum - maximum one-line container lengths - n=3 do not treat commas after => specially at all - n=4 break everything: like n=0 but also break a short container with - a => not followed by a comma - n=5 stable: like n=1 but ALSO break at open one-line containers (default) - - New values n=4 and n=5 have been added to allow short blocks to be - broken open. The new default is n=5, stable. It should more closely - follow the breaks in the input file, and previously formatted code - should remain unchanged. If this causes problems use -cab=1 to recover - the former behavior. Thanks to Tony Maszeroski for the suggestion. - - To illustrate the need for the new options, if perltidy is given - the following code, then the old default (-cab=1) was to close up - the 'index' container even if it was open in the source. The new - default (-cab=5) will keep it open if it was open in the source. - - our $fancypkg = { - 'ALL' => { - 'index' => { - 'key' => 'value', - }, - 'alpine' => { - 'one' => '+', - 'two' => '+', - 'three' => '+', - }, - } - }; - - - New debug flag --memoize (-mem). This version contains a - patch supplied by Jonathan Swartz which can significantly speed up - repeated calls to Perl::Tidy::perltidy in a single process by caching - the result of parsing the formatting parameters. A factor of up to 10 - speedup was achieved for masontidy (https://metacpan.org/module/masontidy). - The memoization patch is on by default but can be deactivated for - testing with -nmem (or --no-memoize). - - - New flag -tso (--tight-secret-operators) causes certain perl operator - sequences (secret operators) to be formatted "tightly" (without spaces). - The most common of these are 0 + and + 0 which become 0+ and +0. The - operators currently modified by this flag are: - =( )= 0+ +0 ()x!! ~~<> ,=> - Suggested by by Philippe Bruhat. See https://metacpan.org/module/perlsecret - This flag is off by default. - - - New flag -vmll (--variable-maximum-line-length) makes the maximum - line length increase with the nesting depth of a line of code. - Basically, it causes the length of leading whitespace to be ignored when - setting line breaks, so the formatting of a block of code is independent - of its nesting depth. Try this option if you have deeply nested - code or data structures, perhaps in conjunction with the -wc flag - described next. The default is not todo this. - - - New flag -wc=n (--whitespace-cycle=n) also addresses problems with - very deeply nested code and data structures. When this parameter is - used and the nesting depth exceeds the value n, the leading whitespace - will be reduced and start at 1 again. The result is that deeply - nested blocks of code will shift back to the left. This occurs cyclically - to any nesting depth. This flag may be used either with or without -vmll. - The default is not to use this (-wc=0). - - - Fixed RT #78764, error parsing smartmatch operator followed by anonymous - hash or array and then a ternary operator; two examples: - - qr/3/ ~~ ['1234'] ? 1 : 0; - map { $_ ~~ [ '0', '1' ] ? 'x' : 'o' } @a; - - - Fixed problem with specifying spaces around arrows using -wls='->' - and -wrs='->'. Thanks to Alain Valleton for documenting this problem. - - - Implemented RT #53183, wishlist, lines of code with the same indentation - level which are contained with multiple stacked opening and closing tokens - (requested with flags -sot -sct) now have reduced indentation. - - # Default - $sender->MailMsg( - { - to => $addr, - subject => $subject, - msg => $body - } - ); - - # OLD: perltidy -sot -sct - $sender->MailMsg( { - to => $addr, - subject => $subject, - msg => $body - } ); - - # NEW: perltidy -sot -sct - $sender->MailMsg( { - to => $addr, - subject => $subject, - msg => $body - } ); - - - New flag -act=n (--all-containers-tightness=n) is an abbreviation for - -pt=n -sbt=n -bt=n -bbt=n, where n=0,1, or 2. It simplifies input when all - containers have the same tightness. Using the same example: - - # NEW: perltidy -sot -sct -act=2 - $sender->MailMsg({ - to => $addr, - subject => $subject, - msg => $body - }); - - - New flag -sac (--stack-all-containers) is an abbreviation for -sot -sct - This is part of wishlist item RT #53183. Using the same example again: - - # NEW: perltidy -sac -act=2 - $sender->MailMsg({ - to => $addr, - subject => $subject, - msg => $body - }); - - - new flag -scbb (--stack-closing-block-brace) causes isolated closing - block braces to stack as in the following example. (Wishlist item RT#73788) - - DEFAULT: - for $w1 (@w1) { - for $w2 (@w2) { - for $w3 (@w3) { - for $w4 (@w4) { - push( @lines, "$w1 $w2 $w3 $w4\n" ); - } - } - } - } - - perltidy -scbb: - for $w1 (@w1) { - for $w2 (@w2) { - for $w3 (@w3) { - for $w4 (@w4) { - push( @lines, "$w1 $w2 $w3 $w4\n" ); - } } } } - - There is, at present, no flag to place these closing braces at the end - of the previous line. It seems difficult to develop good rules for - doing this for a wide variety of code and data structures. - - - Parameters defining block types may use a wildcard '*' to indicate - all block types. Previously it was not possible to include bare blocks. - - - A flag -sobb (--stack-opening-block-brace) has been introduced as an - alias for -bbvt=2 -bbvtl='*'. So for example the following test code: - - {{{{{{{ $testing }}}}}}} - - cannot be formatted as above but can at least be kept vertically compact - using perltidy -sobb -scbb - - { { { { { { { $testing - } } } } } } } - - Or even, perltidy -sobb -scbb -i=1 -bbt=2 - {{{{{{{$testing - }}}}}}} - - - - Error message improved for conflicts due to -pbp; thanks to Djun Kim. - - - Fixed RT #80645, error parsing special array name '@$' when used as - @{$} or $#{$} - - - Eliminated the -chk debug flag which was included in version 20010406 to - do a one-time check for a bug with multi-line quotes. It has not been - needed since then. - - - Numerous other minor formatting improvements. - - 2012 07 14 - - Added flag -iscl (--ignore-side-comment-lengths) which causes perltidy - to ignore the length of side comments when setting line breaks, - RT #71848. The default is to include the length of side comments when - breaking lines to stay within the length prescribed by the -l=n - maximum line length parameter. For example, - - Default behavior on a single line with long side comment: - $vmsfile =~ s/;[\d\-]*$// - ; # Clip off version number; we can use a newer version as well - - perltidy -iscl leaves the line intact: - - $vmsfile =~ s/;[\d\-]*$//; # Clip off version number; we can use a newer version as well - - - Fixed RT #78182, side effects with STDERR. Error handling has been - revised and the documentation has been updated. STDERR can now be - redirected to a string reference, and perltidy now returns an - error flag instead of calling die when input errors are detected. - If the error flag is set then no tidied output was produced. - See man Perl::Tidy for an example. - - - Fixed RT #78156, erroneous warning message for package VERSION syntax. - - - Added abbreviations -conv (--converge) to simplify iteration control. - -conv is equivalent to -it=4 and will insure that the tidied code is - converged to its final state with the minimum number of iterations. - - - Minor formatting modifications have been made to insure convergence. - - - Simplified and hopefully improved the method for guessing the starting - indentation level of entabbed code. Added flag -dt=n (--default_tabsize=n) - which might be helpful if the guessing method does not work well for - some editors. - - - Added support for stacked labels, upper case X/B in hex and binary, and - CORE:: namespace. - - - Eliminated warning messages for using keyword names as constants. - - 2012 07 01 - - Corrected problem introduced by using a chomp on scalar references, RT #77978 - - - Added support for Perl 5.14 package block syntax, RT #78114. - - - A convergence test is made if three or more iterations are requested with - the -it=n parameter to avoid wasting computer time. Several hundred Mb of - code gleaned from the internet were searched with the results that: - - It is unusual for two iterations to be required unless a major - style change is being made. - - Only one case has been found where three iterations were required. - - No cases requiring four iterations have been found with this version. - For the previous version several cases where found the results could - oscillate between two semi-stable states. This version corrects this. - - So if it is important that the code be converged it is okay to set -it=4 - with this version and it will probably stop after the second iteration. - - - Improved ability to identify and retain good line break points in the - input stream, such as at commas and equals. You can always tell - perltidy to ignore old breakpoints with -iob. - - - Fixed glitch in which a terminal closing hash brace followed by semicolon - was not outdented back to the leading line depth like other closing - tokens. Thanks to Keith Neargarder for noting this. - - OLD: - my ( $pre, $post ) = @{ - { - "pp_anonlist" => [ "[", "]" ], - "pp_anonhash" => [ "{", "}" ] - }->{ $kid->ppaddr } - }; # terminal brace - - NEW: - my ( $pre, $post ) = @{ - { - "pp_anonlist" => [ "[", "]" ], - "pp_anonhash" => [ "{", "}" ] - }->{ $kid->ppaddr } - }; # terminal brace - - - Removed extra indentation given to trailing 'if' and 'unless' clauses - without parentheses because this occasionally produced undesirable - results. This only applies where parens are not used after the if or - unless. - - OLD: - return undef - unless my ( $who, $actions ) = - $clause =~ /^($who_re)((?:$action_re)+)$/o; - - NEW: - return undef - unless my ( $who, $actions ) = - $clause =~ /^($who_re)((?:$action_re)+)$/o; - - 2012 06 19 - - Updated perltidy to handle all quote modifiers defined for perl 5 version 16. - - - Side comment text in perltidyrc configuration files must now begin with - at least one space before the #. Thus: - - OK: - -l=78 # Max line width is 78 cols - BAD: - -l=78# Max line width is 78 cols - - This is probably true of almost all existing perltidyrc files, - but if you get an error message about bad parameters - involving a '#' the first time you run this version, please check the side - comments in your perltidyrc file, and add a space before the # if necessary. - You can quickly see the contents your perltidyrc file, if any, with the - command: - - perltidy -dpro - - The reason for this change is that some parameters naturally involve - the # symbol, and this can get interpreted as a side comment unless the - parameter is quoted. For example, to define -sphb=# it used to be necessary - to write - -sbcp='#' - to keep the # from becoming part of a comment. This was causing - trouble for new users. Now it can also be written without quotes: - -sbcp=# - - - Fixed bug in processing some .perltidyrc files containing parameters with - an opening brace character, '{'. For example the following was - incorrectly processed: - --static-block-comment-prefix="^#{2,}[^\s#]" - Thanks to pdagosto. - - - Added flag -boa (--break-at-old-attribute-breakpoints) which retains - any existing line breaks at attribute separation ':'. This is now the - default, use -nboa to deactivate. Thanks to Daphne Phister for the patch. - For example, given the following code, the line breaks at the ':'s will be - retained: - - my @field - : field - : Default(1) - : Get('Name' => 'foo') : Set('Name'); - - whereas the previous version would have output a single line. If - the attributes are on a single line then they will remain on a single line. - - - Added new flags --blank-lines-before-subs=n (-blbs=n) and - --blank-lines-before-packages=n (-blbp=n) to put n blank lines before - subs and packages. The old flag -bbs is now equivalent to -blbs=1 -blbp=1. - and -nbbs is equivalent to -blbs=0 -blbp=0. Requested by M. Schwern and - several others. - - - Added feature -nsak='*' meaning no space between any keyword and opening - paren. This avoids listing entering a long list of keywords. Requested - by M. Schwern. - - - Added option to delete a backup of original file with in-place-modify (-b) - if there were no errors. This can be requested with the flag -bext='/'. - See documentation for details. Requested by M. Schwern and others. - - - Fixed bug where the module postfilter parameter was not applied when -b - flag was used. This was discovered during testing. - - - Fixed in-place-modify (-b) to work with symbolic links to source files. - Thanks to Ted Johnson. - - - Fixed bug where the Perl::Tidy module did not allow -b to be used - in some cases. - - - No extra blank line is added before a comment which follows - a short line ending in an opening token, for example like this: - OLD: - if ( - - # unless we follow a blank or comment line - $last_line_leading_type !~ /^[#b]$/ - ... - - NEW: - if ( - # unless we follow a blank or comment line - $last_line_leading_type !~ /^[#b]$/ - ... - - The blank is not needed for readability in these cases because there - already is already space above the comment. If a blank already - exists there it will not be removed, so this change should not - change code which has previously been formatted with perltidy. - Thanks to R.W.Stauner. - - - Likewise, no extra blank line is added above a comment consisting of a - single #, since nothing is gained in readability. - - - Fixed error in which a blank line was removed after a #>>> directive. - Thanks to Ricky Morse. - - - Unnecessary semicolons after given/when/default blocks are now removed. - - - Fixed bug where an unwanted blank line could be added before - pod text in __DATA__ or __END__ section. Thanks to jidani. - - - Changed exit flags from 1 to 0 to indicate success for -help, -version, - and all -dump commands. Also added -? as another way to dump the help. - Requested by Keith Neargarder. - - - Fixed bug where .ERR and .LOG files were not written except for -it=2 or more - - - Fixed bug where trailing blank lines at the end of a file were dropped when - -it>1. - - - Fixed bug where a line occasionally ended with an extra space. This reduces - rhe number of instances where a second iteration gives a result different - from the first. - - - Updated documentation to note that the Tidy.pm module parameter may - not be a reference to SCALAR or ARRAY; it must be a file. - - - Syntax check with perl now work when the Tidy.pm module is processing - references to arrays and strings. Thanks to Charles Alderman. - - - Zero-length files are no longer processed due to concerns for data loss - due to side effects in some scenarios. - - - block labels, if any, are now included in closing side comment text - when the -csc flag is used. Suggested by Aaron. For example, - the label L102 in the following block is now included in the -csc text: - - L102: for my $i ( 1 .. 10 ) { - ... - } ## end L102: for my $i ( 1 .. 10 ) - - 2010 12 17 - - added new flag -it=n or --iterations=n - This flag causes perltidy to do n complete iterations. - For most purposes the default of n=1 should be satisfactory. However n=2 - can be useful when a major style change is being made, or when code is being - beautified on check-in to a source code control system. The run time will be - approximately proportional to n, and it should seldom be necessary to use a - value greater than n=2. Thanks to Jonathan Swartz - - - A configuration file pathname begins with three dots, e.g. - ".../.perltidyrc", indicates that the file should be searched for starting - in the current directory and working upwards. This makes it easier to have - multiple projects each with their own .perltidyrc in their root directories. - Thanks to Jonathan Swartz for this patch. - - - Added flag --notidy which disables all formatting and causes the input to be - copied unchanged. This can be useful in conjunction with hierarchical - F<.perltidyrc> files to prevent unwanted tidying. - Thanks to Jonathan Swartz for this patch. - - - Added prefilters and postfilters in the call to the Tidy.pm module. - Prefilters and postfilters. The prefilter is a code reference that - will be applied to the source before tidying, and the postfilter - is a code reference to the result before outputting. - - Thanks to Jonathan Swartz for this patch. He writes: - This is useful for all manner of customizations. For example, I use - it to convert the 'method' keyword to 'sub' so that perltidy will work for - Method::Signature::Simple code: - - Perl::Tidy::perltidy( - prefilter => sub { $_ = $_[0]; s/^method (.*)/sub $1 \#__METHOD/gm; return $_ }, - postfilter => sub { $_ = $_[0]; s/^sub (.*?)\s* \#__METHOD/method $1/gm; return $_ } - ); - - - The starting indentation level of sections of code entabbed with -et=n - is correctly guessed if it was also produced with the same -et=n flag. This - keeps the indentation stable on repeated formatting passes within an editor. - Thanks to Sam Kington and Glenn. - - - Functions with prototype '&' had a space between the function and opening - peren. This space now only occurs if the flag --space-function-paren (-sfp) - is set. Thanks to Zrajm Akfohg. - - - Patch to never put spaces around a bare word in braces beginning with ^ as in: - my $before = ${^PREMATCH}; - even if requested with the -bt=0 flag because any spaces cause a syntax error in perl. - Thanks to Fabrice Dulanoy. - - 2009 06 16 - - Allow configuration file to be 'perltidy.ini' for Windows systems. - i.e. C:\Documents and Settings\User\perltidy.ini - and added documentation for setting configuation file under Windows in man - page. Thanks to Stuart Clark. - - - Corrected problem of unwanted semicolons in hash ref within given/when code. - Thanks to Nelo Onyiah. - - - added new flag -cscb or --closing-side-comments-balanced - When using closing-side-comments, and the closing-side-comment-maximum-text - limit is exceeded, then the comment text must be truncated. Previous - versions of perltidy terminate with three dots, and this can still be - achieved with -ncscb: - - perltidy -csc -ncscb - - } ## end foreach my $foo (sort { $b cmp $a ... - - However this causes a problem with older editors which cannot recognize - comments or are not configured to doso because they cannot "bounce" around in - the text correctly. The B<-cscb> flag tries to help them by - appending appropriate terminal balancing structure: - - perltidy -csc -cscb - - } ## end foreach my $foo (sort { $b cmp $a ... }) - - Since there is much to be gained and little to be lost by doing this, - the default is B<-cscb>. Use B<-ncscb> if you do not want this. - - Thanks to Daniel Becker for suggesting this option. - - - After an isolated closing eval block the continuation indentation will be - removed so that the braces line up more like other blocks. Thanks to Yves Orton. - - OLD: - eval { - #STUFF; - 1; # return true - } - or do { - #handle error - }; - - NEW: - eval { - #STUFF; - 1; # return true - } or do { - #handle error - }; - - -A new flag -asbl (or --opening-anonymous-sub-brace-on-new-line) has - been added to put the opening brace of anonymous sub's on a new line, - as in the following snippet: - - my $code = sub - { - my $arg = shift; - return $arg->(@_); - }; - - This was not possible before because the -sbl flag only applies to named - subs. Thanks to Benjamin Krupp. - - -Fix tokenization bug with the following snippet - print 'hi' if { x => 1, }->{x}; - which resulted in a semicolon being added after the comma. The workaround - was to use -nasc, but this is no longer necessary. Thanks to Brian Duggan. - - -Fixed problem in which an incorrect error message could be triggered - by the (unusual) combination of parameters -lp -i=0 -l=2 -ci=0 for - example. Thanks to Richard Jelinek. - - -A new flag --keep-old-blank-lines=n has been added to - give more control over the treatment of old blank lines in - a script. The manual has been revised to discuss the new - flag and clarify the treatment of old blank lines. Thanks - to Oliver Schaefer. - - 2007 12 05 - -Improved support for perl 5.10: New quote modifier 'p', new block type UNITCHECK, - new keyword break, improved formatting of given/when. - - -Corrected tokenization bug of something like $var{-q}. - - -Numerous minor formatting improvements. - - -Corrected list of operators controlled by -baao -bbao to include - . : ? && || and or err xor - - -Corrected very minor error in log file involving incorrect comment - regarding need for upper case of labels. - - -Fixed problem where perltidy could run for a very long time - when given certain non-perl text files. - - -Line breaks in un-parenthesized lists now try to follow - line breaks in the input file rather than trying to fill - lines. This usually works better, but if this causes - trouble you can use -iob to ignore any old line breaks. - Example for the following input snippet: - - print - "conformability (Not the same dimension)\n", - "\t", $have, " is ", text_unit($hu), "\n", - "\t", $want, " is ", text_unit($wu), "\n", - ; - - OLD: - print "conformability (Not the same dimension)\n", "\t", $have, " is ", - text_unit($hu), "\n", "\t", $want, " is ", text_unit($wu), "\n",; - - NEW: - print "conformability (Not the same dimension)\n", - "\t", $have, " is ", text_unit($hu), "\n", - "\t", $want, " is ", text_unit($wu), "\n", - ; - - 2007 08 01 - -Added -fpsc option (--fixed-position-side-comment). Thanks to Ueli Hugenschmidt. - For example -fpsc=40 tells perltidy to put side comments in column 40 - if possible. - - -Added -bbao and -baao options (--break-before-all-operators and - --break-after-all-operators) to simplify command lines and configuration - files. These define an initial preference for breaking at operators which can - be modified with -wba and -wbb flags. For example to break before all operators - except an = one could use --bbao -wba='=' rather than listing every - single perl operator (except =) on a -wbb flag. - - -Added -kis option (--keep-interior-semicolons). Use the B<-kis> flag - to prevent breaking at a semicolon if there was no break there in the - input file. To illustrate, consider the following input lines: - - dbmclose(%verb_delim); undef %verb_delim; - dbmclose(%expanded); undef %expanded; - dbmclose(%global); undef %global; - - Normally these would be broken into six lines, but - perltidy -kis gives: - - dbmclose(%verb_delim); undef %verb_delim; - dbmclose(%expanded); undef %expanded; - dbmclose(%global); undef %global; - - -Improved formatting of complex ternary statements, with indentation - of nested statements. - OLD: - return defined( $cw->{Selected} ) - ? (wantarray) - ? @{ $cw->{Selected} } - : $cw->{Selected}[0] - : undef; - - NEW: - return defined( $cw->{Selected} ) - ? (wantarray) - ? @{ $cw->{Selected} } - : $cw->{Selected}[0] - : undef; - - -Text following un-parenthesized if/unless/while/until statements get a - full level of indentation. Suggested by Jeff Armstorng and others. - OLD: - return $ship->chargeWeapons("phaser-canon") - if $encounter->description eq 'klingon' - and $ship->firepower >= $encounter->firepower - and $location->status ne 'neutral'; - NEW: - return $ship->chargeWeapons("phaser-canon") - if $encounter->description eq 'klingon' - and $ship->firepower >= $encounter->firepower - and $location->status ne 'neutral'; - - 2007 05 08 - -Fixed bug where #line directives were being indented. Thanks to - Philippe Bruhat. - - 2007 05 04 - -Fixed problem where an extra blank line was added after an =cut when either - (a) the =cut started (not stopped) a POD section, or (b) -mbl > 1. - Thanks to J. Robert Ray and Bill Moseley. - - 2007 04 24 - -ole (--output-line-ending) and -ple (--preserve-line-endings) should - now work on all systems rather than just unix systems. Thanks to Dan - Tyrell. - - -Fixed problem of a warning issued for multiple subs for BEGIN subs - and other control subs. Thanks to Heiko Eissfeldt. - - -Fixed problem where no space was introduced between a keyword or - bareword and a colon, such as: - - ( ref($result) eq 'HASH' && !%$result ) ? undef: $result; - - Thanks to Niek. - - -Added a utility program 'break_long_quotes.pl' to the examples directory of - the distribution. It breaks long quoted strings into a chain of concatenated - sub strings no longer than a selected length. Suggested by Michael Renner as - a perltidy feature but was judged to be best done in a separate program. - - -Updated docs to remove extra < and >= from list of tokens - after which breaks are made by default. Thanks to Bob Kleemann. - - -Removed improper uses of $_ to avoid conflicts with external calls, giving - error message similar to: - Modification of a read-only value attempted at - /usr/share/perl5/Perl/Tidy.pm line 6907. - Thanks to Michael Renner. - - -Fixed problem when errorfile was not a plain filename or filehandle - in a call to Tidy.pm. The call - perltidy(source => \$input, destination => \$output, errorfile => \$err); - gave the following error message: - Not a GLOB reference at /usr/share/perl5/Perl/Tidy.pm line 3827. - Thanks to Michael Renner and Phillipe Bruhat. - - -Fixed problem where -sot would not stack an opening token followed by - a side comment. Thanks to Jens Schicke. - - -improved breakpoints in complex math and other long statements. Example: - OLD: - return - log($n) + 0.577215664901532 + ( 1 / ( 2 * $n ) ) - - ( 1 / ( 12 * ( $n**2 ) ) ) + ( 1 / ( 120 * ( $n**4 ) ) ); - NEW: - return - log($n) + 0.577215664901532 + - ( 1 / ( 2 * $n ) ) - - ( 1 / ( 12 * ( $n**2 ) ) ) + - ( 1 / ( 120 * ( $n**4 ) ) ); - - -more robust vertical alignment of complex terminal else blocks and ternary - statements. - - 2006 07 19 - -Eliminated bug where a here-doc invoked through an 'e' modifier on a pattern - replacement text was not recognized. The tokenizer now recursively scans - replacement text (but does not reformat it). - - -improved vertical alignment of terminal else blocks and ternary statements. - Thanks to Chris for the suggestion. - - OLD: - if ( IsBitmap() ) { return GetBitmap(); } - elsif ( IsFiles() ) { return GetFiles(); } - else { return GetText(); } - - NEW: - if ( IsBitmap() ) { return GetBitmap(); } - elsif ( IsFiles() ) { return GetFiles(); } - else { return GetText(); } - - OLD: - $which_search = - $opts{"t"} ? 'title' - : $opts{"s"} ? 'subject' - : $opts{"a"} ? 'author' - : 'title'; - - NEW: - $which_search = - $opts{"t"} ? 'title' - : $opts{"s"} ? 'subject' - : $opts{"a"} ? 'author' - : 'title'; - - -improved indentation of try/catch blocks and other externally defined - functions accepting a block argument. Thanks to jae. - - -Added support for Perl 5.10 features say and smartmatch. - - -Added flag -pbp (--perl-best-practices) as an abbreviation for parameters - suggested in Damian Conway's "Perl Best Practices". -pbp is the same as: - - -l=78 -i=4 -ci=4 -st -se -vt=2 -cti=0 -pt=1 -bt=1 -sbt=1 -bbt=1 -nsfs -nolq - -wbb="% + - * / x != == >= <= =~ !~ < > | & >= < = - **= += *= &= <<= &&= -= /= |= >>= ||= .= %= ^= x=" - - Please note that the -st here restricts input to standard input; use - -nst if necessary to override. - - -Eliminated some needless breaks at equals signs in -lp indentation. - - OLD: - $c = - Math::Complex->make(LEFT + $x * (RIGHT - LEFT) / SIZE, - TOP + $y * (BOTTOM - TOP) / SIZE); - NEW: - $c = Math::Complex->make(LEFT + $x * (RIGHT - LEFT) / SIZE, - TOP + $y * (BOTTOM - TOP) / SIZE); - - A break at an equals is sometimes useful for preventing complex statements - from hitting the line length limit. The decision to do this was - over-eager in some cases and has been improved. Thanks to Royce Reece. - - -qw quotes contained in braces, square brackets, and parens are being - treated more like those containers as far as stacking of tokens. Also - stack of closing tokens ending ');' will outdent to where the ');' would - have outdented if the closing stack is matched with a similar opening stack. - - OLD: perltidy -soc -sct - __PACKAGE__->load_components( - qw( - PK::Auto - Core - ) - ); - NEW: perltidy -soc -sct - __PACKAGE__->load_components( qw( - PK::Auto - Core - ) ); - Thanks to Aran Deltac - - -Eliminated some undesirable or marginally desirable vertical alignments. - These include terminal colons, opening braces, and equals, and particularly - when just two lines would be aligned. - - OLD: - my $accurate_timestamps = $Stamps{lnk}; - my $has_link = - ... - NEW: - my $accurate_timestamps = $Stamps{lnk}; - my $has_link = - - -Corrected a problem with -mangle in which a space would be removed - between a keyword and variable beginning with ::. - - 2006 06 14 - -Attribute argument lists are now correctly treated as quoted strings - and not formatted. This is the most important update in this version. - Thanks to Borris Zentner, Greg Ferguson, Steve Kirkup. - - -Updated to recognize the defined or operator, //, to be released in Perl 10. - Thanks to Sebastien Aperghis-Tramoni. - - -A useful utility perltidyrc_dump.pl is included in the examples section. It - will read any perltidyrc file and write it back out in a standard format - (though comments are lost). - - -Added option to have perltidy read and return a hash with the contents of a - perltidyrc file. This may be used by Leif Eriksen's tidyview code. This - feature is used by the demonstration program 'perltidyrc_dump.pl' in the - examples directory. - - -Improved error checking in perltidyrc files. Unknown bare words were not - being caught. - - -The --dump-options parameter now dumps parameters in the format required by a - perltidyrc file. - - -V-Strings with underscores are now recognized. - For example: $v = v1.2_3; - - -cti=3 option added which gives one extra indentation level to closing - tokens always. This provides more predictable closing token placement - than cti=2. If you are using cti=2 you might want to try cti=3. - - -To identify all left-adjusted comments as static block comments, use C<-sbcp='^#'>. - - -New parameters -fs, -fsb, -fse added to allow sections of code between #<<< - and #>>> to be passed through verbatim. This is enabled by default and turned - off by -nfs. Flags -fsb and -fse allow other beginning and ending markers. - Thanks to Wolfgang Werner and Marion Berryman for suggesting this. - - -added flag -skp to put a space between all Perl keywords and following paren. - The default is to only do this for certain keywords. Suggested by - H.Merijn Brand. - - -added flag -sfp to put a space between a function name and following paren. - The default is not to do this. Suggested by H.Merijn Brand. - - -Added patch to avoid breaking GetOpt::Long::Configure set by calling program. - Thanks to Philippe Bruhat. - - -An error was fixed in which certain parameters in a .perltidyrc file given - without the equals sign were not recognized. That is, - '--brace-tightness 0' gave an error but '--brace-tightness=0' worked - ok. Thanks to Zac Hansen. - - -An error preventing the -nwrs flag from working was corrected. Thanks to - Greg Ferguson. - - -Corrected some alignment problems with entab option. - - -A bug with the combination of -lp and -extrude was fixed (though this - combination doesn't really make sense). The bug was that a line with - a single zero would be dropped. Thanks to Cameron Hayne. - - -Updated Windows detection code to avoid an undefined variable. - Thanks to Joe Yates and Russ Jones. - - -Improved formatting for short trailing statements following a closing paren. - Thanks to Joe Matarazzo. - - -The handling of the -icb (indent closing block braces) flag has been changed - slightly to provide more consistent and predictable formatting of complex - structures. Instead of giving a closing block brace the indentation of the - previous line, it is now given one extra indentation level. The two methods - give the same result if the previous line was a complete statement, as in this - example: - - if ($task) { - yyy(); - } # -icb - else { - zzz(); - } - The change also fixes a problem with empty blocks such as: - - OLD, -icb: - elsif ($debug) { - } - - NEW, -icb: - elsif ($debug) { - } - - -A problem with -icb was fixed in which a closing brace was misplaced when - it followed a quote which spanned multiple lines. - - -Some improved breakpoints for -wba='&& || and or' - - -Fixed problem with misaligned cuddled else in complex statements - when the -bar flag was also used. Thanks to Alex and Royce Reese. - - -Corrected documentation to show that --outdent-long-comments is the default. - Thanks to Mario Lia. - - -New flag -otr (opening-token-right) is similar to -bar (braces-always-right) - but applies to non-structural opening tokens. - - -new flags -sot (stack-opening-token), -sct (stack-closing-token). - Suggested by Tony. - - 2003 10 21 - -The default has been changed to not do syntax checking with perl. - Use -syn if you want it. Perltidy is very robust now, and the -syn - flag now causes more problems than it's worth because of BEGIN blocks - (which get executed with perl -c). For example, perltidy will never - return when trying to beautify this code if -syn is used: - - BEGIN { 1 while { }; } - - Although this is an obvious error, perltidy is often run on untested - code which is more likely to have this sort of problem. A more subtle - example is: - - BEGIN { use FindBin; } - - which may hang on some systems using -syn if a shared file system is - unavailable. - - -Changed style -gnu to use -cti=1 instead of -cti=2 (see next item). - In most cases it looks better. To recover the previous format, use - '-gnu -cti=2' - - -Added flags -cti=n for finer control of closing token indentation. - -cti = 0 no extra indentation (default; same as -nicp) - -cti = 1 enough indentation so that the closing token - aligns with its opening token. - -cti = 2 one extra indentation level if the line has the form - ); ]; or }; (same as -icp). - - The new option -cti=1 works well with -lp: - - EXAMPLES: - - # perltidy -lp -cti=1 - @month_of_year = ( - 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', - 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' - ); - - # perltidy -lp -cti=2 - @month_of_year = ( - 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', - 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' - ); - This is backwards compatible with -icp. See revised manual for - details. Suggested by Mike Pennington. - - -Added flag '--preserve-line-endings' or '-ple' to cause the output - line ending to be the same as in the input file, for unix, dos, - or mac line endings. Only works under unix. Suggested by - Rainer Hochschild. - - -Added flag '--output-line-ending=s' or '-ole=s' where s=dos or win, - unix, or mac. Only works under unix. - - -Files with Mac line endings should now be handled properly under unix - and dos without being passed through a converter. - - -You may now include 'and', 'or', and 'xor' in the list following - '--want-break-after' to get line breaks after those keywords rather than - before them. Suggested by Rainer Hochschild. - - -Corrected problem with command line option for -vtc=n and -vt=n. The - equals sign was being eaten up by the Windows shell so perltidy didn't - see it. - - 2003 07 26 - -Corrected cause of warning message with recent versions of Perl: - "Possible precedence problem on bitwise & operator at ..." - Thanks to Jim Files. - - -fixed bug with -html with '=for pod2html' sections, in which code/pod - output order was incorrect. Thanks to Tassilo von Parseval. - - -fixed bug when the -html flag is used, in which the following error - message, plus others, appear: - did not see in pod2html output - This was caused by a change in the format of html output by pod2html - VERSION 1.04 (included with perl 5.8). Thanks to Tassilo von Parseval. - - -Fixed bug where an __END__ statement would be mistaken for a label - if it is immediately followed by a line with a leading colon. Thanks - to John Bayes. - - -Implemented guessing logic for brace types when it is ambiguous. This - has been on the TODO list a long time. Thanks to Boris Zentner for - an example. - - -Long options may now be negated either as '--nolong-option' - or '--no-long-option'. Thanks to Philip Newton for the suggestion. - - -added flag --html-entities or -hent which controls the use of - Html::Entities for html formatting. Use --nohtml-entities or -nhent to - prevent the use of Html::Entities to encode special symbols. The - default is -hent. Html::Entities when formatting perl text to escape - special symbols. This may or may not be the right thing to do, - depending on browser/language combinations. Thanks to Burak Gursoy for - this suggestion. - - -Bareword strings with leading '-', like, '-foo' now count as 1 token - for horizontal tightness. This way $a{'-foo'}, $a{foo}, and $a{-foo} - are now all treated similarly. Thus, by default, OLD: $a{ -foo } will - now be NEW: $a{-foo}. Suggested by Mark Olesen. - - -added 2 new flags to control spaces between keywords and opening parens: - -sak=s or --space-after-keyword=s, and - -nsak=s or --nospace-after-keyword=s, where 's' is a list of keywords. - - The new default list of keywords which get a space is: - - "my local our and or eq ne if else elsif until unless while for foreach - return switch case given when" - - Use -sak=s and -nsak=s to add and remove keywords from this list, - respectively. - - Explanation: Stephen Hildrey noted that perltidy was being inconsistent - in placing spaces between keywords and opening parens, and sent a patch - to give user control over this. The above list was selected as being - a reasonable default keyword list. Previously, perltidy - had a hardwired list which also included these keywords: - - push pop shift unshift join split die - - but did not have 'our'. Example: if you prefer to make perltidy behave - exactly as before, you can include the following two lines in your - .perltidyrc file: - - -sak="push pop local shift unshift join split die" - -nsak="our" - - -Corrected html error in .toc file when -frm -html is used (extra "); - browsers were tolerant of it. - - -Improved alignment of chains of binary and ?/: operators. Example: - OLD: - $leapyear = - $year % 4 ? 0 - : $year % 100 ? 1 - : $year % 400 ? 0 - : 1; - NEW: - $leapyear = - $year % 4 ? 0 - : $year % 100 ? 1 - : $year % 400 ? 0 - : 1; - - -improved breakpoint choices involving '->' - - -Corrected tokenization of things like ${#}. For example, - ${#} is valid, but ${# } is a syntax error. - - -Corrected minor tokenization errors with indirect object notation. - For example, 'new A::()' works now. - - -Minor tokenization improvements; all perl code distributed with perl 5.8 - seems to be parsed correctly except for one instance (lextest.t) - of the known bug. - - 2002 11 30 - -Implemented scalar attributes. Thanks to Sean Tobin for noting this. - - -Fixed glitch introduced in previous release where -pre option - was not outputting a leading html
 tag.
-
-     -Numerous minor improvements in vertical alignment, including the following:
-
-     -Improved alignment of opening braces in many cases.  Needed for improved
-     switch/case formatting, and also suggested by Mark Olesen for sort/map/grep
-     formatting.  For example:
-
-      OLD:
-        @modified =
-          map { $_->[0] }
-          sort { $a->[1] <=> $b->[1] }
-          map { [ $_, -M ] } @filenames;
-
-      NEW:
-        @modified =
-          map  { $_->[0] }
-          sort { $a->[1] <=> $b->[1] }
-          map  { [ $_, -M ] } @filenames;
-
-     -Eliminated alignments across unrelated statements. Example:
-      OLD:
-        $borrowerinfo->configure( -state => 'disabled' );
-        $borrowerinfo->grid( -col        => 1, -row => 0, -sticky => 'w' );
-
-      NEW:  
-        $borrowerinfo->configure( -state => 'disabled' );
-        $borrowerinfo->grid( -col => 1, -row => 0, -sticky => 'w' );
-
-      Thanks to Mark Olesen for suggesting this.
-
-     -Improved alignement of '='s in certain cases.
-      Thanks to Norbert Gruener for sending an example.
-
-     -Outdent-long-comments (-olc) has been re-instated as a default, since
-      it works much better now.  Use -nolc if you want to prevent it.
-
-     -Added check for 'perltidy file.pl -o file.pl', which causes file.pl
-     to be lost. (The -b option should be used instead). Thanks to mreister
-     for reporting this problem.
-
-  2002 11 06
-     -Switch/case or given/when syntax is now recognized.  Its vertical alignment
-     is not great yet, but it parses ok.  The words 'switch', 'case', 'given',
-     and 'when' are now treated as keywords.  If this causes trouble with older
-     code, we could introduce a switch to deactivate it.  Thanks to Stan Brown
-     and Jochen Schneider for recommending this.
-
-     -Corrected error parsing sub attributes with call parameters.
-     Thanks to Marc Kerr for catching this.
-
-     -Sub prototypes no longer need to be on the same line as sub names.  
-
-     -a new flag -frm or --frames will cause html output to be in a
-     frame, with table of contents in the left panel and formatted source
-     in the right panel.  Try 'perltidy -html -frm somemodule.pm' for example.
-
-     -The new default for -html formatting is to pass the pod through Pod::Html.
-     The result is syntax colored code within your pod documents. This can be
-     deactivated with -npod.  Thanks to those who have written to discuss this,
-     particularly Mark Olesen and Hugh Myers.
-
-     -the -olc (--outdent-long-comments) option works much better.  It now outdents
-     groups of consecutive comments together, and by just the amount needed to
-     avoid having any one line exceeding the maximum line length.
-
-     -block comments are now trimmed of trailing whitespace.
-
-     -if a directory specified with -opath does not exist, it will be created.
-
-     -a table of contents to packages and subs is output when -html is used.
-     Use -ntoc to prevent this. 
-
-     -fixed an unusual bug in which a 'for' statement following a 'format'
-     statement was not correctly tokenized.  Thanks to Boris Zentner for
-     catching this.
-
-     -Tidy.pm is no longer dependent on modules IO::Scalar and IO::ScalarArray.  
-     There were some speed issues.  Suggested by Joerg Walter.
-
-     -The treatment of quoted wildcards (file globs) is now system-independent. 
-     For example
-
-        perltidy 'b*x.p[lm]'
-
-     would match box.pl, box.pm, brinx.pm under any operating system.  Of
-     course, anything unquoted will be subject to expansion by any shell.
-
-     -default color for keywords under -html changed from 
-     SaddleBrown (#8B4513) to magenta4 (#8B008B).
-
-     -fixed an arg parsing glitch in which something like:
-       perltidy quick-help
-     would trigger the help message and exit, rather than operate on the
-     file 'quick-help'.
-
-  2002 09 22
-     -New option '-b' or '--backup-and-modify-in-place' will cause perltidy to
-     overwrite the original file with the tidied output file.  The original
-     file will be saved with a '.bak' extension (which can be changed with
-     -bext=s).  Thanks to Rudi Farkas for the suggestion.
-
-     -An index to all subs is included at the top of -html output, unless
-     only the 
 section is written.
-
-     -Anchor lines of the form  are now inserted at key points
-     in html output, such as before sub definitions, for the convenience of
-     postprocessing scripts.  Suggested by Howard Owen.
-
-     -The cuddled-else (-ce) flag now also makes cuddled continues, like
-     this:
-
-        while ( ( $pack, $file, $line ) = caller( $i++ ) ) {
-           # bla bla
-        } continue {
-            $prevpack = $pack;
-        }
-
-     Suggested by Simon Perreault.  
-
-     -Fixed bug in which an extra blank line was added before an =head or 
-     similar pod line after an __END__ or __DATA__ line each time 
-     perltidy was run.  Also, an extra blank was being added after
-     a terminal =cut.  Thanks to Mike Birdsall for reporting this.
-
-  2002 08 26
-     -Fixed bug in which space was inserted in a hyphenated hash key:
-        my $val = $myhash{USER-NAME};
-      was converted to:
-        my $val = $myhash{USER -NAME}; 
-      Thanks to an anonymous bug reporter at sourceforge.
-
-     -Fixed problem with the '-io' ('--indent-only') where all lines 
-      were double spaced.  Thanks to Nick Andrew for reporting this bug.
-
-     -Fixed tokenization error in which something like '-e1' was 
-      parsed as a number. 
-
-     -Corrected a rare problem involving older perl versions, in which 
-      a line break before a bareword caused problems with 'use strict'.
-      Thanks to Wolfgang Weisselberg for noting this.
-
-     -More syntax error checking added.
-
-     -Outdenting labels (-ola) has been made the default, in order to follow the
-      perlstyle guidelines better.  It's probably a good idea in general, but
-      if you do not want this, use -nola in your .perltidyrc file.
-  
-     -Updated rules for padding logical expressions to include more cases.
-      Thanks to Wolfgang Weisselberg for helpful discussions.
-
-     -Added new flag -osbc (--outdent-static-block-comments) which will
-      outdent static block comments by 2 spaces (or whatever -ci equals).
-      Requested by Jon Robison.
-
-  2002 04 25
-     -Corrected a bug, introduced in the previous release, in which some
-      closing side comments (-csc) could have incorrect text.  This is
-      annoying but will be correct the next time perltidy is run with -csc.
-
-     -Implemented XHTML patch submitted by Ville Skyttä.
-
-     -Fixed bug where whitespace was being removed between 'Bar' and '()' 
-      in a use statement like:
-
-           use Foo::Bar ();
-
-      Thanks to Ville Skyttä for reporting this.
-
-     -Whenever possible, if a logical expression is broken with leading
-      '&&', '||', 'and', or 'or', then the leading line will be padded
-      with additional space to produce alignment.  This has been on the
-      todo list for a long time; thanks to Frank Steinhauer for reminding
-      me to do it.  Notice the first line after the open parens here:
-
-            OLD: perltidy -lp
-            if (
-                 !param("rules.to.$linecount")
-                 && !param("rules.from.$linecount")
-                 && !param("rules.subject.$linecount")
-                 && !(
-                       param("rules.fieldname.$linecount")
-                       && param("rules.fieldval.$linecount")
-                 )
-                 && !param("rules.size.$linecount")
-                 && !param("rules.custom.$linecount")
-              )
-
-            NEW: perltidy -lp
-            if (
-                    !param("rules.to.$linecount")
-                 && !param("rules.from.$linecount")
-                 && !param("rules.subject.$linecount")
-                 && !(
-                          param("rules.fieldname.$linecount")
-                       && param("rules.fieldval.$linecount")
-                 )
-                 && !param("rules.size.$linecount")
-                 && !param("rules.custom.$linecount")
-              )
-
-  2002 04 16
-     -Corrected a mistokenization of variables for a package with a name
-      equal to a perl keyword.  For example: 
-
-         my::qx();
-         package my;
-         sub qx{print "Hello from my::qx\n";}
-
-      In this case, the leading 'my' was mistokenized as a keyword, and a
-      space was being place between 'my' and '::'.  This has been
-      corrected.  Thanks to Martin Sluka for discovering this. 
-
-     -A new flag -bol (--break-at-old-logic-breakpoints)
-      has been added to control whether containers with logical expressions
-      should be broken open.  This is the default.
-
-     -A new flag -bok (--break-at-old-keyword-breakpoints)
-      has been added to follow breaks at old keywords which return lists,
-      such as sort and map.  This is the default.
-
-     -A new flag -bot (--break-at-old-trinary-breakpoints) has been added to
-      follow breaks at trinary (conditional) operators.  This is the default.
-
-     -A new flag -cab=n has been added to control breaks at commas after
-      '=>' tokens.  The default is n=1, meaning break unless this breaks
-      open an existing on-line container.
-
-     -A new flag -boc has been added to allow existing list formatting
-      to be retained.  (--break-at-old-comma-breakpoints).  See updated manual.
-
-     -A new flag -iob (--ignore-old-breakpoints) has been added to
-      prevent the locations of old breakpoints from influencing the output
-      format.
-
-     -Corrected problem where nested parentheses were not getting full
-      indentation.  This has been on the todo list for some time; thanks 
-      to Axel Rose for a snippet demonstrating this issue.
-
-                OLD: inner list is not indented
-                $this->sendnumeric(
-                    $this->server,
-                    (
-                      $ret->name,        $user->username, $user->host,
-                    $user->server->name, $user->nick,     "H"
-                    ),
-                );
-
-                NEW:
-                $this->sendnumeric(
-                    $this->server,
-                    (
-                        $ret->name,          $user->username, $user->host,
-                        $user->server->name, $user->nick,     "H"
-                    ),
-                );
-
-     -Code cleaned up by removing the following unused, undocumented flags.
-      They should not be in any .perltidyrc files because they were just
-      experimental flags which were never documented.  Most of them placed
-      artificial limits on spaces, and Wolfgang Weisselberg convinced me that
-      most of them they do more harm than good by causing unexpected results.
-
-      --maximum-continuation-indentation (-mci)
-      --maximum-whitespace-columns
-      --maximum-space-to-comment (-xsc)
-      --big-space-jump (-bsj)
-
-     -Pod file 'perltidy.pod' has been appended to the script 'perltidy', and
-      Tidy.pod has been append to the module 'Tidy.pm'.  Older MakeMaker's
-      were having trouble.
- 
-     -A new flag -isbc has been added for more control on comments. This flag
-      has the effect that if there is no leading space on the line, then the
-      comment will not be indented, and otherwise it may be.  If both -ibc and
-      -isbc are set, then -isbc takes priority.  Thanks to Frank Steinhauer
-      for suggesting this.
-
-     -A new document 'stylekey.pod' has been created to quickly guide new users
-      through the maze of perltidy style parameters.  An html version is 
-      on the perltidy web page.  Take a look! It should be very helpful.
-
-     -Parameters for controlling 'vertical tightness' have been added:
-      -vt and -vtc are the main controls, but finer control is provided
-      with -pvt, -pcvt, -bvt, -bcvt, -sbvt, -sbcvt.  Block brace vertical
-      tightness controls have also been added.
-      See updated manual and also see 'stylekey.pod'. Simple examples:
-
-        # perltidy -lp -vt=1 -vtc=1
-        @month_of_year = ( 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
-                           'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' );
-
-        # perltidy -lp -vt=1 -vtc=0
-        @month_of_year = ( 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
-                           'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
-        );
-
-     -Lists which do not format well in uniform columns are now better
-      identified and formated.
-
-        OLD:
-        return $c->create( 'polygon', $x, $y, $x + $ruler_info{'size'},
-            $y + $ruler_info{'size'}, $x - $ruler_info{'size'},
-            $y + $ruler_info{'size'} );
-
-        NEW:
-        return $c->create(
-            'polygon', $x, $y,
-            $x + $ruler_info{'size'},
-            $y + $ruler_info{'size'},
-            $x - $ruler_info{'size'},
-            $y + $ruler_info{'size'}
-        );
-
-        OLD:
-          radlablist($f1, pad('Initial', $p), $b->{Init}->get_panel_ref, 'None ',
-                     'None', 'Default', 'Default', 'Simple', 'Simple');
-        NEW:
-          radlablist($f1,
-                     pad('Initial', $p),
-                     $b->{Init}->get_panel_ref,
-                     'None ', 'None', 'Default', 'Default', 'Simple', 'Simple');
-
-     -Corrected problem where an incorrect html filename was generated for 
-      external calls to Tidy.pm module.  Fixed incorrect html title when
-      Tidy.pm is called with IO::Scalar or IO::Array source.
-
-     -Output file permissons are now set as follows.  An output script file
-      gets the same permission as the input file, except that owner
-      read/write permission is added (otherwise, perltidy could not be
-      rerun).  Html output files use system defaults.  Previously chmod 0755
-      was used in all cases.  Thanks to Mark Olesen for bringing this up.
-
-     -Missing semicolons will not be added in multi-line blocks of type
-      sort, map, or grep.  This brings perltidy into closer agreement
-      with common practice.  Of course, you can still put semicolons 
-      there if you like.  Thanks to Simon Perreault for a discussion of this.
-
-     -Most instances of extra semicolons are now deleted.  This is
-      particularly important if the -csc option is used.  Thanks to Wolfgang
-      Weisselberg for noting this.  For example, the following line
-      (produced by 'h2xs' :) has an extra semicolon which will now be
-      removed:
-
-         BEGIN { plan tests => 1 };
-
-     -New parameter -csce (--closing-side-comment-else-flag) can be used
-      to control what text is appended to 'else' and 'elsif' blocks.
-      Default is to just add leading 'if' text to an 'else'.  See manual.
-
-     -The -csc option now labels 'else' blocks with additinal information
-      from the opening if statement and elsif statements, if space.
-      Thanks to Wolfgang Weisselberg for suggesting this.
-
-     -The -csc option will now remove any old closing side comments
-      below the line interval threshold. Thanks to Wolfgang Weisselberg for
-      suggesting this.
-
-     -The abbreviation feature, which was broken in the previous version,
-      is now fixed.  Thanks to Michael Cartmell for noting this.
-
-     -Vertical alignment is now done for '||='  .. somehow this was 
-      overlooked.
-
-  2002 02 25
-     -This version uses modules for the first time, and a standard perl
-      Makefile.PL has been supplied.  However, perltidy may still be
-      installed as a single script, without modules.  See INSTALL for
-      details.
-
-     -The man page 'perl2web' has been merged back into the main 'perltidy'
-      man page to simplify installation.  So you may remove that man page
-      if you have an older installation.
-
-     -Added patch from Axel Rose for MacPerl.  The patch prompts the user
-      for command line arguments before calling the module 
-      Perl::Tidy::perltidy.
-
-     -Corrected bug with '-bar' which was introduced in the previous
-      version.  A closing block brace was being indented.  Thanks to
-      Alexandros M Manoussakis for reporting this.
-
-     -New parameter '--entab-leading-whitespace=n', or '-et=n', has been
-      added for those who prefer tabs.  This behaves different from the
-      existing '-t' parameter; see updated man page.  Suggested by Mark
-      Olesen.
-
-     -New parameter '--perl-syntax-check-flags=s'  or '-pcsf=s' can be
-      used to change the flags passed to perltidy in a syntax check.
-      See updated man page.  Suggested by Mark Olesen. 
-
-     -New parameter '--output-path=s'  or '-opath=s' will cause output
-      files to be placed in directory s.  See updated man page.  Thanks for
-      Mark Olesen for suggesting this.
-
-     -New parameter --dump-profile (or -dpro) will dump to
-      standard output information about the search for a
-      configuration file, the name of whatever configuration file
-      is selected, and its contents.  This should help debugging
-      config files, especially on different Windows systems.
-
-     -The -w parameter now notes possible errors of the form:
-
-            $comment = s/^\s*(\S+)\..*/$1/;   # trim whitespace
-
-     -Corrections added for a leading ':' and for leaving a leading 'tcsh'
-      line untouched.  Mark Olesen reported that lines of this form were
-      accepted by perl but not by perltidy:
-
-            : # use -*- perl -*-
-            eval 'exec perl -wS $0 "$@"'  # shell should exec 'perl'
-            unless 1;                     # but Perl should skip this one
-
-      Perl will silently swallow a leading colon on line 1 of a
-      script, and now perltidy will do likewise.  For example,
-      this is a valid script, provided that it is the first line,
-      but not otherwise:
-
-            : print "Hello World\n";
-  
-      Also, perltidy will now mark a first line with leading ':' followed by
-      '#' as type SYSTEM (just as a #!  line), not to be formatted.
-
-     -List formatting improved for certain lists with special
-      initial terms, such as occur with 'printf', 'sprintf',
-      'push', 'pack', 'join', 'chmod'.  The special initial term is
-      now placed on a line by itself.  For example, perltidy -gnu
-
-         OLD:
-            $Addr = pack(
-                         "C4",                hex($SourceAddr[0]),
-                         hex($SourceAddr[1]), hex($SourceAddr[2]),
-                         hex($SourceAddr[3])
-                         );
-
-         NEW:
-            $Addr = pack("C4",
-                         hex($SourceAddr[0]), hex($SourceAddr[1]),
-                         hex($SourceAddr[2]), hex($SourceAddr[3]));
-
-          OLD:
-                push (
-                      @{$$self{states}}, '64', '66', '68',
-                      '70',              '72', '74', '76',
-                      '78',              '80', '82', '84',
-                      '86',              '88', '90', '92',
-                      '94',              '96', '98', '100',
-                      '102',             '104'
-                      );
-
-          NEW:
-                push (
-                      @{$$self{states}},
-                      '64', '66', '68', '70', '72',  '74',  '76',
-                      '78', '80', '82', '84', '86',  '88',  '90',
-                      '92', '94', '96', '98', '100', '102', '104'
-                      );
-
-     -Lists of complex items, such as matricies, are now detected
-      and displayed with just one item per row:
-
-        OLD:
-        $this->{'CURRENT'}{'gfx'}{'MatrixSkew'} = Text::PDF::API::Matrix->new(
-            [ 1, tan( deg2rad($a) ), 0 ], [ tan( deg2rad($b) ), 1, 0 ],
-            [ 0, 0, 1 ]
-        );
-
-        NEW:
-        $this->{'CURRENT'}{'gfx'}{'MatrixSkew'} = Text::PDF::API::Matrix->new(
-            [ 1,                  tan( deg2rad($a) ), 0 ],
-            [ tan( deg2rad($b) ), 1,                  0 ],
-            [ 0,                  0,                  1 ]
-        );
-
-     -The perl syntax check will be turned off for now when input is from
-      standard input or standard output.  The reason is that this requires
-      temporary files, which has produced far too many problems during
-      Windows testing.  For example, the POSIX module under Windows XP/2000
-      creates temporary names in the root directory, to which only the
-      administrator should have permission to write.
-
-     -Merged patch sent by Yves Orton to handle appropriate
-      configuration file locations for different Windows varieties
-      (2000, NT, Me, XP, 95, 98).
-
-     -Added patch to properly handle a for/foreach loop without
-      parens around a list represented as a qw.  I didn't know this
-      was possible until Wolfgang Weisselberg pointed it out:
-
-            foreach my $key qw\Uno Due Tres Quadro\ {
-                print "Set $key\n";
-            }
-
-      But Perl will give a syntax error without the $ variable; ie this will
-      not work:
-
-            foreach qw\Uno Due Tres Quadro\ {
-                print "Set $_\n";
-            }
-
-     -Merged Windows version detection code sent by Yves Orton.  Perltidy
-      now automatically turns off syntax checking for Win 9x/ME versions,
-      and this has solved a lot of robustness problems.  These systems 
-      cannot reliably handle backtick operators.  See man page for
-      details.
-  
-     -Merged VMS filename handling patch sent by Michael Cartmell.  (Invalid
-      output filenames were being created in some cases). 
-
-     -Numerous minor improvements have been made for -lp style indentation.
-
-     -Long C-style 'for' expressions will be broken after each ';'.   
-
-      'perltidy -gnu' gives:
-
-        OLD:
-        for ($status = $db->seq($key, $value, R_CURSOR()) ; $status == 0
-             and $key eq $origkey ; $status = $db->seq($key, $value, R_NEXT())) 
-
-        NEW:
-        for ($status = $db->seq($key, $value, R_CURSOR()) ;
-             $status == 0 and $key eq $origkey ;
-             $status = $db->seq($key, $value, R_NEXT()))
-
-     -For the -lp option, a single long term within parens
-      (without commas) now has better alignment.  For example,
-      perltidy -gnu
-
-                OLD:
-                $self->throw("Must specify a known host, not $location,"
-                      . " possible values ("
-                      . join (",", sort keys %hosts) . ")");
-
-                NEW:
-                $self->throw("Must specify a known host, not $location,"
-                             . " possible values ("
-                             . join (",", sort keys %hosts) . ")");
-
-  2001 12 31
-     -This version is about 20 percent faster than the previous
-      version as a result of optimization work.  The largest gain
-      came from switching to a dispatch hash table in the
-      tokenizer.
-
-     -perltidy -html will check to see if HTML::Entities is
-      installed, and if so, it will use it to encode unsafe
-      characters.
-
-     -Added flag -oext=ext to change the output file extension to
-      be different from the default ('tdy' or 'html').  For
-      example:
-
-        perltidy -html -oext=htm filename
-
-     will produce filename.htm
-
-     -Added flag -cscw to issue warnings if a closing side comment would replace
-     an existing, different side comments.  See the man page for details.
-     Thanks to Peter Masiar for helpful discussions.
-
-     -Corrected tokenization error of signed hex/octal/binary numbers. For
-     example, the first hex number below would have been parsed correctly
-     but the second one was not:
-        if ( ( $tmp >= 0x80_00_00 ) || ( $tmp < -0x80_00_00 ) ) { }
-
-     -'**=' was incorrectly tokenized as '**' and '='.  This only
-         caused a problem with the -extrude opton.
-
-     -Corrected a divide by zero when -extrude option is used
-
-     -The flag -w will now contain all errors reported by 'perl -c' on the
-     input file, but otherwise they are not reported.  The reason is that
-     perl will report lots of problems and syntax errors which are not of
-     interest when only a small snippet is being formatted (such as missing
-     modules and unknown bare words).  Perltidy will always report all
-     significant syntax errors that it finds, such as unbalanced braces,
-     unless the -q (quiet) flag is set.
-
-     -Merged modifications created by Hugh Myers into perltidy.
-      These include a 'streamhandle' routine which allows perltidy
-      as a module to operate on input and output arrays and strings
-      in addition to files.  Documentation and new packaging as a
-      module should be ready early next year; This is an elegant,
-      powerful update; many thanks to Hugh for contributing it.
-
-  2001 11 28
-     -added a tentative patch which tries to keep any existing breakpoints
-     at lines with leading keywords map,sort,eval,grep. The idea is to
-     improve formatting of sequences of list operations, as in a schwartzian
-     transform.  Example:
-
-        INPUT:
-        my @sorted = map { $_->[0] }
-                     sort { $a->[1] <=> $b->[1] }
-                     map { [ $_, rand ] } @list;
-
-        OLD:
-        my @sorted =
-          map { $_->[0] } sort { $a->[1] <=> $b->[1] } map { [ $_, rand ] } @list;
-
-        NEW:
-        my @sorted = map { $_->[0] }
-          sort { $a->[1] <=> $b->[1] }
-          map { [ $_, rand ] } @list;
-
-      The new alignment is not as nice as the input, but this is an improvement.
-      Thanks to Yves Orton for this suggestion.
-
-     -modified indentation logic so that a line with leading opening paren,
-     brace, or square bracket will never have less indentation than the
-     line with the corresponding opening token.  Here's a simple example:
-
-        OLD:
-            $mw->Button(
-                -text    => "New Document",
-                -command => \&new_document
-              )->pack(
-                -side   => 'bottom',
-                -anchor => 'e'
-            );
-
-        Note how the closing ');' is lined up with the first line, even
-        though it closes a paren in the 'pack' line.  That seems wrong.
- 
-        NEW:
-            $mw->Button(
-                -text    => "New Document",
-                -command => \&new_document
-              )->pack(
-                -side   => 'bottom',
-                -anchor => 'e'
-              );
-
-       This seems nicer: you can up-arrow with an editor and arrive at the
-       opening 'pack' line.
- 
-     -corrected minor glitch in which cuddled else (-ce) did not get applied
-     to an 'unless' block, which should look like this:
-
-            unless ($test) {
-
-            } else {
-
-            }
-
-      Thanks to Jeremy Mates for reporting this.
-
-     -The man page has been reorganized to parameters easier to find.
- 
-     -Added check for multiple definitions of same subroutine.  It is easy
-      to introduce this problem when cutting and pasting. Perl does not
-      complain about it, but it can lead to disaster.
-
-     -The command -pro=filename  or -profile=filename may be used to specify a
-      configuration file which will override the default name of .perltidyrc.
-      There must not be a space on either side of the '=' sign.  I needed
-      this to be able to easily test perltidy with a variety of different
-      configuration files.
-
-     -Side comment alignment has been improved somewhat across frequent level
-      changes, as in short if/else blocks.  Thanks to Wolfgang Weisselberg 
-      for pointing out this problem.  For example:
-    
-        OLD:
-        if ( ref $self ) {    # Called as a method
-            $format = shift;
-        }
-        else {    # Regular procedure call
-            $format = $self;
-            undef $self;
-        }
-
-        NEW:
-        if ( ref $self ) {    # Called as a method
-            $format = shift;
-        }
-        else {                # Regular procedure call
-            $format = $self;
-            undef $self;
-        }
-
-     -New command -ssc (--static-side-comment) and related command allows
-      side comments to be spaced close to preceding character.  This is
-      useful for displaying commented code as side comments.
-
-     -New command -csc (--closing-side-comment) and several related
-      commands allow comments to be added to (and deleted from) any or all
-      closing block braces.  This can be useful if you have to maintain large
-      programs, especially those that you didn't write.  See updated man page.
-      Thanks to Peter Masiar for this suggestion.  For a simple example:
-
-            perltidy -csc
-
-            sub foo {
-                if ( !defined( $_[0] ) ) {
-                    print("Hello, World\n");
-                }
-                else {
-                    print( $_[0], "\n" );
-                }
-            } ## end sub foo
-
-      This added '## end sub foo' to the closing brace.  
-      To remove it, perltidy -ncsc.
-
-     -New commands -ola, for outdenting labels, and -okw, for outdenting
-      selected control keywords, were implemented.  See the perltidy man
-      page for details.  Thanks to Peter Masiar for this suggestion.
-
-     -Hanging side comment change: a comment will not be considered to be a
-      hanging side comment if there is no leading whitespace on the line.
-      This should improve the reliability of identifying hanging side comments.
-      Thanks to Peter Masiar for this suggestion.
-
-     -Two new commands for outdenting, -olq (outdent-long-quotes) and -olc
-      (outdent-long-comments), have been added.  The original -oll
-      (outdent-long-lines) remains, and now is an abbreviation for -olq and -olc.
-      The new default is just -olq.  This was necessary to avoid inconsistency with
-      the new static block comment option.
-
-     -Static block comments:  to provide a way to display commented code
-      better, the convention is used that comments with a leading '##' should
-      not be formatted as usual.  Please see '-sbc' (or '--static-block-comment')
-      for documentation.  It can be deactivated with with -nsbc, but
-      should not normally be necessary. Thanks to Peter Masiar for this 
-      suggestion.
-
-     -Two changes were made to help show structure of complex lists:
-      (1) breakpoints are forced after every ',' in a list where any of
-      the list items spans multiple lines, and
-      (2) List items which span multiple lines now get continuation indentation.
-
-      The following example illustrates both of these points.  Many thanks to
-      Wolfgang Weisselberg for this snippet and a discussion of it; this is a
-      significant formatting improvement. Note how it is easier to see the call
-      parameters in the NEW version:
-
-        OLD:
-        assert( __LINE__, ( not defined $check )
-            or ref $check
-            or $check eq "new"
-            or $check eq "old", "Error in parameters",
-            defined $old_new ? ( ref $old_new ? ref $old_new : $old_new ) : "undef",
-            defined $db_new  ? ( ref $db_new  ? ref $db_new  : $db_new )  : "undef",
-            defined $old_db ? ( ref $old_db ? ref $old_db : $old_db ) : "undef" );
-
-        NEW: 
-        assert(
-            __LINE__,
-            ( not defined $check )
-              or ref $check
-              or $check eq "new"
-              or $check eq "old",
-            "Error in parameters",
-            defined $old_new ? ( ref $old_new ? ref $old_new : $old_new ) : "undef",
-            defined $db_new  ? ( ref $db_new  ? ref $db_new  : $db_new )  : "undef",
-            defined $old_db  ? ( ref $old_db  ? ref $old_db  : $old_db )  : "undef"
-        );
-
-        Another example shows how this helps displaying lists:
-
-        OLD:
-        %{ $self->{COMPONENTS} } = (
-            fname =>
-            { type => 'name', adj => 'yes', font => 'Helvetica', 'index' => 0 },
-            street =>
-            { type => 'road', adj => 'yes', font => 'Helvetica', 'index' => 2 },
-        );
-
-        The structure is clearer with the added indentation:
-    
-        NEW:
-        %{ $self->{COMPONENTS} } = (
-            fname =>
-              { type => 'name', adj => 'yes', font => 'Helvetica', 'index' => 0 },
-            street =>
-              { type => 'road', adj => 'yes', font => 'Helvetica', 'index' => 2 },
-        );
-
-        -The structure of nested logical expressions is now displayed better.
-        Thanks to Wolfgang Weisselberg for helpful discussions.  For example,
-        note how the status of the final 'or' is displayed in the following:
-
-        OLD:
-        return ( !null($op)
-              and null( $op->sibling )
-              and $op->ppaddr eq "pp_null"
-              and class($op) eq "UNOP"
-              and ( ( $op->first->ppaddr =~ /^pp_(and|or)$/
-                and $op->first->first->sibling->ppaddr eq "pp_lineseq" )
-                or ( $op->first->ppaddr eq "pp_lineseq"
-                    and not null $op->first->first->sibling
-                    and $op->first->first->sibling->ppaddr eq "pp_unstack" ) ) );
-
-        NEW:
-        return (
-            !null($op)
-              and null( $op->sibling )
-              and $op->ppaddr eq "pp_null"
-              and class($op) eq "UNOP"
-              and (
-                (
-                    $op->first->ppaddr =~ /^pp_(and|or)$/
-                    and $op->first->first->sibling->ppaddr eq "pp_lineseq"
-                )
-                or ( $op->first->ppaddr eq "pp_lineseq"
-                    and not null $op->first->first->sibling
-                    and $op->first->first->sibling->ppaddr eq "pp_unstack" )
-              )
-        );
-
-       -A break will always be put before a list item containing a comma-arrow.
-       This will improve formatting of mixed lists of this form:
-
-            OLD:
-            $c->create(
-                'text', 225, 20, -text => 'A Simple Plot',
-                -font => $font,
-                -fill => 'brown'
-            );
-
-            NEW:
-            $c->create(
-                'text', 225, 20,
-                -text => 'A Simple Plot',
-                -font => $font,
-                -fill => 'brown'
-            );
-
-      -For convenience, the command -dac (--delete-all-comments) now also
-      deletes pod.  Likewise, -tac (--tee-all-comments) now also sends pod
-      to a '.TEE' file.  Complete control over the treatment of pod and
-      comments is still possible, as described in the updated help message 
-      and man page.
-
-      -The logic which breaks open 'containers' has been rewritten to be completely
-      symmetric in the following sense: if a line break is placed after an opening
-      {, [, or (, then a break will be placed before the corresponding closing
-      token.  Thus, a container either remains closed or is completely cracked
-      open.
-
-      -Improved indentation of parenthesized lists.  For example, 
-
-                OLD:
-                $GPSCompCourse =
-                  int(
-                  atan2( $GPSTempCompLong - $GPSLongitude,
-                  $GPSLatitude - $GPSTempCompLat ) * 180 / 3.14159265 );
-
-                NEW:
-                $GPSCompCourse = int(
-                    atan2(
-                        $GPSTempCompLong - $GPSLongitude,
-                        $GPSLatitude - $GPSTempCompLat
-                      ) * 180 / 3.14159265
-                );
-
-       Further improvements will be made in future releases.
-
-      -Some improvements were made in formatting small lists.
-
-      -Correspondence between Input and Output line numbers reported in a 
-       .LOG file should now be exact.  They were sometimes off due to the size
-       of intermediate buffers.
-
-      -Corrected minor tokenization error in which a ';' in a foreach loop
-       control was tokenized as a statement termination, which forced a 
-       line break:
-
-            OLD:
-            foreach ( $i = 0;
-                $i <= 10;
-                $i += 2
-              )
-            {
-                print "$i ";
-            }
-
-            NEW:
-            foreach ( $i = 0 ; $i <= 10 ; $i += 2 ) {
-                print "$i ";
-            }
-
-      -Corrected a problem with reading config files, in which quote marks were not
-       stripped.  As a result, something like -wba="&& . || " would have the leading
-       quote attached to the && and not work correctly.  A workaround for older
-       versions is to place a space around all tokens within the quotes, like this:
-       -wba=" && . || "
-
-      -Removed any existing space between a label and its ':'
-        OLD    : { }
-        NEW: { }
-       This was necessary because the label and its colon are a single token.
-
-      -Corrected tokenization error for the following (highly non-recommended) 
-       construct:
-        $user = @vars[1] / 100;
- 
-      -Resolved cause of a difference between perltidy under perl v5.6.1 and
-      5.005_03; the problem was different behavior of \G regex position
-      marker(!)
-
-  2001 10 20
-     -Corrected a bug in which a break was not being made after a full-line
-     comment within a short eval/sort/map/grep block.  A flag was not being
-     zeroed.  The syntax error check catches this.  Here is a snippet which
-     illustrates the bug:
-
-            eval {
-                #open Socket to Dispatcher
-                $sock = &OpenSocket;
-            };
-
-     The formatter mistakenly thought that it had found the following 
-     one-line block:
- 
-            eval {#open Socket to Dispatcher$sock = &OpenSocket; };
-
-     The patch fixes this. Many thanks to Henry Story for reporting this bug.
-
-     -Changes were made to help diagnose and resolve problems in a
-     .perltidyrc file: 
-       (1) processing of command parameters has been into two separate
-       batches so that any errors in a .perltidyrc file can be localized.  
-       (2) commands --help, --version, and as many of the --dump-xxx
-       commands are handled immediately, without any command line processing
-       at all.  
-       (3) Perltidy will ignore any commands in the .perltidyrc file which
-       cause immediate exit.  These are:  -h -v -ddf -dln -dop -dsn -dtt
-       -dwls -dwrs -ss.  Thanks to Wolfgang Weisselberg for helpful
-       suggestions regarding these updates.
-
-     -Syntax check has been reinstated as default for MSWin32 systems.  This
-     way Windows 2000 users will get syntax check by default, which seems
-     like a better idea, since the number of Win 95/98 systems will be
-     decreasing over time.  Documentation revised to warn Windows 95/98
-     users about the problem with empty '&1'.  Too bad these systems
-     all report themselves as MSWin32.
-
-  2001 10 16
-     -Fixed tokenization error in which a method call of the form
-
-        Module::->new();
- 
-      got a space before the '::' like this:
-
-        Module ::->new();
-
-      Thanks to David Holden for reporting this.
- 
-     -Added -html control over pod text, using a new abbreviation 'pd'.  See
-     updated perl2web man page. The default is to use the color of a comment,
-     but italicized.  Old .css style sheets will need a new line for
-     .pd to use this.  The old color was the color of a string, and there
-     was no control.  
- 
-     -.css lines are now printed in sorted order.
-
-     -Fixed interpolation problem where html files had '$input_file' as title
-     instead of actual input file name.  Thanks to Simon Perreault for finding
-     this and sending a patch, and also to Tobias Weber.
-
-     -Breaks will now have the ':' placed at the start of a line, 
-     one per line by default because this shows logical structure
-     more clearly. This coding has been completely redone. Some 
-     examples of new ?/: formatting:
-
-           OLD:
-                wantarray ? map( $dir::cwd->lookup($_)->path, @_ ) :
-                  $dir::cwd->lookup( $_[0] )->path;
-
-           NEW:
-                wantarray 
-                  ? map( $dir::cwd->lookup($_)->path, @_ )
-                  : $dir::cwd->lookup( $_[0] )->path;
-
-           OLD:
-                    $a = ( $b > 0 ) ? {
-                        a => 1,
-                        b => 2
-                    } : { a => 6, b => 8 };
-
-           NEW:
-                    $a = ( $b > 0 )
-                      ? {
-                        a => 1,
-                        b => 2
-                      }
-                      : { a => 6, b => 8 };
-
-        OLD: (-gnu):
-        $self->note($self->{skip} ? "Hunk #$self->{hunk} ignored at 1.\n" :
-                    "Hunk #$self->{hunk} failed--$@");
-
-        NEW: (-gnu):
-        $self->note($self->{skip} 
-                    ? "Hunk #$self->{hunk} ignored at 1.\n"
-                    : "Hunk #$self->{hunk} failed--$@");
-
-        OLD:
-            $which_search =
-              $opts{"t"} ? 'title'   :
-              $opts{"s"} ? 'subject' : $opts{"a"} ? 'author' : 'title';
-
-        NEW:
-            $which_search =
-              $opts{"t"} ? 'title'
-              : $opts{"s"} ? 'subject'
-              : $opts{"a"} ? 'author'
-              : 'title';
- 
-     You can use -wba=':' to recover the previous default which placed ':'
-     at the end of a line.  Thanks to Michael Cartmell for helpful
-     discussions and examples.  
-
-     -Tokenizer updated to do syntax checking for matched ?/: pairs.  Also,
-     the tokenizer now outputs a unique serial number for every balanced
-     pair of brace types and ?/: pairs.  This greatly simplifies the
-     formatter.
-
-     -Long lines with repeated 'and', 'or', '&&', '||'  will now have
-     one such item per line.  For example:
-
-        OLD:
-            if ( $opt_d || $opt_m || $opt_p || $opt_t || $opt_x
-                || ( -e $archive && $opt_r ) )
-            {
-                ( $pAr, $pNames ) = readAr($archive);
-            }
-
-        NEW:
-            if ( $opt_d
-                || $opt_m
-                || $opt_p
-                || $opt_t
-                || $opt_x
-                || ( -e $archive && $opt_r ) )
-            {
-                ( $pAr, $pNames ) = readAr($archive);
-            }
-
-       OLD:
-            if ( $vp->{X0} + 4 <= $x && $vp->{X0} + $vp->{W} - 4 >= $x
-                && $vp->{Y0} + 4 <= $y && $vp->{Y0} + $vp->{H} - 4 >= $y ) 
-
-       NEW:
-            if ( $vp->{X0} + 4 <= $x
-                && $vp->{X0} + $vp->{W} - 4 >= $x
-                && $vp->{Y0} + 4 <= $y
-                && $vp->{Y0} + $vp->{H} - 4 >= $y )
-
-     -Long lines with multiple concatenated tokens will have concatenated
-     terms (see below) placed one per line, except for short items.  For
-     example:
-
-       OLD:
-            $report .=
-              "Device type:" . $ib->family . "  ID:" . $ib->serial . "  CRC:"
-              . $ib->crc . ": " . $ib->model() . "\n";
-
-       NEW:
-            $report .= "Device type:"
-              . $ib->family . "  ID:"
-              . $ib->serial . "  CRC:"
-              . $ib->model()
-              . $ib->crc . ": " . "\n";
-
-     NOTE: at present 'short' means 8 characters or less.  There is a
-     tentative flag to change this (-scl), but it is undocumented and
-     is likely to be changed or removed later, so only use it for testing.  
-     In the above example, the tokens "  ID:", "  CRC:", and "\n" are below
-     this limit.  
-
-     -If a line which is short enough to fit on a single line was
-     nevertheless broken in the input file at a 'good' location (see below), 
-     perltidy will try to retain a break.  For example, the following line
-     will be formatted as:
- 
-        open SUM, "<$file"
-          or die "Cannot open $file ($!)";
- 
-     if it was broken in the input file, and like this if not:
-
-        open SUM, "<$file" or die "Cannot open $file ($!)";
-
-     GOOD: 'good' location means before 'and','or','if','unless','&&','||'
-
-     The reason perltidy does not just always break at these points is that if
-     there are multiple, similar statements, this would preclude alignment.  So
-     rather than check for this, perltidy just tries to follow the input style,
-     in the hopes that the author made a good choice. Here is an example where 
-     we might not want to break before each 'if':
-
-        ($Locale, @Locale) = ($English, @English) if (@English > @Locale);
-        ($Locale, @Locale) = ($German,  @German)  if (@German > @Locale);
-        ($Locale, @Locale) = ($French,  @French)  if (@French > @Locale);
-        ($Locale, @Locale) = ($Spanish, @Spanish) if (@Spanish > @Locale);
-
-     -Added wildcard file expansion for systems with shells which lack this.
-     Now 'perltidy *.pl' should work under MSDOS/Windows.  Thanks to Hugh Myers 
-     for suggesting this.  This uses builtin glob() for now; I may change that.
-
-     -Added new flag -sbl which, if specified, overrides the value of -bl
-     for opening sub braces.  This allows formatting of this type:
-
-     perltidy -sbl 
-
-     sub foo
-     {
-        if (!defined($_[0])) {
-            print("Hello, World\n");
-        }
-        else {
-            print($_[0], "\n");
-        }
-     }
-     Requested by Don Alexander.
-
-     -Fixed minor parsing error which prevented a space after a $$ variable
-     (pid) in some cases.  Thanks to Michael Cartmell for noting this.
-     For example, 
-       old: $$< 700 
-       new: $$ < 700
-
-     -Improved line break choices 'and' and 'or' to display logic better.
-     For example:
-
-        OLD:
-            exists $self->{'build_dir'} and push @e,
-              "Unwrapped into directory $self->{'build_dir'}";
-
-        NEW:
-            exists $self->{'build_dir'}
-              and push @e, "Unwrapped into directory $self->{'build_dir'}";
-
-     -Fixed error of multiple use of abbreviatioin '-dsc'.  -dsc remains 
-     abbreviation for delete-side-comments; -dsm is new abbreviation for 
-     delete-semicolons.
-
-     -Corrected and updated 'usage' help routine.  Thanks to Slaven Rezic for 
-     noting an error.
-
-     -The default for Windows is, for now, not to do a 'perl -c' syntax
-     check (but -syn will activate it).  This is because of problems with
-     command.com.  James Freeman sent me a patch which tries to get around
-     the problems, and it works in many cases, but testing revealed several
-     issues that still need to be resolved.  So for now, the default is no
-     syntax check for Windows.
-
-     -I added a -T flag when doing perl -c syntax check.
-     This is because I test it on a large number of scripts from sources
-     unknown, and who knows what might be hidden in initialization blocks?
-     Also, deactivated the syntax check if perltidy is run as root.  As a
-     benign example, running the previous version of perltidy on the
-     following file would cause it to disappear:
-
-            BEGIN{
-                    print "Bye, bye baby!\n";
-                    unlink $0;
-            }
-        
-     The new version will not let that happen.
-
-     -I am contemplating (but have not yet implemented) making '-lp' the
-     default indentation, because it is stable now and may be closer to how
-     perl is commonly formatted.  This could be in the next release.  The
-     reason that '-lp' was not the original default is that the coding for
-     it was complex and not ready for the initial release of perltidy.  If
-     anyone has any strong feelings about this, I'd like to hear.  The
-     current default could always be recovered with the '-nlp' flag.
-
-  2001 09 03
-     -html updates:
-         - sub definition names are now specially colored, red by default.  
-           The letter 'm' is used to identify them.
-         - keyword 'sub' now has color of other keywords.
-         - restored html keyword color to __END__ and __DATA__, which was 
-           accidentally removed in the previous version.
-
-     -A new -se (--standard-error-output) flag has been implemented and
-     documented which causes all errors to be written to standard output
-     instead of a .ERR file.
-
-     -A new -w (--warning-output) flag has been implemented and documented
-      which causes perltidy to output certain non-critical messages to the
-      error output file, .ERR.  These include complaints about pod usage,
-      for example.  The default is to not include these.
-
-      NOTE: This replaces an undocumented -w=0 or --warning-level flag
-      which was tentatively introduced in the previous version to avoid some
-      unwanted messages.  The new default is the same as the old -w=0, so
-      that is no longer needed. 
-
-      -Improved syntax checking and corrected tokenization of functions such
-      as rand, srand, sqrt, ...  These can accept either an operator or a term
-      to their right.  This has been corrected.
- 
-     -Corrected tokenization of semicolon: testing of the previous update showed 
-     that the semicolon in the following statement was being mis-tokenized.  That
-     did no harm, other than adding an extra blank space, but has been corrected.
-
-              for (sort {strcoll($a,$b);} keys %investments) {
-                 ...
-              }
-
-     -New syntax check: after wasting 5 minutes trying to resolve a syntax
-      error in which I had an extra terminal ';' in a complex for (;;) statement, 
-      I spent a few more minutes adding a check for this in perltidy so it won't
-      happen again.
-
-     -The behavior of --break-before-subs (-bbs) and --break-before-blocks
-     (-bbb) has been modified.  Also, a new control parameter,
-     --long-block-line-count=n (-lbl=n) has been introduced to give more
-     control on -bbb.  This was previously a hardwired value.  The reason
-     for the change is to reduce the number of unwanted blank lines that
-     perltidy introduces, and make it less erratic.  It's annoying to remove
-     an unwanted blank line and have perltidy put it back.  The goal is to
-     be able to sprinkle a few blank lines in that dense script you
-     inherited from Bubba.  I did a lot of experimenting with different
-     schemes for introducing blank lines before and after code blocks, and
-     decided that there is no really good way to do it.  But I think the new
-     scheme is an improvement.  You can always deactivate this with -nbbb.
-     I've been meaning to work on this; thanks to Erik Thaysen for bringing
-     it to my attention.
-
-     -The .LOG file is seldom needed, and I get tired of deleting them, so
-      they will now only be automatically saved if perltidy thinks that it
-      made an error, which is almost never.  You can still force the logfile
-      to be saved with -log or -g.
-
-     -Improved method for computing number of columns in a table.  The old
-     method always tried for an even number.  The new method allows odd
-     numbers when it is obvious that a list is not a hash initialization
-     list.
-
-       old: my (
-                 $name,       $xsargs, $parobjs, $optypes,
-                 $hasp2child, $pmcode, $hdrcode, $inplacecode,
-                 $globalnew,  $callcopy
-              )
-              = @_;
-
-       new: my (
-                 $name,   $xsargs,  $parobjs,     $optypes,   $hasp2child,
-                 $pmcode, $hdrcode, $inplacecode, $globalnew, $callcopy
-              )
-              = @_;
-
-     -I fiddled with the list threshold adjustment, and some small lists
-     look better now.  Here is the change for one of the lists in test file
-     'sparse.t':
-     old:
-       %units =
-         ("in", "in", "pt", "pt", "pc", "pi", "mm", "mm", "cm", "cm", "\\hsize", "%",
-           "\\vsize", "%", "\\textwidth", "%", "\\textheight", "%");
-
-     new:
-       %units = (
-                  "in",      "in", "pt",          "pt", "pc",           "pi",
-                  "mm",      "mm", "cm",          "cm", "\\hsize",      "%",
-                  "\\vsize", "%",  "\\textwidth", "%",  "\\textheight", "%"
-                  );
-
-     -Improved -lp formatting at '=' sign.  A break was always being added after
-     the '=' sign in a statement such as this, (to be sure there was enough room
-     for the parameters):
-
-     old: my $fee =
-            CalcReserveFee(
-                            $env,          $borrnum,
-                            $biblionumber, $constraint,
-                            $bibitems
-                            );
- 
-     The updated version doesn't do this unless the space is really needed:
-
-     new: my $fee = CalcReserveFee(
-                                   $env,          $borrnum,
-                                   $biblionumber, $constraint,
-                                   $bibitems
-                                   );
-
-     -I updated the tokenizer to allow $#+ and $#-, which seem to be new to
-     Perl 5.6.  Some experimenting with a recent version of Perl indicated
-     that it allows these non-alphanumeric '$#' array maximum index
-     varaibles: $#: $#- $#+ so I updated the parser accordingly.  Only $#:
-     seems to be valid in older versions of Perl.
-
-     -Fixed a rare formatting problem with -lp (and -gnu) which caused
-     excessive indentation.
-
-     -Many additional syntax checks have been added.
-
-     -Revised method for testing here-doc target strings; the following
-     was causing trouble with a regex test because of the '*' characters:
-      print <<"*EOF*";
-      bla bla
-      *EOF*
-     Perl seems to allow almost anything to be a here doc target, so an
-     exact string comparison is now used.
-
-     -Made update to allow underscores in binary numbers, like '0b1100_0000'.
-
-     -Corrected problem with scanning certain module names; a blank space was 
-     being inserted after 'warnings' in the following:
-        use warnings::register;
-     The problem was that warnings (and a couple of other key modules) were 
-     being tokenized as keywords.  They should have just been identifiers.
-
-     -Corrected tokenization of indirect objects after sort, system, and exec,
-     after testing produced an incorrect error message for the following
-     line of code:
-        print sort $sortsubref @list;
-
-     -Corrected minor problem where a line after a format had unwanted
-     extra continuation indentation.  
-
-     -Delete-block-comments (and -dac) now retain any leading hash-bang line
-
-     -Update for -lp (and -gnu) to not align the leading '=' of a list
-     with a previous '=', since this interferes with alignment of parameters.
-
-      old:  my $hireDay = new Date;
-            my $self    = {
-                         firstName => undef,
-                         lastName  => undef,
-                         hireDay   => $hireDay
-                         };
-    
-      new:  my $hireDay = new Date;
-            my $self = {
-                         firstName => undef,
-                         lastName  => undef,
-                         hireDay   => $hireDay
-                         };
-
-     -Modifications made to display tables more compactly when possible,
-      without adding lines. For example,
-      old:
-                    '1', "I", '2', "II", '3', "III", '4', "IV",
-                    '5', "V", '6', "VI", '7', "VII", '8', "VIII",
-                    '9', "IX"
-      new:
-                    '1', "I",   '2', "II",   '3', "III",
-                    '4', "IV",  '5', "V",    '6', "VI",
-                    '7', "VII", '8', "VIII", '9', "IX"
-
-     -Corrected minor bug in which -pt=2 did not keep the right paren tight
-     around a '++' or '--' token, like this:
-
-                for ($i = 0 ; $i < length $key ; $i++ )
-
-     The formatting for this should be, and now is: 
-
-                for ($i = 0 ; $i < length $key ; $i++)
-
-     Thanks to Erik Thaysen for noting this.
-
-     -Discovered a new bug involving here-docs during testing!  See BUGS.html.  
-
-     -Finally fixed parsing of subroutine attributes (A Perl 5.6 feature).
-     However, the attributes and prototypes must still be on the same line
-     as the sub name.
-
-  2001 07 31
-     -Corrected minor, uncommon bug found during routine testing, in which a
-     blank got inserted between a function name and its opening paren after
-     a file test operator, but only in the case that the function had not
-     been previously seen.  Perl uses the existence (or lack thereof) of 
-     the blank to guess if it is a function call.  That is,
-        if (-l pid_filename()) {
-     became
-        if (-l pid_filename ()) {
-     which is a syntax error if pid_filename has not been seen by perl.
-
-     -If the AutoLoader module is used, perltidy will continue formatting
-     code after seeing an __END__ line.  Use -nlal to deactivate this feature.  
-     Likewise, if the SelfLoader module is used, perltidy will continue 
-     formatting code after seeing a __DATA__ line.  Use -nlsl to
-     deactivate this feature.  Thanks to Slaven Rezic for this suggestion.
-
-     -pod text after __END__ and __DATA__ is now identified by perltidy
-     so that -dp works correctly.  Thanks to Slaven Rezic for this suggestion.
-
-     -The first $VERSION line which might be eval'd by MakeMaker
-     is now passed through unchanged.  Use -npvl to deactivate this feature.
-     Thanks to Manfred Winter for this suggestion.
-
-     -Improved indentation of nested parenthesized expressions.  Tests have
-     given favorable results.  Thanks to Wolfgang Weisselberg for helpful
-     examples.
-
-  2001 07 23
-     -Fixed a very rare problem in which an unwanted semicolon was inserted
-     due to misidentification of anonymous hash reference curly as a code
-     block curly.  (No instances of this have been reported; I discovered it
-     during testing).  A workaround for older versions of perltidy is to use
-     -nasc.
-
-     -Added -icb (-indent-closing-brace) parameter to indent a brace which
-     terminates a code block to the same level as the previous line.
-     Suggested by Andrew Cutler.  For example, 
-
-            if ($task) {
-                yyy();
-                }    # -icb
-            else {
-                zzz();
-                }
-
-     -Rewrote error message triggered by an unknown bareword in a print or
-     printf filehandle position, and added flag -w=0 to prevent issuing this
-     error message.  Suggested by Byron Jones.
-
-     -Added modification to align a one-line 'if' block with similar
-     following 'elsif' one-line blocks, like this:
-          if    ( $something eq "simple" )  { &handle_simple }
-          elsif ( $something eq "hard" )    { &handle_hard }
-     (Suggested by  Wolfgang Weisselberg).
-
-  2001 07 02
-     -Eliminated all constants with leading underscores because perl 5.005_03
-     does not support that.  For example, _SPACES changed to XX_SPACES.
-     Thanks to kromJx for this update.
-
-  2001 07 01
-     -the directory of test files has been moved to a separate distribution
-     file because it is getting large but is of little interest to most users.
-     For the current distribution:
-       perltidy-20010701.tgz        contains the source and docs for perltidy
-       perltidy-20010701-test.tgz   contains the test files
-
-     -fixed bug where temporary file perltidy.TMPI was not being deleted 
-     when input was from stdin.
-
-     -adjusted line break logic to not break after closing brace of an
-     eval block (suggested by Boris Zentner).
-
-     -added flag -gnu (--gnu-style) to give an approximation to the GNU
-     style as sometimes applied to perl.  The programming style in GNU
-     'automake' was used as a guide in setting the parameters; these
-     parameters will probably be adjusted over time.
-
-     -an empty code block now has one space for emphasis:
-       if ( $cmd eq "bg_untested" ) {}    # old
-       if ( $cmd eq "bg_untested" ) { }   # new
-     If this bothers anyone, we could create a parameter.
-
-     -the -bt (--brace-tightness) parameter has been split into two
-     parameters to give more control. -bt now applies only to non-BLOCK
-     braces, while a new parameter -bbt (block-brace-tightness) applies to
-     curly braces which contain code BLOCKS. The default value is -bbt=0.
-
-     -added flag -icp (--indent-closing-paren) which leaves a statement
-     termination of the form );, };, or ]; indented with the same
-     indentation as the previous line.  For example,
-
-        @month_of_year = (          # default, or -nicp
-            'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct',
-            'Nov', 'Dec'
-        );
-
-        @month_of_year = (          # -icp
-            'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct',
-            'Nov', 'Dec'
-            );
-
-     -Vertical alignment updated to synchronize with tokens &&, ||,
-     and, or, if, unless.  Allowable space before forcing
-     resynchronization has been increased.  (Suggested by  Wolfgang
-     Weisselberg).
-
-     -html corrected to use -nohtml-bold-xxxxxxx or -nhbx to negate bold,
-     and likewise -nohtml-italic-xxxxxxx or -nhbi to negate italic.  There
-     was no way to negate these previously.  html documentation updated and
-     corrected.  (Suggested by  Wolfgang Weisselberg).
-
-     -Some modifications have been made which improve the -lp formatting in
-     a few cases.
-
-     -Perltidy now retains or creates a blank line after an =cut to keep
-     podchecker happy (Suggested by Manfred H. Winter).  This appears to be
-     a glitch in podchecker, but it was annoying.
-
-  2001 06 17
-     -Added -bli flag to give continuation indentation to braces, like this
-
-            if ($bli_flag)
-              {
-                extra_indentation();
-              }
-
-     -Corrected an error with the tab (-t) option which caused the last line
-     of a multi-line quote to receive a leading tab.  This error was in
-     version 2001 06 08  but not 2001 04 06.  If you formatted a script
-     with -t with this version, please check it by running once with the
-     -chk flag and perltidy will scan for this possible error.
-
-     -Corrected an invalid pattern (\R should have been just R), changed
-     $^W =1 to BEGIN {$^W=1} to use warnings in compile phase, and corrected
-     several unnecessary 'my' declarations. Many thanks to Wolfgang Weisselberg,
-     2001-06-12, for catching these errors.
- 
-     -A '-bar' flag has been added to require braces to always be on the
-     right, even for multi-line if and foreach statements.  For example,
-     the default formatting of a long if statement would be:
-
-            if ($bigwasteofspace1 && $bigwasteofspace2
-              || $bigwasteofspace3 && $bigwasteofspace4)
-            {
-                bigwastoftime();
-            }
-
-     With -bar, the formatting is:
-
-            if ($bigwasteofspace1 && $bigwasteofspace2
-              || $bigwasteofspace3 && $bigwasteofspace4) {
-                bigwastoftime();
-            }
-     Suggested by Eli Fidler 2001-06-11.
-
-     -Uploaded perltidy to sourceforge cvs 2001-06-10.
-
-     -An '-lp' flag (--line-up-parentheses) has been added which causes lists
-     to be indented with extra indentation in the manner sometimes
-     associated with emacs or the GNU suggestions.  Thanks to Ian Stuart for
-     this suggestion and for extensive help in testing it. 
-
-     -Subroutine call parameter lists are now formatted as other lists.
-     This should improve formatting of tables being passed via subroutine
-     calls.  This will also cause full indentation ('-i=n, default n= 4) of
-     continued parameter list lines rather than just the number of spaces
-     given with -ci=n, default n=2.
- 
-     -Added support for hanging side comments.  Perltidy identifies a hanging
-     side comment as a comment immediately following a line with a side
-     comment or another hanging side comment.  This should work in most
-     cases.  It can be deactivated with --no-hanging-side-comments (-nhsc).
-     The manual has been updated to discuss this.  Suggested by Brad
-     Eisenberg some time ago, and finally implemented.
-
-  2001 06 08
-     -fixed problem with parsing command parameters containing quoted
-     strings in .perltidyrc files. (Reported by Roger Espel Llima 2001-06-07).
-
-     -added two command line flags, --want-break-after and 
-     --want-break-before, which allow changing whether perltidy
-     breaks lines before or after any operators.  Please see the revised 
-     man pages for details.
-
-     -added system-wide configuration file capability.
-     If perltidy does not find a .perltidyrc command line file in
-     the current directory, nor in the home directory, it now looks
-     for '/usr/local/etc/perltidyrc' and then for '/etc/perltidyrc'.
-     (Suggested by Roger Espel Llima 2001-05-31).
-
-     -fixed problem in which spaces were trimmed from lines of a multi-line
-     quote. (Reported by Roger Espel Llima 2001-05-30).  This is an 
-     uncommon situation, but serious, because it could conceivably change
-     the proper function of a script.
-
-     -fixed problem in which a semicolon was incorrectly added within 
-     an anonymous hash.  (Reported by A.C. Yardley, 2001-5-23).
-     (You would know if this happened, because perl would give a syntax
-     error for the resulting script).
-
-     -fixed problem in which an incorrect error message was produced
-      after a version number on a 'use' line, like this ( Reported 
-      by Andres Kroonmaa, 2001-5-14):
-
-                  use CGI 2.42 qw(fatalsToBrowser);
-
-      Other than the extraneous error message, this bug was harmless.
-
-  2001 04 06
-     -fixed serious bug in which the last line of some multi-line quotes or
-      patterns was given continuation indentation spaces.  This may make
-      a pattern incorrect unless it uses the /x modifier.  To find
-      instances of this error in scripts which have been formatted with
-      earlier versions of perltidy, run with the -chk flag, which has
-      been added for this purpose (SLH, 2001-04-05).
-
-      ** So, please check previously formatted scripts by running with -chk
-      at least once **
-
-     -continuation indentation has been reprogrammed to be hierarchical, 
-      which improves deeply nested structures.
-
-     -fixed problem with undefined value in list formatting (reported by Michael
-      Langner 2001-04-05)
-
-     -Switched to graphical display of nesting in .LOG files.  If an
-      old format string was "(1 [0 {2", the new string is "{{(".  This
-      is easier to read and also shows the order of nesting.
-
-     -added outdenting of cuddled paren structures, like  ")->pack(".
-
-     -added line break and outdenting of ')->' so that instead of
-
-            $mw->Label(
-              -text   => "perltidy",
-              -relief => 'ridge')->pack;
- 
-      the current default is:
-
-            $mw->Label(
-              -text   => "perltidy",
-              -relief => 'ridge'
-            )->pack;
-
-      (requested by Michael Langner 2001-03-31; in the future this could 
-      be controlled by a command-line parameter).
-
-     -revised list indentation logic, so that lists following an assignment
-      operator get one full indentation level, rather than just continuation 
-      indentation.  Also corrected some minor glitches in the continuation 
-      indentation logic. 
-
-     -Fixed problem with unwanted continuation indentation after a blank line 
-     (reported by Erik Thaysen 2001-03-28):
-
-     -minor update to avoid stranding a single '(' on one line
-
-  2001 03 28:
-     -corrected serious error tokenizing filehandles, in which a sub call 
-     after a print or printf, like this:
-        print usage() and exit;
-     became this:
-        print usage () and exit;
-     Unfortunately, this converts 'usage' to a filehandle.  To fix this, rerun
-     perltidy; it will look for this situation and issue a warning. 
-
-     -fixed another cuddled-else formatting bug (Reported by Craig Bourne)
-
-     -added several diagnostic --dump routines
- 
-     -added token-level whitespace controls (suggested by Hans Ecke)
-
-  2001 03 23:
-     -added support for special variables of the form ${^WANT_BITS}
-
-     -space added between scalar and left paren in 'for' and 'foreach' loops,
-      (suggestion by Michael Cartmell):
-
-        for $i( 1 .. 20 )   # old
-        for $i ( 1 .. 20 )   # new
-
-     -html now outputs cascading style sheets (thanks to suggestion from
-      Hans Ecke)
-
-     -flags -o and -st now work with -html
-
-     -added missing -html documentation for comments (noted by Alex Izvorski)
-
-     -support for VMS added (thanks to Michael Cartmell for code patches and 
-       testing)
-
-     -v-strings implemented (noted by Hans Ecke and Michael Cartmell; extensive
-       testing by Michael Cartmell)
-
-     -fixed problem where operand may be empty at line 3970 
-      (\b should be just b in lines 3970, 3973) (Thanks to Erik Thaysen, 
-      Keith Marshall for bug reports)
-
-     -fixed -ce bug (cuddled else), where lines like '} else {' were indented
-      (Thanks to Shawn Stepper and Rick Measham for reporting this)
-
-  2001 03 04:
-     -fixed undefined value in line 153 (only worked with -I set)
-     (Thanks to Mike Stok, Phantom of the Opcodes, Ian Ehrenwald, and others)
-
-     -fixed undefined value in line 1069 (filehandle problem with perl versions <
-     5.6) (Thanks to Yuri Leikind, Mike Stok, Michael Holve, Jeff Kolber)
-
-  2001 03 03:
-     -Initial announcement at freshmeat.net; started Change Log
-     (Unfortunately this version was DOA, but it was fixed the next day)
-
diff --git a/CHANGES.md b/CHANGES.md
new file mode 100644
index 0000000..54f577f
--- /dev/null
+++ b/CHANGES.md
@@ -0,0 +1,3405 @@
+# Perltidy Change Log
+
+## 2019 06 01
+
+    - rt #128477: Prevent inconsistent owner/group and setuid/setgid bits. 
+      In the -b (--backup-and-modify-in-place) mode, an attempt is made to set ownership
+      of the output file equal to the input file, if they differ.
+      In all cases, if the final output file ownership differs from input file, any setuid/setgid bits are cleared.
+
+    - Added option -bom  (--break-at-old-method-breakpoints) by
+      merrillymeredith which preserves breakpoints of method chains. Modified to also handle a cuddled call style.
+
+    - Merged patch to fix Windows EOL translation error with UTF-8 written by
+      Ron Ivy. This update prevents automatic conversion to 'DOS' CRLF line
+      endings.  Also, Windows system testing at the appveyor site is working again.
+
+    - RT #128280, added flag --one-line-block-semicolons=n (-olbs=n) 
+      to control semicolons in one-line blocks.  The values of n are:
+        n=0 means no semicolons termininating simple one-line blocks
+        n=1 means stable; do not change from input file [DEFAULT and current]
+        n=2 means always add semicolons in one-line blocks
+      The current behavior corresponds to the default n=1.
+
+    - RT #128216, Minor update to prevent inserting unwanted blank line at
+      indentation level change.  This should not change existing scripts.
+
+    - RT #81852: Improved indentation when quoted word (qw) lists are 
+      nested within other containers using the --weld-nested (-wn) flag.
+      The example given previously (below) is now closer to what it would
+      be with a simple list instead of qw:
+
+      # perltidy -wn
+      use_all_ok( qw{
+          PPI
+          PPI::Tokenizer
+          PPI::Lexer
+          PPI::Dumper
+          PPI::Find
+          PPI::Normal
+          PPI::Util
+          PPI::Cache
+      } );
+
+    - RT#12764, introduced new feature allowing placement of blanks around
+      sequences of selected keywords. This can be activated with the -kgb* 
+      series of parameters described in the manual.
+
+    - Rewrote vertical algnment module.  It is better at finding
+      patterns in complex code. For example,
+
+	OLD:
+           /^-std$/ && do { $std       = 1;     next; };
+           /^--$/   && do { @link_args = @argv; last; };
+           /^-I(.*)/ && do { $path = $1 || shift @argv; next; };
+
+	NEW:
+           /^-std$/  && do { $std       = 1;                 next; };
+           /^--$/    && do { @link_args = @argv;             last; };
+           /^-I(.*)/ && do { $path      = $1 || shift @argv; next; };
+
+    - Add repository URLs to META files 
+
+    - RT #118553, "leave only one newline at end of file". This option was not 
+      added because of undesirable side effects, but a new filter script
+      was added which can do this, "examples/delete_ending_blank_lines.pl".
+
+## 2018 11 20
+
+    - fix RT#127736 Perl-Tidy-20181119 has the EXE_FILES entry commented out in
+      Makefile.PL so it doesn't install the perltidy script or its manpage.
+
+
+## 2018 11 19
+
+    - Removed test case 'filter_example.t' which was causing a failure on a
+      Windows installation for unknown reasons, possibly due to an unexpected
+      perltidyrc being read by the test script.  Added VERSION numbers to all
+      new modules.
+
+## 2018 11 17
+
+    - Fixed RT #126965, in which a ternary operator was misparsed if immediately
+      following a function call without arguments, such as: 
+        my $restrict_customer = shift ? 1 : 0;
+
+    - Fixed RT #125012: bug in -mangle --delete-all-comments
+      A needed blank space before bareword tokens was being removed when comments 
+      were deleted
+
+    - Fixed RT #81852: Stacked containers and quoting operators. Quoted words
+      (qw) delimited by container tokens ('{', '[', '(', '<') are now included in
+      the --weld-nested (-wn) flag:
+
+          # perltidy -wn
+          use_all_ok( qw{
+                PPI
+                PPI::Tokenizer
+                PPI::Lexer
+                PPI::Dumper
+                PPI::Find
+                PPI::Normal
+                PPI::Util
+                PPI::Cache
+                } );
+
+    - The cuddled-else (-ce) coding was merged with the new cuddled-block (-cb)
+      coding.  The change is backward compatible and simplifies input.  
+      The --cuddled-block-option=n (-cbo=n) flag now applies to both -ce and -cb 
+      formatting.  In fact the -cb flag is just an alias for -ce now.
+
+    - Fixed RT #124594, license text desc. changed from 'GPL-2.0+' to 'gpl_2'
+
+    - Fixed bug in which a warning about a possible code bug was issued in a
+      script with brace errors. 
+
+    - added option --notimestamp or -nts to eliminate any time stamps in output 
+      files.  This is used to prevent differences in test scripts from causing
+      failure at installation. For example, the -cscw option will put a date
+      stamp on certain closing side comments. We need to avoid this in order
+      to test this feature in an installation test.
+
+    - Fixed bug with the entab option, -et=8, in which the leading space of
+      some lines was was not entabbed.  This happened in code which was adjusted
+      for vertical alignment and in hanging side comments. Thanks to Glenn.
+
+    - Fixed RT #127633, undesirable line break after return when -baao flag is set
+
+    - Fixed RT #127035, vertical alignment. Vertical alignment has been improved 
+      in several ways.  Thanks especially to Michael Wardman and Glenn for sending 
+      helpful snippets. 
+
+      - Alignment of the =~ operators has been reactivated.  
+
+          OLD:
+          $service_profile =~ s/^\s+|\s+$//g;
+          $host_profile =~ s/^\s+|\s+$//g;
+      
+          NEW:
+          $service_profile =~ s/^\s+|\s+$//g;
+          $host_profile    =~ s/^\s+|\s+$//g;
+
+      - Alignment of the // operator has been reactivated.  
+
+          OLD:
+          is( pop // 7,       7, 'pop // ... works' );
+          is( pop() // 7,     0, 'pop() // ... works' );
+          is( pop @ARGV // 7, 3, 'pop @array // ... works' );
+          
+          NEW:
+          is( pop       // 7, 7, 'pop // ... works' );
+          is( pop()     // 7, 0, 'pop() // ... works' );
+          is( pop @ARGV // 7, 3, 'pop @array // ... works' );
+
+      - The rules for alignment of just two lines have been adjusted,
+        hopefully to be a little better overall.  In some cases, two 
+        lines which were previously unaligned are now aligned, and vice-versa.
+
+          OLD:
+          $expect = "1$expect" if $expect =~ /^e/i;
+          $p = "1$p" if defined $p and $p =~ /^e/i;
+      
+          NEW:
+          $expect = "1$expect" if $expect =~ /^e/i;
+          $p      = "1$p"      if defined $p and $p =~ /^e/i;
+
+
+    - RT #106493; source code repository location has been added to docs; it is 
+         https://github.com/perltidy/perltidy
+
+    - The packaging for this version has changed. The Tidy.pm module is much 
+      smaller.  Supporting modules have been split out from it and placed below 
+      it in the path Perl/Tidy/*.
+
+    - A number of new installation test cases have been added. Updates are now
+      continuously tested at Travis CI against versions back to Perl 5.08.
+
+## 2018 02 20
+
+    - RT #124469, #124494, perltidy often making empty files.  The previous had
+      an index error causing it to fail, particularly in version 5.18 of Perl.
+
+      Please avoid version 20180219.
+
+## 2018 02 19
+
+    - RT #79947, cuddled-else generalization. A new flag -cb provides
+      'cuddled-else' type formatting for an arbitrary type of block chain. The
+      default is try-catch-finally, but this can be modified with the 
+      parameter -cbl. 
+
+    - Fixed RT #124298: add space after ! operator without breaking !! secret 
+      operator
+
+    - RT #123749: numerous minor improvements to the -wn flag were made.  
+
+    - Fixed a problem with convergence tests in which iterations were stopping 
+      prematurely. 
+
+    - Here doc targets for <<~ type here-docs may now have leading whitespace.
+
+    - Fixed RT #124354. The '-indent-only' flag was not working correctly in the 
+      previous release. A bug in version 20180101 caused extra blank lines 
+      to be output.
+
+    - Issue RT #124114. Some improvements were made in vertical alignment
+      involving 'fat commas'.
+
+## 2018 01 01
+
+    - Added new flag -wn (--weld-nested-containers) which addresses these issues:
+      RT #123749: Problem with promises; 
+      RT #119970: opening token stacking strange behavior;
+      RT #81853: Can't stack block braces
+
+      This option causes closely nested pairs of opening and closing containers
+      to be "welded" together and essentially be formatted as a single unit,
+      with just one level of indentation.
+
+      Since this is a new flag it is set to be "off" by default but it has given 
+      excellent results in testing. 
+
+      EXAMPLE 1, multiple blocks, default formatting:
+          do {
+              {
+                  next if $x == $y;    # do something here
+              }
+          } until $x++ > $z;
+
+      perltidy -wn
+          do { {
+              next if $x == $y;
+          } } until $x++ > $z;
+
+       EXAMPLE 2, three levels of wrapped function calls, default formatting:
+              p(
+                  em(
+                      conjug(
+                          translate( param('verb') ), param('tense'),
+                          param('person')
+                      )
+                  )
+              );
+
+          # perltidy -wn
+              p( em( conjug(
+                  translate( param('verb') ),
+                  param('tense'), param('person')
+              ) ) );
+
+          # EXAMPLE 3, chained method calls, default formatting:
+          get('http://mojolicious.org')->then(
+              sub {
+                  my $mojo = shift;
+                  say $mojo->res->code;
+                  return get('http://metacpan.org');
+              }
+          )->then(
+              sub {
+                  my $cpan = shift;
+                  say $cpan->res->code;
+              }
+          )->catch(
+              sub {
+                  my $err = shift;
+                  warn "Something went wrong: $err";
+              }
+          )->wait;
+
+          # perltidy -wn
+          get('http://mojolicious.org')->then( sub {
+              my $mojo = shift;
+              say $mojo->res->code;
+              return get('http://metacpan.org');
+          } )->then( sub {
+              my $cpan = shift;
+              say $cpan->res->code;
+          } )->catch( sub {
+              my $err = shift;
+              warn "Something went wrong: $err";
+          } )->wait;
+
+
+    - Fixed RT #114359: Missparsing of "print $x ** 0.5;
+
+    - Deactivated the --check-syntax flag for better security.  It will be
+      ignored if set.  
+
+    - Corrected minimum perl version from 5.004 to 5.008 based on perlver
+      report.  The change is required for coding involving wide characters.
+
+    - For certain severe errors, the source file will be copied directly to the
+      output without formatting. These include ending in a quote, ending in a
+      here doc, and encountering an unidentified character.
+
+## 2017 12 14
+
+    - RT #123749, partial fix.  "Continuation indentation" is removed from lines 
+      with leading closing parens which are part of a call chain. 
+      For example, the call to pack() is is now outdented to the starting 
+      indentation in the following experession:  
+
+          # OLD
+          $mw->Button(
+              -text    => "New Document",
+              -command => \&new_document
+            )->pack(
+              -side   => 'bottom',
+              -anchor => 'e'
+            );
+
+          # NEW
+          $mw->Button(
+              -text    => "New Document",
+              -command => \&new_document
+          )->pack(
+              -side   => 'bottom',
+              -anchor => 'e'
+          );
+
+      This modification improves readability of complex expressions, especially
+      when the user uses the same value for continuation indentation (-ci=n) and 
+      normal indentation (-i=n).  Perltidy was already programmed to
+      do this but a minor bug was preventing it.
+
+    - RT #123774, added flag to control space between a backslash and a single or
+      double quote, requested by Robert Rothenberg.  The issue is that lines like
+
+         $str1=\"string1";
+         $str2=\'string2';
+
+      confuse syntax highlighters unless a space is left between the backslash and
+      the quote.
+
+      The new flag to control this is -sbq=n (--space-backslash-quote=n), 
+      where n=0 means no space, n=1 means follow existing code, n=2 means always
+      space.  The default is n=1, meaning that a space will be retained if there
+      is one in the source code.
+
+    - Fixed RT #123492, support added for indented here doc operator <<~ added 
+      in v5.26.  Thanks to Chris Weyl for the report.
+
+    - Fixed docs; --closing-side-comment-list-string should have been just
+      --closing-side-comment-list.  Thanks to F.Li.
+
+    - Added patch RT #122030] Perl::Tidy sometimes does not call binmode.
+      Thanks to Irilis Aelae.
+
+    - Fixed RT #121959, PERLTIDY doesn't honor the 'three dot' notation for 
+      locating a config file using environment variables.  Thanks to John 
+      Wittkowski.
+
+    - Minor improvements to formatting, in which some additional vertical
+      aligmnemt is done. Thanks to Keith Neargarder.
+
+    - RT #119588.  Vertical alignment is no longer done for // operator.
+
+## 2017 05 21
+
+    - Fixed debian #862667: failure to check for perltidy.ERR deletion can lead 
+      to overwriting abritrary files by symlink attack. Perltidy was continuing 
+      to write files after an unlink failure.  Thanks to Don Armstrong 
+      for a patch.
+
+    - Fixed RT #116344, perltidy fails on certain anonymous hash references:
+      in the following code snippet the '?' was misparsed as a pattern 
+      delimiter rather than a ternary operator.
+          return ref {} ? 1 : 0;
+
+    - Fixed RT #113792: misparsing of a fat comma (=>) right after 
+      the __END__ or __DATA__ tokens.  These keywords were getting
+      incorrectly quoted by the following => operator.
+
+    - Fixed RT #118558. Custom Getopt::Long configuration breaks parsing 
+      of perltidyrc.  Perltidy was resetting the users configuration too soon.
+
+    - Fixed RT #119140, failure to parse double diamond operator.  Code to
+      handle this new operator has been added.
+
+    - Fixed RT #120968.  Fixed problem where -enc=utf8 didn't work 
+      with --backup-and-modify-in-place. Thanks to Heinz Knutzen for this patch.
+
+    - Fixed minor formatting issue where one-line blocks for subs with signatures 
+      were unnecesarily broken
+
+    - RT #32905, patch to fix utf-8 error when output was STDOUT. 
+
+    - RT #79947, improved spacing of try/catch/finally blocks. Thanks to qsimpleq
+      for a patch.
+
+    - Fixed #114909, Anonymous subs with signatures and prototypes misparsed as
+      broken ternaries, in which a statement such as this was not being parsed
+      correctly:
+          return sub ( $fh, $out ) : prototype(*$) { ... }
+
+    - Implemented RT #113689, option to introduces spaces after an opening block
+      brace and before a closing block brace. Four new optional controls are
+      added. The first two define the minimum number of blank lines to be
+      inserted 
+
+       -blao=i or --blank-lines-after-opening-block=i
+       -blbc=i or --blank-lines-before-closing-block=i
+
+      where i is an integer, the number of lines (the default is 0).  
+
+      The second two define the types of blocks to which the first two apply 
+
+       -blaol=s or --blank-lines-after-opening-block-list=s
+       -blbcl=s or --blank-lines-before-closing-block-list=s
+      
+      where s is a string of possible block keywords (default is just 'sub',
+      meaning a named subroutine).
+
+      For more information please see the documentation.
+
+    - The method for specifying block types for certain input parameters has
+      been generalized to distinguish between normal named subroutines and
+      anonymous subs.  The keyword for normal subroutines remains 'sub', and
+      the new keyword for anonymous subs is 'asub'. 
+
+    - Minor documentation changes. The BUGS sections now have a link
+      to CPAN where most open bugs and issues can be reviewed and bug reports
+      can be submitted.  The information in the AUTHOR and CREDITS sections of
+      the man pages have been removed from the man pages to streamline the
+      documentation. This information is still in the source code.
+
+## 2016 03 02
+
+    - RT #112534. Corrected a minor problem in which an unwanted newline
+      was placed before the closing brace of an anonymous sub with 
+      a signature, if it was in a list.  Thanks to Dmytro Zagashev.
+
+    - Corrected a minor problem in which occasional extra indentation was
+      given to the closing brace of an anonymous sub in a list when the -lp 
+      parameter was set.
+
+## 2016 03 01
+
+     - RT #104427. Added support for signatures.
+
+     - RT #111512.  Changed global warning flag $^W = 1 to use warnings;
+       Thanks to Dmytro Zagashev.
+
+     - RT #110297, added support for new regexp modifier /n
+       Thanks to Dmytro Zagashev.
+
+     - RT #111519.  The -io (--indent-only) and -dac (--delete-all-comments)
+       can now both be used in one pass. Thanks to Dmitry Veltishev.
+
+     - Patch to avoid error message with 'catch' used by TryCatch, as in
+          catch($err){
+             # do something
+          }
+       Thanks to Nick Tonkin.
+
+     - RT #32905, UTF-8 coding is now more robust. Thanks to qsimpleq
+       and Dmytro for patches.
+
+     - RT #106885. Added string bitwise operators ^. &. |. ~. ^.= &.= |.=
+    
+     - Fixed RT #107832 and #106492, lack of vertical alignment of two lines
+       when -boc flag (break at old commas) is set.  This bug was 
+       inadvertantly introduced in previous bug fix RT #98902. 
+
+     - Some common extensions to Perl syntax are handled better.
+       In particular, the following snippet is now foratted cleanly:
+
+         method deposit( Num $amount) {
+             $self->balance( $self->balance + $amount );
+         }
+
+       A new flag -xs (--extended-syntax) was added to enable this, and the default
+       is to use -xs. 
+
+       In previous versions, and now only when -nxs is set, this snippet of code
+       generates the following error message:
+
+       "syntax error at ') {', didn't see one of: case elsif for foreach given if switch unless until when while"
+
+## 2015 08 15
+
+     - Fixed RT# 105484, Invalid warning about 'else' in 'switch' statement.  The
+       warning happened if a 'case' statement did not use parens.
+
+     - Fixed RT# 101547, misparse of // caused error message.  Also..
+
+     - Fixed RT# 102371, misparse of // caused unwated space in //=
+
+     - Fixed RT# 100871, "silent failure of HTML Output on Windows". 
+       Changed calls to tempfile() from:
+         my ( $fh_tmp, $tmpfile ) = tempfile();
+       to have the full path name:
+         my ( $fh_tmp, $tmpfile ) = File::Temp::tempfile()
+       because of problems in the Windows version reported by Dean Pearce.
+
+     - Fixed RT# 99514, calling the perltidy module multiple times with 
+       a .perltidyrc file containing the parameter --output-line-ending 
+       caused a crash.  This was a glitch in the memoization logic. 
+
+     - Fixed RT#99961, multiple lines inside a cast block caused unwanted
+       continuation indentation.  
+
+     - RT# 32905, broken handling of UTF-8 strings. 
+       A new flag -utf8 causes perltidy assume UTF-8 encoding for input and 
+       output of an io stream.  Thanks to Sebastian Podjasek for a patch.  
+       This feature may not work correctly in older versions of Perl. 
+       It worked in a linux version 5.10.1 but not in a Windows version 5.8.3 (but
+       otherwise perltidy ran correctly).
+
+     - Warning files now report perltidy VERSION. Suggested by John Karr.
+    
+     - Fixed long flag --nostack-closing-tokens (-nsct has always worked though). 
+       This was due to a typo.  This also fixed --nostack-opening-tokens to 
+       behave correctly.  Thanks to Rob Dixon.
+
+## 2014 07 11
+
+    - Fixed RT #94902: abbreviation parsing in .perltidyrc files was not
+      working for multi-line abbreviations.  Thanks to Eric Fung for 
+      supplying a patch. 
+    
+    - Fixed RT #95708, misparsing of a hash when the first key was a perl
+      keyword, causing a semicolon to be incorrectly added.
+
+    - Fixed RT #94338 for-loop in a parenthesized block-map.  A code block within
+      parentheses of a map, sort, or grep function was being mistokenized.  In 
+      rare cases this could produce in an incorrect error message.  The fix will
+      produce some minor formatting changes.  Thanks to Daniel Trizen 
+      discovering and documenting this.
+
+    - Fixed RT #94354, excess indentation for stacked tokens.  Thanks to 
+      Colin Williams for supplying a patch.
+
+    - Added support for experimental postfix dereferencing notation introduced in
+      perl 5.20. RT #96021.
+
+    - Updated documentation to clarify the behavior of the -io flag
+      in response to RT #95709.  You can add -noll or -l=0 to prevent 
+      long comments from being outdented when -io is used.
+
+    - Added a check to prevent a problem reported in RT #81866, where large
+      scripts which had been compressed to a single line could not be formatted
+      because of a check for VERSION for MakeMaker. The workaround was to 
+      use -nvpl, but this shouldn't be necessary now.
+
+    - Fixed RT #96101; Closing brace of anonymous sub in a list was being
+      indented.  For example, the closing brace of the anonymous sub below 
+      will now be lined up with the word 'callback'.  This problem 
+      occured if there was no comma after the closing brace of the anonymous sub.  
+      This update may cause minor changes to formatting of code with lists 
+      of anonymous subs, especially TK code.
+      
+      # OLD
+      my @menu_items = (
+
+          #...
+          {
+              path     => '/_Operate/Transcode and split',
+              callback => sub {
+                  return 1 if not $self->project_opened;
+                  $self->comp('project')->transcode( split => 1 );
+                }
+          }
+      );
+
+      # NEW
+      my @menu_items = (
+
+          #...
+          {
+              path     => '/_Operate/Transcode and split',
+              callback => sub {
+                  return 1 if not $self->project_opened;
+                  $self->comp('project')->transcode( split => 1 );
+              }
+          }
+      );
+
+## 2014 03 28
+
+    - Fixed RT #94190 and debian Bug #742004: perltidy.LOG file left behind.
+      Thanks to George Hartzell for debugging this.  The problem was
+      caused by the memoization speedup patch in version 20121207.  An
+      unwanted flag was being set which caused a LOG to be written if 
+      perltidy was called multiple times.
+
+    - New default behavior for LOG files: If the source is from an array or 
+      string (through a call to the perltidy module) then a LOG output is only
+      possible if a logfile stream is specified.  This is to prevent 
+      unexpected perltidy.LOG files. 
+
+    - Fixed debian Bug #740670, insecure temporary file usage.  File::Temp is now
+      used to get a temporary file.  Thanks to Don Anderson for a patch.
+    
+    - Any -b (--backup-and-modify-in-place) flag is silently ignored when a 
+      source stream, destination stream, or standard output is used.  
+      This is because the -b flag may have been in a .perltidyrc file and 
+      warnings break Test::NoWarnings.  Thanks to Marijn Brand. 
+
+## 2013 09 22
+
+    - Fixed RT #88020. --converge was not working with wide characters.
+
+    - Fixed RT #78156. package NAMESPACE VERSION syntax not accepted.
+
+    - First attempt to fix RT #88588.  INDEX END tag change in pod2html breaks 
+      perltidy -html. I put in a patch which should work but I don't yet have
+      a way of testing it.
+
+## 2013 08 06
+
+    - Fixed RT #87107, spelling
+
+## 2013 08 05
+
+    - Fixed RT #87502, incorrect of parsing of smartmatch before hash brace
+    
+    - Added feature request RT #87330, trim whitespace after POD.
+      The flag -trp (--trim-pod) will trim trailing whitespace from lines of POD
+
+## 2013 07 17
+
+    - Fixed RT #86929, #86930, missing lhs of assignment.
+
+    - Fixed RT #84922, moved pod from Tidy.pm into Tidy.pod
+
+## 2012 12 07
+
+    - The flag -cab=n or --comma-arrow-breakpoints=n has been generalized
+      to give better control over breaking open short containers.  The
+      possible values are now:
+
+        n=0 break at all commas after =>  
+        n=1 stable: break at all commas after => if container is open,
+            EXCEPT FOR one-line containers
+        n=2 break at all commas after =>, BUT try to form the maximum
+            maximum one-line container lengths
+        n=3 do not treat commas after => specially at all 
+        n=4 break everything: like n=0 but also break a short container with
+            a => not followed by a comma
+        n=5 stable: like n=1 but ALSO break at open one-line containers (default)
+
+      New values n=4 and n=5 have been added to allow short blocks to be
+      broken open.  The new default is n=5, stable.  It should more closely
+      follow the breaks in the input file, and previously formatted code
+      should remain unchanged.  If this causes problems use -cab=1 to recover 
+      the former behavior.  Thanks to Tony Maszeroski for the suggestion.
+
+      To illustrate the need for the new options, if perltidy is given
+      the following code, then the old default (-cab=1) was to close up 
+      the 'index' container even if it was open in the source.  The new 
+      default (-cab=5) will keep it open if it was open in the source.
+
+       our $fancypkg = {
+           'ALL' => {
+               'index' => {
+                   'key' => 'value',
+               },
+               'alpine' => {
+                   'one'   => '+',
+                   'two'   => '+',
+                   'three' => '+',
+               },
+           }
+       };
+
+    - New debug flag --memoize (-mem).  This version contains a 
+      patch supplied by Jonathan Swartz which can significantly speed up
+      repeated calls to Perl::Tidy::perltidy in a single process by caching
+      the result of parsing the formatting parameters.  A factor of up to 10
+      speedup was achieved for masontidy (https://metacpan.org/module/masontidy).
+      The memoization patch is on by default but can be deactivated for 
+      testing with -nmem (or --no-memoize).
+
+    - New flag -tso (--tight-secret-operators) causes certain perl operator
+      sequences (secret operators) to be formatted "tightly" (without spaces).  
+      The most common of these are 0 +  and + 0 which become 0+ and +0.  The
+      operators currently modified by this flag are: 
+           =( )=  0+  +0  ()x!! ~~<>  ,=>
+      Suggested by by Philippe Bruhat. See https://metacpan.org/module/perlsecret
+      This flag is off by default.
+      
+    - New flag -vmll (--variable-maximum-line-length) makes the maximum
+      line length increase with the nesting depth of a line of code.  
+      Basically, it causes the length of leading whitespace to be ignored when
+      setting line breaks, so the formatting of a block of code is independent
+      of its nesting depth.  Try this option if you have deeply nested 
+      code or data structures, perhaps in conjunction with the -wc flag
+      described next.  The default is not todo this.
+    
+    - New flag -wc=n (--whitespace-cycle=n) also addresses problems with
+      very deeply nested code and data structures.  When this parameter is
+      used and the nesting depth exceeds the value n, the leading whitespace 
+      will be reduced and start at 1 again.  The result is that deeply
+      nested blocks of code will shift back to the left. This occurs cyclically 
+      to any nesting depth.  This flag may be used either with or without -vmll.
+      The default is not to use this (-wc=0).
+
+    - Fixed RT #78764, error parsing smartmatch operator followed by anonymous
+      hash or array and then a ternary operator; two examples:
+
+       qr/3/ ~~ ['1234'] ? 1 : 0;
+       map { $_ ~~ [ '0', '1' ] ? 'x' : 'o' } @a;
+
+    - Fixed problem with specifying spaces around arrows using -wls='->'
+      and -wrs='->'.  Thanks to Alain Valleton for documenting this problem. 
+
+    - Implemented RT #53183, wishlist, lines of code with the same indentation
+      level which are contained with multiple stacked opening and closing tokens
+      (requested with flags -sot -sct) now have reduced indentation.  
+
+       # Default
+       $sender->MailMsg(
+           {
+               to      => $addr,
+               subject => $subject,
+               msg     => $body
+           }
+       );
+
+       # OLD: perltidy -sot -sct 
+       $sender->MailMsg( {
+               to      => $addr,
+               subject => $subject,
+               msg     => $body
+       } );
+
+       # NEW: perltidy -sot -sct 
+       $sender->MailMsg( {
+           to      => $addr,
+           subject => $subject,
+           msg     => $body
+       } );
+
+    - New flag -act=n (--all-containers-tightness=n) is an abbreviation for
+      -pt=n -sbt=n -bt=n -bbt=n, where n=0,1, or 2.  It simplifies input when all
+      containers have the same tightness. Using the same example:
+
+       # NEW: perltidy -sot -sct -act=2
+       $sender->MailMsg({
+           to      => $addr,
+           subject => $subject,
+           msg     => $body
+       });
+
+    - New flag -sac (--stack-all-containers) is an abbreviation for -sot -sct
+      This is part of wishlist item RT #53183. Using the same example again:
+
+       # NEW: perltidy -sac -act=2
+       $sender->MailMsg({
+           to      => $addr,
+           subject => $subject,
+           msg     => $body
+       });
+
+     - new flag -scbb (--stack-closing-block-brace) causes isolated closing 
+       block braces to stack as in the following example. (Wishlist item RT#73788)
+
+       DEFAULT:
+       for $w1 (@w1) {
+           for $w2 (@w2) {
+               for $w3 (@w3) {
+                   for $w4 (@w4) {
+                       push( @lines, "$w1 $w2 $w3 $w4\n" );
+                   }
+               }
+           }
+       }
+
+       perltidy -scbb:
+       for $w1 (@w1) {
+           for $w2 (@w2) {
+               for $w3 (@w3) {
+                   for $w4 (@w4) {
+                       push( @lines, "$w1 $w2 $w3 $w4\n" );
+                   } } } }
+
+      There is, at present, no flag to place these closing braces at the end
+      of the previous line. It seems difficult to develop good rules for 
+      doing this for a wide variety of code and data structures.
+
+    - Parameters defining block types may use a wildcard '*' to indicate
+      all block types.  Previously it was not possible to include bare blocks.
+    
+    - A flag -sobb (--stack-opening-block-brace) has been introduced as an
+      alias for -bbvt=2 -bbvtl='*'.  So for example the following test code:
+
+      {{{{{{{ $testing }}}}}}}
+
+      cannot be formatted as above but can at least be kept vertically compact 
+      using perltidy -sobb -scbb
+
+      {   {   {   {   {   {   {   $testing
+                              } } } } } } }
+
+      Or even, perltidy -sobb -scbb -i=1 -bbt=2
+      {{{{{{{$testing
+            }}}}}}}
+
+
+    - Error message improved for conflicts due to -pbp; thanks to Djun Kim.
+     
+    - Fixed RT #80645, error parsing special array name '@$' when used as 
+      @{$} or $#{$}
+    
+    - Eliminated the -chk debug flag which was included in version 20010406 to
+      do a one-time check for a bug with multi-line quotes.  It has not been
+      needed since then.
+
+    - Numerous other minor formatting improvements.
+
+## 2012 07 14
+
+    - Added flag -iscl (--ignore-side-comment-lengths) which causes perltidy 
+      to ignore the length of side comments when setting line breaks, 
+      RT #71848.  The default is to include the length of side comments when
+      breaking lines to stay within the length prescribed by the -l=n
+      maximum line length parameter.  For example,
+
+        Default behavior on a single line with long side comment:
+           $vmsfile =~ s/;[\d\-]*$//
+             ;    # Clip off version number; we can use a newer version as well
+      
+        perltidy -iscl leaves the line intact:
+
+           $vmsfile =~ s/;[\d\-]*$//; # Clip off version number; we can use a newer version as well
+
+    - Fixed RT #78182, side effects with STDERR.  Error handling has been
+      revised and the documentation has been updated.  STDERR can now be 
+      redirected to a string reference, and perltidy now returns an 
+      error flag instead of calling die when input errors are detected. 
+      If the error flag is set then no tidied output was produced.
+      See man Perl::Tidy for an example.
+
+    - Fixed RT #78156, erroneous warning message for package VERSION syntax.
+
+    - Added abbreviations -conv (--converge) to simplify iteration control.
+      -conv is equivalent to -it=4 and will insure that the tidied code is
+      converged to its final state with the minimum number of iterations.
+
+    - Minor formatting modifications have been made to insure convergence.
+
+    - Simplified and hopefully improved the method for guessing the starting 
+      indentation level of entabbed code.  Added flag -dt=n (--default_tabsize=n) 
+      which might be helpful if the guessing method does not work well for
+      some editors.
+
+    - Added support for stacked labels, upper case X/B in hex and binary, and
+      CORE:: namespace.
+
+    - Eliminated warning messages for using keyword names as constants.
+
+## 2012 07 01
+
+    - Corrected problem introduced by using a chomp on scalar references, RT #77978
+
+    - Added support for Perl 5.14 package block syntax, RT #78114.
+
+    - A convergence test is made if three or more iterations are requested with
+      the -it=n parameter to avoid wasting computer time.  Several hundred Mb of
+      code gleaned from the internet were searched with the results that: 
+       - It is unusual for two iterations to be required unless a major 
+         style change is being made. 
+       - Only one case has been found where three iterations were required.  
+       - No cases requiring four iterations have been found with this version.
+      For the previous version several cases where found the results could
+      oscillate between two semi-stable states. This version corrects this.
+
+      So if it is important that the code be converged it is okay to set -it=4
+      with this version and it will probably stop after the second iteration.
+
+    - Improved ability to identify and retain good line break points in the
+      input stream, such as at commas and equals. You can always tell 
+      perltidy to ignore old breakpoints with -iob.  
+
+    - Fixed glitch in which a terminal closing hash brace followed by semicolon
+      was not outdented back to the leading line depth like other closing
+      tokens.  Thanks to Keith Neargarder for noting this.
+
+        OLD:
+           my ( $pre, $post ) = @{
+               {
+                   "pp_anonlist" => [ "[", "]" ],
+                   "pp_anonhash" => [ "{", "}" ]
+               }->{ $kid->ppaddr }
+             };   # terminal brace
+
+        NEW:
+           my ( $pre, $post ) = @{
+               {
+                   "pp_anonlist" => [ "[", "]" ],
+                   "pp_anonhash" => [ "{", "}" ]
+               }->{ $kid->ppaddr }
+           };    # terminal brace
+
+    - Removed extra indentation given to trailing 'if' and 'unless' clauses 
+      without parentheses because this occasionally produced undesirable 
+      results.  This only applies where parens are not used after the if or
+      unless.
+
+       OLD:
+           return undef
+             unless my ( $who, $actions ) =
+                 $clause =~ /^($who_re)((?:$action_re)+)$/o; 
+       
+       NEW:
+           return undef
+             unless my ( $who, $actions ) =
+             $clause =~ /^($who_re)((?:$action_re)+)$/o; 
+
+## 2012 06 19
+
+    - Updated perltidy to handle all quote modifiers defined for perl 5 version 16.
+
+    - Side comment text in perltidyrc configuration files must now begin with
+      at least one space before the #.  Thus:
+
+      OK:
+        -l=78 # Max line width is 78 cols
+      BAD: 
+        -l=78# Max line width is 78 cols
+
+      This is probably true of almost all existing perltidyrc files, 
+      but if you get an error message about bad parameters
+      involving a '#' the first time you run this version, please check the side
+      comments in your perltidyrc file, and add a space before the # if necessary.
+      You can quickly see the contents your perltidyrc file, if any, with the
+      command:
+
+        perltidy -dpro
+
+      The reason for this change is that some parameters naturally involve
+      the # symbol, and this can get interpreted as a side comment unless the
+      parameter is quoted.  For example, to define -sphb=# it used to be necessary
+      to write
+        -sbcp='#'
+      to keep the # from becoming part of a comment.  This was causing 
+      trouble for new users.  Now it can also be written without quotes: 
+        -sbcp=#
+
+    - Fixed bug in processing some .perltidyrc files containing parameters with
+      an opening brace character, '{'.  For example the following was
+      incorrectly processed:
+         --static-block-comment-prefix="^#{2,}[^\s#]"
+      Thanks to pdagosto.
+
+    - Added flag -boa (--break-at-old-attribute-breakpoints) which retains
+      any existing line breaks at attribute separation ':'. This is now the
+      default, use -nboa to deactivate.  Thanks to Daphne Phister for the patch.  
+      For example, given the following code, the line breaks at the ':'s will be
+      retained:
+          
+                       my @field
+                         : field
+                         : Default(1)
+                         : Get('Name' => 'foo') : Set('Name');
+
+      whereas the previous version would have output a single line.  If
+      the attributes are on a single line then they will remain on a single line.
+    
+    - Added new flags --blank-lines-before-subs=n (-blbs=n) and
+      --blank-lines-before-packages=n (-blbp=n) to put n blank lines before
+      subs and packages.  The old flag -bbs is now equivalent to -blbs=1 -blbp=1.
+      and -nbbs is equivalent to -blbs=0 -blbp=0. Requested by M. Schwern and
+      several others.
+
+    - Added feature -nsak='*' meaning no space between any keyword and opening 
+      paren.  This avoids listing entering a long list of keywords.  Requested
+      by M. Schwern.
+
+    - Added option to delete a backup of original file with in-place-modify (-b)
+      if there were no errors.  This can be requested with the flag -bext='/'.  
+      See documentation for details.  Requested by M. Schwern and others.
+
+    - Fixed bug where the module postfilter parameter was not applied when -b 
+      flag was used.  This was discovered during testing.
+
+    - Fixed in-place-modify (-b) to work with symbolic links to source files.
+      Thanks to Ted Johnson.
+
+    - Fixed bug where the Perl::Tidy module did not allow -b to be used 
+      in some cases.
+
+    - No extra blank line is added before a comment which follows
+      a short line ending in an opening token, for example like this:
+       OLD:
+               if (
+
+                   # unless we follow a blank or comment line
+                   $last_line_leading_type !~ /^[#b]$/
+                   ...
+
+       NEW:
+               if (
+                   # unless we follow a blank or comment line
+                   $last_line_leading_type !~ /^[#b]$/
+                   ...
+
+       The blank is not needed for readability in these cases because there
+       already is already space above the comment.  If a blank already 
+       exists there it will not be removed, so this change should not 
+       change code which has previously been formatted with perltidy. 
+       Thanks to R.W.Stauner.
+
+    - Likewise, no extra blank line is added above a comment consisting of a
+      single #, since nothing is gained in readability.
+
+    - Fixed error in which a blank line was removed after a #>>> directive. 
+      Thanks to Ricky Morse.
+
+    - Unnecessary semicolons after given/when/default blocks are now removed.
+
+    - Fixed bug where an unwanted blank line could be added before
+      pod text in __DATA__ or __END__ section.  Thanks to jidani.
+
+    - Changed exit flags from 1 to 0 to indicate success for -help, -version, 
+      and all -dump commands.  Also added -? as another way to dump the help.
+      Requested by Keith Neargarder.
+
+    - Fixed bug where .ERR and .LOG files were not written except for -it=2 or more
+
+    - Fixed bug where trailing blank lines at the end of a file were dropped when
+      -it>1.
+
+    - Fixed bug where a line occasionally ended with an extra space. This reduces
+      rhe number of instances where a second iteration gives a result different
+      from the first. 
+
+    - Updated documentation to note that the Tidy.pm module  parameter may
+      not be a reference to SCALAR or ARRAY; it must be a file.
+    
+    - Syntax check with perl now work when the Tidy.pm module is processing
+      references to arrays and strings.  Thanks to Charles Alderman.
+
+    - Zero-length files are no longer processed due to concerns for data loss
+      due to side effects in some scenarios.
+
+    - block labels, if any, are now included in closing side comment text
+      when the -csc flag is used.  Suggested by Aaron.  For example, 
+      the label L102 in the following block is now included in the -csc text:
+
+         L102: for my $i ( 1 .. 10 ) {
+           ...
+         } ## end L102: for my $i ( 1 .. 10 )
+
+## 2010 12 17
+
+    - added new flag -it=n or --iterations=n
+      This flag causes perltidy to do n complete iterations.  
+      For most purposes the default of n=1 should be satisfactory.  However n=2
+      can be useful when a major style change is being made, or when code is being
+      beautified on check-in to a source code control system.  The run time will be
+      approximately proportional to n, and it should seldom be necessary to use a
+      value greater than n=2.  Thanks to Jonathan Swartz
+
+    - A configuration file pathname begins with three dots, e.g.
+      ".../.perltidyrc", indicates that the file should be searched for starting
+      in the current directory and working upwards. This makes it easier to have
+      multiple projects each with their own .perltidyrc in their root directories.
+      Thanks to Jonathan Swartz for this patch.
+
+    - Added flag --notidy which disables all formatting and causes the input to be
+      copied unchanged.  This can be useful in conjunction with hierarchical
+      F<.perltidyrc> files to prevent unwanted tidying.
+      Thanks to Jonathan Swartz for this patch.
+
+    - Added prefilters and postfilters in the call to the Tidy.pm module.
+      Prefilters and postfilters. The prefilter is a code reference that 
+      will be applied to the source before tidying, and the postfilter 
+      is a code reference to the result before outputting.  
+
+      Thanks to Jonathan Swartz for this patch.  He writes:
+      This is useful for all manner of customizations. For example, I use
+      it to convert the 'method' keyword to 'sub' so that perltidy will work for
+      Method::Signature::Simple code:
+
+      Perl::Tidy::perltidy(
+         prefilter => sub { $_ = $_[0]; s/^method (.*)/sub $1 \#__METHOD/gm; return $_ },
+         postfilter => sub { $_ = $_[0]; s/^sub (.*?)\s* \#__METHOD/method $1/gm; return $_ }
+      );
+
+    - The starting indentation level of sections of code entabbed with -et=n
+      is correctly guessed if it was also produced with the same -et=n flag.  This
+      keeps the indentation stable on repeated formatting passes within an editor.
+      Thanks to Sam Kington and Glenn.
+
+    - Functions with prototype '&' had a space between the function and opening
+      peren.  This space now only occurs if the flag --space-function-paren (-sfp)
+      is set.  Thanks to Zrajm Akfohg.
+
+    - Patch to never put spaces around a bare word in braces beginning with ^ as in:
+        my $before = ${^PREMATCH};
+      even if requested with the -bt=0 flag because any spaces cause a syntax error in perl.
+      Thanks to Fabrice Dulanoy.
+
+## 2009 06 16
+
+    - Allow configuration file to be 'perltidy.ini' for Windows systems.
+      i.e. C:\Documents and Settings\User\perltidy.ini
+      and added documentation for setting configuation file under Windows in man
+      page.  Thanks to Stuart Clark.
+
+    - Corrected problem of unwanted semicolons in hash ref within given/when code.
+     Thanks to Nelo Onyiah.
+
+    - added new flag -cscb or --closing-side-comments-balanced
+     When using closing-side-comments, and the closing-side-comment-maximum-text
+     limit is exceeded, then the comment text must be truncated.  Previous
+     versions of perltidy terminate with three dots, and this can still be
+     achieved with -ncscb:
+      
+      perltidy -csc -ncscb
+
+      } ## end foreach my $foo (sort { $b cmp $a ...
+      
+     However this causes a problem with older editors which cannot recognize
+     comments or are not configured to doso because they cannot "bounce" around in
+     the text correctly.  The B<-cscb> flag tries to help them by 
+     appending appropriate terminal balancing structure:
+      
+      perltidy -csc -cscb
+
+      } ## end foreach my $foo (sort { $b cmp $a ... })
+      
+     Since there is much to be gained and little to be lost by doing this,
+     the default is B<-cscb>.  Use B<-ncscb> if you do not want this.
+
+     Thanks to Daniel Becker for suggesting this option.
+
+    - After an isolated closing eval block the continuation indentation will be
+      removed so that the braces line up more like other blocks.  Thanks to Yves Orton.
+
+    OLD:
+       eval {
+           #STUFF;
+           1;    # return true
+         }  
+         or do {
+           #handle error
+         };
+
+    NEW:
+       eval {
+           #STUFF;
+           1;    # return true
+       } or do {
+           #handle error
+       };
+
+    -A new flag -asbl (or --opening-anonymous-sub-brace-on-new-line) has
+     been added to put the opening brace of anonymous sub's on a new line,
+     as in the following snippet:
+
+       my $code = sub
+       {
+           my $arg = shift;
+           return $arg->(@_);
+       };
+
+     This was not possible before because the -sbl flag only applies to named
+     subs. Thanks to Benjamin Krupp.
+
+    -Fix tokenization bug with the following snippet
+      print 'hi' if { x => 1, }->{x};
+     which resulted in a semicolon being added after the comma.  The workaround
+     was to use -nasc, but this is no longer necessary.  Thanks to Brian Duggan. 
+
+    -Fixed problem in which an incorrect error message could be triggered
+    by the (unusual) combination of parameters  -lp -i=0 -l=2 -ci=0 for
+    example.  Thanks to Richard Jelinek.
+
+    -A new flag --keep-old-blank-lines=n has been added to
+    give more control over the treatment of old blank lines in
+    a script.  The manual has been revised to discuss the new
+    flag and clarify the treatment of old blank lines.  Thanks
+    to Oliver Schaefer.
+
+## 2007 12 05
+
+    -Improved support for perl 5.10: New quote modifier 'p', new block type UNITCHECK, 
+    new keyword break, improved formatting of given/when.
+
+    -Corrected tokenization bug of something like $var{-q}.
+
+    -Numerous minor formatting improvements.
+
+    -Corrected list of operators controlled by -baao -bbao to include
+      . : ? && || and or err xor
+
+    -Corrected very minor error in log file involving incorrect comment
+    regarding need for upper case of labels.  
+
+    -Fixed problem where perltidy could run for a very long time
+    when given certain non-perl text files.
+
+    -Line breaks in un-parenthesized lists now try to follow
+    line breaks in the input file rather than trying to fill
+    lines.  This usually works better, but if this causes
+    trouble you can use -iob to ignore any old line breaks.
+    Example for the following input snippet:
+
+       print
+       "conformability (Not the same dimension)\n",
+       "\t", $have, " is ", text_unit($hu), "\n",
+       "\t", $want, " is ", text_unit($wu), "\n",
+       ;
+
+     OLD:
+       print "conformability (Not the same dimension)\n", "\t", $have, " is ",
+         text_unit($hu), "\n", "\t", $want, " is ", text_unit($wu), "\n",;
+
+     NEW:
+       print "conformability (Not the same dimension)\n",
+         "\t", $have, " is ", text_unit($hu), "\n",
+         "\t", $want, " is ", text_unit($wu), "\n",
+         ;
+
+## 2007 08 01
+
+    -Added -fpsc option (--fixed-position-side-comment). Thanks to Ueli Hugenschmidt. 
+    For example -fpsc=40 tells perltidy to put side comments in column 40
+    if possible.  
+
+    -Added -bbao and -baao options (--break-before-all-operators and
+    --break-after-all-operators) to simplify command lines and configuration
+    files.  These define an initial preference for breaking at operators which can
+    be modified with -wba and -wbb flags.  For example to break before all operators
+    except an = one could use --bbao -wba='=' rather than listing every
+    single perl operator (except =) on a -wbb flag.
+
+    -Added -kis option (--keep-interior-semicolons).  Use the B<-kis> flag
+    to prevent breaking at a semicolon if there was no break there in the
+    input file.  To illustrate, consider the following input lines:
+
+       dbmclose(%verb_delim); undef %verb_delim;
+       dbmclose(%expanded); undef %expanded;
+       dbmclose(%global); undef %global;
+
+    Normally these would be broken into six lines, but 
+    perltidy -kis gives:
+
+       dbmclose(%verb_delim); undef %verb_delim;
+       dbmclose(%expanded);   undef %expanded;
+       dbmclose(%global);     undef %global;
+    
+    -Improved formatting of complex ternary statements, with indentation
+    of nested statements.  
+     OLD:
+       return defined( $cw->{Selected} )
+         ? (wantarray)
+         ? @{ $cw->{Selected} }
+         : $cw->{Selected}[0]
+         : undef;
+
+     NEW:
+       return defined( $cw->{Selected} )
+         ? (wantarray)
+             ? @{ $cw->{Selected} }
+             : $cw->{Selected}[0]
+         : undef;
+
+    -Text following un-parenthesized if/unless/while/until statements get a
+    full level of indentation.  Suggested by Jeff Armstorng and others. 
+    OLD:
+       return $ship->chargeWeapons("phaser-canon")
+         if $encounter->description eq 'klingon'
+         and $ship->firepower >= $encounter->firepower
+         and $location->status ne 'neutral';
+    NEW:
+       return $ship->chargeWeapons("phaser-canon")
+         if $encounter->description eq 'klingon'
+             and $ship->firepower >= $encounter->firepower
+             and $location->status ne 'neutral';
+
+## 2007 05 08
+
+    -Fixed bug where #line directives were being indented.  Thanks to
+    Philippe Bruhat.
+
+## 2007 05 04
+
+    -Fixed problem where an extra blank line was added after an =cut when either
+    (a) the =cut started (not stopped) a POD section, or (b) -mbl > 1. 
+    Thanks to J. Robert Ray and Bill Moseley.
+
+## 2007 04 24
+
+    -ole (--output-line-ending) and -ple (--preserve-line-endings) should
+    now work on all systems rather than just unix systems. Thanks to Dan
+    Tyrell.
+
+    -Fixed problem of a warning issued for multiple subs for BEGIN subs
+    and other control subs. Thanks to Heiko Eissfeldt.
+    
+    -Fixed problem where no space was introduced between a keyword or
+    bareword and a colon, such as:
+
+    ( ref($result) eq 'HASH' && !%$result ) ? undef: $result;
+
+    Thanks to Niek.
+
+    -Added a utility program 'break_long_quotes.pl' to the examples directory of
+    the distribution.  It breaks long quoted strings into a chain of concatenated
+    sub strings no longer than a selected length.  Suggested by Michael Renner as
+    a perltidy feature but was judged to be best done in a separate program.
+
+    -Updated docs to remove extra < and >= from list of tokens 
+    after which breaks are made by default.  Thanks to Bob Kleemann.
+
+    -Removed improper uses of $_ to avoid conflicts with external calls, giving
+    error message similar to:
+       Modification of a read-only value attempted at 
+       /usr/share/perl5/Perl/Tidy.pm line 6907.
+    Thanks to Michael Renner.
+
+    -Fixed problem when errorfile was not a plain filename or filehandle
+    in a call to Tidy.pm.  The call
+    perltidy(source => \$input, destination => \$output, errorfile => \$err);
+    gave the following error message:
+     Not a GLOB reference at /usr/share/perl5/Perl/Tidy.pm line 3827.
+    Thanks to Michael Renner and Phillipe Bruhat.
+
+    -Fixed problem where -sot would not stack an opening token followed by
+    a side comment.  Thanks to Jens Schicke.
+
+    -improved breakpoints in complex math and other long statements. Example:
+    OLD:
+       return
+         log($n) + 0.577215664901532 + ( 1 / ( 2 * $n ) ) -
+         ( 1 / ( 12 * ( $n**2 ) ) ) + ( 1 / ( 120 * ( $n**4 ) ) );
+    NEW:
+       return
+         log($n) + 0.577215664901532 +
+         ( 1 / ( 2 * $n ) ) -
+         ( 1 / ( 12 * ( $n**2 ) ) ) +
+         ( 1 / ( 120 * ( $n**4 ) ) );
+
+    -more robust vertical alignment of complex terminal else blocks and ternary
+    statements.
+
+## 2006 07 19
+
+    -Eliminated bug where a here-doc invoked through an 'e' modifier on a pattern
+    replacement text was not recognized.  The tokenizer now recursively scans
+    replacement text (but does not reformat it).
+
+    -improved vertical alignment of terminal else blocks and ternary statements.
+     Thanks to Chris for the suggestion. 
+
+     OLD:
+       if    ( IsBitmap() ) { return GetBitmap(); }
+       elsif ( IsFiles() )  { return GetFiles(); }
+       else { return GetText(); }
+
+     NEW:
+       if    ( IsBitmap() ) { return GetBitmap(); }
+       elsif ( IsFiles() )  { return GetFiles(); }
+       else                 { return GetText(); }
+
+     OLD:
+       $which_search =
+           $opts{"t"} ? 'title'
+         : $opts{"s"} ? 'subject'
+         : $opts{"a"} ? 'author'
+         : 'title';
+
+     NEW:
+       $which_search =
+           $opts{"t"} ? 'title'
+         : $opts{"s"} ? 'subject'
+         : $opts{"a"} ? 'author'
+         :              'title';
+
+    -improved indentation of try/catch blocks and other externally defined
+    functions accepting a block argument.  Thanks to jae.
+
+    -Added support for Perl 5.10 features say and smartmatch.
+
+    -Added flag -pbp (--perl-best-practices) as an abbreviation for parameters
+    suggested in Damian Conway's "Perl Best Practices".  -pbp is the same as:
+
+       -l=78 -i=4 -ci=4 -st -se -vt=2 -cti=0 -pt=1 -bt=1 -sbt=1 -bbt=1 -nsfs -nolq
+       -wbb="% + - * / x != == >= <= =~ !~ < > | & >= < = 
+             **= += *= &= <<= &&= -= /= |= >>= ||= .= %= ^= x="
+
+     Please note that the -st here restricts input to standard input; use
+     -nst if necessary to override.
+
+    -Eliminated some needless breaks at equals signs in -lp indentation.
+
+       OLD:
+           $c =
+             Math::Complex->make(LEFT + $x * (RIGHT - LEFT) / SIZE,
+                                 TOP + $y * (BOTTOM - TOP) / SIZE);
+       NEW:
+           $c = Math::Complex->make(LEFT + $x * (RIGHT - LEFT) / SIZE,
+                                    TOP + $y * (BOTTOM - TOP) / SIZE);
+
+    A break at an equals is sometimes useful for preventing complex statements 
+    from hitting the line length limit.  The decision to do this was 
+    over-eager in some cases and has been improved.  Thanks to Royce Reece.
+
+    -qw quotes contained in braces, square brackets, and parens are being
+    treated more like those containers as far as stacking of tokens.  Also
+    stack of closing tokens ending ');' will outdent to where the ');' would
+    have outdented if the closing stack is matched with a similar opening stack.
+
+     OLD: perltidy -soc -sct
+       __PACKAGE__->load_components(
+           qw(
+             PK::Auto
+             Core
+             )
+       );
+     NEW: perltidy -soc -sct
+       __PACKAGE__->load_components( qw(
+             PK::Auto
+             Core
+       ) );
+     Thanks to Aran Deltac
+
+    -Eliminated some undesirable or marginally desirable vertical alignments.
+    These include terminal colons, opening braces, and equals, and particularly
+    when just two lines would be aligned.
+
+    OLD:
+       my $accurate_timestamps = $Stamps{lnk};
+       my $has_link            = 
+           ...
+    NEW:
+       my $accurate_timestamps = $Stamps{lnk};
+       my $has_link =
+
+    -Corrected a problem with -mangle in which a space would be removed
+    between a keyword and variable beginning with ::.
+
+## 2006 06 14
+
+    -Attribute argument lists are now correctly treated as quoted strings
+    and not formatted.  This is the most important update in this version.
+    Thanks to Borris Zentner, Greg Ferguson, Steve Kirkup.
+
+    -Updated to recognize the defined or operator, //, to be released in Perl 10.
+    Thanks to Sebastien Aperghis-Tramoni.
+
+    -A useful utility perltidyrc_dump.pl is included in the examples section.  It
+    will read any perltidyrc file and write it back out in a standard format
+    (though comments are lost).
+
+    -Added option to have perltidy read and return a hash with the contents of a
+    perltidyrc file.  This may be used by Leif Eriksen's tidyview code.  This
+    feature is used by the demonstration program 'perltidyrc_dump.pl' in the
+    examples directory.
+
+    -Improved error checking in perltidyrc files.  Unknown bare words were not
+    being caught.
+
+    -The --dump-options parameter now dumps parameters in the format required by a
+    perltidyrc file.
+
+    -V-Strings with underscores are now recognized.
+    For example: $v = v1.2_3; 
+
+    -cti=3 option added which gives one extra indentation level to closing 
+    tokens always.  This provides more predictable closing token placement
+    than cti=2.  If you are using cti=2 you might want to try cti=3.
+
+    -To identify all left-adjusted comments as static block comments, use C<-sbcp='^#'>.
+
+    -New parameters -fs, -fsb, -fse added to allow sections of code between #<<<
+    and #>>> to be passed through verbatim. This is enabled by default and turned
+    off by -nfs.  Flags -fsb and -fse allow other beginning and ending markers.
+    Thanks to Wolfgang Werner and Marion Berryman for suggesting this.  
+
+    -added flag -skp to put a space between all Perl keywords and following paren.
+    The default is to only do this for certain keywords.  Suggested by
+    H.Merijn Brand.
+
+    -added flag -sfp to put a space between a function name and following paren.
+    The default is not to do this.  Suggested by H.Merijn Brand.
+
+    -Added patch to avoid breaking GetOpt::Long::Configure set by calling program. 
+    Thanks to Philippe Bruhat.
+
+    -An error was fixed in which certain parameters in a .perltidyrc file given
+    without the equals sign were not recognized.  That is,
+    '--brace-tightness 0' gave an error but '--brace-tightness=0' worked
+    ok.  Thanks to Zac Hansen.
+
+    -An error preventing the -nwrs flag from working was corrected. Thanks to
+     Greg Ferguson.
+
+    -Corrected some alignment problems with entab option.
+
+    -A bug with the combination of -lp and -extrude was fixed (though this
+    combination doesn't really make sense).  The bug was that a line with
+    a single zero would be dropped.  Thanks to Cameron Hayne.
+
+    -Updated Windows detection code to avoid an undefined variable.
+    Thanks to Joe Yates and Russ Jones.
+
+    -Improved formatting for short trailing statements following a closing paren.
+    Thanks to Joe Matarazzo.
+
+    -The handling of the -icb (indent closing block braces) flag has been changed
+    slightly to provide more consistent and predictable formatting of complex
+    structures.  Instead of giving a closing block brace the indentation of the
+    previous line, it is now given one extra indentation level.  The two methods
+    give the same result if the previous line was a complete statement, as in this
+    example:
+
+           if ($task) {
+               yyy();
+               }    # -icb
+           else {
+               zzz();
+               }
+    The change also fixes a problem with empty blocks such as:
+
+       OLD, -icb:
+       elsif ($debug) {
+       }
+
+       NEW, -icb:
+       elsif ($debug) {
+           }
+
+    -A problem with -icb was fixed in which a closing brace was misplaced when
+    it followed a quote which spanned multiple lines.
+
+    -Some improved breakpoints for -wba='&& || and or'
+
+    -Fixed problem with misaligned cuddled else in complex statements
+    when the -bar flag was also used.  Thanks to Alex and Royce Reese.
+
+    -Corrected documentation to show that --outdent-long-comments is the default.
+    Thanks to Mario Lia.
+
+    -New flag -otr (opening-token-right) is similar to -bar (braces-always-right)
+    but applies to non-structural opening tokens.
+
+    -new flags -sot (stack-opening-token), -sct (stack-closing-token).
+    Suggested by Tony.
+
+## 2003 10 21
+
+    -The default has been changed to not do syntax checking with perl.  
+      Use -syn if you want it.  Perltidy is very robust now, and the -syn
+      flag now causes more problems than it's worth because of BEGIN blocks
+      (which get executed with perl -c).  For example, perltidy will never
+      return when trying to beautify this code if -syn is used:
+
+           BEGIN { 1 while { }; }
+
+     Although this is an obvious error, perltidy is often run on untested
+     code which is more likely to have this sort of problem.  A more subtle
+     example is:
+
+           BEGIN { use FindBin; }
+
+     which may hang on some systems using -syn if a shared file system is
+     unavailable.
+
+    -Changed style -gnu to use -cti=1 instead of -cti=2 (see next item).
+     In most cases it looks better.  To recover the previous format, use
+     '-gnu -cti=2'
+
+    -Added flags -cti=n for finer control of closing token indentation.
+      -cti = 0 no extra indentation (default; same as -nicp)
+      -cti = 1 enough indentation so that the closing token
+           aligns with its opening token.
+      -cti = 2 one extra indentation level if the line has the form 
+             );   ];   or   };     (same as -icp).
+
+      The new option -cti=1 works well with -lp:
+
+      EXAMPLES:
+
+       # perltidy -lp -cti=1
+       @month_of_year = (
+                          'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+                          'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
+                        );
+
+       # perltidy -lp -cti=2
+       @month_of_year = (
+                          'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+                          'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
+                          );
+     This is backwards compatible with -icp. See revised manual for
+     details.  Suggested by Mike Pennington.
+     
+    -Added flag '--preserve-line-endings' or '-ple' to cause the output
+     line ending to be the same as in the input file, for unix, dos, 
+     or mac line endings.  Only works under unix. Suggested by 
+     Rainer Hochschild.
+
+    -Added flag '--output-line-ending=s' or '-ole=s' where s=dos or win,
+     unix, or mac.  Only works under unix.
+
+    -Files with Mac line endings should now be handled properly under unix
+     and dos without being passed through a converter.
+
+    -You may now include 'and', 'or', and 'xor' in the list following
+     '--want-break-after' to get line breaks after those keywords rather than
+     before them.  Suggested by Rainer Hochschild.
+
+    -Corrected problem with command line option for -vtc=n and -vt=n. The
+     equals sign was being eaten up by the Windows shell so perltidy didn't
+     see it.
+
+## 2003 07 26
+
+    -Corrected cause of warning message with recent versions of Perl:
+       "Possible precedence problem on bitwise & operator at ..."
+     Thanks to Jim Files.
+
+    -fixed bug with -html with '=for pod2html' sections, in which code/pod
+    output order was incorrect.  Thanks to Tassilo von Parseval.
+
+    -fixed bug when the -html flag is used, in which the following error
+    message, plus others, appear:
+        did not see  in pod2html output
+    This was caused by a change in the format of html output by pod2html
+    VERSION 1.04 (included with perl 5.8).  Thanks to Tassilo von Parseval.
+
+    -Fixed bug where an __END__ statement would be mistaken for a label
+    if it is immediately followed by a line with a leading colon. Thanks
+    to John Bayes.
+    
+    -Implemented guessing logic for brace types when it is ambiguous.  This
+    has been on the TODO list a long time.  Thanks to Boris Zentner for
+    an example.
+
+    -Long options may now be negated either as '--nolong-option' 
+    or '--no-long-option'.  Thanks to Philip Newton for the suggestion.
+
+    -added flag --html-entities or -hent which controls the use of
+    Html::Entities for html formatting.  Use --nohtml-entities or -nhent to
+    prevent the use of Html::Entities to encode special symbols.  The
+    default is -hent.  Html::Entities when formatting perl text to escape
+    special symbols.  This may or may not be the right thing to do,
+    depending on browser/language combinations.  Thanks to Burak Gursoy for
+    this suggestion.
+
+    -Bareword strings with leading '-', like, '-foo' now count as 1 token
+    for horizontal tightness.  This way $a{'-foo'}, $a{foo}, and $a{-foo}
+    are now all treated similarly.  Thus, by default, OLD: $a{ -foo } will
+    now be NEW: $a{-foo}.  Suggested by Mark Olesen.
+
+    -added 2 new flags to control spaces between keywords and opening parens:
+      -sak=s  or --space-after-keyword=s,  and
+      -nsak=s or --nospace-after-keyword=s, where 's' is a list of keywords.
+
+    The new default list of keywords which get a space is:
+
+      "my local our and or eq ne if else elsif until unless while for foreach
+        return switch case given when"
+
+    Use -sak=s and -nsak=s to add and remove keywords from this list,
+       respectively.
+
+    Explanation: Stephen Hildrey noted that perltidy was being inconsistent
+    in placing spaces between keywords and opening parens, and sent a patch
+    to give user control over this.  The above list was selected as being
+    a reasonable default keyword list.  Previously, perltidy
+    had a hardwired list which also included these keywords:
+
+           push pop shift unshift join split die
+
+    but did not have 'our'.  Example: if you prefer to make perltidy behave
+    exactly as before, you can include the following two lines in your
+    .perltidyrc file: 
+
+      -sak="push pop local shift unshift join split die"
+      -nsak="our"
+
+    -Corrected html error in .toc file when -frm -html is used (extra ");
+     browsers were tolerant of it.
+
+    -Improved alignment of chains of binary and ?/: operators. Example:
+     OLD:
+       $leapyear =
+         $year % 4     ? 0
+         : $year % 100 ? 1
+         : $year % 400 ? 0
+         : 1;
+     NEW:
+       $leapyear =
+           $year % 4   ? 0
+         : $year % 100 ? 1
+         : $year % 400 ? 0
+         : 1;
+
+    -improved breakpoint choices involving '->'
+
+    -Corrected tokenization of things like ${#}. For example,
+     ${#} is valid, but ${# } is a syntax error.
+
+    -Corrected minor tokenization errors with indirect object notation.
+     For example, 'new A::()' works now.
+
+    -Minor tokenization improvements; all perl code distributed with perl 5.8 
+     seems to be parsed correctly except for one instance (lextest.t) 
+     of the known bug.
+
+## 2002 11 30
+
+    -Implemented scalar attributes.  Thanks to Sean Tobin for noting this.
+
+    -Fixed glitch introduced in previous release where -pre option
+    was not outputting a leading html 
 tag.
+
+    -Numerous minor improvements in vertical alignment, including the following:
+
+    -Improved alignment of opening braces in many cases.  Needed for improved
+    switch/case formatting, and also suggested by Mark Olesen for sort/map/grep
+    formatting.  For example:
+
+     OLD:
+       @modified =
+         map { $_->[0] }
+         sort { $a->[1] <=> $b->[1] }
+         map { [ $_, -M ] } @filenames;
+
+     NEW:
+       @modified =
+         map  { $_->[0] }
+         sort { $a->[1] <=> $b->[1] }
+         map  { [ $_, -M ] } @filenames;
+
+    -Eliminated alignments across unrelated statements. Example:
+     OLD:
+       $borrowerinfo->configure( -state => 'disabled' );
+       $borrowerinfo->grid( -col        => 1, -row => 0, -sticky => 'w' );
+
+     NEW:  
+       $borrowerinfo->configure( -state => 'disabled' );
+       $borrowerinfo->grid( -col => 1, -row => 0, -sticky => 'w' );
+
+     Thanks to Mark Olesen for suggesting this.
+
+    -Improved alignement of '='s in certain cases.
+     Thanks to Norbert Gruener for sending an example.
+
+    -Outdent-long-comments (-olc) has been re-instated as a default, since
+     it works much better now.  Use -nolc if you want to prevent it.
+
+    -Added check for 'perltidy file.pl -o file.pl', which causes file.pl
+    to be lost. (The -b option should be used instead). Thanks to mreister
+    for reporting this problem.
+
+## 2002 11 06
+
+    -Switch/case or given/when syntax is now recognized.  Its vertical alignment
+    is not great yet, but it parses ok.  The words 'switch', 'case', 'given',
+    and 'when' are now treated as keywords.  If this causes trouble with older
+    code, we could introduce a switch to deactivate it.  Thanks to Stan Brown
+    and Jochen Schneider for recommending this.
+
+    -Corrected error parsing sub attributes with call parameters.
+    Thanks to Marc Kerr for catching this.
+
+    -Sub prototypes no longer need to be on the same line as sub names.  
+
+    -a new flag -frm or --frames will cause html output to be in a
+    frame, with table of contents in the left panel and formatted source
+    in the right panel.  Try 'perltidy -html -frm somemodule.pm' for example.
+
+    -The new default for -html formatting is to pass the pod through Pod::Html.
+    The result is syntax colored code within your pod documents. This can be
+    deactivated with -npod.  Thanks to those who have written to discuss this,
+    particularly Mark Olesen and Hugh Myers.
+
+    -the -olc (--outdent-long-comments) option works much better.  It now outdents
+    groups of consecutive comments together, and by just the amount needed to
+    avoid having any one line exceeding the maximum line length.
+
+    -block comments are now trimmed of trailing whitespace.
+
+    -if a directory specified with -opath does not exist, it will be created.
+
+    -a table of contents to packages and subs is output when -html is used.
+    Use -ntoc to prevent this. 
+
+    -fixed an unusual bug in which a 'for' statement following a 'format'
+    statement was not correctly tokenized.  Thanks to Boris Zentner for
+    catching this.
+
+    -Tidy.pm is no longer dependent on modules IO::Scalar and IO::ScalarArray.  
+    There were some speed issues.  Suggested by Joerg Walter.
+
+    -The treatment of quoted wildcards (file globs) is now system-independent. 
+    For example
+
+       perltidy 'b*x.p[lm]'
+
+    would match box.pl, box.pm, brinx.pm under any operating system.  Of
+    course, anything unquoted will be subject to expansion by any shell.
+
+    -default color for keywords under -html changed from 
+    SaddleBrown (#8B4513) to magenta4 (#8B008B).
+
+    -fixed an arg parsing glitch in which something like:
+      perltidy quick-help
+    would trigger the help message and exit, rather than operate on the
+    file 'quick-help'.
+
+## 2002 09 22
+
+    -New option '-b' or '--backup-and-modify-in-place' will cause perltidy to
+    overwrite the original file with the tidied output file.  The original
+    file will be saved with a '.bak' extension (which can be changed with
+    -bext=s).  Thanks to Rudi Farkas for the suggestion.
+
+    -An index to all subs is included at the top of -html output, unless
+    only the 
 section is written.
+
+    -Anchor lines of the form  are now inserted at key points
+    in html output, such as before sub definitions, for the convenience of
+    postprocessing scripts.  Suggested by Howard Owen.
+
+    -The cuddled-else (-ce) flag now also makes cuddled continues, like
+    this:
+
+       while ( ( $pack, $file, $line ) = caller( $i++ ) ) {
+          # bla bla
+       } continue {
+           $prevpack = $pack;
+       }
+
+    Suggested by Simon Perreault.  
+
+    -Fixed bug in which an extra blank line was added before an =head or 
+    similar pod line after an __END__ or __DATA__ line each time 
+    perltidy was run.  Also, an extra blank was being added after
+    a terminal =cut.  Thanks to Mike Birdsall for reporting this.
+
+## 2002 08 26
+
+    -Fixed bug in which space was inserted in a hyphenated hash key:
+       my $val = $myhash{USER-NAME};
+     was converted to:
+       my $val = $myhash{USER -NAME}; 
+     Thanks to an anonymous bug reporter at sourceforge.
+
+    -Fixed problem with the '-io' ('--indent-only') where all lines 
+     were double spaced.  Thanks to Nick Andrew for reporting this bug.
+
+    -Fixed tokenization error in which something like '-e1' was 
+     parsed as a number. 
+
+    -Corrected a rare problem involving older perl versions, in which 
+     a line break before a bareword caused problems with 'use strict'.
+     Thanks to Wolfgang Weisselberg for noting this.
+
+    -More syntax error checking added.
+
+    -Outdenting labels (-ola) has been made the default, in order to follow the
+     perlstyle guidelines better.  It's probably a good idea in general, but
+     if you do not want this, use -nola in your .perltidyrc file.
+     
+    -Updated rules for padding logical expressions to include more cases.
+     Thanks to Wolfgang Weisselberg for helpful discussions.
+
+    -Added new flag -osbc (--outdent-static-block-comments) which will
+     outdent static block comments by 2 spaces (or whatever -ci equals).
+     Requested by Jon Robison.
+
+## 2002 04 25
+
+    -Corrected a bug, introduced in the previous release, in which some
+     closing side comments (-csc) could have incorrect text.  This is
+     annoying but will be correct the next time perltidy is run with -csc.
+
+    -Fixed bug where whitespace was being removed between 'Bar' and '()' 
+     in a use statement like:
+
+          use Foo::Bar ();
+
+    -Whenever possible, if a logical expression is broken with leading
+     '&&', '||', 'and', or 'or', then the leading line will be padded
+     with additional space to produce alignment.  This has been on the
+     todo list for a long time; thanks to Frank Steinhauer for reminding
+     me to do it.  Notice the first line after the open parens here:
+
+           OLD: perltidy -lp
+           if (
+                !param("rules.to.$linecount")
+                && !param("rules.from.$linecount")
+                && !param("rules.subject.$linecount")
+                && !(
+                      param("rules.fieldname.$linecount")
+                      && param("rules.fieldval.$linecount")
+                )
+                && !param("rules.size.$linecount")
+                && !param("rules.custom.$linecount")
+             )
+
+           NEW: perltidy -lp
+           if (
+                   !param("rules.to.$linecount")
+                && !param("rules.from.$linecount")
+                && !param("rules.subject.$linecount")
+                && !(
+                         param("rules.fieldname.$linecount")
+                      && param("rules.fieldval.$linecount")
+                )
+                && !param("rules.size.$linecount")
+                && !param("rules.custom.$linecount")
+             )
+
+## 2002 04 16
+
+    -Corrected a mistokenization of variables for a package with a name
+     equal to a perl keyword.  For example: 
+
+        my::qx();
+        package my;
+        sub qx{print "Hello from my::qx\n";}
+
+     In this case, the leading 'my' was mistokenized as a keyword, and a
+     space was being place between 'my' and '::'.  This has been
+     corrected.  Thanks to Martin Sluka for discovering this. 
+
+    -A new flag -bol (--break-at-old-logic-breakpoints)
+     has been added to control whether containers with logical expressions
+     should be broken open.  This is the default.
+
+    -A new flag -bok (--break-at-old-keyword-breakpoints)
+     has been added to follow breaks at old keywords which return lists,
+     such as sort and map.  This is the default.
+
+    -A new flag -bot (--break-at-old-trinary-breakpoints) has been added to
+     follow breaks at trinary (conditional) operators.  This is the default.
+
+    -A new flag -cab=n has been added to control breaks at commas after
+     '=>' tokens.  The default is n=1, meaning break unless this breaks
+     open an existing on-line container.
+
+    -A new flag -boc has been added to allow existing list formatting
+     to be retained.  (--break-at-old-comma-breakpoints).  See updated manual.
+
+    -A new flag -iob (--ignore-old-breakpoints) has been added to
+     prevent the locations of old breakpoints from influencing the output
+     format.
+
+    -Corrected problem where nested parentheses were not getting full
+     indentation.  This has been on the todo list for some time; thanks 
+     to Axel Rose for a snippet demonstrating this issue.
+
+               OLD: inner list is not indented
+               $this->sendnumeric(
+                   $this->server,
+                   (
+                     $ret->name,        $user->username, $user->host,
+                   $user->server->name, $user->nick,     "H"
+                   ),
+               );
+
+               NEW:
+               $this->sendnumeric(
+                   $this->server,
+                   (
+                       $ret->name,          $user->username, $user->host,
+                       $user->server->name, $user->nick,     "H"
+                   ),
+               );
+
+    -Code cleaned up by removing the following unused, undocumented flags.
+     They should not be in any .perltidyrc files because they were just
+     experimental flags which were never documented.  Most of them placed
+     artificial limits on spaces, and Wolfgang Weisselberg convinced me that
+     most of them they do more harm than good by causing unexpected results.
+
+     --maximum-continuation-indentation (-mci)
+     --maximum-whitespace-columns
+     --maximum-space-to-comment (-xsc)
+     --big-space-jump (-bsj)
+
+    -Pod file 'perltidy.pod' has been appended to the script 'perltidy', and
+     Tidy.pod has been append to the module 'Tidy.pm'.  Older MakeMaker's
+     were having trouble.
+    
+    -A new flag -isbc has been added for more control on comments. This flag
+     has the effect that if there is no leading space on the line, then the
+     comment will not be indented, and otherwise it may be.  If both -ibc and
+     -isbc are set, then -isbc takes priority.  Thanks to Frank Steinhauer
+     for suggesting this.
+
+    -A new document 'stylekey.pod' has been created to quickly guide new users
+     through the maze of perltidy style parameters.  An html version is 
+     on the perltidy web page.  Take a look! It should be very helpful.
+
+    -Parameters for controlling 'vertical tightness' have been added:
+     -vt and -vtc are the main controls, but finer control is provided
+     with -pvt, -pcvt, -bvt, -bcvt, -sbvt, -sbcvt.  Block brace vertical
+     tightness controls have also been added.
+     See updated manual and also see 'stylekey.pod'. Simple examples:
+
+       # perltidy -lp -vt=1 -vtc=1
+       @month_of_year = ( 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+                          'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' );
+
+       # perltidy -lp -vt=1 -vtc=0
+       @month_of_year = ( 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+                          'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
+       );
+
+    -Lists which do not format well in uniform columns are now better
+     identified and formated.
+
+       OLD:
+       return $c->create( 'polygon', $x, $y, $x + $ruler_info{'size'},
+           $y + $ruler_info{'size'}, $x - $ruler_info{'size'},
+           $y + $ruler_info{'size'} );
+
+       NEW:
+       return $c->create(
+           'polygon', $x, $y,
+           $x + $ruler_info{'size'},
+           $y + $ruler_info{'size'},
+           $x - $ruler_info{'size'},
+           $y + $ruler_info{'size'}
+       );
+
+       OLD:
+         radlablist($f1, pad('Initial', $p), $b->{Init}->get_panel_ref, 'None ',
+                    'None', 'Default', 'Default', 'Simple', 'Simple');
+       NEW:
+         radlablist($f1,
+                    pad('Initial', $p),
+                    $b->{Init}->get_panel_ref,
+                    'None ', 'None', 'Default', 'Default', 'Simple', 'Simple');
+
+    -Corrected problem where an incorrect html filename was generated for 
+     external calls to Tidy.pm module.  Fixed incorrect html title when
+     Tidy.pm is called with IO::Scalar or IO::Array source.
+
+    -Output file permissons are now set as follows.  An output script file
+     gets the same permission as the input file, except that owner
+     read/write permission is added (otherwise, perltidy could not be
+     rerun).  Html output files use system defaults.  Previously chmod 0755
+     was used in all cases.  Thanks to Mark Olesen for bringing this up.
+
+    -Missing semicolons will not be added in multi-line blocks of type
+     sort, map, or grep.  This brings perltidy into closer agreement
+     with common practice.  Of course, you can still put semicolons 
+     there if you like.  Thanks to Simon Perreault for a discussion of this.
+
+    -Most instances of extra semicolons are now deleted.  This is
+     particularly important if the -csc option is used.  Thanks to Wolfgang
+     Weisselberg for noting this.  For example, the following line
+     (produced by 'h2xs' :) has an extra semicolon which will now be
+     removed:
+
+        BEGIN { plan tests => 1 };
+
+    -New parameter -csce (--closing-side-comment-else-flag) can be used
+     to control what text is appended to 'else' and 'elsif' blocks.
+     Default is to just add leading 'if' text to an 'else'.  See manual.
+
+    -The -csc option now labels 'else' blocks with additinal information
+     from the opening if statement and elsif statements, if space.
+     Thanks to Wolfgang Weisselberg for suggesting this.
+
+    -The -csc option will now remove any old closing side comments
+     below the line interval threshold. Thanks to Wolfgang Weisselberg for
+     suggesting this.
+
+    -The abbreviation feature, which was broken in the previous version,
+     is now fixed.  Thanks to Michael Cartmell for noting this.
+
+    -Vertical alignment is now done for '||='  .. somehow this was 
+     overlooked.
+
+## 2002 02 25
+
+    -This version uses modules for the first time, and a standard perl
+     Makefile.PL has been supplied.  However, perltidy may still be
+     installed as a single script, without modules.  See INSTALL for
+     details.
+
+    -The man page 'perl2web' has been merged back into the main 'perltidy'
+     man page to simplify installation.  So you may remove that man page
+     if you have an older installation.
+
+    -Added patch from Axel Rose for MacPerl.  The patch prompts the user
+     for command line arguments before calling the module 
+     Perl::Tidy::perltidy.
+
+    -Corrected bug with '-bar' which was introduced in the previous
+     version.  A closing block brace was being indented.  Thanks to
+     Alexandros M Manoussakis for reporting this.
+
+    -New parameter '--entab-leading-whitespace=n', or '-et=n', has been
+     added for those who prefer tabs.  This behaves different from the
+     existing '-t' parameter; see updated man page.  Suggested by Mark
+     Olesen.
+
+    -New parameter '--perl-syntax-check-flags=s'  or '-pcsf=s' can be
+     used to change the flags passed to perltidy in a syntax check.
+     See updated man page.  Suggested by Mark Olesen. 
+
+    -New parameter '--output-path=s'  or '-opath=s' will cause output
+     files to be placed in directory s.  See updated man page.  Thanks for
+     Mark Olesen for suggesting this.
+
+    -New parameter --dump-profile (or -dpro) will dump to
+     standard output information about the search for a
+     configuration file, the name of whatever configuration file
+     is selected, and its contents.  This should help debugging
+     config files, especially on different Windows systems.
+
+    -The -w parameter now notes possible errors of the form:
+
+           $comment = s/^\s*(\S+)\..*/$1/;   # trim whitespace
+
+    -Corrections added for a leading ':' and for leaving a leading 'tcsh'
+     line untouched.  Mark Olesen reported that lines of this form were
+     accepted by perl but not by perltidy:
+
+           : # use -*- perl -*-
+           eval 'exec perl -wS $0 "$@"'  # shell should exec 'perl'
+           unless 1;                     # but Perl should skip this one
+
+     Perl will silently swallow a leading colon on line 1 of a
+     script, and now perltidy will do likewise.  For example,
+     this is a valid script, provided that it is the first line,
+     but not otherwise:
+
+           : print "Hello World\n";
+     
+     Also, perltidy will now mark a first line with leading ':' followed by
+     '#' as type SYSTEM (just as a #!  line), not to be formatted.
+
+    -List formatting improved for certain lists with special
+     initial terms, such as occur with 'printf', 'sprintf',
+     'push', 'pack', 'join', 'chmod'.  The special initial term is
+     now placed on a line by itself.  For example, perltidy -gnu
+
+        OLD:
+           $Addr = pack(
+                        "C4",                hex($SourceAddr[0]),
+                        hex($SourceAddr[1]), hex($SourceAddr[2]),
+                        hex($SourceAddr[3])
+                        );
+
+        NEW:
+           $Addr = pack("C4",
+                        hex($SourceAddr[0]), hex($SourceAddr[1]),
+                        hex($SourceAddr[2]), hex($SourceAddr[3]));
+
+         OLD:
+               push (
+                     @{$$self{states}}, '64', '66', '68',
+                     '70',              '72', '74', '76',
+                     '78',              '80', '82', '84',
+                     '86',              '88', '90', '92',
+                     '94',              '96', '98', '100',
+                     '102',             '104'
+                     );
+
+         NEW:
+               push (
+                     @{$$self{states}},
+                     '64', '66', '68', '70', '72',  '74',  '76',
+                     '78', '80', '82', '84', '86',  '88',  '90',
+                     '92', '94', '96', '98', '100', '102', '104'
+                     );
+
+    -Lists of complex items, such as matricies, are now detected
+     and displayed with just one item per row:
+
+       OLD:
+       $this->{'CURRENT'}{'gfx'}{'MatrixSkew'} = Text::PDF::API::Matrix->new(
+           [ 1, tan( deg2rad($a) ), 0 ], [ tan( deg2rad($b) ), 1, 0 ],
+           [ 0, 0, 1 ]
+       );
+
+       NEW:
+       $this->{'CURRENT'}{'gfx'}{'MatrixSkew'} = Text::PDF::API::Matrix->new(
+           [ 1,                  tan( deg2rad($a) ), 0 ],
+           [ tan( deg2rad($b) ), 1,                  0 ],
+           [ 0,                  0,                  1 ]
+       );
+
+    -The perl syntax check will be turned off for now when input is from
+     standard input or standard output.  The reason is that this requires
+     temporary files, which has produced far too many problems during
+     Windows testing.  For example, the POSIX module under Windows XP/2000
+     creates temporary names in the root directory, to which only the
+     administrator should have permission to write.
+
+    -Merged patch sent by Yves Orton to handle appropriate
+     configuration file locations for different Windows varieties
+     (2000, NT, Me, XP, 95, 98).
+
+    -Added patch to properly handle a for/foreach loop without
+     parens around a list represented as a qw.  I didn't know this
+     was possible until Wolfgang Weisselberg pointed it out:
+
+           foreach my $key qw\Uno Due Tres Quadro\ {
+               print "Set $key\n";
+           }
+
+     But Perl will give a syntax error without the $ variable; ie this will
+     not work:
+
+           foreach qw\Uno Due Tres Quadro\ {
+               print "Set $_\n";
+           }
+
+    -Merged Windows version detection code sent by Yves Orton.  Perltidy
+     now automatically turns off syntax checking for Win 9x/ME versions,
+     and this has solved a lot of robustness problems.  These systems 
+     cannot reliably handle backtick operators.  See man page for
+     details.
+     
+    -Merged VMS filename handling patch sent by Michael Cartmell.  (Invalid
+     output filenames were being created in some cases). 
+
+    -Numerous minor improvements have been made for -lp style indentation.
+
+    -Long C-style 'for' expressions will be broken after each ';'.   
+
+     'perltidy -gnu' gives:
+
+       OLD:
+       for ($status = $db->seq($key, $value, R_CURSOR()) ; $status == 0
+            and $key eq $origkey ; $status = $db->seq($key, $value, R_NEXT())) 
+
+       NEW:
+       for ($status = $db->seq($key, $value, R_CURSOR()) ;
+            $status == 0 and $key eq $origkey ;
+            $status = $db->seq($key, $value, R_NEXT()))
+
+    -For the -lp option, a single long term within parens
+     (without commas) now has better alignment.  For example,
+     perltidy -gnu
+
+               OLD:
+               $self->throw("Must specify a known host, not $location,"
+                     . " possible values ("
+                     . join (",", sort keys %hosts) . ")");
+
+               NEW:
+               $self->throw("Must specify a known host, not $location,"
+                            . " possible values ("
+                            . join (",", sort keys %hosts) . ")");
+
+## 2001 12 31
+
+    -This version is about 20 percent faster than the previous
+     version as a result of optimization work.  The largest gain
+     came from switching to a dispatch hash table in the
+     tokenizer.
+
+    -perltidy -html will check to see if HTML::Entities is
+     installed, and if so, it will use it to encode unsafe
+     characters.
+
+    -Added flag -oext=ext to change the output file extension to
+     be different from the default ('tdy' or 'html').  For
+     example:
+
+       perltidy -html -oext=htm filename
+
+    will produce filename.htm
+
+    -Added flag -cscw to issue warnings if a closing side comment would replace
+    an existing, different side comments.  See the man page for details.
+    Thanks to Peter Masiar for helpful discussions.
+
+    -Corrected tokenization error of signed hex/octal/binary numbers. For
+    example, the first hex number below would have been parsed correctly
+    but the second one was not:
+       if ( ( $tmp >= 0x80_00_00 ) || ( $tmp < -0x80_00_00 ) ) { }
+
+    -'**=' was incorrectly tokenized as '**' and '='.  This only
+        caused a problem with the -extrude opton.
+
+    -Corrected a divide by zero when -extrude option is used
+
+    -The flag -w will now contain all errors reported by 'perl -c' on the
+    input file, but otherwise they are not reported.  The reason is that
+    perl will report lots of problems and syntax errors which are not of
+    interest when only a small snippet is being formatted (such as missing
+    modules and unknown bare words).  Perltidy will always report all
+    significant syntax errors that it finds, such as unbalanced braces,
+    unless the -q (quiet) flag is set.
+
+    -Merged modifications created by Hugh Myers into perltidy.
+     These include a 'streamhandle' routine which allows perltidy
+     as a module to operate on input and output arrays and strings
+     in addition to files.  Documentation and new packaging as a
+     module should be ready early next year; This is an elegant,
+     powerful update; many thanks to Hugh for contributing it.
+
+## 2001 11 28
+
+    -added a tentative patch which tries to keep any existing breakpoints
+    at lines with leading keywords map,sort,eval,grep. The idea is to
+    improve formatting of sequences of list operations, as in a schwartzian
+    transform.  Example:
+
+       INPUT:
+       my @sorted = map { $_->[0] }
+                    sort { $a->[1] <=> $b->[1] }
+                    map { [ $_, rand ] } @list;
+
+       OLD:
+       my @sorted =
+         map { $_->[0] } sort { $a->[1] <=> $b->[1] } map { [ $_, rand ] } @list;
+
+       NEW:
+       my @sorted = map { $_->[0] }
+         sort { $a->[1] <=> $b->[1] }
+         map { [ $_, rand ] } @list;
+
+     The new alignment is not as nice as the input, but this is an improvement.
+     Thanks to Yves Orton for this suggestion.
+
+    -modified indentation logic so that a line with leading opening paren,
+    brace, or square bracket will never have less indentation than the
+    line with the corresponding opening token.  Here's a simple example:
+
+       OLD:
+           $mw->Button(
+               -text    => "New Document",
+               -command => \&new_document
+             )->pack(
+               -side   => 'bottom',
+               -anchor => 'e'
+           );
+
+       Note how the closing ');' is lined up with the first line, even
+       though it closes a paren in the 'pack' line.  That seems wrong.
+    
+       NEW:
+           $mw->Button(
+               -text    => "New Document",
+               -command => \&new_document
+             )->pack(
+               -side   => 'bottom',
+               -anchor => 'e'
+             );
+
+      This seems nicer: you can up-arrow with an editor and arrive at the
+      opening 'pack' line.
+    
+    -corrected minor glitch in which cuddled else (-ce) did not get applied
+    to an 'unless' block, which should look like this:
+
+           unless ($test) {
+
+           } else {
+
+           }
+
+     Thanks to Jeremy Mates for reporting this.
+
+    -The man page has been reorganized to parameters easier to find.
+    
+    -Added check for multiple definitions of same subroutine.  It is easy
+     to introduce this problem when cutting and pasting. Perl does not
+     complain about it, but it can lead to disaster.
+
+    -The command -pro=filename  or -profile=filename may be used to specify a
+     configuration file which will override the default name of .perltidyrc.
+     There must not be a space on either side of the '=' sign.  I needed
+     this to be able to easily test perltidy with a variety of different
+     configuration files.
+
+    -Side comment alignment has been improved somewhat across frequent level
+     changes, as in short if/else blocks.  Thanks to Wolfgang Weisselberg 
+     for pointing out this problem.  For example:
+       
+       OLD:
+       if ( ref $self ) {    # Called as a method
+           $format = shift;
+       }
+       else {    # Regular procedure call
+           $format = $self;
+           undef $self;
+       }
+
+       NEW:
+       if ( ref $self ) {    # Called as a method
+           $format = shift;
+       }
+       else {                # Regular procedure call
+           $format = $self;
+           undef $self;
+       }
+
+    -New command -ssc (--static-side-comment) and related command allows
+     side comments to be spaced close to preceding character.  This is
+     useful for displaying commented code as side comments.
+
+    -New command -csc (--closing-side-comment) and several related
+     commands allow comments to be added to (and deleted from) any or all
+     closing block braces.  This can be useful if you have to maintain large
+     programs, especially those that you didn't write.  See updated man page.
+     Thanks to Peter Masiar for this suggestion.  For a simple example:
+
+           perltidy -csc
+
+           sub foo {
+               if ( !defined( $_[0] ) ) {
+                   print("Hello, World\n");
+               }
+               else {
+                   print( $_[0], "\n" );
+               }
+           } ## end sub foo
+
+     This added '## end sub foo' to the closing brace.  
+     To remove it, perltidy -ncsc.
+
+    -New commands -ola, for outdenting labels, and -okw, for outdenting
+     selected control keywords, were implemented.  See the perltidy man
+     page for details.  Thanks to Peter Masiar for this suggestion.
+
+    -Hanging side comment change: a comment will not be considered to be a
+     hanging side comment if there is no leading whitespace on the line.
+     This should improve the reliability of identifying hanging side comments.
+     Thanks to Peter Masiar for this suggestion.
+
+    -Two new commands for outdenting, -olq (outdent-long-quotes) and -olc
+     (outdent-long-comments), have been added.  The original -oll
+     (outdent-long-lines) remains, and now is an abbreviation for -olq and -olc.
+     The new default is just -olq.  This was necessary to avoid inconsistency with
+     the new static block comment option.
+
+    -Static block comments:  to provide a way to display commented code
+     better, the convention is used that comments with a leading '##' should
+     not be formatted as usual.  Please see '-sbc' (or '--static-block-comment')
+     for documentation.  It can be deactivated with with -nsbc, but
+     should not normally be necessary. Thanks to Peter Masiar for this 
+     suggestion.
+
+    -Two changes were made to help show structure of complex lists:
+     (1) breakpoints are forced after every ',' in a list where any of
+     the list items spans multiple lines, and
+     (2) List items which span multiple lines now get continuation indentation.
+
+     The following example illustrates both of these points.  Many thanks to
+     Wolfgang Weisselberg for this snippet and a discussion of it; this is a
+     significant formatting improvement. Note how it is easier to see the call
+     parameters in the NEW version:
+
+       OLD:
+       assert( __LINE__, ( not defined $check )
+           or ref $check
+           or $check eq "new"
+           or $check eq "old", "Error in parameters",
+           defined $old_new ? ( ref $old_new ? ref $old_new : $old_new ) : "undef",
+           defined $db_new  ? ( ref $db_new  ? ref $db_new  : $db_new )  : "undef",
+           defined $old_db ? ( ref $old_db ? ref $old_db : $old_db ) : "undef" );
+
+       NEW: 
+       assert(
+           __LINE__,
+           ( not defined $check )
+             or ref $check
+             or $check eq "new"
+             or $check eq "old",
+           "Error in parameters",
+           defined $old_new ? ( ref $old_new ? ref $old_new : $old_new ) : "undef",
+           defined $db_new  ? ( ref $db_new  ? ref $db_new  : $db_new )  : "undef",
+           defined $old_db  ? ( ref $old_db  ? ref $old_db  : $old_db )  : "undef"
+       );
+
+       Another example shows how this helps displaying lists:
+
+       OLD:
+       %{ $self->{COMPONENTS} } = (
+           fname =>
+           { type => 'name', adj => 'yes', font => 'Helvetica', 'index' => 0 },
+           street =>
+           { type => 'road', adj => 'yes', font => 'Helvetica', 'index' => 2 },
+       );
+
+       The structure is clearer with the added indentation:
+       
+       NEW:
+       %{ $self->{COMPONENTS} } = (
+           fname =>
+             { type => 'name', adj => 'yes', font => 'Helvetica', 'index' => 0 },
+           street =>
+             { type => 'road', adj => 'yes', font => 'Helvetica', 'index' => 2 },
+       );
+
+       -The structure of nested logical expressions is now displayed better.
+       Thanks to Wolfgang Weisselberg for helpful discussions.  For example,
+       note how the status of the final 'or' is displayed in the following:
+
+       OLD:
+       return ( !null($op)
+             and null( $op->sibling )
+             and $op->ppaddr eq "pp_null"
+             and class($op) eq "UNOP"
+             and ( ( $op->first->ppaddr =~ /^pp_(and|or)$/
+               and $op->first->first->sibling->ppaddr eq "pp_lineseq" )
+               or ( $op->first->ppaddr eq "pp_lineseq"
+                   and not null $op->first->first->sibling
+                   and $op->first->first->sibling->ppaddr eq "pp_unstack" ) ) );
+
+       NEW:
+       return (
+           !null($op)
+             and null( $op->sibling )
+             and $op->ppaddr eq "pp_null"
+             and class($op) eq "UNOP"
+             and (
+               (
+                   $op->first->ppaddr =~ /^pp_(and|or)$/
+                   and $op->first->first->sibling->ppaddr eq "pp_lineseq"
+               )
+               or ( $op->first->ppaddr eq "pp_lineseq"
+                   and not null $op->first->first->sibling
+                   and $op->first->first->sibling->ppaddr eq "pp_unstack" )
+             )
+       );
+
+      -A break will always be put before a list item containing a comma-arrow.
+      This will improve formatting of mixed lists of this form:
+
+           OLD:
+           $c->create(
+               'text', 225, 20, -text => 'A Simple Plot',
+               -font => $font,
+               -fill => 'brown'
+           );
+
+           NEW:
+           $c->create(
+               'text', 225, 20,
+               -text => 'A Simple Plot',
+               -font => $font,
+               -fill => 'brown'
+           );
+
+     -For convenience, the command -dac (--delete-all-comments) now also
+     deletes pod.  Likewise, -tac (--tee-all-comments) now also sends pod
+     to a '.TEE' file.  Complete control over the treatment of pod and
+     comments is still possible, as described in the updated help message 
+     and man page.
+
+     -The logic which breaks open 'containers' has been rewritten to be completely
+     symmetric in the following sense: if a line break is placed after an opening
+     {, [, or (, then a break will be placed before the corresponding closing
+     token.  Thus, a container either remains closed or is completely cracked
+     open.
+
+     -Improved indentation of parenthesized lists.  For example, 
+
+               OLD:
+               $GPSCompCourse =
+                 int(
+                 atan2( $GPSTempCompLong - $GPSLongitude,
+                 $GPSLatitude - $GPSTempCompLat ) * 180 / 3.14159265 );
+
+               NEW:
+               $GPSCompCourse = int(
+                   atan2(
+                       $GPSTempCompLong - $GPSLongitude,
+                       $GPSLatitude - $GPSTempCompLat
+                     ) * 180 / 3.14159265
+               );
+
+      Further improvements will be made in future releases.
+
+     -Some improvements were made in formatting small lists.
+
+     -Correspondence between Input and Output line numbers reported in a 
+      .LOG file should now be exact.  They were sometimes off due to the size
+      of intermediate buffers.
+
+     -Corrected minor tokenization error in which a ';' in a foreach loop
+      control was tokenized as a statement termination, which forced a 
+      line break:
+
+           OLD:
+           foreach ( $i = 0;
+               $i <= 10;
+               $i += 2
+             )
+           {
+               print "$i ";
+           }
+
+           NEW:
+           foreach ( $i = 0 ; $i <= 10 ; $i += 2 ) {
+               print "$i ";
+           }
+
+     -Corrected a problem with reading config files, in which quote marks were not
+      stripped.  As a result, something like -wba="&& . || " would have the leading
+      quote attached to the && and not work correctly.  A workaround for older
+      versions is to place a space around all tokens within the quotes, like this:
+      -wba=" && . || "
+
+     -Removed any existing space between a label and its ':'
+       OLD    : { }
+       NEW: { }
+      This was necessary because the label and its colon are a single token.
+
+     -Corrected tokenization error for the following (highly non-recommended) 
+      construct:
+       $user = @vars[1] / 100;
+    
+     -Resolved cause of a difference between perltidy under perl v5.6.1 and
+     5.005_03; the problem was different behavior of \G regex position
+     marker(!)
+
+## 2001 10 20
+
+    -Corrected a bug in which a break was not being made after a full-line
+    comment within a short eval/sort/map/grep block.  A flag was not being
+    zeroed.  The syntax error check catches this.  Here is a snippet which
+    illustrates the bug:
+
+           eval {
+               #open Socket to Dispatcher
+               $sock = &OpenSocket;
+           };
+
+    The formatter mistakenly thought that it had found the following 
+    one-line block:
+    
+           eval {#open Socket to Dispatcher$sock = &OpenSocket; };
+
+    The patch fixes this. Many thanks to Henry Story for reporting this bug.
+
+    -Changes were made to help diagnose and resolve problems in a
+    .perltidyrc file: 
+      (1) processing of command parameters has been into two separate
+      batches so that any errors in a .perltidyrc file can be localized.  
+      (2) commands --help, --version, and as many of the --dump-xxx
+      commands are handled immediately, without any command line processing
+      at all.  
+      (3) Perltidy will ignore any commands in the .perltidyrc file which
+      cause immediate exit.  These are:  -h -v -ddf -dln -dop -dsn -dtt
+      -dwls -dwrs -ss.  Thanks to Wolfgang Weisselberg for helpful
+      suggestions regarding these updates.
+
+    -Syntax check has been reinstated as default for MSWin32 systems.  This
+    way Windows 2000 users will get syntax check by default, which seems
+    like a better idea, since the number of Win 95/98 systems will be
+    decreasing over time.  Documentation revised to warn Windows 95/98
+    users about the problem with empty '&1'.  Too bad these systems
+    all report themselves as MSWin32.
+
+## 2001 10 16
+
+    -Fixed tokenization error in which a method call of the form
+
+       Module::->new();
+    
+     got a space before the '::' like this:
+
+       Module ::->new();
+
+     Thanks to David Holden for reporting this.
+    
+    -Added -html control over pod text, using a new abbreviation 'pd'.  See
+    updated perl2web man page. The default is to use the color of a comment,
+    but italicized.  Old .css style sheets will need a new line for
+    .pd to use this.  The old color was the color of a string, and there
+    was no control.  
+    
+    -.css lines are now printed in sorted order.
+
+    -Fixed interpolation problem where html files had '$input_file' as title
+    instead of actual input file name.  Thanks to Simon Perreault for finding
+    this and sending a patch, and also to Tobias Weber.
+
+    -Breaks will now have the ':' placed at the start of a line, 
+    one per line by default because this shows logical structure
+    more clearly. This coding has been completely redone. Some 
+    examples of new ?/: formatting:
+
+          OLD:
+               wantarray ? map( $dir::cwd->lookup($_)->path, @_ ) :
+                 $dir::cwd->lookup( $_[0] )->path;
+
+          NEW:
+               wantarray 
+                 ? map( $dir::cwd->lookup($_)->path, @_ )
+                 : $dir::cwd->lookup( $_[0] )->path;
+
+          OLD:
+                   $a = ( $b > 0 ) ? {
+                       a => 1,
+                       b => 2
+                   } : { a => 6, b => 8 };
+
+          NEW:
+                   $a = ( $b > 0 )
+                     ? {
+                       a => 1,
+                       b => 2
+                     }
+                     : { a => 6, b => 8 };
+
+       OLD: (-gnu):
+       $self->note($self->{skip} ? "Hunk #$self->{hunk} ignored at 1.\n" :
+                   "Hunk #$self->{hunk} failed--$@");
+
+       NEW: (-gnu):
+       $self->note($self->{skip} 
+                   ? "Hunk #$self->{hunk} ignored at 1.\n"
+                   : "Hunk #$self->{hunk} failed--$@");
+
+       OLD:
+           $which_search =
+             $opts{"t"} ? 'title'   :
+             $opts{"s"} ? 'subject' : $opts{"a"} ? 'author' : 'title';
+
+       NEW:
+           $which_search =
+             $opts{"t"} ? 'title'
+             : $opts{"s"} ? 'subject'
+             : $opts{"a"} ? 'author'
+             : 'title';
+    
+    You can use -wba=':' to recover the previous default which placed ':'
+    at the end of a line.  Thanks to Michael Cartmell for helpful
+    discussions and examples.  
+
+    -Tokenizer updated to do syntax checking for matched ?/: pairs.  Also,
+    the tokenizer now outputs a unique serial number for every balanced
+    pair of brace types and ?/: pairs.  This greatly simplifies the
+    formatter.
+
+    -Long lines with repeated 'and', 'or', '&&', '||'  will now have
+    one such item per line.  For example:
+
+       OLD:
+           if ( $opt_d || $opt_m || $opt_p || $opt_t || $opt_x
+               || ( -e $archive && $opt_r ) )
+           {
+               ( $pAr, $pNames ) = readAr($archive);
+           }
+
+       NEW:
+           if ( $opt_d
+               || $opt_m
+               || $opt_p
+               || $opt_t
+               || $opt_x
+               || ( -e $archive && $opt_r ) )
+           {
+               ( $pAr, $pNames ) = readAr($archive);
+           }
+
+      OLD:
+           if ( $vp->{X0} + 4 <= $x && $vp->{X0} + $vp->{W} - 4 >= $x
+               && $vp->{Y0} + 4 <= $y && $vp->{Y0} + $vp->{H} - 4 >= $y ) 
+
+      NEW:
+           if ( $vp->{X0} + 4 <= $x
+               && $vp->{X0} + $vp->{W} - 4 >= $x
+               && $vp->{Y0} + 4 <= $y
+               && $vp->{Y0} + $vp->{H} - 4 >= $y )
+
+    -Long lines with multiple concatenated tokens will have concatenated
+    terms (see below) placed one per line, except for short items.  For
+    example:
+
+      OLD:
+           $report .=
+             "Device type:" . $ib->family . "  ID:" . $ib->serial . "  CRC:"
+             . $ib->crc . ": " . $ib->model() . "\n";
+
+      NEW:
+           $report .= "Device type:"
+             . $ib->family . "  ID:"
+             . $ib->serial . "  CRC:"
+             . $ib->model()
+             . $ib->crc . ": " . "\n";
+
+    NOTE: at present 'short' means 8 characters or less.  There is a
+    tentative flag to change this (-scl), but it is undocumented and
+    is likely to be changed or removed later, so only use it for testing.  
+    In the above example, the tokens "  ID:", "  CRC:", and "\n" are below
+    this limit.  
+
+    -If a line which is short enough to fit on a single line was
+    nevertheless broken in the input file at a 'good' location (see below), 
+    perltidy will try to retain a break.  For example, the following line
+    will be formatted as:
+    
+       open SUM, "<$file"
+         or die "Cannot open $file ($!)";
+    
+    if it was broken in the input file, and like this if not:
+
+       open SUM, "<$file" or die "Cannot open $file ($!)";
+
+    GOOD: 'good' location means before 'and','or','if','unless','&&','||'
+
+    The reason perltidy does not just always break at these points is that if
+    there are multiple, similar statements, this would preclude alignment.  So
+    rather than check for this, perltidy just tries to follow the input style,
+    in the hopes that the author made a good choice. Here is an example where 
+    we might not want to break before each 'if':
+
+       ($Locale, @Locale) = ($English, @English) if (@English > @Locale);
+       ($Locale, @Locale) = ($German,  @German)  if (@German > @Locale);
+       ($Locale, @Locale) = ($French,  @French)  if (@French > @Locale);
+       ($Locale, @Locale) = ($Spanish, @Spanish) if (@Spanish > @Locale);
+
+    -Added wildcard file expansion for systems with shells which lack this.
+    Now 'perltidy *.pl' should work under MSDOS/Windows.  Thanks to Hugh Myers 
+    for suggesting this.  This uses builtin glob() for now; I may change that.
+
+    -Added new flag -sbl which, if specified, overrides the value of -bl
+    for opening sub braces.  This allows formatting of this type:
+
+    perltidy -sbl 
+
+    sub foo
+    {
+       if (!defined($_[0])) {
+           print("Hello, World\n");
+       }
+       else {
+           print($_[0], "\n");
+       }
+    }
+    Requested by Don Alexander.
+
+    -Fixed minor parsing error which prevented a space after a $$ variable
+    (pid) in some cases.  Thanks to Michael Cartmell for noting this.
+    For example, 
+      old: $$< 700 
+      new: $$ < 700
+
+    -Improved line break choices 'and' and 'or' to display logic better.
+    For example:
+
+       OLD:
+           exists $self->{'build_dir'} and push @e,
+             "Unwrapped into directory $self->{'build_dir'}";
+
+       NEW:
+           exists $self->{'build_dir'}
+             and push @e, "Unwrapped into directory $self->{'build_dir'}";
+
+    -Fixed error of multiple use of abbreviatioin '-dsc'.  -dsc remains 
+    abbreviation for delete-side-comments; -dsm is new abbreviation for 
+    delete-semicolons.
+
+    -Corrected and updated 'usage' help routine.  Thanks to Slaven Rezic for 
+    noting an error.
+
+    -The default for Windows is, for now, not to do a 'perl -c' syntax
+    check (but -syn will activate it).  This is because of problems with
+    command.com.  James Freeman sent me a patch which tries to get around
+    the problems, and it works in many cases, but testing revealed several
+    issues that still need to be resolved.  So for now, the default is no
+    syntax check for Windows.
+
+    -I added a -T flag when doing perl -c syntax check.
+    This is because I test it on a large number of scripts from sources
+    unknown, and who knows what might be hidden in initialization blocks?
+    Also, deactivated the syntax check if perltidy is run as root.  As a
+    benign example, running the previous version of perltidy on the
+    following file would cause it to disappear:
+
+           BEGIN{
+                   print "Bye, bye baby!\n";
+                   unlink $0;
+           }
+           
+    The new version will not let that happen.
+
+    -I am contemplating (but have not yet implemented) making '-lp' the
+    default indentation, because it is stable now and may be closer to how
+    perl is commonly formatted.  This could be in the next release.  The
+    reason that '-lp' was not the original default is that the coding for
+    it was complex and not ready for the initial release of perltidy.  If
+    anyone has any strong feelings about this, I'd like to hear.  The
+    current default could always be recovered with the '-nlp' flag.  
+
+## 2001 09 03 
+
+    -html updates:
+        - sub definition names are now specially colored, red by default.  
+          The letter 'm' is used to identify them.
+        - keyword 'sub' now has color of other keywords.
+        - restored html keyword color to __END__ and __DATA__, which was 
+          accidentally removed in the previous version.
+
+    -A new -se (--standard-error-output) flag has been implemented and
+    documented which causes all errors to be written to standard output
+    instead of a .ERR file.
+
+    -A new -w (--warning-output) flag has been implemented and documented
+     which causes perltidy to output certain non-critical messages to the
+     error output file, .ERR.  These include complaints about pod usage,
+     for example.  The default is to not include these.
+
+     NOTE: This replaces an undocumented -w=0 or --warning-level flag
+     which was tentatively introduced in the previous version to avoid some
+     unwanted messages.  The new default is the same as the old -w=0, so
+     that is no longer needed. 
+
+     -Improved syntax checking and corrected tokenization of functions such
+     as rand, srand, sqrt, ...  These can accept either an operator or a term
+     to their right.  This has been corrected.
+    
+    -Corrected tokenization of semicolon: testing of the previous update showed 
+    that the semicolon in the following statement was being mis-tokenized.  That
+    did no harm, other than adding an extra blank space, but has been corrected.
+
+             for (sort {strcoll($a,$b);} keys %investments) {
+                ...
+             }
+
+    -New syntax check: after wasting 5 minutes trying to resolve a syntax
+     error in which I had an extra terminal ';' in a complex for (;;) statement, 
+     I spent a few more minutes adding a check for this in perltidy so it won't
+     happen again.
+
+    -The behavior of --break-before-subs (-bbs) and --break-before-blocks
+    (-bbb) has been modified.  Also, a new control parameter,
+    --long-block-line-count=n (-lbl=n) has been introduced to give more
+    control on -bbb.  This was previously a hardwired value.  The reason
+    for the change is to reduce the number of unwanted blank lines that
+    perltidy introduces, and make it less erratic.  It's annoying to remove
+    an unwanted blank line and have perltidy put it back.  The goal is to
+    be able to sprinkle a few blank lines in that dense script you
+    inherited from Bubba.  I did a lot of experimenting with different
+    schemes for introducing blank lines before and after code blocks, and
+    decided that there is no really good way to do it.  But I think the new
+    scheme is an improvement.  You can always deactivate this with -nbbb.
+    I've been meaning to work on this; thanks to Erik Thaysen for bringing
+    it to my attention.
+
+    -The .LOG file is seldom needed, and I get tired of deleting them, so
+     they will now only be automatically saved if perltidy thinks that it
+     made an error, which is almost never.  You can still force the logfile
+     to be saved with -log or -g.
+
+    -Improved method for computing number of columns in a table.  The old
+    method always tried for an even number.  The new method allows odd
+    numbers when it is obvious that a list is not a hash initialization
+    list.
+
+      old: my (
+                $name,       $xsargs, $parobjs, $optypes,
+                $hasp2child, $pmcode, $hdrcode, $inplacecode,
+                $globalnew,  $callcopy
+             )
+             = @_;
+
+      new: my (
+                $name,   $xsargs,  $parobjs,     $optypes,   $hasp2child,
+                $pmcode, $hdrcode, $inplacecode, $globalnew, $callcopy
+             )
+             = @_;
+
+    -I fiddled with the list threshold adjustment, and some small lists
+    look better now.  Here is the change for one of the lists in test file
+    'sparse.t':
+    old:
+      %units =
+        ("in", "in", "pt", "pt", "pc", "pi", "mm", "mm", "cm", "cm", "\\hsize", "%",
+          "\\vsize", "%", "\\textwidth", "%", "\\textheight", "%");
+
+    new:
+      %units = (
+                 "in",      "in", "pt",          "pt", "pc",           "pi",
+                 "mm",      "mm", "cm",          "cm", "\\hsize",      "%",
+                 "\\vsize", "%",  "\\textwidth", "%",  "\\textheight", "%"
+                 );
+
+    -Improved -lp formatting at '=' sign.  A break was always being added after
+    the '=' sign in a statement such as this, (to be sure there was enough room
+    for the parameters):
+
+    old: my $fee =
+           CalcReserveFee(
+                           $env,          $borrnum,
+                           $biblionumber, $constraint,
+                           $bibitems
+                           );
+    
+    The updated version doesn't do this unless the space is really needed:
+
+    new: my $fee = CalcReserveFee(
+                                  $env,          $borrnum,
+                                  $biblionumber, $constraint,
+                                  $bibitems
+                                  );
+
+    -I updated the tokenizer to allow $#+ and $#-, which seem to be new to
+    Perl 5.6.  Some experimenting with a recent version of Perl indicated
+    that it allows these non-alphanumeric '$#' array maximum index
+    varaibles: $#: $#- $#+ so I updated the parser accordingly.  Only $#:
+    seems to be valid in older versions of Perl.
+
+    -Fixed a rare formatting problem with -lp (and -gnu) which caused
+    excessive indentation.
+
+    -Many additional syntax checks have been added.
+
+    -Revised method for testing here-doc target strings; the following
+    was causing trouble with a regex test because of the '*' characters:
+     print <<"*EOF*";
+     bla bla
+     *EOF*
+    Perl seems to allow almost anything to be a here doc target, so an
+    exact string comparison is now used.
+
+    -Made update to allow underscores in binary numbers, like '0b1100_0000'.
+
+    -Corrected problem with scanning certain module names; a blank space was 
+    being inserted after 'warnings' in the following:
+       use warnings::register;
+    The problem was that warnings (and a couple of other key modules) were 
+    being tokenized as keywords.  They should have just been identifiers.
+
+    -Corrected tokenization of indirect objects after sort, system, and exec,
+    after testing produced an incorrect error message for the following
+    line of code:
+       print sort $sortsubref @list;
+
+    -Corrected minor problem where a line after a format had unwanted
+    extra continuation indentation.  
+
+    -Delete-block-comments (and -dac) now retain any leading hash-bang line
+
+    -Update for -lp (and -gnu) to not align the leading '=' of a list
+    with a previous '=', since this interferes with alignment of parameters.
+
+     old:  my $hireDay = new Date;
+           my $self    = {
+                        firstName => undef,
+                        lastName  => undef,
+                        hireDay   => $hireDay
+                        };
+       
+     new:  my $hireDay = new Date;
+           my $self = {
+                        firstName => undef,
+                        lastName  => undef,
+                        hireDay   => $hireDay
+                        };
+
+    -Modifications made to display tables more compactly when possible,
+     without adding lines. For example,
+     old:
+                   '1', "I", '2', "II", '3', "III", '4', "IV",
+                   '5', "V", '6', "VI", '7', "VII", '8', "VIII",
+                   '9', "IX"
+     new:
+                   '1', "I",   '2', "II",   '3', "III",
+                   '4', "IV",  '5', "V",    '6', "VI",
+                   '7', "VII", '8', "VIII", '9', "IX"
+
+    -Corrected minor bug in which -pt=2 did not keep the right paren tight
+    around a '++' or '--' token, like this:
+
+               for ($i = 0 ; $i < length $key ; $i++ )
+
+    The formatting for this should be, and now is: 
+
+               for ($i = 0 ; $i < length $key ; $i++)
+
+    Thanks to Erik Thaysen for noting this.
+
+    -Discovered a new bug involving here-docs during testing!  See BUGS.html.  
+
+    -Finally fixed parsing of subroutine attributes (A Perl 5.6 feature).
+    However, the attributes and prototypes must still be on the same line
+    as the sub name.
+
+## 2001 07 31 
+
+    -Corrected minor, uncommon bug found during routine testing, in which a
+    blank got inserted between a function name and its opening paren after
+    a file test operator, but only in the case that the function had not
+    been previously seen.  Perl uses the existence (or lack thereof) of 
+    the blank to guess if it is a function call.  That is,
+       if (-l pid_filename()) {
+    became
+       if (-l pid_filename ()) {
+    which is a syntax error if pid_filename has not been seen by perl.
+
+    -If the AutoLoader module is used, perltidy will continue formatting
+    code after seeing an __END__ line.  Use -nlal to deactivate this feature.  
+    Likewise, if the SelfLoader module is used, perltidy will continue 
+    formatting code after seeing a __DATA__ line.  Use -nlsl to
+    deactivate this feature.  Thanks to Slaven Rezic for this suggestion.
+
+    -pod text after __END__ and __DATA__ is now identified by perltidy
+    so that -dp works correctly.  Thanks to Slaven Rezic for this suggestion.
+
+    -The first $VERSION line which might be eval'd by MakeMaker
+    is now passed through unchanged.  Use -npvl to deactivate this feature.
+    Thanks to Manfred Winter for this suggestion.
+
+    -Improved indentation of nested parenthesized expressions.  Tests have
+    given favorable results.  Thanks to Wolfgang Weisselberg for helpful
+    examples.
+
+## 2001 07 23 
+
+    -Fixed a very rare problem in which an unwanted semicolon was inserted
+    due to misidentification of anonymous hash reference curly as a code
+    block curly.  (No instances of this have been reported; I discovered it
+    during testing).  A workaround for older versions of perltidy is to use
+    -nasc.
+
+    -Added -icb (-indent-closing-brace) parameter to indent a brace which
+    terminates a code block to the same level as the previous line.
+    Suggested by Andrew Cutler.  For example, 
+
+           if ($task) {
+               yyy();
+               }    # -icb
+           else {
+               zzz();
+               }
+
+    -Rewrote error message triggered by an unknown bareword in a print or
+    printf filehandle position, and added flag -w=0 to prevent issuing this
+    error message.  Suggested by Byron Jones.
+
+    -Added modification to align a one-line 'if' block with similar
+    following 'elsif' one-line blocks, like this:
+         if    ( $something eq "simple" )  { &handle_simple }
+         elsif ( $something eq "hard" )    { &handle_hard }
+    (Suggested by  Wolfgang Weisselberg).
+
+## 2001 07 02 
+
+    -Eliminated all constants with leading underscores because perl 5.005_03
+    does not support that.  For example, _SPACES changed to XX_SPACES.
+    Thanks to kromJx for this update.
+
+## 2001 07 01 
+
+    -the directory of test files has been moved to a separate distribution
+    file because it is getting large but is of little interest to most users.
+    For the current distribution:
+      perltidy-20010701.tgz        contains the source and docs for perltidy
+      perltidy-20010701-test.tgz   contains the test files
+
+    -fixed bug where temporary file perltidy.TMPI was not being deleted 
+    when input was from stdin.
+
+    -adjusted line break logic to not break after closing brace of an
+    eval block (suggested by Boris Zentner).
+
+    -added flag -gnu (--gnu-style) to give an approximation to the GNU
+    style as sometimes applied to perl.  The programming style in GNU
+    'automake' was used as a guide in setting the parameters; these
+    parameters will probably be adjusted over time.
+
+    -an empty code block now has one space for emphasis:
+      if ( $cmd eq "bg_untested" ) {}    # old
+      if ( $cmd eq "bg_untested" ) { }   # new
+    If this bothers anyone, we could create a parameter.
+
+    -the -bt (--brace-tightness) parameter has been split into two
+    parameters to give more control. -bt now applies only to non-BLOCK
+    braces, while a new parameter -bbt (block-brace-tightness) applies to
+    curly braces which contain code BLOCKS. The default value is -bbt=0.
+
+    -added flag -icp (--indent-closing-paren) which leaves a statement
+    termination of the form );, };, or ]; indented with the same
+    indentation as the previous line.  For example,
+
+       @month_of_year = (          # default, or -nicp
+           'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct',
+           'Nov', 'Dec'
+       );
+
+       @month_of_year = (          # -icp
+           'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct',
+           'Nov', 'Dec'
+           );
+
+    -Vertical alignment updated to synchronize with tokens &&, ||,
+    and, or, if, unless.  Allowable space before forcing
+    resynchronization has been increased.  (Suggested by  Wolfgang
+    Weisselberg).
+
+    -html corrected to use -nohtml-bold-xxxxxxx or -nhbx to negate bold,
+    and likewise -nohtml-italic-xxxxxxx or -nhbi to negate italic.  There
+    was no way to negate these previously.  html documentation updated and
+    corrected.  (Suggested by  Wolfgang Weisselberg).
+
+    -Some modifications have been made which improve the -lp formatting in
+    a few cases.
+
+    -Perltidy now retains or creates a blank line after an =cut to keep
+    podchecker happy (Suggested by Manfred H. Winter).  This appears to be
+    a glitch in podchecker, but it was annoying.
+
+## 2001 06 17  
+
+    -Added -bli flag to give continuation indentation to braces, like this
+
+           if ($bli_flag)
+             {
+               extra_indentation();
+             }
+
+    -Corrected an error with the tab (-t) option which caused the last line
+    of a multi-line quote to receive a leading tab.  This error was in
+    version 2001 06 08  but not 2001 04 06.  If you formatted a script
+    with -t with this version, please check it by running once with the
+    -chk flag and perltidy will scan for this possible error.
+
+    -Corrected an invalid pattern (\R should have been just R), changed
+    $^W =1 to BEGIN {$^W=1} to use warnings in compile phase, and corrected
+    several unnecessary 'my' declarations. Many thanks to Wolfgang Weisselberg,
+    2001-06-12, for catching these errors.
+    
+    -A '-bar' flag has been added to require braces to always be on the
+    right, even for multi-line if and foreach statements.  For example,
+    the default formatting of a long if statement would be:
+
+           if ($bigwasteofspace1 && $bigwasteofspace2
+             || $bigwasteofspace3 && $bigwasteofspace4)
+           {
+               bigwastoftime();
+           }
+
+    With -bar, the formatting is:
+
+           if ($bigwasteofspace1 && $bigwasteofspace2
+             || $bigwasteofspace3 && $bigwasteofspace4) {
+               bigwastoftime();
+           }
+    Suggested by Eli Fidler 2001-06-11.
+
+    -Uploaded perltidy to sourceforge cvs 2001-06-10.
+
+    -An '-lp' flag (--line-up-parentheses) has been added which causes lists
+    to be indented with extra indentation in the manner sometimes
+    associated with emacs or the GNU suggestions.  Thanks to Ian Stuart for
+    this suggestion and for extensive help in testing it. 
+
+    -Subroutine call parameter lists are now formatted as other lists.
+    This should improve formatting of tables being passed via subroutine
+    calls.  This will also cause full indentation ('-i=n, default n= 4) of
+    continued parameter list lines rather than just the number of spaces
+    given with -ci=n, default n=2.
+    
+    -Added support for hanging side comments.  Perltidy identifies a hanging
+    side comment as a comment immediately following a line with a side
+    comment or another hanging side comment.  This should work in most
+    cases.  It can be deactivated with --no-hanging-side-comments (-nhsc).
+    The manual has been updated to discuss this.  Suggested by Brad
+    Eisenberg some time ago, and finally implemented.
+
+## 2001 06 08  
+
+    -fixed problem with parsing command parameters containing quoted
+    strings in .perltidyrc files. (Reported by Roger Espel Llima 2001-06-07).
+
+    -added two command line flags, --want-break-after and 
+    --want-break-before, which allow changing whether perltidy
+    breaks lines before or after any operators.  Please see the revised 
+    man pages for details.
+
+    -added system-wide configuration file capability.
+    If perltidy does not find a .perltidyrc command line file in
+    the current directory, nor in the home directory, it now looks
+    for '/usr/local/etc/perltidyrc' and then for '/etc/perltidyrc'.
+    (Suggested by Roger Espel Llima 2001-05-31).
+
+    -fixed problem in which spaces were trimmed from lines of a multi-line
+    quote. (Reported by Roger Espel Llima 2001-05-30).  This is an 
+    uncommon situation, but serious, because it could conceivably change
+    the proper function of a script.
+
+    -fixed problem in which a semicolon was incorrectly added within 
+    an anonymous hash.  (Reported by A.C. Yardley, 2001-5-23).
+    (You would know if this happened, because perl would give a syntax
+    error for the resulting script).
+
+    -fixed problem in which an incorrect error message was produced
+     after a version number on a 'use' line, like this ( Reported 
+     by Andres Kroonmaa, 2001-5-14):
+
+                 use CGI 2.42 qw(fatalsToBrowser);
+
+     Other than the extraneous error message, this bug was harmless.
+
+## 2001 04 06 
+
+    -fixed serious bug in which the last line of some multi-line quotes or
+     patterns was given continuation indentation spaces.  This may make
+     a pattern incorrect unless it uses the /x modifier.  To find
+     instances of this error in scripts which have been formatted with
+     earlier versions of perltidy, run with the -chk flag, which has
+     been added for this purpose (SLH, 2001-04-05).
+
+     ** So, please check previously formatted scripts by running with -chk
+     at least once **
+
+    -continuation indentation has been reprogrammed to be hierarchical, 
+     which improves deeply nested structures.
+
+    -fixed problem with undefined value in list formatting (reported by Michael
+     Langner 2001-04-05)
+
+    -Switched to graphical display of nesting in .LOG files.  If an
+     old format string was "(1 [0 {2", the new string is "{{(".  This
+     is easier to read and also shows the order of nesting.
+
+    -added outdenting of cuddled paren structures, like  ")->pack(".
+
+    -added line break and outdenting of ')->' so that instead of
+
+           $mw->Label(
+             -text   => "perltidy",
+             -relief => 'ridge')->pack;
+    
+     the current default is:
+
+           $mw->Label(
+             -text   => "perltidy",
+             -relief => 'ridge'
+           )->pack;
+
+     (requested by Michael Langner 2001-03-31; in the future this could 
+     be controlled by a command-line parameter).
+
+    -revised list indentation logic, so that lists following an assignment
+     operator get one full indentation level, rather than just continuation 
+     indentation.  Also corrected some minor glitches in the continuation 
+     indentation logic. 
+
+    -Fixed problem with unwanted continuation indentation after a blank line 
+    (reported by Erik Thaysen 2001-03-28):
+
+    -minor update to avoid stranding a single '(' on one line
+
+## 2001 03 28:
+
+    -corrected serious error tokenizing filehandles, in which a sub call 
+    after a print or printf, like this:
+       print usage() and exit;
+    became this:
+       print usage () and exit;
+    Unfortunately, this converts 'usage' to a filehandle.  To fix this, rerun
+    perltidy; it will look for this situation and issue a warning. 
+
+    -fixed another cuddled-else formatting bug (Reported by Craig Bourne)
+
+    -added several diagnostic --dump routines
+    
+    -added token-level whitespace controls (suggested by Hans Ecke)
+
+## 2001 03 23:
+
+    -added support for special variables of the form ${^WANT_BITS}
+
+    -space added between scalar and left paren in 'for' and 'foreach' loops,
+     (suggestion by Michael Cartmell):
+
+       for $i( 1 .. 20 )   # old
+       for $i ( 1 .. 20 )   # new
+
+    -html now outputs cascading style sheets (thanks to suggestion from
+     Hans Ecke)
+
+    -flags -o and -st now work with -html
+
+    -added missing -html documentation for comments (noted by Alex Izvorski)
+
+    -support for VMS added (thanks to Michael Cartmell for code patches and 
+      testing)
+
+    -v-strings implemented (noted by Hans Ecke and Michael Cartmell; extensive
+      testing by Michael Cartmell)
+
+    -fixed problem where operand may be empty at line 3970 
+     (\b should be just b in lines 3970, 3973) (Thanks to Erik Thaysen, 
+     Keith Marshall for bug reports)
+
+    -fixed -ce bug (cuddled else), where lines like '} else {' were indented
+     (Thanks to Shawn Stepper and Rick Measham for reporting this)
+
+## 2001 03 04:
+
+    -fixed undefined value in line 153 (only worked with -I set)
+    (Thanks to Mike Stok, Phantom of the Opcodes, Ian Ehrenwald, and others)
+
+    -fixed undefined value in line 1069 (filehandle problem with perl versions <
+    5.6) (Thanks to Yuri Leikind, Mike Stok, Michael Holve, Jeff Kolber)
+
+## 2001 03 03:
+
+    -Initial announcement at freshmeat.net; started Change Log
+    (Unfortunately this version was DOA, but it was fixed the next day)
diff --git a/INSTALL b/INSTALL
deleted file mode 100644
index da8af29..0000000
--- a/INSTALL
+++ /dev/null
@@ -1,387 +0,0 @@
-PERLTIDY INSTALLATION NOTES
-Get a distribution file
-    Source Files in .tar.gz and .zip format
-        This document tells how to install perltidy from the basic source
-        distribution files in .tar.gz or .zip format. These files are
-        identical except for the line endings. The .tar.gz has Unix style
-        line endings, and the .zip file has Windows style line endings. The
-        standard perl MakeMaker method should work for these in most cases.
-
-    Source files in RPM and .deb format
-        The web site also has links to RPM and Debian .deb Linux packages,
-        which may be convenient for some users.
-
-Quick Test Drive
-    If you want to do a quick test of perltidy without doing any
-    installation, get a .tar.gz or a .zip source file and see the section
-    below "Method 2: Installation as a single binary script".
-
-Uninstall older versions
-    In certain circumstances, it is best to remove an older version of
-    perltidy before installing the latest version. These are:
-
-    Uninstall a Version older than 20020225
-        You can use perltidy -v to determine the version number. The first
-        version of perltidy to use Makefile.PL for installation was
-        20020225, so if your previous installation is older than this, it is
-        best to remove it, because the installation path may now be
-        different. There were up to 3 files these older installations: the
-        script perltidy and possibly two man pages, perltidy.1 and
-        perl2web.1. If you saved your Makefile, you can probably use "make
-        uninstall". Otherwise, you can use a locate or find command to find
-        and remove these files.
-
-    Uninstall older versions when changing installation method
-        If you switch from one installation method to another, the paths to
-        the components of perltidy may change, so it is probably best to
-        remove the older version before installing the new version. If your
-        older installation method had an uninstall option (such as with
-        RPM's and debian packages), use it. Otherwise, you can locate and
-        remove the older files by hand. There are two key files: Tidy.pm and
-        perltidy. In addition, there may be one or two man pages, something
-        like Perl::Tidy.3pm and perltidy.1p. You can use a "locate" and/or
-        "find" command to find and remove these files. After installation,
-        you can verify that the new version of perltidy is working with the
-        "perltidy -v" command.
-
-Two Installation Methods - Overview
-    These are generic instructions. Some system-specific notes and hints are
-    given in later sections.
-
-    Two separate installation methods are possible.
-
-    Method 1: Standard Installation Method
-        The standard method based on MakeMaker should work in a normal perl
-        environment. This is the recommended installation procedure for
-        systems which support it.
-
-                perl Makefile.PL
-                make
-                make test
-                make install
-
-        The "make" command is probably "nmake" under a Windows system. You
-        may need to become root (or administrator) before doing the "make
-        install" step.
-
-    Method 2: Installation as a single binary script
-        If you just want to take perltidy for a quick test drive without
-        installing it, or are having trouble installing modules, you can
-        bundle it all in one independent executable script. This might also
-        be helpful on a system for which the Makefile.PL method does not
-        work, or if you are temporarily a guest on some system, or if you
-        want to try hacking a special version of perltidy without messing up
-        your regular version.
-
-        You just need to uncompress the source distribution, cd down into
-        it, and enter the command:
-
-                perl pm2pl
-
-        which will combine the pieces of perltidy into a single script named
-        perltidy in the current directory. This script should be fully
-        functional. Try it out on a handy perl script, for example
-
-          perl perltidy Makefile.PL
-
-        This should create Makefile.PL.tdy.
-
-    After Installation
-        After installation by either method, verify that the installation
-        worked and that the correct new version is being by entering:
-
-          perltidy -v
-
-        If the version number disagrees with the version number embedded in
-        the distribution file name, search for and remove the old version.
-        For example, under a Unix system, the command "which perltidy" might
-        show where it is. Also, see the above notes on uninstalling older
-        versions.
-
-        On a Unix system running the "bash" shell, if you had a previous
-        installation of perltidy, you may have to use
-
-         hash -r
-
-        to get the shell to find the new one.
-
-        After perltidy is installed, you can find where it will look for
-        configuration files and environment variables on your system with
-        the command:
-
-          perltidy -dpro
-
-    How to Uninstall
-        Unfortunately, the standard Perl installation method does not seem
-        able to do an uninstall.
-
-        But try this:
-
-          make uninstall
-
-        On some systems, it will give you a list of files to remove by hand.
-        If not, you need to find the script perltidy and its module file
-        Tidy.pm, which will be in a subdirectory named Perl in the site
-        library.
-
-        If you installed perltidy with the alternative method, you should
-        just reverse the steps that you used.
-
-  Unix Installation Notes
-    Alternative method - Unix
-        If the alternative method is used, test the script produced by the
-        "pm2pl" perl script:
-
-          perl ./perltidy somefile.pl
-
-        where somefile.pl is any convenient test file, such as Makefile.PL
-        itself. Then,
-
-        1. If the script is not executable, use
-
-         chmod +x perltidy
-
-        2. Verify that the initial line in perltidy works for your system by
-        entering:
-
-         ./perltidy -h
-
-        which should produce the usage text and then exit. This should
-        usually work, but if it does not, you will need to change the first
-        line in perltidy to reflect the location of perl on your system. On
-        a Unix system, you might find the path to perl with the command
-        'which perl'.
-
-        3. A sample Makefile for this installation method is Makefile.npm.
-        Edit it to have the correct paths.
-
-        You will need to become root unless you change the paths to point to
-        somewhere in your home directory. Then issue the command
-
-         make -f Makefile.npm install
-
-        This installs perltidy and the man page perltidy.1.
-
-        5. Test the installation using
-
-         perltidy -h
-
-        You should see the usage screen. Then, if you installed the man
-        pages, try
-
-         man perltidy
-
-        which should bring up the manual page.
-
-        If you ever want to remove perltidy, you can remove perltidy and its
-        man pages by hand or use
-
-         make uninstall
-
-  Windows Installation Notes
-    On a Windows 9x/Me system you should CLOSE ANY OPEN APPLICATIONS to
-    avoid losing unsaved data in case of trouble.
-
-    Standard Method - Windows
-        After you unzip the distribution file, the procedure is probably
-        this:
-
-                perl Makefile.PL
-                nmake
-                nmake test
-                nmake install
-
-        You may need to download a copy of unzip to unzip the .zip
-        distribution file; you can get this at
-        http://www.info-zip.org/pub/infozip/UnZip.html
-
-        If you have ActiveState Perl, the installation method is outlined at
-        http://aspn.activestate.com//ASPN/Reference/Products/ActivePerl/faq/
-        Windows/ActivePerl-Winfaq9.html#How_can_I_use_modules_from_CPAN_
-
-        You may need to download a copy of Microsoft's nmake program from
-        ftp://ftp.microsoft.com/Softlib/MSLFILES/nmake15.exe
-
-        If you are not familiar with installing modules, or have trouble
-        doing so, and want to start testing perltidy quickly, you may want
-        to use the alternative method instead (next section).
-
-    Alternative Method - Windows
-        From the main installation directory, just enter
-
-           perl pm2pl 
-
-        Placing the resulting file perltidy and the example batch file
-        perltidy.bat, located in the examples directory, in your path should
-        work. (You can determine your path by issuing the msdos command
-        "PATH"). However, the batch file probably will not support file
-        redirection. So, for example, to pipe the long help message through
-        'more', you might have to invoke perltidy with perl directly, like
-        this:
-
-         perl \somepath\perltidy -h | more
-
-        The batch file will not work properly with wildcard filenames, but
-        you may use wildcard filenames if you place them in quotes. For
-        example
-
-         perltidy '*.pl'
-
-  VMS Installation Notes
-    Links to VMS Utilities and Documentation
-        To install perltidy you will need the following utilities Perl, of
-        course, source with VMS goodies available from
-        http://www.sidhe.org/vmsperl or binary available from the Compaq
-        OpenVMS freeware CD. To unpack the source either gunzip and vmstar
-        available from the Compaq OpenVMS freeware CD or zip available from
-        http://www.info-zip.org/
-
-        To build perltidy you can use either MMS, Compaq's VMS equivalent of
-        make, or MMK, an MMS clone available from http://www.madgoat.com.
-
-        Information on running perl under VMS can be found at:
-        http://w4.lns.cornell.edu/~pvhp/perl/VMS.html
-
-    Unpack the source:
-         $ unzip -a perl-tidy-yyyymmdd.zip  ! or
-
-         $ unzip /text=auto perl-tidy-yyyymmdd.zip ! or
-
-         $ gunzip perl-tidy-yyyymmdd.tgz
-         $ vmstar perl-tidy-yyyymmdd.tar
-
-    Build and install perltidy under VMS:
-         $ set default [.perl-tidy-yyymmdd]
-         $ perl perltidy.pl
-         $ mmk
-         $ mmk test
-         $ mmk install
-
-    Using Perltidy under VMS
-        Create a symbol. This should be put in a logon script, eg
-        sylogin.com
-
-         $ perltidy == "perl perl_root:[utils]perltidy."
-
-        Default parameters can be placed in a perltidyrc file. Perltidy
-        looks for one in the following places and uses the first found if
-        the logical "PERLTIDY" is a file and the file exists then that is
-        used if the logical "PERLTIDY" is a directory then look for a
-        .perltidyrc file in the directory look for a .perltidyrc file in the
-        user's home directory
-
-        To see where the search is done and which .perltidyrc is used type
-
-         $ perltidy -dpro
-
-        A system "PERLTIDY" logical can be defined pointing to a file with a
-        minimal configuration, and users can defined their own logical to
-        use a personal .perltidyrc file.
-
-         $ define /system perltidy perl_root:[utils]perltidy.rc
-
-    The -x Parameter
-        If you have one of the magic incantations at the start of perl
-        scripts, so that they can be invoked as a .com file, then you will
-        need to use the -x parameter which causes perltidy to skip all lines
-        until it finds a hash bang line eg "#!perl -w". Since it is such a
-        common option this is probably a good thing to put in a .perltidyrc
-        file.
-
-    VMS File Extensions
-        VMS file extensions will use an underscore character instead of a
-        dot, when necessary, to create a valid filename. So
-
-              perltidy myfile.pl
-
-        will generate the output file myfile.pl_tdy instead of
-        myfile.pl.tdy, and so on.
-
-
-Troubleshooting / Other Operating Systems
-    Is your system missing from the notes above, or are you having trouble?
-    Perltidy is quite portable. 
-
-    If there seems to be a problem locating a configuration file, you can see
-    what is going on in the config file search with:
-
-     perltidy -dpro
-
-    If you want to customize where perltidy looks for configuration files,
-    look at the routine 'find_config_file' in module 'Tidy.pm'. You should
-    be able to at least use the '-pro=filename' method under most systems.
-
-    Remember to place quotes (either single or double) around input
-    parameters which contain spaces, such as file names. For example:
-
-     perltidy "file name with spaces"
-
-    Without the quotes, perltidy would look for four files: file, name,
-    with, and spaces.
-
-    If you develop a system-dependent patch that might be of general
-    interest, please let us know.
-
-CONFIGURATION FILE
-    You do not need a configuration file, but you may eventually want to
-    create one to save typing; the tutorial and man page discuss this.
-
-SYSTEM TEMPORARY FILES
-    Perltidy needs to create a system temporary file when it invokes
-    Pod::Html to format pod text under the -html option. For Unix systems,
-    this will normally be a file in /tmp, and for other systems, it will be
-    a file in the current working directory named perltidy.TMP. This file
-    will be removed when the run finishes.
-
-DOCUMENTATION
-    Documentation is contained in .pod format, either in the docs directory
-    or appended to the scripts.
-
-    These documents can also be found at http://perltidy.sourceforge.net
-
-    Reading the brief tutorial should help you use perltidy effectively. The
-    tutorial can be read interactively with perldoc, for example
-
-      cd docs
-      perldoc tutorial.pod
-
-    or else an html version can be made with pod2html:
-
-      pod2html tutorial.pod >tutorial.html
-
-    If you use the Makefile.PL installation method on a Unix system, the
-    perltidy and Perl::Tidy man pages should automatically be installed.
-    Otherwise, you can extract the man pages with the pod2xxxx utilities, as
-    follows:
-
-      cd bin
-      pod2text perltidy >perltidy.txt
-      pod2html perltidy >perltidy.html
-  
-      cd lib/Perl
-      pod2text Tidy.pm >Tidy.txt
-      pod2html Tidy.pm >Tidy.html
-
-    After installation, the installation directory of files may be deleted.
-
-    Perltidy is still being developed, so please check sourceforge
-    occasionally for updates if you find that it is useful. New releases are
-    announced on freshmeat.net.
-
-CREDITS
-    Thanks to Michael Cartmell for supplying notes on VMS.
-
-    Thanks to Axel Rose for supplying notes on MacPerl.
-
-FEEDBACK / BUG REPORTS
-    If you see ways to improve these notes, please let us know.
-
-    Bug reports, comments and suggestions are welcome. Attach the smallest
-    piece of code which demonstrates the bug or issue. If appropriate,
-    attach a .LOG file. Your efforts are greatly appreciated!
-
-    Thank You
-
-     Steve Hancock
-     perltidy at users.sourceforge.net
-     http://perltidy.sourceforge.net
diff --git a/INSTALL.md b/INSTALL.md
new file mode 100644
index 0000000..67ecab7
--- /dev/null
+++ b/INSTALL.md
@@ -0,0 +1,398 @@
+# PERLTIDY INSTALLATION NOTES
+
+# Get a distribution file
+
+- Source Files in .tar.gz and .zip format
+
+    This document tells how to install perltidy from the basic source
+    distribution files in `.tar.gz` or `.zip` format.  These files are
+    identical except for the line endings.  The `.tar.gz` has Unix style
+    line endings, and the `.zip` file has Windows style line endings.  The
+    standard perl MakeMaker method should work for these in most cases. 
+
+- Source files in RPM and .deb format
+
+    The web site also has links to RPM and Debian .deb Linux packages, which may be
+    convenient for some users.
+
+# Quick Test Drive
+
+If you want to do a quick test of perltidy without doing any installation, get
+a `.tar.gz` or a `.zip` source file and see the section below "Method 2: Installation
+as a single binary script".
+
+# Uninstall older versions
+
+In certain circumstances, it is best to remove an older version
+of perltidy before installing the latest version.  These are:
+
+- Uninstall a Version older than 20020225
+
+    You can use perltidy -v to determine the version number.  The first
+    version of perltidy to use Makefile.PL for installation was 20020225, so
+    if your previous installation is older than this, it is best to remove
+    it, because the installation path may now be different.  There were up
+    to 3 files these older installations: the script `perltidy` and
+    possibly two man pages, `perltidy.1` and `perl2web.1`.  If you saved
+    your Makefile, you can probably use `make uninstall`.  Otherwise, you
+    can use a `locate` or `find` command to find and remove these files.
+
+- Uninstall older versions when changing installation method
+
+    If you switch from one installation method to another, the paths to the
+    components of perltidy may change, so it is probably best to remove the older
+    version before installing the new version.  If your older installation method
+    had an uninstall option (such as with RPM's and debian packages), use it.
+    Otherwise, you can locate and remove the older files by hand.  There are two
+    key files: `Tidy.pm` and `perltidy`.  In addition, there may be one or two
+    man pages, something like `Perl::Tidy.3pm` and `perltidy.1p`.  You can use a
+    `locate` and/or `find` command to find and remove these files.  After
+    installation, you can verify that the new version of perltidy is working with
+    the `perltidy -v` command.
+
+# Two Installation Methods - Overview
+
+These are generic instructions.  Some system-specific notes and hints
+are given in later sections.  
+
+Two separate installation methods are possible.  
+
+- Method 1: Standard Installation Method
+
+    The standard method based on MakeMaker should work in a normal perl
+    environment.  This is the recommended installation procedure for
+    systems which support it.
+
+            perl Makefile.PL
+            make
+            make test
+            make install
+
+    The `make` command is probably `nmake` under a Windows system.  You
+    may need to become root (or administrator) before doing the `make
+    install` step.  
+
+- Method 2: Installation as a single binary script
+
+    If you just want to take perltidy for a quick test drive without installing it,
+    or are having trouble installing modules, you can bundle it all in one
+    independent executable script.  This might also be helpful on a system for
+    which the Makefile.PL method does not work, or if you are temporarily a guest
+    on some system, or if you want to try hacking a special version of perltidy
+    without messing up your regular version.  
+
+    You just need to uncompress the source distribution, cd down into it, and enter
+    the command:
+
+            perl pm2pl
+
+    which will combine the pieces of perltidy into a single script named
+    `perltidy` in the current directory.  This script should be 
+    fully functional.  Try it out on a handy perl script, for example
+
+        perl perltidy Makefile.PL
+
+    This should create `Makefile.PL.tdy`.
+
+- After Installation
+
+    After installation by either method, verify that the installation worked
+    and that the correct new version is being by entering:
+
+        perltidy -v
+
+    If the version number disagrees with the version number embedded in the
+    distribution file name, search for and remove the old version.
+    For example, under a Unix system, the command `which perltidy` might 
+    show where it is.  Also, see the above notes on uninstalling older
+    versions.
+
+    On a Unix system running the `bash` shell, if you had a previous
+    installation of perltidy, you may have to use 
+
+        hash -r
+
+    to get the shell to find the new one.
+
+    After `perltidy` is installed, you can find where it will look for
+    configuration files and environment variables on your system with
+    the command:
+
+        perltidy -dpro
+
+- How to Uninstall
+
+    Unfortunately, the standard Perl installation method does not seem able
+    to do an uninstall.
+
+    But try this:
+
+        make uninstall
+
+    On some systems, it will give you a list of files to remove by hand.  If
+    not, you need to find the script `perltidy` and its module file
+    `Tidy.pm`, which will be in a subdirectory named `Perl` in the site
+    library.
+
+    If you installed perltidy with the alternative method, you should just
+    reverse the steps that you used.
+
+## Unix Installation Notes
+
+- Alternative method - Unix
+
+    If the alternative method is used, test the script produced by the
+    `pm2pl` perl script:
+
+        perl ./perltidy somefile.pl
+
+    where `somefile.pl` is any convenient test file, such as `Makefile.PL`
+    itself.  Then,
+
+    1\. If the script is not executable, use 
+
+        chmod +x perltidy
+
+    2\. Verify that the initial line in perltidy works for your system by
+    entering:
+
+        ./perltidy -h
+
+    which should produce the usage text and then exit.  This should usually
+    work, but if it does not, you will need to change the first line in
+    `perltidy` to reflect the location of perl on your system.  On a Unix
+    system, you might find the path to perl with the command 'which perl'.
+
+    3\. A sample `Makefile` for this installation method is `Makefile.npm`.
+    Edit it to have the correct paths.
+
+    You will need to become root unless you change the paths to point to
+    somewhere in your home directory.  Then issue the command
+
+        make -f Makefile.npm install
+
+    This installs perltidy and the man page perltidy.1. 
+
+    5\. Test the installation using
+
+        perltidy -h
+
+    You should see the usage screen.  Then, if you installed the man pages, 
+    try
+
+        man perltidy
+
+    which should bring up the manual page. 
+
+    If you ever want to remove perltidy, you can remove perltidy and its man
+    pages by hand or use
+
+        make uninstall
+
+## Windows Installation Notes
+
+On a Windows 9x/Me system you should CLOSE ANY OPEN APPLICATIONS to
+avoid losing unsaved data in case of trouble.
+
+- Standard Method - Windows
+
+    After you unzip the distribution file, the procedure is probably this:
+
+            perl Makefile.PL
+            nmake
+            nmake test
+            nmake install
+
+    You may need to download a copy of `unzip` to unzip the `.zip` distribution
+    file; you can get this at
+    http://www.info-zip.org/pub/infozip/UnZip.html
+
+    If you have ActiveState
+    Perl, the installation method is outlined at
+    http://aspn.activestate.com//ASPN/Reference/Products/ActivePerl/faq/Windows/ActivePerl-Winfaq9.html#How\_can\_I\_use\_modules\_from\_CPAN\_
+
+    You may need to download a copy of Microsoft's `nmake` program from
+    ftp://ftp.microsoft.com/Softlib/MSLFILES/nmake15.exe
+
+    If you are not familiar with installing modules, or have trouble doing
+    so, and want to start testing perltidy quickly, you may want to use the
+    alternative method instead (next section).
+
+- Alternative Method - Windows
+
+    From the main installation directory, just enter
+
+        perl pm2pl 
+
+    Placing the resulting file `perltidy` and the example batch file
+    `perltidy.bat`, located in the `examples` directory, in your path should
+    work.  (You can determine your path by issuing the msdos command
+    `PATH`).  However, the batch file probably will not support file
+    redirection.  So, for example, to pipe the long help message through
+    'more', you might have to invoke perltidy with perl directly, like this:
+
+        perl \somepath\perltidy -h | more
+
+    The batch file will not work properly with wildcard filenames, but you may
+    use wildcard filenames if you place them in quotes.  For example
+
+        perltidy '*.pl'
+
+## VMS Installation Notes
+
+- Links to VMS Utilities and Documentation
+
+    To install perltidy you will need the following utilities Perl, of
+    course, source with VMS goodies available from
+    http://www.sidhe.org/vmsperl or binary available from the Compaq OpenVMS
+    freeware CD.  To unpack the source either gunzip and vmstar available
+    from the Compaq OpenVMS freeware CD or zip available from
+    http://www.info-zip.org/
+
+    To build perltidy you can use either **MMS**, Compaq's VMS equivalent of
+    make, or **MMK**, an **MMS** clone available from
+    http://www.madgoat.com.
+
+    Information on running perl under VMS can be found at:
+    http://w4.lns.cornell.edu/~pvhp/perl/VMS.html
+
+- Unpack the source:
+
+        $ unzip -a perl-tidy-yyyymmdd.zip  ! or
+
+        $ unzip /text=auto perl-tidy-yyyymmdd.zip ! or
+
+        $ gunzip perl-tidy-yyyymmdd.tgz
+        $ vmstar perl-tidy-yyyymmdd.tar
+
+- Build and install perltidy under VMS:
+
+        $ set default [.perl-tidy-yyymmdd]
+        $ perl perltidy.pl
+        $ mmk
+        $ mmk test
+        $ mmk install
+
+- Using Perltidy under VMS
+
+    Create a symbol. This should be put in a logon script, eg sylogin.com
+
+        $ perltidy == "perl perl_root:[utils]perltidy."
+
+    Default parameters can be placed in a `perltidyrc` file.  Perltidy
+    looks for one in the following places and uses the first found if the
+    logical `PERLTIDY` is a file and the file exists then that is used if the
+    logical `PERLTIDY` is a directory then look for a `.perltidyrc` file in the
+    directory look for a `.perltidyrc` file in the user's home directory
+
+    To see where the search is done and which `.perltidyrc` is used type
+
+        $ perltidy -dpro
+
+    A system `PERLTIDY` logical can be defined pointing to a file with a
+    minimal configuration,  and users can defined their own logical to use a
+    personal `.perltidyrc` file.
+
+        $ define /system perltidy perl_root:[utils]perltidy.rc
+
+- The -x Parameter
+
+    If you have one of the magic incantations at the start of perl scripts,
+    so that they can be invoked as a .com file, then you will need to use
+    the **-x** parameter which causes perltidy to skip all lines until it
+    finds a hash bang line eg `#!perl -w`.  Since it is such a common
+    option this is probably a good thing to put in a `.perltidyrc` file.
+
+- VMS File Extensions
+
+    VMS file extensions will use an underscore character instead of a dot, 
+    when necessary, to create a valid filename.  So 
+
+          perltidy myfile.pl
+
+    will generate the output file `myfile.pl_tdy` instead of
+    `myfile.pl.tdy`, and so on. 
+
+# Troubleshooting / Other Operating Systems
+
+If there seems to be a problem locating a configuration file, you can see
+what is going on in the config file search with:
+
+    perltidy -dpro
+
+If you want to customize where perltidy looks for configuration files,
+look at the routine 'find\_config\_file' in module 'Tidy.pm'.  You should
+be able to at least use the '-pro=filename' method under most systems.  
+
+Remember to place quotes (either single or double) around input
+parameters which contain spaces, such as file names.  For example:
+
+    perltidy "file name with spaces"
+
+Without the quotes, perltidy would look for four files: `file`,
+`name`, `with`, and `spaces`.
+
+If you develop a system-dependent patch that might be of general
+interest, please let us know.
+
+# CONFIGURATION FILE
+
+You do not need a configuration file, but you may eventually want to
+create one to save typing; the tutorial and man page discuss this.
+
+# SYSTEM TEMPORARY FILES
+
+Perltidy needs to create a system temporary file when it invokes
+Pod::Html to format pod text under the -html option.  For Unix systems,
+this will normally be a file in /tmp, and for other systems, it will be
+a file in the current working directory named `perltidy.TMP`.  This file
+will be removed when the run finishes.
+
+# DOCUMENTATION
+
+Documentation is contained in **.pod** format, either in the `docs` directory
+or appended to the scripts.  
+
+These documents can also be found at http://perltidy.sourceforge.net
+
+Reading the brief tutorial should help you use perltidy effectively.  
+The tutorial can be read interactively with **perldoc**, for
+example
+
+    cd docs
+    perldoc tutorial.pod
+
+or else an `html` version can be made with **pod2html**:
+
+    pod2html tutorial.pod >tutorial.html
+
+If you use the Makefile.PL installation method on a Unix system, the
+**perltidy** and **Perl::Tidy** man pages should automatically be installed.
+Otherwise, you can extract the man pages with the **pod2xxxx** utilities, as
+follows:
+
+    cd bin
+    pod2text perltidy >perltidy.txt
+    pod2html perltidy >perltidy.html
+    
+    cd lib/Perl
+    pod2text Tidy.pm >Tidy.txt
+    pod2html Tidy.pm >Tidy.html
+
+After installation, the installation directory of files may be deleted. 
+
+Perltidy is still being developed, so please check sourceforge occasionally
+for updates if you find that it is useful.  New releases are announced
+on freshmeat.net.
+
+# CREDITS
+
+Thanks to the many programmers who have documented problems, made suggestions and sent patches.  
+
+# FEEDBACK / BUG REPORTS
+
+If you see ways to improve these notes, please let us know.
+
+A list of current bugs and issues can be found at the CPAN site [https://rt.cpan.org/Public/Dist/Display.html?Name=Perl-Tidy](https://rt.cpan.org/Public/Dist/Display.html?Name=Perl-Tidy)
+
+To report a new bug or problem, use the link on this page .  
diff --git a/MANIFEST b/MANIFEST
index 46d4799..3dac77b 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -1,42 +1,78 @@
-BUGS
-CHANGES
-COPYING
-INSTALL
-MANIFEST
-Makefile.PL
-Makefile.npm
-README
-TODO
 bin/perltidy
-docs/README
-docs/perltidy.1
-docs/stylekey.pod
-docs/testfile.pl
-docs/tutorial.pod
-examples/README
+BUGS.md
+CHANGES.md
+COPYING
+docs/ChangeLog.html
+docs/index.html
+docs/index.md
+docs/INSTALL.html
+docs/perltidy.html
+docs/stylekey.html
+docs/Tidy.html
+docs/tutorial.html
 examples/bbtidy.pl
 examples/break_long_quotes.pl
+examples/delete_ending_blank_lines.pl
 examples/ex_mp.pl
-examples/lextest
+examples/filter_example.in
+examples/filter_example.pl
 examples/find_naughty.pl
-examples/perltidyrc_dump.pl
-examples/perltidy_okw.pl
+examples/lextest
 examples/perlcomment.pl
 examples/perllinetype.pl
 examples/perlmask.pl
+examples/perltidy_okw.pl
+examples/perltidyrc_dump.pl
 examples/perlxmltok.pl
 examples/pt.bat
+examples/README
 examples/testfa.t
 examples/testff.t
-examples/filter_example.pl
-examples/filter_example.in
+INSTALL.md
 lib/Perl/Tidy.pm
 lib/Perl/Tidy.pod
+lib/Perl/Tidy/Debugger.pm
+lib/Perl/Tidy/DevNull.pm
+lib/Perl/Tidy/Diagnostics.pm
+lib/Perl/Tidy/FileWriter.pm
+lib/Perl/Tidy/Formatter.pm
+lib/Perl/Tidy/HtmlWriter.pm
+lib/Perl/Tidy/IndentationItem.pm
+lib/Perl/Tidy/IOScalar.pm
+lib/Perl/Tidy/IOScalarArray.pm
+lib/Perl/Tidy/LineBuffer.pm
+lib/Perl/Tidy/LineSink.pm
+lib/Perl/Tidy/LineSource.pm
+lib/Perl/Tidy/Logger.pm
+lib/Perl/Tidy/Tokenizer.pm
+lib/Perl/Tidy/VerticalAligner.pm
+lib/Perl/Tidy/VerticalAligner/Alignment.pm
+lib/Perl/Tidy/VerticalAligner/Line.pm
+Makefile.PL
+MANIFEST			This list of files
 pm2pl
+README.md
+t/filter_example.t.SKIP
+t/snippets1.t
+t/snippets10.t
+t/snippets11.t
+t/snippets12.t
+t/snippets13.t
+t/snippets14.t
+t/snippets15.t
+t/snippets2.t
+t/snippets3.t
+t/snippets4.t
+t/snippets5.t
+t/snippets6.t
+t/snippets7.t
+t/snippets8.t
+t/snippets9.t
+t/test-eol.t
 t/test.t
 t/testsa.t
 t/testss.t
-t/testwide.t
 t/testwide.pl.src
-META.yml                                 Module meta-data (added by MakeMaker)
+t/testwide.t
+META.yml                                 Module YAML meta-data (added by MakeMaker)
 META.json                                Module JSON meta-data (added by MakeMaker)
diff --git a/META.json b/META.json
index d456517..2a16d64 100644
--- a/META.json
+++ b/META.json
@@ -6,7 +6,7 @@
    "dynamic_config" : 1,
    "generated_by" : "ExtUtils::MakeMaker version 7.0401, CPAN::Meta::Converter version 2.150001",
    "license" : [
-      "unknown"
+      "gpl_2"
    ],
    "meta-spec" : {
       "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
@@ -29,11 +29,15 @@
          "requires" : {
             "ExtUtils::MakeMaker" : "0"
          }
-      },
-      "runtime" : {
-         "requires" : {}
       }
    },
    "release_status" : "stable",
-   "version" : "20180220"
+   "resources" : {
+      "repository" : {
+         "type" : "git",
+         "url" : "https://github.com/perltidy/perltidy.git",
+         "web" : "https://github.com/perltidy/perltidy"
+      }
+   },
+   "version" : "20190601"
 }
diff --git a/META.yml b/META.yml
index 1a66d09..43ec692 100644
--- a/META.yml
+++ b/META.yml
@@ -8,7 +8,7 @@ configure_requires:
   ExtUtils::MakeMaker: '0'
 dynamic_config: 1
 generated_by: 'ExtUtils::MakeMaker version 7.0401, CPAN::Meta::Converter version 2.150001'
-license: unknown
+license: gpl
 meta-spec:
   url: http://module-build.sourceforge.net/META-spec-v1.4.html
   version: '1.4'
@@ -17,5 +17,6 @@ no_index:
   directory:
     - t
     - inc
-requires: {}
-version: '20180220'
+resources:
+  repository: https://github.com/perltidy/perltidy.git
+version: '20190601'
diff --git a/Makefile.PL b/Makefile.PL
index 968a70a..34eb0d4 100755
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -1,24 +1,27 @@
-# 
-# The indentation of this file is poor because this file may be used to
-# test perltidy after installation. For example, try
-#
-#    perltidy -lp Makefile.PL
-#
-# which will create a reformatted version as Makefile.PL.tdy.
-#
 use ExtUtils::MakeMaker;
 WriteMakefile(
-NAME  => "Perl::Tidy",
-VERSION_FROM => "lib/Perl/Tidy.pm",
-(
-  $] >= 5.005
-? (
-  ABSTRACT => 'indent and reformat perl scripts',
-  LICENSE => 'GPL-2.0+',
-AUTHOR => 'Steve Hancock '
-  )
-: ()
-),
-EXE_FILES => ['bin/perltidy'],
-dist  => { COMPRESS => 'gzip', SUFFIX => 'gz' },
+    NAME         => "Perl::Tidy",
+    VERSION_FROM => "lib/Perl/Tidy.pm",
+    (
+        $] >= 5.005
+        ? (
+            ABSTRACT => 'indent and reformat perl scripts',
+            LICENSE  => 'gpl_2',
+            AUTHOR   => 'Steve Hancock '
+          )
+        : ()
+    ),
+
+    EXE_FILES  => ['bin/perltidy'],
+    dist       => { COMPRESS => 'gzip', SUFFIX => 'gz' },
+    META_MERGE => {
+        'meta-spec' => { version => 2 },
+        resources   => {
+            repository => {
+                type => 'git',
+                url  => 'https://github.com/perltidy/perltidy.git',
+                web  => 'https://github.com/perltidy/perltidy',
+            },
+        },
+    },
 );
diff --git a/Makefile.npm b/Makefile.npm
deleted file mode 100644
index dba5fc7..0000000
--- a/Makefile.npm
+++ /dev/null
@@ -1,37 +0,0 @@
-# This is a sample Makefile for the previous perltidy installation
-# method in which perltidy was a single script.  The installation using
-# Makefile.PL is preferred, but this might be helpful if you want to use
-# this old installation method to install a single perltidy script
-# instead. See the INSTALL file for more information.  
-#
-# Step 0: Build the monolithic version of 'perltidy':
-#         'perl pm2pl'
-#     and then verify that 'perltidy' works using something like:
-#         perl ./perltidy lextest.pl
-
-# Step 1: edit BINDIR to reflect where you want to put perltidy
-# This is fairly standard:
-BINDIR	= /usr/local/bin
-
-# Step 2: edit MANDIR to reflect where you want to put the man page, perltidy.1
-# or comment out MANDIR to skip installing a man page.
-# (Man pages can be found on the web at http://perltidy.sourceforge.net)
-#
-# Note: The value of MANDIR varies a lot, so be sure to check this.
-MANDIR	= /usr/local/man/man1
-
-# Step 3: Then become superuser (if necessary) and issue the command
-# make install
-
-PROGRAM	= perltidy
-
-install: $(PROGRAM)
-	cp $(PROGRAM) $(BINDIR)/$(PROGRAM)
-	chmod 755 $(BINDIR)/$(PROGRAM)
-	if test -d $(MANDIR); then cp docs/$(PROGRAM).1 $(MANDIR)/$(PROGRAM).1; \
-	chmod 644 $(MANDIR)/$(PROGRAM).1; \
-	fi
-
-uninstall: 
-	if test -d $(BINDIR); then rm -f $(BINDIR)/$(PROGRAM); fi
-	if test -d $(MANDIR); then rm -f $(MANDIR)/$(PROGRAM).1; fi
diff --git a/README b/README
deleted file mode 100644
index b7e3536..0000000
--- a/README
+++ /dev/null
@@ -1,57 +0,0 @@
-Welcome to Perltidy!
-    Perltidy is a tool to indent and reformat perl scripts. It can also
-    write scripts in html format.
-
-    Perltidy is free software released under the GNU General Public License
-    -- please see the included file "COPYING" for details.
-
-PREREQUISITES
-    "perltidy" should work with most standard Perl installations. The
-    following modules are not required, but perltidy may use them if
-    detected:
-
-      HTML::Entities  will be used to encode HTML entities if detected
-      Pod::Html will be used to format pod text
-
-    The total disk space needed after removing the installation directory
-    will about 2 Mb.
-
-DOWNLOAD
-    There are two source distribution files:
-
-    *   A .tgz "tarball", with Unix-style  line endings, and
-
-    *   A zip file, .zip, with Windows-style  line endings.
-
-    In addition, the web site has links to debian and RPM packages.
-
-INSTALLATION
-    For most standard installations, the standard Makefile.PL method should
-    work:
-
-     perl Makefile.PL
-     make
-     make test
-     make install
-
-    The INSTALL file has additional installation notes, and tells how to use
-    perltidy without doing an installation.
-
-WHAT NEXT
-    Please see the CHANGES file for notices of any recent updates.
-
-    Please see the BUGS file for a list of all known open bugs.
-
-    Documentation can be found in the docs directory, and it can also be
-    found at http://perltidy.sourceforge.net
-
-    Reading the brief tutorial should help you use perltidy effectively.
-
-FEEDBACK / BUG REPORTS
-
-    A list of current bugs and issues can be found at the CPAN site
-
-     https://rt.cpan.org/Public/Dist/Display.html?Name=Perl-Tidy
-
-    To report a new bug or problem, use the link on this page .  
-
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..e077071
--- /dev/null
+++ b/README.md
@@ -0,0 +1,11 @@
+# Welcome to Perltidy
+
+Perltidy is a tool to indent and reformat scripts written in Perl5.
+
+Perltidy is free software released under the GNU General Public
+License -- please see the included file "COPYING" for details.
+
+Documentation can be found at the web site [at GitHub](https://perltidy.github.io/perltidy/) 
+or [at Sourceforge](http://perltidy.sourceforge.net)
+
+A copy of the web site is contained in the docs folder of the distribution.
diff --git a/TODO b/TODO
deleted file mode 100644
index b8359cf..0000000
--- a/TODO
+++ /dev/null
@@ -1,6 +0,0 @@
-Perltidy TODO List
-
-    For a list of bugs and feature requests see:
-
-    https://rt.cpan.org/Public/Dist/Display.html?Name=Perl-Tidy
-
diff --git a/bin/perltidy b/bin/perltidy
index 9839ed4..032fead 100755
--- a/bin/perltidy
+++ b/bin/perltidy
@@ -175,7 +175,9 @@ Show summary of usage and exit.
 
 Name of the output file (only if a single input file is being
 processed).  If no output file is specified, and output is not
-redirected to the standard output, the output will go to F.
+redirected to the standard output (see B<-st>), the output will go to
+F. [Note: - does not redirect to standard output. Use
+B<-st> instead.]
 
 =item	B<-st>,    B<--standard-output>
 
@@ -1590,35 +1592,28 @@ The default is not to use cuddled elses, and is indicated with the flag
 B<-nce> or B<--nocuddled-else>.  Here is a comparison of the
 alternatives:
 
+  # -ce
   if ($task) {
       yyy();
-  } else {    # -ce
+  } else {    
       zzz();
   }
 
+  # -nce (default)
   if ($task) {
 	yyy();
   }
-  else {    # -nce  (default)
+  else {    
 	zzz();
   }
 
-=item B<-cb>,   B<--cuddled-blocks>
+In this example the keyword B is placed on the same line which begins with
+the preceding closing block brace and is followed by its own opening block brace
+on the same line.  Other keywords and function names which are formatted with
+this "cuddled" style are B, B, B, B.
 
-This flag enables the "cuddled else" format style on a chain of specified block
-types.  The default is to apply it to a chain consisting of try-catch-finally
-blocks, but it can apply to any desired chain of blocks by specifying their
-names on a separate parameter B<-cbl>, described in the next section.  
-
-    # perltidy -cb:
-    try {
-        throw Error::Simple( "ok 2\n", 2 );
-    } catch Error::Simple with {
-        my $err = shift;
-        print "$err";
-    } finally {
-        print "ok 3\n";
-    };
+Other block types can be formatted by specifying their names on a 
+separate parameter B<-cbl>, described in a later section.  
 
 Cuddling between a pair of code blocks requires that the closing brace of the
 first block start a new line.  If this block is entirely on one line in the
@@ -1629,12 +1624,11 @@ the chain.  If it spans multiple lines then cuddling is made and continues
 along the chain, regardless of the sizes of subsequent blocks. Otherwise, short
 lines remain intact.
 
-So for example, the B<-cb> flag would not have any effect if the above snippet
+So for example, the B<-ce> flag would not have any effect if the above snippet
 is rewritten as
 
-    try { throw Error::Simple( "ok 2\n", 2 ); }
-    catch Error::Simple with { my $err = shift; print "$err"; }
-    finally { print "ok 3\n"; };
+  if ($task) { yyy() }
+  else {    zzz() }
 
 If the first block spans multiple lines, then cuddling can be done and will
 continue for the subsequent blocks in the chain, as illustrated in the previous
@@ -1645,61 +1639,48 @@ there are comments after the closing brace where cuddling would occur then
 cuddling will be prevented.  If this occurs, cuddling will restart later in the
 chain if possible.  
 
-The default for this parameter is B<--nocuddled-blocks>
-
-=item B<-cbl>,    B<--cuddled-block-list>     
-
-The block types to which the B<-cuddled-blocks> style applies is defined by
-this parameter.  This parameter is a character string, giving a list of
-block types separated by dashes.
-
-The default value for this string is 
-
-   -cbl="try-catch-finally" 
-
-This string will cause cuddled formatting to be applied to every block in a chain
-starting with a "try" and followed by any number of "catch" and "finally"
-blocks.
-
-In general, a string describing a chain of blocks has the form
+=item B<-cb>,   B<--cuddled-blocks>
 
-   -cbl="word1-word2-word3-...-wordn"
+This flag is equivalent to B<-ce>. 
 
-In this case, a chain begins when an opening block brace preceded by word1 in 
-the list is encountered.  The chain continues if the closing block brace is
-followed immediately by any of word2, word3, etc. 
 
-If the leading word, word1, might be repeated later in a chain then it should
-also be included amoung the secondary words.
+=item B<-cbl>,    B<--cuddled-block-list>     
 
-Multiple chain types may be specified by separating the strings with commas or
-spaces.  So for example if we have two chains of code blocks, f1-f2-f3  and g1-g2-g3-g4,
-they could be specified as
+The built-in default cuddled block types are B.
 
-   -cbl="f1-f2-f3  g1-g2-g3-g4"
-or
-   -cbl=f1-f2-f3,g1-g2-g3-g4  
+Additional block types to which the B<-cuddled-blocks> style applies can be defined by
+this parameter.  This parameter is a character string, giving a list of
+block types separated by commas or spaces.  For example, to cuddle code blocks
+of type sort, map and grep, in addition to the default types, the string could
+be set to
+  
+  -cbl="sort map grep"
 
-Spaces are easier to read but commas may avoid quotation difficulties when
-entering data in a command shell.
+or equivalently
 
-To define secondary words that apply to all block types, other than those explicitly specified,
-the leading word can be omitted.  For example, the built-in cuddled-else format specified by
-the B<-ce> flag can be approximately specified by
+  -cbl=sort,map,grep 
 
-   -cbl="if-else-elsif unless-else-elsif -continue"
-or
-   -cbl=if-else-elsif,unless-else-elsif,-continue
+Note however that these particular block types are typically short so there might not be much
+opportunity for the cuddled format style.
 
-The final string -continue allows cuddling the optional continue block
-which may follow may other block types.
+Using commas avoids the need to protect spaces with quotes.
 
 As a diagnostic check, the flag B<--dump-cuddled-block-list> or B<-dcbl> can be
-used to view the hash of values this flag creates.
+used to view the hash of values that are generated by this flag. 
 
 Finally, note that the B<-cbl> flag by itself merely specifies which blocks are formatted
 with the cuddled format. It has no effect unless this formatting style is activated with
-B<-cb>.
+B<-ce>.
+
+=item B<-cblx>,    B<--cuddled-block-list-exclusive>     
+
+When cuddled else formatting is selected with B<-ce>, setting this flag causes
+perltidy to ignore its built-in defaults and rely exclusively on the block types
+specified on the B<-cbl> flag described in the previous section.  For example,
+to avoid using cuddled B and B, which among in the defaults, the
+following set of parameters could be used:
+
+  perltidy -ce -cbl='else elsif continue' -cblx
 
 
 =item B<-cbo=n>,   B<--cuddled-break-option=n>
@@ -1713,7 +1694,7 @@ options are:
 
    cbo=0  Never force a short block to break.
    cbo=1  If the first of a pair of blocks is broken in the input file, 
-          then break the second.
+          then break the second [DEFAULT].
    cbo=2  Break open all blocks for maximal cuddled formatting.
 
 The default and recommended value is B.  With this value, if the starting
@@ -1725,10 +1706,6 @@ blocks.
 
 The option B produces maximal cuddling but will not allow any short blocks.
 
-Note: at present, this option currently only applies to blocks controlled by
-the B<-cb> flag.  Cuddling under the B<-ce> flag corresponds approximately to
-B<-cbo=1> but cannot currently be changed.
-
 
 =item B<-bl>,    B<--opening-brace-on-new-line>     
 
@@ -1948,7 +1925,7 @@ Here is an example illustrating a welded container within a welded containers:
 
 This format option is quite general but there are some limitations.  
 
-One limitiation is that any line length limit still applies and can cause long
+One limitation is that any line length limit still applies and can cause long
 welded sections to be broken into multiple lines.  
 
 Another limitation is that an opening symbol which delimits quoted text cannot
@@ -2174,7 +2151,7 @@ controls can be used:
 
 The flag B<-sot> is an abbreviation for B<-sop -sohb -sosb>.
 
-The flag B<-sobb> is a abbreviation for B<-bbvt=2 -bbvtl='*'>.  This
+The flag B<-sobb> is an abbreviation for B<-bbvt=2 -bbvtl='*'>.  This
 will case a cascade of opening block braces to appear on a single line,
 although this an uncommon occurrence except in test scripts. 
 
@@ -2463,6 +2440,42 @@ or C, then the container will remain broken.  Also, breaks
 at internal keywords C and C will normally be retained.
 To prevent this, and thus form longer lines, use B<-nbol>.
 
+=item B<-bom>,  B<--break-at-old-method-breakpoints>
+
+By default, a method call arrow C<-E> is considered a candidate for
+a breakpoint, but method chains will fill to the line width before a break is
+considered.  With B<-bom>, breaks before the arrow are preserved, so if you
+have preformatted a method chain:
+
+  my $q = $rs
+    ->related_resultset('CDs')
+    ->related_resultset('Tracks')
+    ->search({
+      'track.id' => {-ident => 'none_search.id'},
+    })->as_query;
+
+It will B these breaks, rather than become this:
+
+  my $q = $rs->related_resultset('CDs')->related_resultset('Tracks')->search({
+      'track.id' => {-ident => 'none_search.id'},
+    })->as_query;
+
+This flag will also look for and keep a 'cuddled' style of calls, 
+in which lines begin with a closing paren followed by a call arrow, 
+as in this example:
+
+  my $q = $rs->related_resultset(
+      'CDs'
+  )->related_resultset(
+      'Tracks'
+  )->search( {
+      'track.id' => { -ident => 'none_search.id' },
+  } )->as_query;
+
+You may want to include the B<-weld-nested-containers> flag in this case to keep 
+nested braces and parens together, as in the last line.
+
+
 =item B<-bok>,  B<--break-at-old-keyword-breakpoints>
 
 By default, perltidy will retain a breakpoint before keywords which may
@@ -2715,6 +2728,194 @@ previous versions.
 
 =back
 
+B
+
+The parameters in this section provide some control over the placement of blank
+lines within and around groups of statements beginning with selected keywords.
+These blank lines are called here B, and all of the
+parameters begin with B<--keyword-group-blanks*>, or B<-kgb*> for short.  The
+default settings do not employ these controls but they can be enabled with the
+following parameters:
+
+B<-kgbl=s> or B<--keyword-group-blanks-list=s>; B is a quoted string of keywords
+
+B<-kgbs=s> or B<--keyword-group-blanks-size=s>; B gives the number of keywords required to form a group.  
+
+B<-kgbb=n> or B<--keyword-group-blanks-before=n>; B = (0, 1, or 2) controls a leading blank
+
+B<-kgba=n> or B<--keyword-group-blanks-after=n>; B = (0, 1, or 2) controls a trailing blank
+
+B<-kgbi> or B<--keyword-group-blanks-inside> is a switch for adding blanks between subgroups
+
+B<-kgbd> or B<--keyword-group-blanks-delete> is a switch for removing initial blank lines between keywords
+
+B<-kgbr=n> or B<--keyword-group-blanks-repeat-count=n> can limit the number of times this logic is applied
+
+In addition, the following abbreviations are available to for simplified usage:
+
+B<-kgb> or B<--keyword-group-blanks> is short for B<-kgbb=2 -kgba=2 kgbi>
+
+B<-nkgb> or B<--nokeyword-group-blanks>, is short for B<-kgbb=1 -kgba=1 nkgbi>
+
+Before describing the meaning of the parameters in detail let us look at an
+example which is formatted with default parameter settings.
+
+        print "Entering test 2\n";
+        use Test;
+        use Encode qw(from_to encode decode
+          encode_utf8 decode_utf8
+          find_encoding is_utf8);
+        use charnames qw(greek);
+        my @encodings     = grep( /iso-?8859/, Encode::encodings() );
+        my @character_set = ( '0' .. '9', 'A' .. 'Z', 'a' .. 'z' );
+        my @source        = qw(ascii iso8859-1 cp1250);
+        my @destiny       = qw(cp1047 cp37 posix-bc);
+        my @ebcdic_sets   = qw(cp1047 cp37 posix-bc);
+        my $str           = join( '', map( chr($_), 0x20 .. 0x7E ) );
+        return unless ($str);
+
+using B gives:
+
+        print "Entering test 2\n";
+                                      <----------this blank controlled by -kgbb
+        use Test;
+        use Encode qw(from_to encode decode
+          encode_utf8 decode_utf8
+          find_encoding is_utf8);
+        use charnames qw(greek);
+                                      <---------this blank controlled by -kgbi
+        my @encodings     = grep( /iso-?8859/, Encode::encodings() );
+        my @character_set = ( '0' .. '9', 'A' .. 'Z', 'a' .. 'z' );
+        my @source        = qw(ascii iso8859-1 cp1250);
+        my @destiny       = qw(cp1047 cp37 posix-bc);
+        my @ebcdic_sets   = qw(cp1047 cp37 posix-bc);
+        my $str           = join( '', map( chr($_), 0x20 .. 0x7E ) );
+                                      <----------this blank controlled by -kgba
+        return unless ($str);
+
+Blank lines have been introduced around the B and B sequences.  What
+happened is that the default keyword list includes B and B but not
+B and B.  So a continuous sequence of nine B and B
+statements was located.  This number exceeds the default threshold of five, so
+blanks were placed before and after the entire group.  Then, since there was
+also a subsequence of six B lines, a blank line was introduced to separate
+them.
+
+Finer control over blank placement can be achieved by using the individual
+parameters rather than the B<-kgb> flag.  The individual controls are as follows.
+
+B<-kgbl=s> or B<--keyword-group-blanks-list=s>, where B is a quoted string,
+defines the set of keywords which will be formed into groups.  The string is a
+space separated list of keywords.  The default set is B, but any list of keywords may be used. Comment lines may also be included in a keyword group, even though they are not keywords.  To include ordinary block comments, include the symbol B. To include static block comments (which normally begin with '##'), include the symbol B.
+
+B<-kgbs=s> or B<--keyword-group-blanks-size=s>, where B is a string
+describing the number of consecutive keyword statements forming a group.  If
+B is an integer then it is the minimum number required for a group.  A
+maximum value may also be given with the format B, where B is
+the minimum number and B is the maximum number, and the min and max values
+are separated by one or more dots.  No groups will be found if the maximum is
+less than the minimum.  The maximum is unlimited if not given.  The default is
+B.  Some examples:
+
+    s      min   max         number for group
+    3      3     unlimited   3 or more
+    1.1    1     1           1
+    1..3   1     3           1 to 3
+    1.0    1     0           (no match)
+    
+
+B<-kgbb=n> or B<--keyword-group-blanks-before=n> specifies whether
+a blank should appear before the first line of the group, as follows:
+
+   n=0 => (delete) an existing blank line will be removed
+   n=1 => (stable) no change to the input file is made  [DEFAULT]
+   n=2 => (insert) a blank line is introduced if possible
+
+B<-kgba=n> or B<--keyword-group-blanks-after=n> likewise specifies
+whether a blank should appear after the last line of the group, using the same
+scheme (0=delete, 1=stable, 2=insert).
+
+B<-kgbi> or B<--keyword-group-blanks-inside> controls
+the insertion of blank lines between the first and last statement of the entire
+group.  If there is a continuous run of a single statement type with more than
+the minimum threshold number (as specified with B<-kgbs=s>) then this
+switch causes a blank line be inserted between this
+subgroup and the others. In the example above this happened between the
+B and B statements.
+
+B<-kgbd> or B<--keyword-group-blanks-delete> controls the deletion of any
+blank lines that exist in the the group when it is first scanned.  When
+statements are initially scanned, any existing blank lines are included in the
+collection.  Any such orignial blank lines will be deleted before any other
+insertions are made when the parameter B<-kgbd> is set.  The default is not to
+do this, B<-nkgbd>.  
+
+B<-kgbr=n> or B<--keyword-group-blanks-repeat-count=n> specifies B, the
+maximum number of times this logic will be applied to any file.  The special
+value B is the same as n=infinity which means it will be applied to an
+entire script [Default].  A value B could be used to make it apply just
+one time for example.  This might be useful for adjusting just the B
+statements in the top part of a module for example.
+
+B<-kgb> or B<--keyword-group-blanks> is an abbreviation equivalent to setting
+B<-kgbb=1 -kgba=1 -kgbi>.  This turns on keyword group formatting with a set of
+default values.  
+
+B<-nkgb> or B<--nokeyword-group-blanks> is equivalent to B<-kgbb=0 -kgba
+nkgbi>.  This flag turns off keyword group blank lines and is the default
+setting. 
+
+Here are a few notes about the functioning of this technique.  
+
+=over 4
+
+=item *
+
+These parameters are probably more useful as part of a major code reformatting
+operation rather than as a routine formatting operation.
+
+In particular, note that deleting old blank lines with B<-kgbd> is an
+irreversible operation so it should be applied with care.  Existing blank lines
+may be serving an important role in controlling vertical alignment.
+
+=item *
+
+Conflicts which arise among these B parameters and other blank line
+controls are generally resolved by producing the maximum number of blank lines
+implied by any parameter.
+
+For example, if the flags B<--freeze-blank-lines>, or
+B<--keep-old-blank-lines=2>, are set, then they have priority over any blank
+line deletion implied by the B<-kgb> flags of this section, so no blank lines
+will be deleted.
+
+For another example, if a keyword group ends at a B and the flag B requests no blank line there, but we also have B<--blank-lines-before-subs=2>, then two blank lines will still be introduced before the sub.
+
+=item *
+
+The introduction of blank lines does not occur if it would conflict with other
+input controls or code validity. For example, a blank line will not be placed
+within a here-doc or within a section of code marked with format skipping
+comments.  And in general, a blank line will only be introduced at the end of a
+group if the next statement is a line of code. 
+
+=item *
+
+The count which is used to determine the group size is not the number of lines
+but rather the total number of keywords which are found.  Individual statements
+with a certain leading keyword may continue on multiple lines, but if any of
+these lines is nested more than one level deep then that group will be ended.
+
+=item *
+
+The search for groups of lines with similar leading keywords is based on the
+input source, not the final formatted source.  Consequently, if the source code
+is badly formatted, it would be best to make a first formatting pass without
+these options.
+
+=back
+
 =head2 Styles
 
 A style refers to a convenient collection of existing parameters.
@@ -2771,8 +2972,90 @@ after the -pbp parameter.  For example,
         : ' elsewhere in this document'
       );
 
+
+=item  One-line blocks 
+
+There are a few points to note regarding one-line blocks.  A one-line
+block is something like this,
+
+	if ($x > 0) { $y = 1 / $x }  
+
+where the contents within the curly braces is short enough to fit
+on a single line.
+
+With few exceptions, perltidy retains existing one-line blocks, if it
+is possible within the line-length constraint, but it does not attempt
+to form new ones.  In other words, perltidy will try to follow the
+one-line block style of the input file.
+
+If an existing one-line block is longer than the maximum line length,
+however, it will be broken into multiple lines.  When this happens, perltidy
+checks for and adds any optional terminating semicolon (unless the B<-nasc>
+option is used) if the block is a code block.  
+
+The main exception is that perltidy will attempt to form new one-line
+blocks following the keywords C, C, and C, because
+these code blocks are often small and most clearly displayed in a single
+line.
+
+One-line block rules can conflict with the cuddled-else option.  When
+the cuddled-else option is used, perltidy retains existing one-line
+blocks, even if they do not obey cuddled-else formatting.
+
+Occasionally, when one-line blocks get broken because they exceed the
+available line length, the formatting will violate the requested brace style.
+If this happens, reformatting the script a second time should correct
+the problem.
+
+Sometimes it might be desirable to convert a script to have one-line blocks
+whenever possible.  Although there is currently no flag for this, a simple
+workaround is to execute perltidy twice, once with the flag B<-noadd-newlines>
+and then once again with normal parameters, like this:  
+
+     cat infile | perltidy -nanl | perltidy >outfile
+
+When executed on this snippet
+
+    if ( $? == -1 ) {
+        die "failed to execute: $!\n";
+    }
+    if ( $? == -1 ) {
+        print "Had enough.\n";
+        die "failed to execute: $!\n";
+    }
+
+the result is
+
+    if ( $? == -1 ) { die "failed to execute: $!\n"; }
+    if ( $? == -1 ) {
+        print "Had enough.\n";
+        die "failed to execute: $!\n";
+    }
+
+This shows that blocks with a single statement become one-line blocks.
+
+
+=item B<-olbs=n>, B<--one-line-block-semicolons=n>
+
+This flag controls the placement of semicolons at the end of one-line blocks.
+Semicolons are optional before a closing block brace, and frequently they are
+omitted at the end of a one-line block containing just a single statement.
+By default, perltidy follows the input file regarding these semicolons, 
+but this behavior can be controlled by this flag.  The values of n are:
+
+  n=0 remove terminal semicolons in one-line blocks having a single statement
+  n=1 stable; keep input file placement of terminal semicolons [DEFAULT ]
+  n=2 add terminal semicolons in all one-line blocks
+
+Note that the B option has no effect if adding semicolons is prohibited
+with the B<-nasc> flag.  Also not that while B adds missing semicolons to
+all one-line blocks, regardless of complexity, the B option only removes
+ending semicolons which terminate one-line blocks containing just one
+semicolon.  So these two options are not exact inverses.
+
 =back
 
+
 =head2 Controlling Vertical Alignment
 
 Vertical alignment refers to lining up certain symbols in list of consecutive
@@ -3004,40 +3287,6 @@ to make the minimum number of one-line blocks.
 Another use for B<--mangle> is to combine it with B<-dac> to reduce
 the file size of a perl script.
 
-=item  One-line blocks 
-
-There are a few points to note regarding one-line blocks.  A one-line
-block is something like this,
-
-	if ($x > 0) { $y = 1 / $x }  
-
-where the contents within the curly braces is short enough to fit
-on a single line.
-
-With few exceptions, perltidy retains existing one-line blocks, if it
-is possible within the line-length constraint, but it does not attempt
-to form new ones.  In other words, perltidy will try to follow the
-one-line block style of the input file.
-
-If an existing one-line block is longer than the maximum line length,
-however, it will be broken into multiple lines.  When this happens, perltidy
-checks for and adds any optional terminating semicolon (unless the B<-nasc>
-option is used) if the block is a code block.  
-
-The main exception is that perltidy will attempt to form new one-line
-blocks following the keywords C, C, and C, because
-these code blocks are often small and most clearly displayed in a single
-line.
-
-One-line block rules can conflict with the cuddled-else option.  When
-the cuddled-else option is used, perltidy retains existing one-line
-blocks, even if they do not obey cuddled-else formatting.
-
-Occasionally, when one-line blocks get broken because they exceed the
-available line length, the formatting will violate the requested brace style.
-If this happens, reformatting the script a second time should correct
-the problem.
-
 =item  Debugging 
 
 The following flags are available for debugging:
@@ -3076,6 +3325,12 @@ Memoization can reduce run time when running perltidy repeatedly in a
 single process.  It is on by default but can be deactivated for
 testing with B<-nmem>.
 
+B<--no-timestamp> or B<-nts> will eliminate any time stamps in output files to prevent
+differences in dates from causing test installation scripts to fail. There are just
+a couple of places where timestamps normally occur. One is in the headers of
+html files, and another is when the B<-cscw> option is selected. The default is
+to allow timestamps (B<--timestamp> or B<-ts>).
+
 B<--file-size-order> or B<-fso> will cause files to be processed in order of
 increasing size, when multiple files are being processed.  This is useful
 during program development, when large numbers of files with varying sizes are
@@ -3480,16 +3735,16 @@ perlstyle(1), Perl::Tidy(3)
 
 =head1 VERSION
 
-This man page documents perltidy version 20180220.
+This man page documents perltidy version 20190601
 
 =head1 BUG REPORTS
 
-A list of current bugs and issues can be found at the CPAN site
-
-     https://rt.cpan.org/Public/Dist/Display.html?Name=Perl-Tidy
+A list of current bugs and issues can be found at the CPAN site L
 
 To report a new bug or problem, use the link on this page.  
 
+The source code repository is at L.
+
 =head1 COPYRIGHT
 
 Copyright (c) 2000-2018 by Steve Hancock
diff --git a/docs/ChangeLog.html b/docs/ChangeLog.html
new file mode 100644
index 0000000..895f042
--- /dev/null
+++ b/docs/ChangeLog.html
@@ -0,0 +1,3460 @@
+

Perltidy Change Log

+ +

2019 06 01

+ +
- rt #128477: Prevent inconsistent owner/group and setuid/setgid bits. 
+  In the -b (--backup-and-modify-in-place) mode, an attempt is made to set ownership
+  of the output file equal to the input file, if they differ.
+  In all cases, if the final output file ownership differs from input file, any setuid/setgid bits are cleared.
+
+- Added option -bom  (--break-at-old-method-breakpoints) by
+  merrillymeredith which preserves breakpoints of method chains. Modified to also handle a cuddled call style.
+
+- Merged patch to fix Windows EOL translation error with UTF-8 written by
+  Ron Ivy. This update prevents automatic conversion to 'DOS' CRLF line
+  endings.  Also, Windows system testing at the appveyor site is working again.
+
+- RT #128280, added flag --one-line-block-semicolons=n (-olbs=n) 
+  to control semicolons in one-line blocks.  The values of n are:
+    n=0 means no semicolons termininating simple one-line blocks
+    n=1 means stable; do not change from input file [DEFAULT and current]
+    n=2 means always add semicolons in one-line blocks
+  The current behavior corresponds to the default n=1.
+
+- RT #128216, Minor update to prevent inserting unwanted blank line at
+  indentation level change.  This should not change existing scripts.
+
+- RT #81852: Improved indentation when quoted word (qw) lists are 
+  nested within other containers using the --weld-nested (-wn) flag.
+  The example given previously (below) is now closer to what it would
+  be with a simple list instead of qw:
+
+  # perltidy -wn
+  use_all_ok( qw{
+      PPI
+      PPI::Tokenizer
+      PPI::Lexer
+      PPI::Dumper
+      PPI::Find
+      PPI::Normal
+      PPI::Util
+      PPI::Cache
+  } );
+
+- RT#12764, introduced new feature allowing placement of blanks around
+  sequences of selected keywords. This can be activated with the -kgb* 
+  series of parameters described in the manual.
+
+- Rewrote vertical algnment module.  It is better at finding
+  patterns in complex code. For example,
+
+OLD:
+       /^-std$/ && do { $std       = 1;     next; };
+       /^--$/   && do { @link_args = @argv; last; };
+       /^-I(.*)/ && do { $path = $1 || shift @argv; next; };
+
+NEW:
+       /^-std$/  && do { $std       = 1;                 next; };
+       /^--$/    && do { @link_args = @argv;             last; };
+       /^-I(.*)/ && do { $path      = $1 || shift @argv; next; };
+
+- Add repository URLs to META files 
+
+- RT #118553, "leave only one newline at end of file". This option was not 
+  added because of undesirable side effects, but a new filter script
+  was added which can do this, "examples/delete_ending_blank_lines.pl".
+
+ +

2018 11 20

+ +
- fix RT#127736 Perl-Tidy-20181119 has the EXE_FILES entry commented out in
+  Makefile.PL so it doesn't install the perltidy script or its manpage.
+
+ +

2018 11 19

+ +
- Removed test case 'filter_example.t' which was causing a failure on a
+  Windows installation for unknown reasons, possibly due to an unexpected
+  perltidyrc being read by the test script.  Added VERSION numbers to all
+  new modules.
+
+ +

2018 11 17

+ +
- Fixed RT #126965, in which a ternary operator was misparsed if immediately
+  following a function call without arguments, such as: 
+    my $restrict_customer = shift ? 1 : 0;
+
+- Fixed RT #125012: bug in -mangle --delete-all-comments
+  A needed blank space before bareword tokens was being removed when comments 
+  were deleted
+
+- Fixed RT #81852: Stacked containers and quoting operators. Quoted words
+  (qw) delimited by container tokens ('{', '[', '(', '<') are now included in
+  the --weld-nested (-wn) flag:
+
+      # perltidy -wn
+      use_all_ok( qw{
+            PPI
+            PPI::Tokenizer
+            PPI::Lexer
+            PPI::Dumper
+            PPI::Find
+            PPI::Normal
+            PPI::Util
+            PPI::Cache
+            } );
+
+- The cuddled-else (-ce) coding was merged with the new cuddled-block (-cb)
+  coding.  The change is backward compatible and simplifies input.  
+  The --cuddled-block-option=n (-cbo=n) flag now applies to both -ce and -cb 
+  formatting.  In fact the -cb flag is just an alias for -ce now.
+
+- Fixed RT #124594, license text desc. changed from 'GPL-2.0+' to 'gpl_2'
+
+- Fixed bug in which a warning about a possible code bug was issued in a
+  script with brace errors. 
+
+- added option --notimestamp or -nts to eliminate any time stamps in output 
+  files.  This is used to prevent differences in test scripts from causing
+  failure at installation. For example, the -cscw option will put a date
+  stamp on certain closing side comments. We need to avoid this in order
+  to test this feature in an installation test.
+
+- Fixed bug with the entab option, -et=8, in which the leading space of
+  some lines was was not entabbed.  This happened in code which was adjusted
+  for vertical alignment and in hanging side comments. Thanks to Glenn.
+
+- Fixed RT #127633, undesirable line break after return when -baao flag is set
+
+- Fixed RT #127035, vertical alignment. Vertical alignment has been improved 
+  in several ways.  Thanks especially to Michael Wardman and Glenn for sending 
+  helpful snippets. 
+
+  - Alignment of the =~ operators has been reactivated.  
+
+      OLD:
+      $service_profile =~ s/^\s+|\s+$//g;
+      $host_profile =~ s/^\s+|\s+$//g;
+
+      NEW:
+      $service_profile =~ s/^\s+|\s+$//g;
+      $host_profile    =~ s/^\s+|\s+$//g;
+
+  - Alignment of the // operator has been reactivated.  
+
+      OLD:
+      is( pop // 7,       7, 'pop // ... works' );
+      is( pop() // 7,     0, 'pop() // ... works' );
+      is( pop @ARGV // 7, 3, 'pop @array // ... works' );
+
+      NEW:
+      is( pop       // 7, 7, 'pop // ... works' );
+      is( pop()     // 7, 0, 'pop() // ... works' );
+      is( pop @ARGV // 7, 3, 'pop @array // ... works' );
+
+  - The rules for alignment of just two lines have been adjusted,
+    hopefully to be a little better overall.  In some cases, two 
+    lines which were previously unaligned are now aligned, and vice-versa.
+
+      OLD:
+      $expect = "1$expect" if $expect =~ /^e/i;
+      $p = "1$p" if defined $p and $p =~ /^e/i;
+
+      NEW:
+      $expect = "1$expect" if $expect =~ /^e/i;
+      $p      = "1$p"      if defined $p and $p =~ /^e/i;
+
+
+- RT #106493; source code repository location has been added to docs; it is 
+     https://github.com/perltidy/perltidy
+
+- The packaging for this version has changed. The Tidy.pm module is much 
+  smaller.  Supporting modules have been split out from it and placed below 
+  it in the path Perl/Tidy/*.
+
+- A number of new installation test cases have been added. Updates are now
+  continuously tested at Travis CI against versions back to Perl 5.08.
+
+ +

2018 02 20

+ +
- RT #124469, #124494, perltidy often making empty files.  The previous had
+  an index error causing it to fail, particularly in version 5.18 of Perl.
+
+  Please avoid version 20180219.
+
+ +

2018 02 19

+ +
- RT #79947, cuddled-else generalization. A new flag -cb provides
+  'cuddled-else' type formatting for an arbitrary type of block chain. The
+  default is try-catch-finally, but this can be modified with the 
+  parameter -cbl. 
+
+- Fixed RT #124298: add space after ! operator without breaking !! secret 
+  operator
+
+- RT #123749: numerous minor improvements to the -wn flag were made.  
+
+- Fixed a problem with convergence tests in which iterations were stopping 
+  prematurely. 
+
+- Here doc targets for <<~ type here-docs may now have leading whitespace.
+
+- Fixed RT #124354. The '-indent-only' flag was not working correctly in the 
+  previous release. A bug in version 20180101 caused extra blank lines 
+  to be output.
+
+- Issue RT #124114. Some improvements were made in vertical alignment
+  involving 'fat commas'.
+
+ +

2018 01 01

+ +
- Added new flag -wn (--weld-nested-containers) which addresses these issues:
+  RT #123749: Problem with promises; 
+  RT #119970: opening token stacking strange behavior;
+  RT #81853: Can't stack block braces
+
+  This option causes closely nested pairs of opening and closing containers
+  to be "welded" together and essentially be formatted as a single unit,
+  with just one level of indentation.
+
+  Since this is a new flag it is set to be "off" by default but it has given 
+  excellent results in testing. 
+
+  EXAMPLE 1, multiple blocks, default formatting:
+      do {
+          {
+              next if $x == $y;    # do something here
+          }
+      } until $x++ > $z;
+
+  perltidy -wn
+      do { {
+          next if $x == $y;
+      } } until $x++ > $z;
+
+   EXAMPLE 2, three levels of wrapped function calls, default formatting:
+          p(
+              em(
+                  conjug(
+                      translate( param('verb') ), param('tense'),
+                      param('person')
+                  )
+              )
+          );
+
+      # perltidy -wn
+          p( em( conjug(
+              translate( param('verb') ),
+              param('tense'), param('person')
+          ) ) );
+
+      # EXAMPLE 3, chained method calls, default formatting:
+      get('http://mojolicious.org')->then(
+          sub {
+              my $mojo = shift;
+              say $mojo->res->code;
+              return get('http://metacpan.org');
+          }
+      )->then(
+          sub {
+              my $cpan = shift;
+              say $cpan->res->code;
+          }
+      )->catch(
+          sub {
+              my $err = shift;
+              warn "Something went wrong: $err";
+          }
+      )->wait;
+
+      # perltidy -wn
+      get('http://mojolicious.org')->then( sub {
+          my $mojo = shift;
+          say $mojo->res->code;
+          return get('http://metacpan.org');
+      } )->then( sub {
+          my $cpan = shift;
+          say $cpan->res->code;
+      } )->catch( sub {
+          my $err = shift;
+          warn "Something went wrong: $err";
+      } )->wait;
+
+
+- Fixed RT #114359: Missparsing of "print $x ** 0.5;
+
+- Deactivated the --check-syntax flag for better security.  It will be
+  ignored if set.  
+
+- Corrected minimum perl version from 5.004 to 5.008 based on perlver
+  report.  The change is required for coding involving wide characters.
+
+- For certain severe errors, the source file will be copied directly to the
+  output without formatting. These include ending in a quote, ending in a
+  here doc, and encountering an unidentified character.
+
+ +

2017 12 14

+ +
- RT #123749, partial fix.  "Continuation indentation" is removed from lines 
+  with leading closing parens which are part of a call chain. 
+  For example, the call to pack() is is now outdented to the starting 
+  indentation in the following experession:  
+
+      # OLD
+      $mw->Button(
+          -text    => "New Document",
+          -command => \&new_document
+        )->pack(
+          -side   => 'bottom',
+          -anchor => 'e'
+        );
+
+      # NEW
+      $mw->Button(
+          -text    => "New Document",
+          -command => \&new_document
+      )->pack(
+          -side   => 'bottom',
+          -anchor => 'e'
+      );
+
+  This modification improves readability of complex expressions, especially
+  when the user uses the same value for continuation indentation (-ci=n) and 
+  normal indentation (-i=n).  Perltidy was already programmed to
+  do this but a minor bug was preventing it.
+
+- RT #123774, added flag to control space between a backslash and a single or
+  double quote, requested by Robert Rothenberg.  The issue is that lines like
+
+     $str1=\"string1";
+     $str2=\'string2';
+
+  confuse syntax highlighters unless a space is left between the backslash and
+  the quote.
+
+  The new flag to control this is -sbq=n (--space-backslash-quote=n), 
+  where n=0 means no space, n=1 means follow existing code, n=2 means always
+  space.  The default is n=1, meaning that a space will be retained if there
+  is one in the source code.
+
+- Fixed RT #123492, support added for indented here doc operator <<~ added 
+  in v5.26.  Thanks to Chris Weyl for the report.
+
+- Fixed docs; --closing-side-comment-list-string should have been just
+  --closing-side-comment-list.  Thanks to F.Li.
+
+- Added patch RT #122030] Perl::Tidy sometimes does not call binmode.
+  Thanks to Irilis Aelae.
+
+- Fixed RT #121959, PERLTIDY doesn't honor the 'three dot' notation for 
+  locating a config file using environment variables.  Thanks to John 
+  Wittkowski.
+
+- Minor improvements to formatting, in which some additional vertical
+  aligmnemt is done. Thanks to Keith Neargarder.
+
+- RT #119588.  Vertical alignment is no longer done for // operator.
+
+ +

2017 05 21

+ +
- Fixed debian #862667: failure to check for perltidy.ERR deletion can lead 
+  to overwriting abritrary files by symlink attack. Perltidy was continuing 
+  to write files after an unlink failure.  Thanks to Don Armstrong 
+  for a patch.
+
+- Fixed RT #116344, perltidy fails on certain anonymous hash references:
+  in the following code snippet the '?' was misparsed as a pattern 
+  delimiter rather than a ternary operator.
+      return ref {} ? 1 : 0;
+
+- Fixed RT #113792: misparsing of a fat comma (=>) right after 
+  the __END__ or __DATA__ tokens.  These keywords were getting
+  incorrectly quoted by the following => operator.
+
+- Fixed RT #118558. Custom Getopt::Long configuration breaks parsing 
+  of perltidyrc.  Perltidy was resetting the users configuration too soon.
+
+- Fixed RT #119140, failure to parse double diamond operator.  Code to
+  handle this new operator has been added.
+
+- Fixed RT #120968.  Fixed problem where -enc=utf8 didn't work 
+  with --backup-and-modify-in-place. Thanks to Heinz Knutzen for this patch.
+
+- Fixed minor formatting issue where one-line blocks for subs with signatures 
+  were unnecesarily broken
+
+- RT #32905, patch to fix utf-8 error when output was STDOUT. 
+
+- RT #79947, improved spacing of try/catch/finally blocks. Thanks to qsimpleq
+  for a patch.
+
+- Fixed #114909, Anonymous subs with signatures and prototypes misparsed as
+  broken ternaries, in which a statement such as this was not being parsed
+  correctly:
+      return sub ( $fh, $out ) : prototype(*$) { ... }
+
+- Implemented RT #113689, option to introduces spaces after an opening block
+  brace and before a closing block brace. Four new optional controls are
+  added. The first two define the minimum number of blank lines to be
+  inserted 
+
+   -blao=i or --blank-lines-after-opening-block=i
+   -blbc=i or --blank-lines-before-closing-block=i
+
+  where i is an integer, the number of lines (the default is 0).  
+
+  The second two define the types of blocks to which the first two apply 
+
+   -blaol=s or --blank-lines-after-opening-block-list=s
+   -blbcl=s or --blank-lines-before-closing-block-list=s
+
+  where s is a string of possible block keywords (default is just 'sub',
+  meaning a named subroutine).
+
+  For more information please see the documentation.
+
+- The method for specifying block types for certain input parameters has
+  been generalized to distinguish between normal named subroutines and
+  anonymous subs.  The keyword for normal subroutines remains 'sub', and
+  the new keyword for anonymous subs is 'asub'. 
+
+- Minor documentation changes. The BUGS sections now have a link
+  to CPAN where most open bugs and issues can be reviewed and bug reports
+  can be submitted.  The information in the AUTHOR and CREDITS sections of
+  the man pages have been removed from the man pages to streamline the
+  documentation. This information is still in the source code.
+
+ +

2016 03 02

+ +
- RT #112534. Corrected a minor problem in which an unwanted newline
+  was placed before the closing brace of an anonymous sub with 
+  a signature, if it was in a list.  Thanks to Dmytro Zagashev.
+
+- Corrected a minor problem in which occasional extra indentation was
+  given to the closing brace of an anonymous sub in a list when the -lp 
+  parameter was set.
+
+ +

2016 03 01

+ +
 - RT #104427. Added support for signatures.
+
+ - RT #111512.  Changed global warning flag $^W = 1 to use warnings;
+   Thanks to Dmytro Zagashev.
+
+ - RT #110297, added support for new regexp modifier /n
+   Thanks to Dmytro Zagashev.
+
+ - RT #111519.  The -io (--indent-only) and -dac (--delete-all-comments)
+   can now both be used in one pass. Thanks to Dmitry Veltishev.
+
+ - Patch to avoid error message with 'catch' used by TryCatch, as in
+      catch($err){
+         # do something
+      }
+   Thanks to Nick Tonkin.
+
+ - RT #32905, UTF-8 coding is now more robust. Thanks to qsimpleq
+   and Dmytro for patches.
+
+ - RT #106885. Added string bitwise operators ^. &. |. ~. ^.= &.= |.=
+
+ - Fixed RT #107832 and #106492, lack of vertical alignment of two lines
+   when -boc flag (break at old commas) is set.  This bug was 
+   inadvertantly introduced in previous bug fix RT #98902. 
+
+ - Some common extensions to Perl syntax are handled better.
+   In particular, the following snippet is now foratted cleanly:
+
+     method deposit( Num $amount) {
+         $self->balance( $self->balance + $amount );
+     }
+
+   A new flag -xs (--extended-syntax) was added to enable this, and the default
+   is to use -xs. 
+
+   In previous versions, and now only when -nxs is set, this snippet of code
+   generates the following error message:
+
+   "syntax error at ') {', didn't see one of: case elsif for foreach given if switch unless until when while"
+
+ +

2015 08 15

+ +
 - Fixed RT# 105484, Invalid warning about 'else' in 'switch' statement.  The
+   warning happened if a 'case' statement did not use parens.
+
+ - Fixed RT# 101547, misparse of // caused error message.  Also..
+
+ - Fixed RT# 102371, misparse of // caused unwated space in //=
+
+ - Fixed RT# 100871, "silent failure of HTML Output on Windows". 
+   Changed calls to tempfile() from:
+     my ( $fh_tmp, $tmpfile ) = tempfile();
+   to have the full path name:
+     my ( $fh_tmp, $tmpfile ) = File::Temp::tempfile()
+   because of problems in the Windows version reported by Dean Pearce.
+
+ - Fixed RT# 99514, calling the perltidy module multiple times with 
+   a .perltidyrc file containing the parameter --output-line-ending 
+   caused a crash.  This was a glitch in the memoization logic. 
+
+ - Fixed RT#99961, multiple lines inside a cast block caused unwanted
+   continuation indentation.  
+
+ - RT# 32905, broken handling of UTF-8 strings. 
+   A new flag -utf8 causes perltidy assume UTF-8 encoding for input and 
+   output of an io stream.  Thanks to Sebastian Podjasek for a patch.  
+   This feature may not work correctly in older versions of Perl. 
+   It worked in a linux version 5.10.1 but not in a Windows version 5.8.3 (but
+   otherwise perltidy ran correctly).
+
+ - Warning files now report perltidy VERSION. Suggested by John Karr.
+
+ - Fixed long flag --nostack-closing-tokens (-nsct has always worked though). 
+   This was due to a typo.  This also fixed --nostack-opening-tokens to 
+   behave correctly.  Thanks to Rob Dixon.
+
+ +

2014 07 11

+ +
- Fixed RT #94902: abbreviation parsing in .perltidyrc files was not
+  working for multi-line abbreviations.  Thanks to Eric Fung for 
+  supplying a patch. 
+
+- Fixed RT #95708, misparsing of a hash when the first key was a perl
+  keyword, causing a semicolon to be incorrectly added.
+
+- Fixed RT #94338 for-loop in a parenthesized block-map.  A code block within
+  parentheses of a map, sort, or grep function was being mistokenized.  In 
+  rare cases this could produce in an incorrect error message.  The fix will
+  produce some minor formatting changes.  Thanks to Daniel Trizen 
+  discovering and documenting this.
+
+- Fixed RT #94354, excess indentation for stacked tokens.  Thanks to 
+  Colin Williams for supplying a patch.
+
+- Added support for experimental postfix dereferencing notation introduced in
+  perl 5.20. RT #96021.
+
+- Updated documentation to clarify the behavior of the -io flag
+  in response to RT #95709.  You can add -noll or -l=0 to prevent 
+  long comments from being outdented when -io is used.
+
+- Added a check to prevent a problem reported in RT #81866, where large
+  scripts which had been compressed to a single line could not be formatted
+  because of a check for VERSION for MakeMaker. The workaround was to 
+  use -nvpl, but this shouldn't be necessary now.
+
+- Fixed RT #96101; Closing brace of anonymous sub in a list was being
+  indented.  For example, the closing brace of the anonymous sub below 
+  will now be lined up with the word 'callback'.  This problem 
+  occured if there was no comma after the closing brace of the anonymous sub.  
+  This update may cause minor changes to formatting of code with lists 
+  of anonymous subs, especially TK code.
+
+  # OLD
+  my @menu_items = (
+
+      #...
+      {
+          path     => '/_Operate/Transcode and split',
+          callback => sub {
+              return 1 if not $self->project_opened;
+              $self->comp('project')->transcode( split => 1 );
+            }
+      }
+  );
+
+  # NEW
+  my @menu_items = (
+
+      #...
+      {
+          path     => '/_Operate/Transcode and split',
+          callback => sub {
+              return 1 if not $self->project_opened;
+              $self->comp('project')->transcode( split => 1 );
+          }
+      }
+  );
+
+ +

2014 03 28

+ +
- Fixed RT #94190 and debian Bug #742004: perltidy.LOG file left behind.
+  Thanks to George Hartzell for debugging this.  The problem was
+  caused by the memoization speedup patch in version 20121207.  An
+  unwanted flag was being set which caused a LOG to be written if 
+  perltidy was called multiple times.
+
+- New default behavior for LOG files: If the source is from an array or 
+  string (through a call to the perltidy module) then a LOG output is only
+  possible if a logfile stream is specified.  This is to prevent 
+  unexpected perltidy.LOG files. 
+
+- Fixed debian Bug #740670, insecure temporary file usage.  File::Temp is now
+  used to get a temporary file.  Thanks to Don Anderson for a patch.
+
+- Any -b (--backup-and-modify-in-place) flag is silently ignored when a 
+  source stream, destination stream, or standard output is used.  
+  This is because the -b flag may have been in a .perltidyrc file and 
+  warnings break Test::NoWarnings.  Thanks to Marijn Brand.
+
+ +

2013 09 22

+ +
- Fixed RT #88020. --converge was not working with wide characters.
+
+- Fixed RT #78156. package NAMESPACE VERSION syntax not accepted.
+
+- First attempt to fix RT #88588.  INDEX END tag change in pod2html breaks 
+  perltidy -html. I put in a patch which should work but I don't yet have
+  a way of testing it.
+
+ +

2013 08 06

+ +
- Fixed RT #87107, spelling
+
+ +

2013 08 05

+ +
- Fixed RT #87502, incorrect of parsing of smartmatch before hash brace
+
+- Added feature request RT #87330, trim whitespace after POD.
+  The flag -trp (--trim-pod) will trim trailing whitespace from lines of POD
+
+ +

2013 07 17

+ +
- Fixed RT #86929, #86930, missing lhs of assignment.
+
+- Fixed RT #84922, moved pod from Tidy.pm into Tidy.pod
+
+ +

2012 12 07

+ +
- The flag -cab=n or --comma-arrow-breakpoints=n has been generalized
+  to give better control over breaking open short containers.  The
+  possible values are now:
+
+    n=0 break at all commas after =>  
+    n=1 stable: break at all commas after => if container is open,
+        EXCEPT FOR one-line containers
+    n=2 break at all commas after =>, BUT try to form the maximum
+        maximum one-line container lengths
+    n=3 do not treat commas after => specially at all 
+    n=4 break everything: like n=0 but also break a short container with
+        a => not followed by a comma
+    n=5 stable: like n=1 but ALSO break at open one-line containers (default)
+
+  New values n=4 and n=5 have been added to allow short blocks to be
+  broken open.  The new default is n=5, stable.  It should more closely
+  follow the breaks in the input file, and previously formatted code
+  should remain unchanged.  If this causes problems use -cab=1 to recover 
+  the former behavior.  Thanks to Tony Maszeroski for the suggestion.
+
+  To illustrate the need for the new options, if perltidy is given
+  the following code, then the old default (-cab=1) was to close up 
+  the 'index' container even if it was open in the source.  The new 
+  default (-cab=5) will keep it open if it was open in the source.
+
+   our $fancypkg = {
+       'ALL' => {
+           'index' => {
+               'key' => 'value',
+           },
+           'alpine' => {
+               'one'   => '+',
+               'two'   => '+',
+               'three' => '+',
+           },
+       }
+   };
+
+- New debug flag --memoize (-mem).  This version contains a 
+  patch supplied by Jonathan Swartz which can significantly speed up
+  repeated calls to Perl::Tidy::perltidy in a single process by caching
+  the result of parsing the formatting parameters.  A factor of up to 10
+  speedup was achieved for masontidy (https://metacpan.org/module/masontidy).
+  The memoization patch is on by default but can be deactivated for 
+  testing with -nmem (or --no-memoize).
+
+- New flag -tso (--tight-secret-operators) causes certain perl operator
+  sequences (secret operators) to be formatted "tightly" (without spaces).  
+  The most common of these are 0 +  and + 0 which become 0+ and +0.  The
+  operators currently modified by this flag are: 
+       =( )=  0+  +0  ()x!! ~~<>  ,=>
+  Suggested by by Philippe Bruhat. See https://metacpan.org/module/perlsecret
+  This flag is off by default.
+
+- New flag -vmll (--variable-maximum-line-length) makes the maximum
+  line length increase with the nesting depth of a line of code.  
+  Basically, it causes the length of leading whitespace to be ignored when
+  setting line breaks, so the formatting of a block of code is independent
+  of its nesting depth.  Try this option if you have deeply nested 
+  code or data structures, perhaps in conjunction with the -wc flag
+  described next.  The default is not todo this.
+
+- New flag -wc=n (--whitespace-cycle=n) also addresses problems with
+  very deeply nested code and data structures.  When this parameter is
+  used and the nesting depth exceeds the value n, the leading whitespace 
+  will be reduced and start at 1 again.  The result is that deeply
+  nested blocks of code will shift back to the left. This occurs cyclically 
+  to any nesting depth.  This flag may be used either with or without -vmll.
+  The default is not to use this (-wc=0).
+
+- Fixed RT #78764, error parsing smartmatch operator followed by anonymous
+  hash or array and then a ternary operator; two examples:
+
+   qr/3/ ~~ ['1234'] ? 1 : 0;
+   map { $_ ~~ [ '0', '1' ] ? 'x' : 'o' } @a;
+
+- Fixed problem with specifying spaces around arrows using -wls='->'
+  and -wrs='->'.  Thanks to Alain Valleton for documenting this problem. 
+
+- Implemented RT #53183, wishlist, lines of code with the same indentation
+  level which are contained with multiple stacked opening and closing tokens
+  (requested with flags -sot -sct) now have reduced indentation.  
+
+   # Default
+   $sender->MailMsg(
+       {
+           to      => $addr,
+           subject => $subject,
+           msg     => $body
+       }
+   );
+
+   # OLD: perltidy -sot -sct 
+   $sender->MailMsg( {
+           to      => $addr,
+           subject => $subject,
+           msg     => $body
+   } );
+
+   # NEW: perltidy -sot -sct 
+   $sender->MailMsg( {
+       to      => $addr,
+       subject => $subject,
+       msg     => $body
+   } );
+
+- New flag -act=n (--all-containers-tightness=n) is an abbreviation for
+  -pt=n -sbt=n -bt=n -bbt=n, where n=0,1, or 2.  It simplifies input when all
+  containers have the same tightness. Using the same example:
+
+   # NEW: perltidy -sot -sct -act=2
+   $sender->MailMsg({
+       to      => $addr,
+       subject => $subject,
+       msg     => $body
+   });
+
+- New flag -sac (--stack-all-containers) is an abbreviation for -sot -sct
+  This is part of wishlist item RT #53183. Using the same example again:
+
+   # NEW: perltidy -sac -act=2
+   $sender->MailMsg({
+       to      => $addr,
+       subject => $subject,
+       msg     => $body
+   });
+
+ - new flag -scbb (--stack-closing-block-brace) causes isolated closing 
+   block braces to stack as in the following example. (Wishlist item RT#73788)
+
+   DEFAULT:
+   for $w1 (@w1) {
+       for $w2 (@w2) {
+           for $w3 (@w3) {
+               for $w4 (@w4) {
+                   push( @lines, "$w1 $w2 $w3 $w4\n" );
+               }
+           }
+       }
+   }
+
+   perltidy -scbb:
+   for $w1 (@w1) {
+       for $w2 (@w2) {
+           for $w3 (@w3) {
+               for $w4 (@w4) {
+                   push( @lines, "$w1 $w2 $w3 $w4\n" );
+               } } } }
+
+  There is, at present, no flag to place these closing braces at the end
+  of the previous line. It seems difficult to develop good rules for 
+  doing this for a wide variety of code and data structures.
+
+- Parameters defining block types may use a wildcard '*' to indicate
+  all block types.  Previously it was not possible to include bare blocks.
+
+- A flag -sobb (--stack-opening-block-brace) has been introduced as an
+  alias for -bbvt=2 -bbvtl='*'.  So for example the following test code:
+
+  {{{{{{{ $testing }}}}}}}
+
+  cannot be formatted as above but can at least be kept vertically compact 
+  using perltidy -sobb -scbb
+
+  {   {   {   {   {   {   {   $testing
+                          } } } } } } }
+
+  Or even, perltidy -sobb -scbb -i=1 -bbt=2
+  {{{{{{{$testing
+        }}}}}}}
+
+
+- Error message improved for conflicts due to -pbp; thanks to Djun Kim.
+
+- Fixed RT #80645, error parsing special array name '@$' when used as 
+  @{$} or $#{$}
+
+- Eliminated the -chk debug flag which was included in version 20010406 to
+  do a one-time check for a bug with multi-line quotes.  It has not been
+  needed since then.
+
+- Numerous other minor formatting improvements.
+
+ +

2012 07 14

+ +
- Added flag -iscl (--ignore-side-comment-lengths) which causes perltidy 
+  to ignore the length of side comments when setting line breaks, 
+  RT #71848.  The default is to include the length of side comments when
+  breaking lines to stay within the length prescribed by the -l=n
+  maximum line length parameter.  For example,
+
+    Default behavior on a single line with long side comment:
+       $vmsfile =~ s/;[\d\-]*$//
+         ;    # Clip off version number; we can use a newer version as well
+
+    perltidy -iscl leaves the line intact:
+
+       $vmsfile =~ s/;[\d\-]*$//; # Clip off version number; we can use a newer version as well
+
+- Fixed RT #78182, side effects with STDERR.  Error handling has been
+  revised and the documentation has been updated.  STDERR can now be 
+  redirected to a string reference, and perltidy now returns an 
+  error flag instead of calling die when input errors are detected. 
+  If the error flag is set then no tidied output was produced.
+  See man Perl::Tidy for an example.
+
+- Fixed RT #78156, erroneous warning message for package VERSION syntax.
+
+- Added abbreviations -conv (--converge) to simplify iteration control.
+  -conv is equivalent to -it=4 and will insure that the tidied code is
+  converged to its final state with the minimum number of iterations.
+
+- Minor formatting modifications have been made to insure convergence.
+
+- Simplified and hopefully improved the method for guessing the starting 
+  indentation level of entabbed code.  Added flag -dt=n (--default_tabsize=n) 
+  which might be helpful if the guessing method does not work well for
+  some editors.
+
+- Added support for stacked labels, upper case X/B in hex and binary, and
+  CORE:: namespace.
+
+- Eliminated warning messages for using keyword names as constants.
+
+ +

2012 07 01

+ +
- Corrected problem introduced by using a chomp on scalar references, RT #77978
+
+- Added support for Perl 5.14 package block syntax, RT #78114.
+
+- A convergence test is made if three or more iterations are requested with
+  the -it=n parameter to avoid wasting computer time.  Several hundred Mb of
+  code gleaned from the internet were searched with the results that: 
+   - It is unusual for two iterations to be required unless a major 
+     style change is being made. 
+   - Only one case has been found where three iterations were required.  
+   - No cases requiring four iterations have been found with this version.
+  For the previous version several cases where found the results could
+  oscillate between two semi-stable states. This version corrects this.
+
+  So if it is important that the code be converged it is okay to set -it=4
+  with this version and it will probably stop after the second iteration.
+
+- Improved ability to identify and retain good line break points in the
+  input stream, such as at commas and equals. You can always tell 
+  perltidy to ignore old breakpoints with -iob.  
+
+- Fixed glitch in which a terminal closing hash brace followed by semicolon
+  was not outdented back to the leading line depth like other closing
+  tokens.  Thanks to Keith Neargarder for noting this.
+
+    OLD:
+       my ( $pre, $post ) = @{
+           {
+               "pp_anonlist" => [ "[", "]" ],
+               "pp_anonhash" => [ "{", "}" ]
+           }->{ $kid->ppaddr }
+         };   # terminal brace
+
+    NEW:
+       my ( $pre, $post ) = @{
+           {
+               "pp_anonlist" => [ "[", "]" ],
+               "pp_anonhash" => [ "{", "}" ]
+           }->{ $kid->ppaddr }
+       };    # terminal brace
+
+- Removed extra indentation given to trailing 'if' and 'unless' clauses 
+  without parentheses because this occasionally produced undesirable 
+  results.  This only applies where parens are not used after the if or
+  unless.
+
+   OLD:
+       return undef
+         unless my ( $who, $actions ) =
+             $clause =~ /^($who_re)((?:$action_re)+)$/o; 
+
+   NEW:
+       return undef
+         unless my ( $who, $actions ) =
+         $clause =~ /^($who_re)((?:$action_re)+)$/o;
+
+ +

2012 06 19

+ +
- Updated perltidy to handle all quote modifiers defined for perl 5 version 16.
+
+- Side comment text in perltidyrc configuration files must now begin with
+  at least one space before the #.  Thus:
+
+  OK:
+    -l=78 # Max line width is 78 cols
+  BAD: 
+    -l=78# Max line width is 78 cols
+
+  This is probably true of almost all existing perltidyrc files, 
+  but if you get an error message about bad parameters
+  involving a '#' the first time you run this version, please check the side
+  comments in your perltidyrc file, and add a space before the # if necessary.
+  You can quickly see the contents your perltidyrc file, if any, with the
+  command:
+
+    perltidy -dpro
+
+  The reason for this change is that some parameters naturally involve
+  the # symbol, and this can get interpreted as a side comment unless the
+  parameter is quoted.  For example, to define -sphb=# it used to be necessary
+  to write
+    -sbcp='#'
+  to keep the # from becoming part of a comment.  This was causing 
+  trouble for new users.  Now it can also be written without quotes: 
+    -sbcp=#
+
+- Fixed bug in processing some .perltidyrc files containing parameters with
+  an opening brace character, '{'.  For example the following was
+  incorrectly processed:
+     --static-block-comment-prefix="^#{2,}[^\s#]"
+  Thanks to pdagosto.
+
+- Added flag -boa (--break-at-old-attribute-breakpoints) which retains
+  any existing line breaks at attribute separation ':'. This is now the
+  default, use -nboa to deactivate.  Thanks to Daphne Phister for the patch.  
+  For example, given the following code, the line breaks at the ':'s will be
+  retained:
+
+                   my @field
+                     : field
+                     : Default(1)
+                     : Get('Name' => 'foo') : Set('Name');
+
+  whereas the previous version would have output a single line.  If
+  the attributes are on a single line then they will remain on a single line.
+
+- Added new flags --blank-lines-before-subs=n (-blbs=n) and
+  --blank-lines-before-packages=n (-blbp=n) to put n blank lines before
+  subs and packages.  The old flag -bbs is now equivalent to -blbs=1 -blbp=1.
+  and -nbbs is equivalent to -blbs=0 -blbp=0. Requested by M. Schwern and
+  several others.
+
+- Added feature -nsak='*' meaning no space between any keyword and opening 
+  paren.  This avoids listing entering a long list of keywords.  Requested
+  by M. Schwern.
+
+- Added option to delete a backup of original file with in-place-modify (-b)
+  if there were no errors.  This can be requested with the flag -bext='/'.  
+  See documentation for details.  Requested by M. Schwern and others.
+
+- Fixed bug where the module postfilter parameter was not applied when -b 
+  flag was used.  This was discovered during testing.
+
+- Fixed in-place-modify (-b) to work with symbolic links to source files.
+  Thanks to Ted Johnson.
+
+- Fixed bug where the Perl::Tidy module did not allow -b to be used 
+  in some cases.
+
+- No extra blank line is added before a comment which follows
+  a short line ending in an opening token, for example like this:
+   OLD:
+           if (
+
+               # unless we follow a blank or comment line
+               $last_line_leading_type !~ /^[#b]$/
+               ...
+
+   NEW:
+           if (
+               # unless we follow a blank or comment line
+               $last_line_leading_type !~ /^[#b]$/
+               ...
+
+   The blank is not needed for readability in these cases because there
+   already is already space above the comment.  If a blank already 
+   exists there it will not be removed, so this change should not 
+   change code which has previously been formatted with perltidy. 
+   Thanks to R.W.Stauner.
+
+- Likewise, no extra blank line is added above a comment consisting of a
+  single #, since nothing is gained in readability.
+
+- Fixed error in which a blank line was removed after a #>>> directive. 
+  Thanks to Ricky Morse.
+
+- Unnecessary semicolons after given/when/default blocks are now removed.
+
+- Fixed bug where an unwanted blank line could be added before
+  pod text in __DATA__ or __END__ section.  Thanks to jidani.
+
+- Changed exit flags from 1 to 0 to indicate success for -help, -version, 
+  and all -dump commands.  Also added -? as another way to dump the help.
+  Requested by Keith Neargarder.
+
+- Fixed bug where .ERR and .LOG files were not written except for -it=2 or more
+
+- Fixed bug where trailing blank lines at the end of a file were dropped when
+  -it>1.
+
+- Fixed bug where a line occasionally ended with an extra space. This reduces
+  rhe number of instances where a second iteration gives a result different
+  from the first. 
+
+- Updated documentation to note that the Tidy.pm module <stderr> parameter may
+  not be a reference to SCALAR or ARRAY; it must be a file.
+
+- Syntax check with perl now work when the Tidy.pm module is processing
+  references to arrays and strings.  Thanks to Charles Alderman.
+
+- Zero-length files are no longer processed due to concerns for data loss
+  due to side effects in some scenarios.
+
+- block labels, if any, are now included in closing side comment text
+  when the -csc flag is used.  Suggested by Aaron.  For example, 
+  the label L102 in the following block is now included in the -csc text:
+
+     L102: for my $i ( 1 .. 10 ) {
+       ...
+     } ## end L102: for my $i ( 1 .. 10 )
+
+ +

2010 12 17

+ +
- added new flag -it=n or --iterations=n
+  This flag causes perltidy to do n complete iterations.  
+  For most purposes the default of n=1 should be satisfactory.  However n=2
+  can be useful when a major style change is being made, or when code is being
+  beautified on check-in to a source code control system.  The run time will be
+  approximately proportional to n, and it should seldom be necessary to use a
+  value greater than n=2.  Thanks to Jonathan Swartz
+
+- A configuration file pathname begins with three dots, e.g.
+  ".../.perltidyrc", indicates that the file should be searched for starting
+  in the current directory and working upwards. This makes it easier to have
+  multiple projects each with their own .perltidyrc in their root directories.
+  Thanks to Jonathan Swartz for this patch.
+
+- Added flag --notidy which disables all formatting and causes the input to be
+  copied unchanged.  This can be useful in conjunction with hierarchical
+  F<.perltidyrc> files to prevent unwanted tidying.
+  Thanks to Jonathan Swartz for this patch.
+
+- Added prefilters and postfilters in the call to the Tidy.pm module.
+  Prefilters and postfilters. The prefilter is a code reference that 
+  will be applied to the source before tidying, and the postfilter 
+  is a code reference to the result before outputting.  
+
+  Thanks to Jonathan Swartz for this patch.  He writes:
+  This is useful for all manner of customizations. For example, I use
+  it to convert the 'method' keyword to 'sub' so that perltidy will work for
+  Method::Signature::Simple code:
+
+  Perl::Tidy::perltidy(
+     prefilter => sub { $_ = $_[0]; s/^method (.*)/sub $1 \#__METHOD/gm; return $_ },
+     postfilter => sub { $_ = $_[0]; s/^sub (.*?)\s* \#__METHOD/method $1/gm; return $_ }
+  );
+
+- The starting indentation level of sections of code entabbed with -et=n
+  is correctly guessed if it was also produced with the same -et=n flag.  This
+  keeps the indentation stable on repeated formatting passes within an editor.
+  Thanks to Sam Kington and Glenn.
+
+- Functions with prototype '&' had a space between the function and opening
+  peren.  This space now only occurs if the flag --space-function-paren (-sfp)
+  is set.  Thanks to Zrajm Akfohg.
+
+- Patch to never put spaces around a bare word in braces beginning with ^ as in:
+    my $before = ${^PREMATCH};
+  even if requested with the -bt=0 flag because any spaces cause a syntax error in perl.
+  Thanks to Fabrice Dulanoy.
+
+ +

2009 06 16

+ +
- Allow configuration file to be 'perltidy.ini' for Windows systems.
+  i.e. C:\Documents and Settings\User\perltidy.ini
+  and added documentation for setting configuation file under Windows in man
+  page.  Thanks to Stuart Clark.
+
+- Corrected problem of unwanted semicolons in hash ref within given/when code.
+ Thanks to Nelo Onyiah.
+
+- added new flag -cscb or --closing-side-comments-balanced
+ When using closing-side-comments, and the closing-side-comment-maximum-text
+ limit is exceeded, then the comment text must be truncated.  Previous
+ versions of perltidy terminate with three dots, and this can still be
+ achieved with -ncscb:
+
+  perltidy -csc -ncscb
+
+  } ## end foreach my $foo (sort { $b cmp $a ...
+
+ However this causes a problem with older editors which cannot recognize
+ comments or are not configured to doso because they cannot "bounce" around in
+ the text correctly.  The B<-cscb> flag tries to help them by 
+ appending appropriate terminal balancing structure:
+
+  perltidy -csc -cscb
+
+  } ## end foreach my $foo (sort { $b cmp $a ... })
+
+ Since there is much to be gained and little to be lost by doing this,
+ the default is B<-cscb>.  Use B<-ncscb> if you do not want this.
+
+ Thanks to Daniel Becker for suggesting this option.
+
+- After an isolated closing eval block the continuation indentation will be
+  removed so that the braces line up more like other blocks.  Thanks to Yves Orton.
+
+OLD:
+   eval {
+       #STUFF;
+       1;    # return true
+     }  
+     or do {
+       #handle error
+     };
+
+NEW:
+   eval {
+       #STUFF;
+       1;    # return true
+   } or do {
+       #handle error
+   };
+
+-A new flag -asbl (or --opening-anonymous-sub-brace-on-new-line) has
+ been added to put the opening brace of anonymous sub's on a new line,
+ as in the following snippet:
+
+   my $code = sub
+   {
+       my $arg = shift;
+       return $arg->(@_);
+   };
+
+ This was not possible before because the -sbl flag only applies to named
+ subs. Thanks to Benjamin Krupp.
+
+-Fix tokenization bug with the following snippet
+  print 'hi' if { x => 1, }->{x};
+ which resulted in a semicolon being added after the comma.  The workaround
+ was to use -nasc, but this is no longer necessary.  Thanks to Brian Duggan. 
+
+-Fixed problem in which an incorrect error message could be triggered
+by the (unusual) combination of parameters  -lp -i=0 -l=2 -ci=0 for
+example.  Thanks to Richard Jelinek.
+
+-A new flag --keep-old-blank-lines=n has been added to
+give more control over the treatment of old blank lines in
+a script.  The manual has been revised to discuss the new
+flag and clarify the treatment of old blank lines.  Thanks
+to Oliver Schaefer.
+
+ +

2007 12 05

+ +
-Improved support for perl 5.10: New quote modifier 'p', new block type UNITCHECK, 
+new keyword break, improved formatting of given/when.
+
+-Corrected tokenization bug of something like $var{-q}.
+
+-Numerous minor formatting improvements.
+
+-Corrected list of operators controlled by -baao -bbao to include
+  . : ? && || and or err xor
+
+-Corrected very minor error in log file involving incorrect comment
+regarding need for upper case of labels.  
+
+-Fixed problem where perltidy could run for a very long time
+when given certain non-perl text files.
+
+-Line breaks in un-parenthesized lists now try to follow
+line breaks in the input file rather than trying to fill
+lines.  This usually works better, but if this causes
+trouble you can use -iob to ignore any old line breaks.
+Example for the following input snippet:
+
+   print
+   "conformability (Not the same dimension)\n",
+   "\t", $have, " is ", text_unit($hu), "\n",
+   "\t", $want, " is ", text_unit($wu), "\n",
+   ;
+
+ OLD:
+   print "conformability (Not the same dimension)\n", "\t", $have, " is ",
+     text_unit($hu), "\n", "\t", $want, " is ", text_unit($wu), "\n",;
+
+ NEW:
+   print "conformability (Not the same dimension)\n",
+     "\t", $have, " is ", text_unit($hu), "\n",
+     "\t", $want, " is ", text_unit($wu), "\n",
+     ;
+
+ +

2007 08 01

+ +
-Added -fpsc option (--fixed-position-side-comment). Thanks to Ueli Hugenschmidt. 
+For example -fpsc=40 tells perltidy to put side comments in column 40
+if possible.  
+
+-Added -bbao and -baao options (--break-before-all-operators and
+--break-after-all-operators) to simplify command lines and configuration
+files.  These define an initial preference for breaking at operators which can
+be modified with -wba and -wbb flags.  For example to break before all operators
+except an = one could use --bbao -wba='=' rather than listing every
+single perl operator (except =) on a -wbb flag.
+
+-Added -kis option (--keep-interior-semicolons).  Use the B<-kis> flag
+to prevent breaking at a semicolon if there was no break there in the
+input file.  To illustrate, consider the following input lines:
+
+   dbmclose(%verb_delim); undef %verb_delim;
+   dbmclose(%expanded); undef %expanded;
+   dbmclose(%global); undef %global;
+
+Normally these would be broken into six lines, but 
+perltidy -kis gives:
+
+   dbmclose(%verb_delim); undef %verb_delim;
+   dbmclose(%expanded);   undef %expanded;
+   dbmclose(%global);     undef %global;
+
+-Improved formatting of complex ternary statements, with indentation
+of nested statements.  
+ OLD:
+   return defined( $cw->{Selected} )
+     ? (wantarray)
+     ? @{ $cw->{Selected} }
+     : $cw->{Selected}[0]
+     : undef;
+
+ NEW:
+   return defined( $cw->{Selected} )
+     ? (wantarray)
+         ? @{ $cw->{Selected} }
+         : $cw->{Selected}[0]
+     : undef;
+
+-Text following un-parenthesized if/unless/while/until statements get a
+full level of indentation.  Suggested by Jeff Armstorng and others. 
+OLD:
+   return $ship->chargeWeapons("phaser-canon")
+     if $encounter->description eq 'klingon'
+     and $ship->firepower >= $encounter->firepower
+     and $location->status ne 'neutral';
+NEW:
+   return $ship->chargeWeapons("phaser-canon")
+     if $encounter->description eq 'klingon'
+         and $ship->firepower >= $encounter->firepower
+         and $location->status ne 'neutral';
+
+ +

2007 05 08

+ +
-Fixed bug where #line directives were being indented.  Thanks to
+Philippe Bruhat.
+
+ +

2007 05 04

+ +
-Fixed problem where an extra blank line was added after an =cut when either
+(a) the =cut started (not stopped) a POD section, or (b) -mbl > 1. 
+Thanks to J. Robert Ray and Bill Moseley.
+
+ +

2007 04 24

+ +
-ole (--output-line-ending) and -ple (--preserve-line-endings) should
+now work on all systems rather than just unix systems. Thanks to Dan
+Tyrell.
+
+-Fixed problem of a warning issued for multiple subs for BEGIN subs
+and other control subs. Thanks to Heiko Eissfeldt.
+
+-Fixed problem where no space was introduced between a keyword or
+bareword and a colon, such as:
+
+( ref($result) eq 'HASH' && !%$result ) ? undef: $result;
+
+Thanks to Niek.
+
+-Added a utility program 'break_long_quotes.pl' to the examples directory of
+the distribution.  It breaks long quoted strings into a chain of concatenated
+sub strings no longer than a selected length.  Suggested by Michael Renner as
+a perltidy feature but was judged to be best done in a separate program.
+
+-Updated docs to remove extra < and >= from list of tokens 
+after which breaks are made by default.  Thanks to Bob Kleemann.
+
+-Removed improper uses of $_ to avoid conflicts with external calls, giving
+error message similar to:
+   Modification of a read-only value attempted at 
+   /usr/share/perl5/Perl/Tidy.pm line 6907.
+Thanks to Michael Renner.
+
+-Fixed problem when errorfile was not a plain filename or filehandle
+in a call to Tidy.pm.  The call
+perltidy(source => \$input, destination => \$output, errorfile => \$err);
+gave the following error message:
+ Not a GLOB reference at /usr/share/perl5/Perl/Tidy.pm line 3827.
+Thanks to Michael Renner and Phillipe Bruhat.
+
+-Fixed problem where -sot would not stack an opening token followed by
+a side comment.  Thanks to Jens Schicke.
+
+-improved breakpoints in complex math and other long statements. Example:
+OLD:
+   return
+     log($n) + 0.577215664901532 + ( 1 / ( 2 * $n ) ) -
+     ( 1 / ( 12 * ( $n**2 ) ) ) + ( 1 / ( 120 * ( $n**4 ) ) );
+NEW:
+   return
+     log($n) + 0.577215664901532 +
+     ( 1 / ( 2 * $n ) ) -
+     ( 1 / ( 12 * ( $n**2 ) ) ) +
+     ( 1 / ( 120 * ( $n**4 ) ) );
+
+-more robust vertical alignment of complex terminal else blocks and ternary
+statements.
+
+ +

2006 07 19

+ +
-Eliminated bug where a here-doc invoked through an 'e' modifier on a pattern
+replacement text was not recognized.  The tokenizer now recursively scans
+replacement text (but does not reformat it).
+
+-improved vertical alignment of terminal else blocks and ternary statements.
+ Thanks to Chris for the suggestion. 
+
+ OLD:
+   if    ( IsBitmap() ) { return GetBitmap(); }
+   elsif ( IsFiles() )  { return GetFiles(); }
+   else { return GetText(); }
+
+ NEW:
+   if    ( IsBitmap() ) { return GetBitmap(); }
+   elsif ( IsFiles() )  { return GetFiles(); }
+   else                 { return GetText(); }
+
+ OLD:
+   $which_search =
+       $opts{"t"} ? 'title'
+     : $opts{"s"} ? 'subject'
+     : $opts{"a"} ? 'author'
+     : 'title';
+
+ NEW:
+   $which_search =
+       $opts{"t"} ? 'title'
+     : $opts{"s"} ? 'subject'
+     : $opts{"a"} ? 'author'
+     :              'title';
+
+-improved indentation of try/catch blocks and other externally defined
+functions accepting a block argument.  Thanks to jae.
+
+-Added support for Perl 5.10 features say and smartmatch.
+
+-Added flag -pbp (--perl-best-practices) as an abbreviation for parameters
+suggested in Damian Conway's "Perl Best Practices".  -pbp is the same as:
+
+   -l=78 -i=4 -ci=4 -st -se -vt=2 -cti=0 -pt=1 -bt=1 -sbt=1 -bbt=1 -nsfs -nolq
+   -wbb="% + - * / x != == >= <= =~ !~ < > | & >= < = 
+         **= += *= &= <<= &&= -= /= |= >>= ||= .= %= ^= x="
+
+ Please note that the -st here restricts input to standard input; use
+ -nst if necessary to override.
+
+-Eliminated some needless breaks at equals signs in -lp indentation.
+
+   OLD:
+       $c =
+         Math::Complex->make(LEFT + $x * (RIGHT - LEFT) / SIZE,
+                             TOP + $y * (BOTTOM - TOP) / SIZE);
+   NEW:
+       $c = Math::Complex->make(LEFT + $x * (RIGHT - LEFT) / SIZE,
+                                TOP + $y * (BOTTOM - TOP) / SIZE);
+
+A break at an equals is sometimes useful for preventing complex statements 
+from hitting the line length limit.  The decision to do this was 
+over-eager in some cases and has been improved.  Thanks to Royce Reece.
+
+-qw quotes contained in braces, square brackets, and parens are being
+treated more like those containers as far as stacking of tokens.  Also
+stack of closing tokens ending ');' will outdent to where the ');' would
+have outdented if the closing stack is matched with a similar opening stack.
+
+ OLD: perltidy -soc -sct
+   __PACKAGE__->load_components(
+       qw(
+         PK::Auto
+         Core
+         )
+   );
+ NEW: perltidy -soc -sct
+   __PACKAGE__->load_components( qw(
+         PK::Auto
+         Core
+   ) );
+ Thanks to Aran Deltac
+
+-Eliminated some undesirable or marginally desirable vertical alignments.
+These include terminal colons, opening braces, and equals, and particularly
+when just two lines would be aligned.
+
+OLD:
+   my $accurate_timestamps = $Stamps{lnk};
+   my $has_link            = 
+       ...
+NEW:
+   my $accurate_timestamps = $Stamps{lnk};
+   my $has_link =
+
+-Corrected a problem with -mangle in which a space would be removed
+between a keyword and variable beginning with ::.
+
+ +

2006 06 14

+ +
-Attribute argument lists are now correctly treated as quoted strings
+and not formatted.  This is the most important update in this version.
+Thanks to Borris Zentner, Greg Ferguson, Steve Kirkup.
+
+-Updated to recognize the defined or operator, //, to be released in Perl 10.
+Thanks to Sebastien Aperghis-Tramoni.
+
+-A useful utility perltidyrc_dump.pl is included in the examples section.  It
+will read any perltidyrc file and write it back out in a standard format
+(though comments are lost).
+
+-Added option to have perltidy read and return a hash with the contents of a
+perltidyrc file.  This may be used by Leif Eriksen's tidyview code.  This
+feature is used by the demonstration program 'perltidyrc_dump.pl' in the
+examples directory.
+
+-Improved error checking in perltidyrc files.  Unknown bare words were not
+being caught.
+
+-The --dump-options parameter now dumps parameters in the format required by a
+perltidyrc file.
+
+-V-Strings with underscores are now recognized.
+For example: $v = v1.2_3; 
+
+-cti=3 option added which gives one extra indentation level to closing 
+tokens always.  This provides more predictable closing token placement
+than cti=2.  If you are using cti=2 you might want to try cti=3.
+
+-To identify all left-adjusted comments as static block comments, use C<-sbcp='^#'>.
+
+-New parameters -fs, -fsb, -fse added to allow sections of code between #<<<
+and #>>> to be passed through verbatim. This is enabled by default and turned
+off by -nfs.  Flags -fsb and -fse allow other beginning and ending markers.
+Thanks to Wolfgang Werner and Marion Berryman for suggesting this.  
+
+-added flag -skp to put a space between all Perl keywords and following paren.
+The default is to only do this for certain keywords.  Suggested by
+H.Merijn Brand.
+
+-added flag -sfp to put a space between a function name and following paren.
+The default is not to do this.  Suggested by H.Merijn Brand.
+
+-Added patch to avoid breaking GetOpt::Long::Configure set by calling program. 
+Thanks to Philippe Bruhat.
+
+-An error was fixed in which certain parameters in a .perltidyrc file given
+without the equals sign were not recognized.  That is,
+'--brace-tightness 0' gave an error but '--brace-tightness=0' worked
+ok.  Thanks to Zac Hansen.
+
+-An error preventing the -nwrs flag from working was corrected. Thanks to
+ Greg Ferguson.
+
+-Corrected some alignment problems with entab option.
+
+-A bug with the combination of -lp and -extrude was fixed (though this
+combination doesn't really make sense).  The bug was that a line with
+a single zero would be dropped.  Thanks to Cameron Hayne.
+
+-Updated Windows detection code to avoid an undefined variable.
+Thanks to Joe Yates and Russ Jones.
+
+-Improved formatting for short trailing statements following a closing paren.
+Thanks to Joe Matarazzo.
+
+-The handling of the -icb (indent closing block braces) flag has been changed
+slightly to provide more consistent and predictable formatting of complex
+structures.  Instead of giving a closing block brace the indentation of the
+previous line, it is now given one extra indentation level.  The two methods
+give the same result if the previous line was a complete statement, as in this
+example:
+
+       if ($task) {
+           yyy();
+           }    # -icb
+       else {
+           zzz();
+           }
+The change also fixes a problem with empty blocks such as:
+
+   OLD, -icb:
+   elsif ($debug) {
+   }
+
+   NEW, -icb:
+   elsif ($debug) {
+       }
+
+-A problem with -icb was fixed in which a closing brace was misplaced when
+it followed a quote which spanned multiple lines.
+
+-Some improved breakpoints for -wba='&& || and or'
+
+-Fixed problem with misaligned cuddled else in complex statements
+when the -bar flag was also used.  Thanks to Alex and Royce Reese.
+
+-Corrected documentation to show that --outdent-long-comments is the default.
+Thanks to Mario Lia.
+
+-New flag -otr (opening-token-right) is similar to -bar (braces-always-right)
+but applies to non-structural opening tokens.
+
+-new flags -sot (stack-opening-token), -sct (stack-closing-token).
+Suggested by Tony.
+
+ +

2003 10 21

+ +
-The default has been changed to not do syntax checking with perl.  
+  Use -syn if you want it.  Perltidy is very robust now, and the -syn
+  flag now causes more problems than it's worth because of BEGIN blocks
+  (which get executed with perl -c).  For example, perltidy will never
+  return when trying to beautify this code if -syn is used:
+
+       BEGIN { 1 while { }; }
+
+ Although this is an obvious error, perltidy is often run on untested
+ code which is more likely to have this sort of problem.  A more subtle
+ example is:
+
+       BEGIN { use FindBin; }
+
+ which may hang on some systems using -syn if a shared file system is
+ unavailable.
+
+-Changed style -gnu to use -cti=1 instead of -cti=2 (see next item).
+ In most cases it looks better.  To recover the previous format, use
+ '-gnu -cti=2'
+
+-Added flags -cti=n for finer control of closing token indentation.
+  -cti = 0 no extra indentation (default; same as -nicp)
+  -cti = 1 enough indentation so that the closing token
+       aligns with its opening token.
+  -cti = 2 one extra indentation level if the line has the form 
+         );   ];   or   };     (same as -icp).
+
+  The new option -cti=1 works well with -lp:
+
+  EXAMPLES:
+
+   # perltidy -lp -cti=1
+   @month_of_year = (
+                      'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+                      'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
+                    );
+
+   # perltidy -lp -cti=2
+   @month_of_year = (
+                      'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+                      'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
+                      );
+ This is backwards compatible with -icp. See revised manual for
+ details.  Suggested by Mike Pennington.
+
+-Added flag '--preserve-line-endings' or '-ple' to cause the output
+ line ending to be the same as in the input file, for unix, dos, 
+ or mac line endings.  Only works under unix. Suggested by 
+ Rainer Hochschild.
+
+-Added flag '--output-line-ending=s' or '-ole=s' where s=dos or win,
+ unix, or mac.  Only works under unix.
+
+-Files with Mac line endings should now be handled properly under unix
+ and dos without being passed through a converter.
+
+-You may now include 'and', 'or', and 'xor' in the list following
+ '--want-break-after' to get line breaks after those keywords rather than
+ before them.  Suggested by Rainer Hochschild.
+
+-Corrected problem with command line option for -vtc=n and -vt=n. The
+ equals sign was being eaten up by the Windows shell so perltidy didn't
+ see it.
+
+ +

2003 07 26

+ +
-Corrected cause of warning message with recent versions of Perl:
+   "Possible precedence problem on bitwise & operator at ..."
+ Thanks to Jim Files.
+
+-fixed bug with -html with '=for pod2html' sections, in which code/pod
+output order was incorrect.  Thanks to Tassilo von Parseval.
+
+-fixed bug when the -html flag is used, in which the following error
+message, plus others, appear:
+    did not see <body> in pod2html output
+This was caused by a change in the format of html output by pod2html
+VERSION 1.04 (included with perl 5.8).  Thanks to Tassilo von Parseval.
+
+-Fixed bug where an __END__ statement would be mistaken for a label
+if it is immediately followed by a line with a leading colon. Thanks
+to John Bayes.
+
+-Implemented guessing logic for brace types when it is ambiguous.  This
+has been on the TODO list a long time.  Thanks to Boris Zentner for
+an example.
+
+-Long options may now be negated either as '--nolong-option' 
+or '--no-long-option'.  Thanks to Philip Newton for the suggestion.
+
+-added flag --html-entities or -hent which controls the use of
+Html::Entities for html formatting.  Use --nohtml-entities or -nhent to
+prevent the use of Html::Entities to encode special symbols.  The
+default is -hent.  Html::Entities when formatting perl text to escape
+special symbols.  This may or may not be the right thing to do,
+depending on browser/language combinations.  Thanks to Burak Gursoy for
+this suggestion.
+
+-Bareword strings with leading '-', like, '-foo' now count as 1 token
+for horizontal tightness.  This way $a{'-foo'}, $a{foo}, and $a{-foo}
+are now all treated similarly.  Thus, by default, OLD: $a{ -foo } will
+now be NEW: $a{-foo}.  Suggested by Mark Olesen.
+
+-added 2 new flags to control spaces between keywords and opening parens:
+  -sak=s  or --space-after-keyword=s,  and
+  -nsak=s or --nospace-after-keyword=s, where 's' is a list of keywords.
+
+The new default list of keywords which get a space is:
+
+  "my local our and or eq ne if else elsif until unless while for foreach
+    return switch case given when"
+
+Use -sak=s and -nsak=s to add and remove keywords from this list,
+   respectively.
+
+Explanation: Stephen Hildrey noted that perltidy was being inconsistent
+in placing spaces between keywords and opening parens, and sent a patch
+to give user control over this.  The above list was selected as being
+a reasonable default keyword list.  Previously, perltidy
+had a hardwired list which also included these keywords:
+
+       push pop shift unshift join split die
+
+but did not have 'our'.  Example: if you prefer to make perltidy behave
+exactly as before, you can include the following two lines in your
+.perltidyrc file: 
+
+  -sak="push pop local shift unshift join split die"
+  -nsak="our"
+
+-Corrected html error in .toc file when -frm -html is used (extra ");
+ browsers were tolerant of it.
+
+-Improved alignment of chains of binary and ?/: operators. Example:
+ OLD:
+   $leapyear =
+     $year % 4     ? 0
+     : $year % 100 ? 1
+     : $year % 400 ? 0
+     : 1;
+ NEW:
+   $leapyear =
+       $year % 4   ? 0
+     : $year % 100 ? 1
+     : $year % 400 ? 0
+     : 1;
+
+-improved breakpoint choices involving '->'
+
+-Corrected tokenization of things like ${#}. For example,
+ ${#} is valid, but ${# } is a syntax error.
+
+-Corrected minor tokenization errors with indirect object notation.
+ For example, 'new A::()' works now.
+
+-Minor tokenization improvements; all perl code distributed with perl 5.8 
+ seems to be parsed correctly except for one instance (lextest.t) 
+ of the known bug.
+
+ +

2002 11 30

+ +
-Implemented scalar attributes.  Thanks to Sean Tobin for noting this.
+
+-Fixed glitch introduced in previous release where -pre option
+was not outputting a leading html <pre> tag.
+
+-Numerous minor improvements in vertical alignment, including the following:
+
+-Improved alignment of opening braces in many cases.  Needed for improved
+switch/case formatting, and also suggested by Mark Olesen for sort/map/grep
+formatting.  For example:
+
+ OLD:
+   @modified =
+     map { $_->[0] }
+     sort { $a->[1] <=> $b->[1] }
+     map { [ $_, -M ] } @filenames;
+
+ NEW:
+   @modified =
+     map  { $_->[0] }
+     sort { $a->[1] <=> $b->[1] }
+     map  { [ $_, -M ] } @filenames;
+
+-Eliminated alignments across unrelated statements. Example:
+ OLD:
+   $borrowerinfo->configure( -state => 'disabled' );
+   $borrowerinfo->grid( -col        => 1, -row => 0, -sticky => 'w' );
+
+ NEW:  
+   $borrowerinfo->configure( -state => 'disabled' );
+   $borrowerinfo->grid( -col => 1, -row => 0, -sticky => 'w' );
+
+ Thanks to Mark Olesen for suggesting this.
+
+-Improved alignement of '='s in certain cases.
+ Thanks to Norbert Gruener for sending an example.
+
+-Outdent-long-comments (-olc) has been re-instated as a default, since
+ it works much better now.  Use -nolc if you want to prevent it.
+
+-Added check for 'perltidy file.pl -o file.pl', which causes file.pl
+to be lost. (The -b option should be used instead). Thanks to mreister
+for reporting this problem.
+
+ +

2002 11 06

+ +
-Switch/case or given/when syntax is now recognized.  Its vertical alignment
+is not great yet, but it parses ok.  The words 'switch', 'case', 'given',
+and 'when' are now treated as keywords.  If this causes trouble with older
+code, we could introduce a switch to deactivate it.  Thanks to Stan Brown
+and Jochen Schneider for recommending this.
+
+-Corrected error parsing sub attributes with call parameters.
+Thanks to Marc Kerr for catching this.
+
+-Sub prototypes no longer need to be on the same line as sub names.  
+
+-a new flag -frm or --frames will cause html output to be in a
+frame, with table of contents in the left panel and formatted source
+in the right panel.  Try 'perltidy -html -frm somemodule.pm' for example.
+
+-The new default for -html formatting is to pass the pod through Pod::Html.
+The result is syntax colored code within your pod documents. This can be
+deactivated with -npod.  Thanks to those who have written to discuss this,
+particularly Mark Olesen and Hugh Myers.
+
+-the -olc (--outdent-long-comments) option works much better.  It now outdents
+groups of consecutive comments together, and by just the amount needed to
+avoid having any one line exceeding the maximum line length.
+
+-block comments are now trimmed of trailing whitespace.
+
+-if a directory specified with -opath does not exist, it will be created.
+
+-a table of contents to packages and subs is output when -html is used.
+Use -ntoc to prevent this. 
+
+-fixed an unusual bug in which a 'for' statement following a 'format'
+statement was not correctly tokenized.  Thanks to Boris Zentner for
+catching this.
+
+-Tidy.pm is no longer dependent on modules IO::Scalar and IO::ScalarArray.  
+There were some speed issues.  Suggested by Joerg Walter.
+
+-The treatment of quoted wildcards (file globs) is now system-independent. 
+For example
+
+   perltidy 'b*x.p[lm]'
+
+would match box.pl, box.pm, brinx.pm under any operating system.  Of
+course, anything unquoted will be subject to expansion by any shell.
+
+-default color for keywords under -html changed from 
+SaddleBrown (#8B4513) to magenta4 (#8B008B).
+
+-fixed an arg parsing glitch in which something like:
+  perltidy quick-help
+would trigger the help message and exit, rather than operate on the
+file 'quick-help'.
+
+ +

2002 09 22

+ +
-New option '-b' or '--backup-and-modify-in-place' will cause perltidy to
+overwrite the original file with the tidied output file.  The original
+file will be saved with a '.bak' extension (which can be changed with
+-bext=s).  Thanks to Rudi Farkas for the suggestion.
+
+-An index to all subs is included at the top of -html output, unless
+only the <pre> section is written.
+
+-Anchor lines of the form <a name="mysub"></a> are now inserted at key points
+in html output, such as before sub definitions, for the convenience of
+postprocessing scripts.  Suggested by Howard Owen.
+
+-The cuddled-else (-ce) flag now also makes cuddled continues, like
+this:
+
+   while ( ( $pack, $file, $line ) = caller( $i++ ) ) {
+      # bla bla
+   } continue {
+       $prevpack = $pack;
+   }
+
+Suggested by Simon Perreault.  
+
+-Fixed bug in which an extra blank line was added before an =head or 
+similar pod line after an __END__ or __DATA__ line each time 
+perltidy was run.  Also, an extra blank was being added after
+a terminal =cut.  Thanks to Mike Birdsall for reporting this.
+
+ +

2002 08 26

+ +
-Fixed bug in which space was inserted in a hyphenated hash key:
+   my $val = $myhash{USER-NAME};
+ was converted to:
+   my $val = $myhash{USER -NAME}; 
+ Thanks to an anonymous bug reporter at sourceforge.
+
+-Fixed problem with the '-io' ('--indent-only') where all lines 
+ were double spaced.  Thanks to Nick Andrew for reporting this bug.
+
+-Fixed tokenization error in which something like '-e1' was 
+ parsed as a number. 
+
+-Corrected a rare problem involving older perl versions, in which 
+ a line break before a bareword caused problems with 'use strict'.
+ Thanks to Wolfgang Weisselberg for noting this.
+
+-More syntax error checking added.
+
+-Outdenting labels (-ola) has been made the default, in order to follow the
+ perlstyle guidelines better.  It's probably a good idea in general, but
+ if you do not want this, use -nola in your .perltidyrc file.
+
+-Updated rules for padding logical expressions to include more cases.
+ Thanks to Wolfgang Weisselberg for helpful discussions.
+
+-Added new flag -osbc (--outdent-static-block-comments) which will
+ outdent static block comments by 2 spaces (or whatever -ci equals).
+ Requested by Jon Robison.
+
+ +

2002 04 25

+ +
-Corrected a bug, introduced in the previous release, in which some
+ closing side comments (-csc) could have incorrect text.  This is
+ annoying but will be correct the next time perltidy is run with -csc.
+
+-Fixed bug where whitespace was being removed between 'Bar' and '()' 
+ in a use statement like:
+
+      use Foo::Bar ();
+
+-Whenever possible, if a logical expression is broken with leading
+ '&&', '||', 'and', or 'or', then the leading line will be padded
+ with additional space to produce alignment.  This has been on the
+ todo list for a long time; thanks to Frank Steinhauer for reminding
+ me to do it.  Notice the first line after the open parens here:
+
+       OLD: perltidy -lp
+       if (
+            !param("rules.to.$linecount")
+            && !param("rules.from.$linecount")
+            && !param("rules.subject.$linecount")
+            && !(
+                  param("rules.fieldname.$linecount")
+                  && param("rules.fieldval.$linecount")
+            )
+            && !param("rules.size.$linecount")
+            && !param("rules.custom.$linecount")
+         )
+
+       NEW: perltidy -lp
+       if (
+               !param("rules.to.$linecount")
+            && !param("rules.from.$linecount")
+            && !param("rules.subject.$linecount")
+            && !(
+                     param("rules.fieldname.$linecount")
+                  && param("rules.fieldval.$linecount")
+            )
+            && !param("rules.size.$linecount")
+            && !param("rules.custom.$linecount")
+         )
+
+ +

2002 04 16

+ +
-Corrected a mistokenization of variables for a package with a name
+ equal to a perl keyword.  For example: 
+
+    my::qx();
+    package my;
+    sub qx{print "Hello from my::qx\n";}
+
+ In this case, the leading 'my' was mistokenized as a keyword, and a
+ space was being place between 'my' and '::'.  This has been
+ corrected.  Thanks to Martin Sluka for discovering this. 
+
+-A new flag -bol (--break-at-old-logic-breakpoints)
+ has been added to control whether containers with logical expressions
+ should be broken open.  This is the default.
+
+-A new flag -bok (--break-at-old-keyword-breakpoints)
+ has been added to follow breaks at old keywords which return lists,
+ such as sort and map.  This is the default.
+
+-A new flag -bot (--break-at-old-trinary-breakpoints) has been added to
+ follow breaks at trinary (conditional) operators.  This is the default.
+
+-A new flag -cab=n has been added to control breaks at commas after
+ '=>' tokens.  The default is n=1, meaning break unless this breaks
+ open an existing on-line container.
+
+-A new flag -boc has been added to allow existing list formatting
+ to be retained.  (--break-at-old-comma-breakpoints).  See updated manual.
+
+-A new flag -iob (--ignore-old-breakpoints) has been added to
+ prevent the locations of old breakpoints from influencing the output
+ format.
+
+-Corrected problem where nested parentheses were not getting full
+ indentation.  This has been on the todo list for some time; thanks 
+ to Axel Rose for a snippet demonstrating this issue.
+
+           OLD: inner list is not indented
+           $this->sendnumeric(
+               $this->server,
+               (
+                 $ret->name,        $user->username, $user->host,
+               $user->server->name, $user->nick,     "H"
+               ),
+           );
+
+           NEW:
+           $this->sendnumeric(
+               $this->server,
+               (
+                   $ret->name,          $user->username, $user->host,
+                   $user->server->name, $user->nick,     "H"
+               ),
+           );
+
+-Code cleaned up by removing the following unused, undocumented flags.
+ They should not be in any .perltidyrc files because they were just
+ experimental flags which were never documented.  Most of them placed
+ artificial limits on spaces, and Wolfgang Weisselberg convinced me that
+ most of them they do more harm than good by causing unexpected results.
+
+ --maximum-continuation-indentation (-mci)
+ --maximum-whitespace-columns
+ --maximum-space-to-comment (-xsc)
+ --big-space-jump (-bsj)
+
+-Pod file 'perltidy.pod' has been appended to the script 'perltidy', and
+ Tidy.pod has been append to the module 'Tidy.pm'.  Older MakeMaker's
+ were having trouble.
+
+-A new flag -isbc has been added for more control on comments. This flag
+ has the effect that if there is no leading space on the line, then the
+ comment will not be indented, and otherwise it may be.  If both -ibc and
+ -isbc are set, then -isbc takes priority.  Thanks to Frank Steinhauer
+ for suggesting this.
+
+-A new document 'stylekey.pod' has been created to quickly guide new users
+ through the maze of perltidy style parameters.  An html version is 
+ on the perltidy web page.  Take a look! It should be very helpful.
+
+-Parameters for controlling 'vertical tightness' have been added:
+ -vt and -vtc are the main controls, but finer control is provided
+ with -pvt, -pcvt, -bvt, -bcvt, -sbvt, -sbcvt.  Block brace vertical
+ tightness controls have also been added.
+ See updated manual and also see 'stylekey.pod'. Simple examples:
+
+   # perltidy -lp -vt=1 -vtc=1
+   @month_of_year = ( 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+                      'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' );
+
+   # perltidy -lp -vt=1 -vtc=0
+   @month_of_year = ( 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+                      'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
+   );
+
+-Lists which do not format well in uniform columns are now better
+ identified and formated.
+
+   OLD:
+   return $c->create( 'polygon', $x, $y, $x + $ruler_info{'size'},
+       $y + $ruler_info{'size'}, $x - $ruler_info{'size'},
+       $y + $ruler_info{'size'} );
+
+   NEW:
+   return $c->create(
+       'polygon', $x, $y,
+       $x + $ruler_info{'size'},
+       $y + $ruler_info{'size'},
+       $x - $ruler_info{'size'},
+       $y + $ruler_info{'size'}
+   );
+
+   OLD:
+     radlablist($f1, pad('Initial', $p), $b->{Init}->get_panel_ref, 'None ',
+                'None', 'Default', 'Default', 'Simple', 'Simple');
+   NEW:
+     radlablist($f1,
+                pad('Initial', $p),
+                $b->{Init}->get_panel_ref,
+                'None ', 'None', 'Default', 'Default', 'Simple', 'Simple');
+
+-Corrected problem where an incorrect html filename was generated for 
+ external calls to Tidy.pm module.  Fixed incorrect html title when
+ Tidy.pm is called with IO::Scalar or IO::Array source.
+
+-Output file permissons are now set as follows.  An output script file
+ gets the same permission as the input file, except that owner
+ read/write permission is added (otherwise, perltidy could not be
+ rerun).  Html output files use system defaults.  Previously chmod 0755
+ was used in all cases.  Thanks to Mark Olesen for bringing this up.
+
+-Missing semicolons will not be added in multi-line blocks of type
+ sort, map, or grep.  This brings perltidy into closer agreement
+ with common practice.  Of course, you can still put semicolons 
+ there if you like.  Thanks to Simon Perreault for a discussion of this.
+
+-Most instances of extra semicolons are now deleted.  This is
+ particularly important if the -csc option is used.  Thanks to Wolfgang
+ Weisselberg for noting this.  For example, the following line
+ (produced by 'h2xs' :) has an extra semicolon which will now be
+ removed:
+
+    BEGIN { plan tests => 1 };
+
+-New parameter -csce (--closing-side-comment-else-flag) can be used
+ to control what text is appended to 'else' and 'elsif' blocks.
+ Default is to just add leading 'if' text to an 'else'.  See manual.
+
+-The -csc option now labels 'else' blocks with additinal information
+ from the opening if statement and elsif statements, if space.
+ Thanks to Wolfgang Weisselberg for suggesting this.
+
+-The -csc option will now remove any old closing side comments
+ below the line interval threshold. Thanks to Wolfgang Weisselberg for
+ suggesting this.
+
+-The abbreviation feature, which was broken in the previous version,
+ is now fixed.  Thanks to Michael Cartmell for noting this.
+
+-Vertical alignment is now done for '||='  .. somehow this was 
+ overlooked.
+
+ +

2002 02 25

+ +
-This version uses modules for the first time, and a standard perl
+ Makefile.PL has been supplied.  However, perltidy may still be
+ installed as a single script, without modules.  See INSTALL for
+ details.
+
+-The man page 'perl2web' has been merged back into the main 'perltidy'
+ man page to simplify installation.  So you may remove that man page
+ if you have an older installation.
+
+-Added patch from Axel Rose for MacPerl.  The patch prompts the user
+ for command line arguments before calling the module 
+ Perl::Tidy::perltidy.
+
+-Corrected bug with '-bar' which was introduced in the previous
+ version.  A closing block brace was being indented.  Thanks to
+ Alexandros M Manoussakis for reporting this.
+
+-New parameter '--entab-leading-whitespace=n', or '-et=n', has been
+ added for those who prefer tabs.  This behaves different from the
+ existing '-t' parameter; see updated man page.  Suggested by Mark
+ Olesen.
+
+-New parameter '--perl-syntax-check-flags=s'  or '-pcsf=s' can be
+ used to change the flags passed to perltidy in a syntax check.
+ See updated man page.  Suggested by Mark Olesen. 
+
+-New parameter '--output-path=s'  or '-opath=s' will cause output
+ files to be placed in directory s.  See updated man page.  Thanks for
+ Mark Olesen for suggesting this.
+
+-New parameter --dump-profile (or -dpro) will dump to
+ standard output information about the search for a
+ configuration file, the name of whatever configuration file
+ is selected, and its contents.  This should help debugging
+ config files, especially on different Windows systems.
+
+-The -w parameter now notes possible errors of the form:
+
+       $comment = s/^\s*(\S+)\..*/$1/;   # trim whitespace
+
+-Corrections added for a leading ':' and for leaving a leading 'tcsh'
+ line untouched.  Mark Olesen reported that lines of this form were
+ accepted by perl but not by perltidy:
+
+       : # use -*- perl -*-
+       eval 'exec perl -wS $0 "$@"'  # shell should exec 'perl'
+       unless 1;                     # but Perl should skip this one
+
+ Perl will silently swallow a leading colon on line 1 of a
+ script, and now perltidy will do likewise.  For example,
+ this is a valid script, provided that it is the first line,
+ but not otherwise:
+
+       : print "Hello World\n";
+
+ Also, perltidy will now mark a first line with leading ':' followed by
+ '#' as type SYSTEM (just as a #!  line), not to be formatted.
+
+-List formatting improved for certain lists with special
+ initial terms, such as occur with 'printf', 'sprintf',
+ 'push', 'pack', 'join', 'chmod'.  The special initial term is
+ now placed on a line by itself.  For example, perltidy -gnu
+
+    OLD:
+       $Addr = pack(
+                    "C4",                hex($SourceAddr[0]),
+                    hex($SourceAddr[1]), hex($SourceAddr[2]),
+                    hex($SourceAddr[3])
+                    );
+
+    NEW:
+       $Addr = pack("C4",
+                    hex($SourceAddr[0]), hex($SourceAddr[1]),
+                    hex($SourceAddr[2]), hex($SourceAddr[3]));
+
+     OLD:
+           push (
+                 @{$$self{states}}, '64', '66', '68',
+                 '70',              '72', '74', '76',
+                 '78',              '80', '82', '84',
+                 '86',              '88', '90', '92',
+                 '94',              '96', '98', '100',
+                 '102',             '104'
+                 );
+
+     NEW:
+           push (
+                 @{$$self{states}},
+                 '64', '66', '68', '70', '72',  '74',  '76',
+                 '78', '80', '82', '84', '86',  '88',  '90',
+                 '92', '94', '96', '98', '100', '102', '104'
+                 );
+
+-Lists of complex items, such as matricies, are now detected
+ and displayed with just one item per row:
+
+   OLD:
+   $this->{'CURRENT'}{'gfx'}{'MatrixSkew'} = Text::PDF::API::Matrix->new(
+       [ 1, tan( deg2rad($a) ), 0 ], [ tan( deg2rad($b) ), 1, 0 ],
+       [ 0, 0, 1 ]
+   );
+
+   NEW:
+   $this->{'CURRENT'}{'gfx'}{'MatrixSkew'} = Text::PDF::API::Matrix->new(
+       [ 1,                  tan( deg2rad($a) ), 0 ],
+       [ tan( deg2rad($b) ), 1,                  0 ],
+       [ 0,                  0,                  1 ]
+   );
+
+-The perl syntax check will be turned off for now when input is from
+ standard input or standard output.  The reason is that this requires
+ temporary files, which has produced far too many problems during
+ Windows testing.  For example, the POSIX module under Windows XP/2000
+ creates temporary names in the root directory, to which only the
+ administrator should have permission to write.
+
+-Merged patch sent by Yves Orton to handle appropriate
+ configuration file locations for different Windows varieties
+ (2000, NT, Me, XP, 95, 98).
+
+-Added patch to properly handle a for/foreach loop without
+ parens around a list represented as a qw.  I didn't know this
+ was possible until Wolfgang Weisselberg pointed it out:
+
+       foreach my $key qw\Uno Due Tres Quadro\ {
+           print "Set $key\n";
+       }
+
+ But Perl will give a syntax error without the $ variable; ie this will
+ not work:
+
+       foreach qw\Uno Due Tres Quadro\ {
+           print "Set $_\n";
+       }
+
+-Merged Windows version detection code sent by Yves Orton.  Perltidy
+ now automatically turns off syntax checking for Win 9x/ME versions,
+ and this has solved a lot of robustness problems.  These systems 
+ cannot reliably handle backtick operators.  See man page for
+ details.
+
+-Merged VMS filename handling patch sent by Michael Cartmell.  (Invalid
+ output filenames were being created in some cases). 
+
+-Numerous minor improvements have been made for -lp style indentation.
+
+-Long C-style 'for' expressions will be broken after each ';'.   
+
+ 'perltidy -gnu' gives:
+
+   OLD:
+   for ($status = $db->seq($key, $value, R_CURSOR()) ; $status == 0
+        and $key eq $origkey ; $status = $db->seq($key, $value, R_NEXT())) 
+
+   NEW:
+   for ($status = $db->seq($key, $value, R_CURSOR()) ;
+        $status == 0 and $key eq $origkey ;
+        $status = $db->seq($key, $value, R_NEXT()))
+
+-For the -lp option, a single long term within parens
+ (without commas) now has better alignment.  For example,
+ perltidy -gnu
+
+           OLD:
+           $self->throw("Must specify a known host, not $location,"
+                 . " possible values ("
+                 . join (",", sort keys %hosts) . ")");
+
+           NEW:
+           $self->throw("Must specify a known host, not $location,"
+                        . " possible values ("
+                        . join (",", sort keys %hosts) . ")");
+
+ +

2001 12 31

+ +
-This version is about 20 percent faster than the previous
+ version as a result of optimization work.  The largest gain
+ came from switching to a dispatch hash table in the
+ tokenizer.
+
+-perltidy -html will check to see if HTML::Entities is
+ installed, and if so, it will use it to encode unsafe
+ characters.
+
+-Added flag -oext=ext to change the output file extension to
+ be different from the default ('tdy' or 'html').  For
+ example:
+
+   perltidy -html -oext=htm filename
+
+will produce filename.htm
+
+-Added flag -cscw to issue warnings if a closing side comment would replace
+an existing, different side comments.  See the man page for details.
+Thanks to Peter Masiar for helpful discussions.
+
+-Corrected tokenization error of signed hex/octal/binary numbers. For
+example, the first hex number below would have been parsed correctly
+but the second one was not:
+   if ( ( $tmp >= 0x80_00_00 ) || ( $tmp < -0x80_00_00 ) ) { }
+
+-'**=' was incorrectly tokenized as '**' and '='.  This only
+    caused a problem with the -extrude opton.
+
+-Corrected a divide by zero when -extrude option is used
+
+-The flag -w will now contain all errors reported by 'perl -c' on the
+input file, but otherwise they are not reported.  The reason is that
+perl will report lots of problems and syntax errors which are not of
+interest when only a small snippet is being formatted (such as missing
+modules and unknown bare words).  Perltidy will always report all
+significant syntax errors that it finds, such as unbalanced braces,
+unless the -q (quiet) flag is set.
+
+-Merged modifications created by Hugh Myers into perltidy.
+ These include a 'streamhandle' routine which allows perltidy
+ as a module to operate on input and output arrays and strings
+ in addition to files.  Documentation and new packaging as a
+ module should be ready early next year; This is an elegant,
+ powerful update; many thanks to Hugh for contributing it.
+
+ +

2001 11 28

+ +
-added a tentative patch which tries to keep any existing breakpoints
+at lines with leading keywords map,sort,eval,grep. The idea is to
+improve formatting of sequences of list operations, as in a schwartzian
+transform.  Example:
+
+   INPUT:
+   my @sorted = map { $_->[0] }
+                sort { $a->[1] <=> $b->[1] }
+                map { [ $_, rand ] } @list;
+
+   OLD:
+   my @sorted =
+     map { $_->[0] } sort { $a->[1] <=> $b->[1] } map { [ $_, rand ] } @list;
+
+   NEW:
+   my @sorted = map { $_->[0] }
+     sort { $a->[1] <=> $b->[1] }
+     map { [ $_, rand ] } @list;
+
+ The new alignment is not as nice as the input, but this is an improvement.
+ Thanks to Yves Orton for this suggestion.
+
+-modified indentation logic so that a line with leading opening paren,
+brace, or square bracket will never have less indentation than the
+line with the corresponding opening token.  Here's a simple example:
+
+   OLD:
+       $mw->Button(
+           -text    => "New Document",
+           -command => \&new_document
+         )->pack(
+           -side   => 'bottom',
+           -anchor => 'e'
+       );
+
+   Note how the closing ');' is lined up with the first line, even
+   though it closes a paren in the 'pack' line.  That seems wrong.
+
+   NEW:
+       $mw->Button(
+           -text    => "New Document",
+           -command => \&new_document
+         )->pack(
+           -side   => 'bottom',
+           -anchor => 'e'
+         );
+
+  This seems nicer: you can up-arrow with an editor and arrive at the
+  opening 'pack' line.
+
+-corrected minor glitch in which cuddled else (-ce) did not get applied
+to an 'unless' block, which should look like this:
+
+       unless ($test) {
+
+       } else {
+
+       }
+
+ Thanks to Jeremy Mates for reporting this.
+
+-The man page has been reorganized to parameters easier to find.
+
+-Added check for multiple definitions of same subroutine.  It is easy
+ to introduce this problem when cutting and pasting. Perl does not
+ complain about it, but it can lead to disaster.
+
+-The command -pro=filename  or -profile=filename may be used to specify a
+ configuration file which will override the default name of .perltidyrc.
+ There must not be a space on either side of the '=' sign.  I needed
+ this to be able to easily test perltidy with a variety of different
+ configuration files.
+
+-Side comment alignment has been improved somewhat across frequent level
+ changes, as in short if/else blocks.  Thanks to Wolfgang Weisselberg 
+ for pointing out this problem.  For example:
+
+   OLD:
+   if ( ref $self ) {    # Called as a method
+       $format = shift;
+   }
+   else {    # Regular procedure call
+       $format = $self;
+       undef $self;
+   }
+
+   NEW:
+   if ( ref $self ) {    # Called as a method
+       $format = shift;
+   }
+   else {                # Regular procedure call
+       $format = $self;
+       undef $self;
+   }
+
+-New command -ssc (--static-side-comment) and related command allows
+ side comments to be spaced close to preceding character.  This is
+ useful for displaying commented code as side comments.
+
+-New command -csc (--closing-side-comment) and several related
+ commands allow comments to be added to (and deleted from) any or all
+ closing block braces.  This can be useful if you have to maintain large
+ programs, especially those that you didn't write.  See updated man page.
+ Thanks to Peter Masiar for this suggestion.  For a simple example:
+
+       perltidy -csc
+
+       sub foo {
+           if ( !defined( $_[0] ) ) {
+               print("Hello, World\n");
+           }
+           else {
+               print( $_[0], "\n" );
+           }
+       } ## end sub foo
+
+ This added '## end sub foo' to the closing brace.  
+ To remove it, perltidy -ncsc.
+
+-New commands -ola, for outdenting labels, and -okw, for outdenting
+ selected control keywords, were implemented.  See the perltidy man
+ page for details.  Thanks to Peter Masiar for this suggestion.
+
+-Hanging side comment change: a comment will not be considered to be a
+ hanging side comment if there is no leading whitespace on the line.
+ This should improve the reliability of identifying hanging side comments.
+ Thanks to Peter Masiar for this suggestion.
+
+-Two new commands for outdenting, -olq (outdent-long-quotes) and -olc
+ (outdent-long-comments), have been added.  The original -oll
+ (outdent-long-lines) remains, and now is an abbreviation for -olq and -olc.
+ The new default is just -olq.  This was necessary to avoid inconsistency with
+ the new static block comment option.
+
+-Static block comments:  to provide a way to display commented code
+ better, the convention is used that comments with a leading '##' should
+ not be formatted as usual.  Please see '-sbc' (or '--static-block-comment')
+ for documentation.  It can be deactivated with with -nsbc, but
+ should not normally be necessary. Thanks to Peter Masiar for this 
+ suggestion.
+
+-Two changes were made to help show structure of complex lists:
+ (1) breakpoints are forced after every ',' in a list where any of
+ the list items spans multiple lines, and
+ (2) List items which span multiple lines now get continuation indentation.
+
+ The following example illustrates both of these points.  Many thanks to
+ Wolfgang Weisselberg for this snippet and a discussion of it; this is a
+ significant formatting improvement. Note how it is easier to see the call
+ parameters in the NEW version:
+
+   OLD:
+   assert( __LINE__, ( not defined $check )
+       or ref $check
+       or $check eq "new"
+       or $check eq "old", "Error in parameters",
+       defined $old_new ? ( ref $old_new ? ref $old_new : $old_new ) : "undef",
+       defined $db_new  ? ( ref $db_new  ? ref $db_new  : $db_new )  : "undef",
+       defined $old_db ? ( ref $old_db ? ref $old_db : $old_db ) : "undef" );
+
+   NEW: 
+   assert(
+       __LINE__,
+       ( not defined $check )
+         or ref $check
+         or $check eq "new"
+         or $check eq "old",
+       "Error in parameters",
+       defined $old_new ? ( ref $old_new ? ref $old_new : $old_new ) : "undef",
+       defined $db_new  ? ( ref $db_new  ? ref $db_new  : $db_new )  : "undef",
+       defined $old_db  ? ( ref $old_db  ? ref $old_db  : $old_db )  : "undef"
+   );
+
+   Another example shows how this helps displaying lists:
+
+   OLD:
+   %{ $self->{COMPONENTS} } = (
+       fname =>
+       { type => 'name', adj => 'yes', font => 'Helvetica', 'index' => 0 },
+       street =>
+       { type => 'road', adj => 'yes', font => 'Helvetica', 'index' => 2 },
+   );
+
+   The structure is clearer with the added indentation:
+
+   NEW:
+   %{ $self->{COMPONENTS} } = (
+       fname =>
+         { type => 'name', adj => 'yes', font => 'Helvetica', 'index' => 0 },
+       street =>
+         { type => 'road', adj => 'yes', font => 'Helvetica', 'index' => 2 },
+   );
+
+   -The structure of nested logical expressions is now displayed better.
+   Thanks to Wolfgang Weisselberg for helpful discussions.  For example,
+   note how the status of the final 'or' is displayed in the following:
+
+   OLD:
+   return ( !null($op)
+         and null( $op->sibling )
+         and $op->ppaddr eq "pp_null"
+         and class($op) eq "UNOP"
+         and ( ( $op->first->ppaddr =~ /^pp_(and|or)$/
+           and $op->first->first->sibling->ppaddr eq "pp_lineseq" )
+           or ( $op->first->ppaddr eq "pp_lineseq"
+               and not null $op->first->first->sibling
+               and $op->first->first->sibling->ppaddr eq "pp_unstack" ) ) );
+
+   NEW:
+   return (
+       !null($op)
+         and null( $op->sibling )
+         and $op->ppaddr eq "pp_null"
+         and class($op) eq "UNOP"
+         and (
+           (
+               $op->first->ppaddr =~ /^pp_(and|or)$/
+               and $op->first->first->sibling->ppaddr eq "pp_lineseq"
+           )
+           or ( $op->first->ppaddr eq "pp_lineseq"
+               and not null $op->first->first->sibling
+               and $op->first->first->sibling->ppaddr eq "pp_unstack" )
+         )
+   );
+
+  -A break will always be put before a list item containing a comma-arrow.
+  This will improve formatting of mixed lists of this form:
+
+       OLD:
+       $c->create(
+           'text', 225, 20, -text => 'A Simple Plot',
+           -font => $font,
+           -fill => 'brown'
+       );
+
+       NEW:
+       $c->create(
+           'text', 225, 20,
+           -text => 'A Simple Plot',
+           -font => $font,
+           -fill => 'brown'
+       );
+
+ -For convenience, the command -dac (--delete-all-comments) now also
+ deletes pod.  Likewise, -tac (--tee-all-comments) now also sends pod
+ to a '.TEE' file.  Complete control over the treatment of pod and
+ comments is still possible, as described in the updated help message 
+ and man page.
+
+ -The logic which breaks open 'containers' has been rewritten to be completely
+ symmetric in the following sense: if a line break is placed after an opening
+ {, [, or (, then a break will be placed before the corresponding closing
+ token.  Thus, a container either remains closed or is completely cracked
+ open.
+
+ -Improved indentation of parenthesized lists.  For example, 
+
+           OLD:
+           $GPSCompCourse =
+             int(
+             atan2( $GPSTempCompLong - $GPSLongitude,
+             $GPSLatitude - $GPSTempCompLat ) * 180 / 3.14159265 );
+
+           NEW:
+           $GPSCompCourse = int(
+               atan2(
+                   $GPSTempCompLong - $GPSLongitude,
+                   $GPSLatitude - $GPSTempCompLat
+                 ) * 180 / 3.14159265
+           );
+
+  Further improvements will be made in future releases.
+
+ -Some improvements were made in formatting small lists.
+
+ -Correspondence between Input and Output line numbers reported in a 
+  .LOG file should now be exact.  They were sometimes off due to the size
+  of intermediate buffers.
+
+ -Corrected minor tokenization error in which a ';' in a foreach loop
+  control was tokenized as a statement termination, which forced a 
+  line break:
+
+       OLD:
+       foreach ( $i = 0;
+           $i <= 10;
+           $i += 2
+         )
+       {
+           print "$i ";
+       }
+
+       NEW:
+       foreach ( $i = 0 ; $i <= 10 ; $i += 2 ) {
+           print "$i ";
+       }
+
+ -Corrected a problem with reading config files, in which quote marks were not
+  stripped.  As a result, something like -wba="&& . || " would have the leading
+  quote attached to the && and not work correctly.  A workaround for older
+  versions is to place a space around all tokens within the quotes, like this:
+  -wba=" && . || "
+
+ -Removed any existing space between a label and its ':'
+   OLD    : { }
+   NEW: { }
+  This was necessary because the label and its colon are a single token.
+
+ -Corrected tokenization error for the following (highly non-recommended) 
+  construct:
+   $user = @vars[1] / 100;
+
+ -Resolved cause of a difference between perltidy under perl v5.6.1 and
+ 5.005_03; the problem was different behavior of \G regex position
+ marker(!)
+
+ +

2001 10 20

+ +
-Corrected a bug in which a break was not being made after a full-line
+comment within a short eval/sort/map/grep block.  A flag was not being
+zeroed.  The syntax error check catches this.  Here is a snippet which
+illustrates the bug:
+
+       eval {
+           #open Socket to Dispatcher
+           $sock = &OpenSocket;
+       };
+
+The formatter mistakenly thought that it had found the following 
+one-line block:
+
+       eval {#open Socket to Dispatcher$sock = &OpenSocket; };
+
+The patch fixes this. Many thanks to Henry Story for reporting this bug.
+
+-Changes were made to help diagnose and resolve problems in a
+.perltidyrc file: 
+  (1) processing of command parameters has been into two separate
+  batches so that any errors in a .perltidyrc file can be localized.  
+  (2) commands --help, --version, and as many of the --dump-xxx
+  commands are handled immediately, without any command line processing
+  at all.  
+  (3) Perltidy will ignore any commands in the .perltidyrc file which
+  cause immediate exit.  These are:  -h -v -ddf -dln -dop -dsn -dtt
+  -dwls -dwrs -ss.  Thanks to Wolfgang Weisselberg for helpful
+  suggestions regarding these updates.
+
+-Syntax check has been reinstated as default for MSWin32 systems.  This
+way Windows 2000 users will get syntax check by default, which seems
+like a better idea, since the number of Win 95/98 systems will be
+decreasing over time.  Documentation revised to warn Windows 95/98
+users about the problem with empty '&1'.  Too bad these systems
+all report themselves as MSWin32.
+
+ +

2001 10 16

+ +
-Fixed tokenization error in which a method call of the form
+
+   Module::->new();
+
+ got a space before the '::' like this:
+
+   Module ::->new();
+
+ Thanks to David Holden for reporting this.
+
+-Added -html control over pod text, using a new abbreviation 'pd'.  See
+updated perl2web man page. The default is to use the color of a comment,
+but italicized.  Old .css style sheets will need a new line for
+.pd to use this.  The old color was the color of a string, and there
+was no control.  
+
+-.css lines are now printed in sorted order.
+
+-Fixed interpolation problem where html files had '$input_file' as title
+instead of actual input file name.  Thanks to Simon Perreault for finding
+this and sending a patch, and also to Tobias Weber.
+
+-Breaks will now have the ':' placed at the start of a line, 
+one per line by default because this shows logical structure
+more clearly. This coding has been completely redone. Some 
+examples of new ?/: formatting:
+
+      OLD:
+           wantarray ? map( $dir::cwd->lookup($_)->path, @_ ) :
+             $dir::cwd->lookup( $_[0] )->path;
+
+      NEW:
+           wantarray 
+             ? map( $dir::cwd->lookup($_)->path, @_ )
+             : $dir::cwd->lookup( $_[0] )->path;
+
+      OLD:
+               $a = ( $b > 0 ) ? {
+                   a => 1,
+                   b => 2
+               } : { a => 6, b => 8 };
+
+      NEW:
+               $a = ( $b > 0 )
+                 ? {
+                   a => 1,
+                   b => 2
+                 }
+                 : { a => 6, b => 8 };
+
+   OLD: (-gnu):
+   $self->note($self->{skip} ? "Hunk #$self->{hunk} ignored at 1.\n" :
+               "Hunk #$self->{hunk} failed--$@");
+
+   NEW: (-gnu):
+   $self->note($self->{skip} 
+               ? "Hunk #$self->{hunk} ignored at 1.\n"
+               : "Hunk #$self->{hunk} failed--$@");
+
+   OLD:
+       $which_search =
+         $opts{"t"} ? 'title'   :
+         $opts{"s"} ? 'subject' : $opts{"a"} ? 'author' : 'title';
+
+   NEW:
+       $which_search =
+         $opts{"t"} ? 'title'
+         : $opts{"s"} ? 'subject'
+         : $opts{"a"} ? 'author'
+         : 'title';
+
+You can use -wba=':' to recover the previous default which placed ':'
+at the end of a line.  Thanks to Michael Cartmell for helpful
+discussions and examples.  
+
+-Tokenizer updated to do syntax checking for matched ?/: pairs.  Also,
+the tokenizer now outputs a unique serial number for every balanced
+pair of brace types and ?/: pairs.  This greatly simplifies the
+formatter.
+
+-Long lines with repeated 'and', 'or', '&&', '||'  will now have
+one such item per line.  For example:
+
+   OLD:
+       if ( $opt_d || $opt_m || $opt_p || $opt_t || $opt_x
+           || ( -e $archive && $opt_r ) )
+       {
+           ( $pAr, $pNames ) = readAr($archive);
+       }
+
+   NEW:
+       if ( $opt_d
+           || $opt_m
+           || $opt_p
+           || $opt_t
+           || $opt_x
+           || ( -e $archive && $opt_r ) )
+       {
+           ( $pAr, $pNames ) = readAr($archive);
+       }
+
+  OLD:
+       if ( $vp->{X0} + 4 <= $x && $vp->{X0} + $vp->{W} - 4 >= $x
+           && $vp->{Y0} + 4 <= $y && $vp->{Y0} + $vp->{H} - 4 >= $y ) 
+
+  NEW:
+       if ( $vp->{X0} + 4 <= $x
+           && $vp->{X0} + $vp->{W} - 4 >= $x
+           && $vp->{Y0} + 4 <= $y
+           && $vp->{Y0} + $vp->{H} - 4 >= $y )
+
+-Long lines with multiple concatenated tokens will have concatenated
+terms (see below) placed one per line, except for short items.  For
+example:
+
+  OLD:
+       $report .=
+         "Device type:" . $ib->family . "  ID:" . $ib->serial . "  CRC:"
+         . $ib->crc . ": " . $ib->model() . "\n";
+
+  NEW:
+       $report .= "Device type:"
+         . $ib->family . "  ID:"
+         . $ib->serial . "  CRC:"
+         . $ib->model()
+         . $ib->crc . ": " . "\n";
+
+NOTE: at present 'short' means 8 characters or less.  There is a
+tentative flag to change this (-scl), but it is undocumented and
+is likely to be changed or removed later, so only use it for testing.  
+In the above example, the tokens "  ID:", "  CRC:", and "\n" are below
+this limit.  
+
+-If a line which is short enough to fit on a single line was
+nevertheless broken in the input file at a 'good' location (see below), 
+perltidy will try to retain a break.  For example, the following line
+will be formatted as:
+
+   open SUM, "<$file"
+     or die "Cannot open $file ($!)";
+
+if it was broken in the input file, and like this if not:
+
+   open SUM, "<$file" or die "Cannot open $file ($!)";
+
+GOOD: 'good' location means before 'and','or','if','unless','&&','||'
+
+The reason perltidy does not just always break at these points is that if
+there are multiple, similar statements, this would preclude alignment.  So
+rather than check for this, perltidy just tries to follow the input style,
+in the hopes that the author made a good choice. Here is an example where 
+we might not want to break before each 'if':
+
+   ($Locale, @Locale) = ($English, @English) if (@English > @Locale);
+   ($Locale, @Locale) = ($German,  @German)  if (@German > @Locale);
+   ($Locale, @Locale) = ($French,  @French)  if (@French > @Locale);
+   ($Locale, @Locale) = ($Spanish, @Spanish) if (@Spanish > @Locale);
+
+-Added wildcard file expansion for systems with shells which lack this.
+Now 'perltidy *.pl' should work under MSDOS/Windows.  Thanks to Hugh Myers 
+for suggesting this.  This uses builtin glob() for now; I may change that.
+
+-Added new flag -sbl which, if specified, overrides the value of -bl
+for opening sub braces.  This allows formatting of this type:
+
+perltidy -sbl 
+
+sub foo
+{
+   if (!defined($_[0])) {
+       print("Hello, World\n");
+   }
+   else {
+       print($_[0], "\n");
+   }
+}
+Requested by Don Alexander.
+
+-Fixed minor parsing error which prevented a space after a $$ variable
+(pid) in some cases.  Thanks to Michael Cartmell for noting this.
+For example, 
+  old: $$< 700 
+  new: $$ < 700
+
+-Improved line break choices 'and' and 'or' to display logic better.
+For example:
+
+   OLD:
+       exists $self->{'build_dir'} and push @e,
+         "Unwrapped into directory $self->{'build_dir'}";
+
+   NEW:
+       exists $self->{'build_dir'}
+         and push @e, "Unwrapped into directory $self->{'build_dir'}";
+
+-Fixed error of multiple use of abbreviatioin '-dsc'.  -dsc remains 
+abbreviation for delete-side-comments; -dsm is new abbreviation for 
+delete-semicolons.
+
+-Corrected and updated 'usage' help routine.  Thanks to Slaven Rezic for 
+noting an error.
+
+-The default for Windows is, for now, not to do a 'perl -c' syntax
+check (but -syn will activate it).  This is because of problems with
+command.com.  James Freeman sent me a patch which tries to get around
+the problems, and it works in many cases, but testing revealed several
+issues that still need to be resolved.  So for now, the default is no
+syntax check for Windows.
+
+-I added a -T flag when doing perl -c syntax check.
+This is because I test it on a large number of scripts from sources
+unknown, and who knows what might be hidden in initialization blocks?
+Also, deactivated the syntax check if perltidy is run as root.  As a
+benign example, running the previous version of perltidy on the
+following file would cause it to disappear:
+
+       BEGIN{
+               print "Bye, bye baby!\n";
+               unlink $0;
+       }
+
+The new version will not let that happen.
+
+-I am contemplating (but have not yet implemented) making '-lp' the
+default indentation, because it is stable now and may be closer to how
+perl is commonly formatted.  This could be in the next release.  The
+reason that '-lp' was not the original default is that the coding for
+it was complex and not ready for the initial release of perltidy.  If
+anyone has any strong feelings about this, I'd like to hear.  The
+current default could always be recovered with the '-nlp' flag.
+
+ +

2001 09 03

+ +
-html updates:
+    - sub definition names are now specially colored, red by default.  
+      The letter 'm' is used to identify them.
+    - keyword 'sub' now has color of other keywords.
+    - restored html keyword color to __END__ and __DATA__, which was 
+      accidentally removed in the previous version.
+
+-A new -se (--standard-error-output) flag has been implemented and
+documented which causes all errors to be written to standard output
+instead of a .ERR file.
+
+-A new -w (--warning-output) flag has been implemented and documented
+ which causes perltidy to output certain non-critical messages to the
+ error output file, .ERR.  These include complaints about pod usage,
+ for example.  The default is to not include these.
+
+ NOTE: This replaces an undocumented -w=0 or --warning-level flag
+ which was tentatively introduced in the previous version to avoid some
+ unwanted messages.  The new default is the same as the old -w=0, so
+ that is no longer needed. 
+
+ -Improved syntax checking and corrected tokenization of functions such
+ as rand, srand, sqrt, ...  These can accept either an operator or a term
+ to their right.  This has been corrected.
+
+-Corrected tokenization of semicolon: testing of the previous update showed 
+that the semicolon in the following statement was being mis-tokenized.  That
+did no harm, other than adding an extra blank space, but has been corrected.
+
+         for (sort {strcoll($a,$b);} keys %investments) {
+            ...
+         }
+
+-New syntax check: after wasting 5 minutes trying to resolve a syntax
+ error in which I had an extra terminal ';' in a complex for (;;) statement, 
+ I spent a few more minutes adding a check for this in perltidy so it won't
+ happen again.
+
+-The behavior of --break-before-subs (-bbs) and --break-before-blocks
+(-bbb) has been modified.  Also, a new control parameter,
+--long-block-line-count=n (-lbl=n) has been introduced to give more
+control on -bbb.  This was previously a hardwired value.  The reason
+for the change is to reduce the number of unwanted blank lines that
+perltidy introduces, and make it less erratic.  It's annoying to remove
+an unwanted blank line and have perltidy put it back.  The goal is to
+be able to sprinkle a few blank lines in that dense script you
+inherited from Bubba.  I did a lot of experimenting with different
+schemes for introducing blank lines before and after code blocks, and
+decided that there is no really good way to do it.  But I think the new
+scheme is an improvement.  You can always deactivate this with -nbbb.
+I've been meaning to work on this; thanks to Erik Thaysen for bringing
+it to my attention.
+
+-The .LOG file is seldom needed, and I get tired of deleting them, so
+ they will now only be automatically saved if perltidy thinks that it
+ made an error, which is almost never.  You can still force the logfile
+ to be saved with -log or -g.
+
+-Improved method for computing number of columns in a table.  The old
+method always tried for an even number.  The new method allows odd
+numbers when it is obvious that a list is not a hash initialization
+list.
+
+  old: my (
+            $name,       $xsargs, $parobjs, $optypes,
+            $hasp2child, $pmcode, $hdrcode, $inplacecode,
+            $globalnew,  $callcopy
+         )
+         = @_;
+
+  new: my (
+            $name,   $xsargs,  $parobjs,     $optypes,   $hasp2child,
+            $pmcode, $hdrcode, $inplacecode, $globalnew, $callcopy
+         )
+         = @_;
+
+-I fiddled with the list threshold adjustment, and some small lists
+look better now.  Here is the change for one of the lists in test file
+'sparse.t':
+old:
+  %units =
+    ("in", "in", "pt", "pt", "pc", "pi", "mm", "mm", "cm", "cm", "\\hsize", "%",
+      "\\vsize", "%", "\\textwidth", "%", "\\textheight", "%");
+
+new:
+  %units = (
+             "in",      "in", "pt",          "pt", "pc",           "pi",
+             "mm",      "mm", "cm",          "cm", "\\hsize",      "%",
+             "\\vsize", "%",  "\\textwidth", "%",  "\\textheight", "%"
+             );
+
+-Improved -lp formatting at '=' sign.  A break was always being added after
+the '=' sign in a statement such as this, (to be sure there was enough room
+for the parameters):
+
+old: my $fee =
+       CalcReserveFee(
+                       $env,          $borrnum,
+                       $biblionumber, $constraint,
+                       $bibitems
+                       );
+
+The updated version doesn't do this unless the space is really needed:
+
+new: my $fee = CalcReserveFee(
+                              $env,          $borrnum,
+                              $biblionumber, $constraint,
+                              $bibitems
+                              );
+
+-I updated the tokenizer to allow $#+ and $#-, which seem to be new to
+Perl 5.6.  Some experimenting with a recent version of Perl indicated
+that it allows these non-alphanumeric '$#' array maximum index
+varaibles: $#: $#- $#+ so I updated the parser accordingly.  Only $#:
+seems to be valid in older versions of Perl.
+
+-Fixed a rare formatting problem with -lp (and -gnu) which caused
+excessive indentation.
+
+-Many additional syntax checks have been added.
+
+-Revised method for testing here-doc target strings; the following
+was causing trouble with a regex test because of the '*' characters:
+ print <<"*EOF*";
+ bla bla
+ *EOF*
+Perl seems to allow almost anything to be a here doc target, so an
+exact string comparison is now used.
+
+-Made update to allow underscores in binary numbers, like '0b1100_0000'.
+
+-Corrected problem with scanning certain module names; a blank space was 
+being inserted after 'warnings' in the following:
+   use warnings::register;
+The problem was that warnings (and a couple of other key modules) were 
+being tokenized as keywords.  They should have just been identifiers.
+
+-Corrected tokenization of indirect objects after sort, system, and exec,
+after testing produced an incorrect error message for the following
+line of code:
+   print sort $sortsubref @list;
+
+-Corrected minor problem where a line after a format had unwanted
+extra continuation indentation.  
+
+-Delete-block-comments (and -dac) now retain any leading hash-bang line
+
+-Update for -lp (and -gnu) to not align the leading '=' of a list
+with a previous '=', since this interferes with alignment of parameters.
+
+ old:  my $hireDay = new Date;
+       my $self    = {
+                    firstName => undef,
+                    lastName  => undef,
+                    hireDay   => $hireDay
+                    };
+
+ new:  my $hireDay = new Date;
+       my $self = {
+                    firstName => undef,
+                    lastName  => undef,
+                    hireDay   => $hireDay
+                    };
+
+-Modifications made to display tables more compactly when possible,
+ without adding lines. For example,
+ old:
+               '1', "I", '2', "II", '3', "III", '4', "IV",
+               '5', "V", '6', "VI", '7', "VII", '8', "VIII",
+               '9', "IX"
+ new:
+               '1', "I",   '2', "II",   '3', "III",
+               '4', "IV",  '5', "V",    '6', "VI",
+               '7', "VII", '8', "VIII", '9', "IX"
+
+-Corrected minor bug in which -pt=2 did not keep the right paren tight
+around a '++' or '--' token, like this:
+
+           for ($i = 0 ; $i < length $key ; $i++ )
+
+The formatting for this should be, and now is: 
+
+           for ($i = 0 ; $i < length $key ; $i++)
+
+Thanks to Erik Thaysen for noting this.
+
+-Discovered a new bug involving here-docs during testing!  See BUGS.html.  
+
+-Finally fixed parsing of subroutine attributes (A Perl 5.6 feature).
+However, the attributes and prototypes must still be on the same line
+as the sub name.
+
+ +

2001 07 31

+ +
-Corrected minor, uncommon bug found during routine testing, in which a
+blank got inserted between a function name and its opening paren after
+a file test operator, but only in the case that the function had not
+been previously seen.  Perl uses the existence (or lack thereof) of 
+the blank to guess if it is a function call.  That is,
+   if (-l pid_filename()) {
+became
+   if (-l pid_filename ()) {
+which is a syntax error if pid_filename has not been seen by perl.
+
+-If the AutoLoader module is used, perltidy will continue formatting
+code after seeing an __END__ line.  Use -nlal to deactivate this feature.  
+Likewise, if the SelfLoader module is used, perltidy will continue 
+formatting code after seeing a __DATA__ line.  Use -nlsl to
+deactivate this feature.  Thanks to Slaven Rezic for this suggestion.
+
+-pod text after __END__ and __DATA__ is now identified by perltidy
+so that -dp works correctly.  Thanks to Slaven Rezic for this suggestion.
+
+-The first $VERSION line which might be eval'd by MakeMaker
+is now passed through unchanged.  Use -npvl to deactivate this feature.
+Thanks to Manfred Winter for this suggestion.
+
+-Improved indentation of nested parenthesized expressions.  Tests have
+given favorable results.  Thanks to Wolfgang Weisselberg for helpful
+examples.
+
+ +

2001 07 23

+ +
-Fixed a very rare problem in which an unwanted semicolon was inserted
+due to misidentification of anonymous hash reference curly as a code
+block curly.  (No instances of this have been reported; I discovered it
+during testing).  A workaround for older versions of perltidy is to use
+-nasc.
+
+-Added -icb (-indent-closing-brace) parameter to indent a brace which
+terminates a code block to the same level as the previous line.
+Suggested by Andrew Cutler.  For example, 
+
+       if ($task) {
+           yyy();
+           }    # -icb
+       else {
+           zzz();
+           }
+
+-Rewrote error message triggered by an unknown bareword in a print or
+printf filehandle position, and added flag -w=0 to prevent issuing this
+error message.  Suggested by Byron Jones.
+
+-Added modification to align a one-line 'if' block with similar
+following 'elsif' one-line blocks, like this:
+     if    ( $something eq "simple" )  { &handle_simple }
+     elsif ( $something eq "hard" )    { &handle_hard }
+(Suggested by  Wolfgang Weisselberg).
+
+ +

2001 07 02

+ +
-Eliminated all constants with leading underscores because perl 5.005_03
+does not support that.  For example, _SPACES changed to XX_SPACES.
+Thanks to kromJx for this update.
+
+ +

2001 07 01

+ +
-the directory of test files has been moved to a separate distribution
+file because it is getting large but is of little interest to most users.
+For the current distribution:
+  perltidy-20010701.tgz        contains the source and docs for perltidy
+  perltidy-20010701-test.tgz   contains the test files
+
+-fixed bug where temporary file perltidy.TMPI was not being deleted 
+when input was from stdin.
+
+-adjusted line break logic to not break after closing brace of an
+eval block (suggested by Boris Zentner).
+
+-added flag -gnu (--gnu-style) to give an approximation to the GNU
+style as sometimes applied to perl.  The programming style in GNU
+'automake' was used as a guide in setting the parameters; these
+parameters will probably be adjusted over time.
+
+-an empty code block now has one space for emphasis:
+  if ( $cmd eq "bg_untested" ) {}    # old
+  if ( $cmd eq "bg_untested" ) { }   # new
+If this bothers anyone, we could create a parameter.
+
+-the -bt (--brace-tightness) parameter has been split into two
+parameters to give more control. -bt now applies only to non-BLOCK
+braces, while a new parameter -bbt (block-brace-tightness) applies to
+curly braces which contain code BLOCKS. The default value is -bbt=0.
+
+-added flag -icp (--indent-closing-paren) which leaves a statement
+termination of the form );, };, or ]; indented with the same
+indentation as the previous line.  For example,
+
+   @month_of_year = (          # default, or -nicp
+       'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct',
+       'Nov', 'Dec'
+   );
+
+   @month_of_year = (          # -icp
+       'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct',
+       'Nov', 'Dec'
+       );
+
+-Vertical alignment updated to synchronize with tokens &&, ||,
+and, or, if, unless.  Allowable space before forcing
+resynchronization has been increased.  (Suggested by  Wolfgang
+Weisselberg).
+
+-html corrected to use -nohtml-bold-xxxxxxx or -nhbx to negate bold,
+and likewise -nohtml-italic-xxxxxxx or -nhbi to negate italic.  There
+was no way to negate these previously.  html documentation updated and
+corrected.  (Suggested by  Wolfgang Weisselberg).
+
+-Some modifications have been made which improve the -lp formatting in
+a few cases.
+
+-Perltidy now retains or creates a blank line after an =cut to keep
+podchecker happy (Suggested by Manfred H. Winter).  This appears to be
+a glitch in podchecker, but it was annoying.
+
+ +

2001 06 17

+ +
-Added -bli flag to give continuation indentation to braces, like this
+
+       if ($bli_flag)
+         {
+           extra_indentation();
+         }
+
+-Corrected an error with the tab (-t) option which caused the last line
+of a multi-line quote to receive a leading tab.  This error was in
+version 2001 06 08  but not 2001 04 06.  If you formatted a script
+with -t with this version, please check it by running once with the
+-chk flag and perltidy will scan for this possible error.
+
+-Corrected an invalid pattern (\R should have been just R), changed
+$^W =1 to BEGIN {$^W=1} to use warnings in compile phase, and corrected
+several unnecessary 'my' declarations. Many thanks to Wolfgang Weisselberg,
+2001-06-12, for catching these errors.
+
+-A '-bar' flag has been added to require braces to always be on the
+right, even for multi-line if and foreach statements.  For example,
+the default formatting of a long if statement would be:
+
+       if ($bigwasteofspace1 && $bigwasteofspace2
+         || $bigwasteofspace3 && $bigwasteofspace4)
+       {
+           bigwastoftime();
+       }
+
+With -bar, the formatting is:
+
+       if ($bigwasteofspace1 && $bigwasteofspace2
+         || $bigwasteofspace3 && $bigwasteofspace4) {
+           bigwastoftime();
+       }
+Suggested by Eli Fidler 2001-06-11.
+
+-Uploaded perltidy to sourceforge cvs 2001-06-10.
+
+-An '-lp' flag (--line-up-parentheses) has been added which causes lists
+to be indented with extra indentation in the manner sometimes
+associated with emacs or the GNU suggestions.  Thanks to Ian Stuart for
+this suggestion and for extensive help in testing it. 
+
+-Subroutine call parameter lists are now formatted as other lists.
+This should improve formatting of tables being passed via subroutine
+calls.  This will also cause full indentation ('-i=n, default n= 4) of
+continued parameter list lines rather than just the number of spaces
+given with -ci=n, default n=2.
+
+-Added support for hanging side comments.  Perltidy identifies a hanging
+side comment as a comment immediately following a line with a side
+comment or another hanging side comment.  This should work in most
+cases.  It can be deactivated with --no-hanging-side-comments (-nhsc).
+The manual has been updated to discuss this.  Suggested by Brad
+Eisenberg some time ago, and finally implemented.
+
+ +

2001 06 08

+ +
-fixed problem with parsing command parameters containing quoted
+strings in .perltidyrc files. (Reported by Roger Espel Llima 2001-06-07).
+
+-added two command line flags, --want-break-after and 
+--want-break-before, which allow changing whether perltidy
+breaks lines before or after any operators.  Please see the revised 
+man pages for details.
+
+-added system-wide configuration file capability.
+If perltidy does not find a .perltidyrc command line file in
+the current directory, nor in the home directory, it now looks
+for '/usr/local/etc/perltidyrc' and then for '/etc/perltidyrc'.
+(Suggested by Roger Espel Llima 2001-05-31).
+
+-fixed problem in which spaces were trimmed from lines of a multi-line
+quote. (Reported by Roger Espel Llima 2001-05-30).  This is an 
+uncommon situation, but serious, because it could conceivably change
+the proper function of a script.
+
+-fixed problem in which a semicolon was incorrectly added within 
+an anonymous hash.  (Reported by A.C. Yardley, 2001-5-23).
+(You would know if this happened, because perl would give a syntax
+error for the resulting script).
+
+-fixed problem in which an incorrect error message was produced
+ after a version number on a 'use' line, like this ( Reported 
+ by Andres Kroonmaa, 2001-5-14):
+
+             use CGI 2.42 qw(fatalsToBrowser);
+
+ Other than the extraneous error message, this bug was harmless.
+
+ +

2001 04 06

+ +
-fixed serious bug in which the last line of some multi-line quotes or
+ patterns was given continuation indentation spaces.  This may make
+ a pattern incorrect unless it uses the /x modifier.  To find
+ instances of this error in scripts which have been formatted with
+ earlier versions of perltidy, run with the -chk flag, which has
+ been added for this purpose (SLH, 2001-04-05).
+
+ ** So, please check previously formatted scripts by running with -chk
+ at least once **
+
+-continuation indentation has been reprogrammed to be hierarchical, 
+ which improves deeply nested structures.
+
+-fixed problem with undefined value in list formatting (reported by Michael
+ Langner 2001-04-05)
+
+-Switched to graphical display of nesting in .LOG files.  If an
+ old format string was "(1 [0 {2", the new string is "{{(".  This
+ is easier to read and also shows the order of nesting.
+
+-added outdenting of cuddled paren structures, like  ")->pack(".
+
+-added line break and outdenting of ')->' so that instead of
+
+       $mw->Label(
+         -text   => "perltidy",
+         -relief => 'ridge')->pack;
+
+ the current default is:
+
+       $mw->Label(
+         -text   => "perltidy",
+         -relief => 'ridge'
+       )->pack;
+
+ (requested by Michael Langner 2001-03-31; in the future this could 
+ be controlled by a command-line parameter).
+
+-revised list indentation logic, so that lists following an assignment
+ operator get one full indentation level, rather than just continuation 
+ indentation.  Also corrected some minor glitches in the continuation 
+ indentation logic. 
+
+-Fixed problem with unwanted continuation indentation after a blank line 
+(reported by Erik Thaysen 2001-03-28):
+
+-minor update to avoid stranding a single '(' on one line
+
+ +

2001 03 28:

+ +
-corrected serious error tokenizing filehandles, in which a sub call 
+after a print or printf, like this:
+   print usage() and exit;
+became this:
+   print usage () and exit;
+Unfortunately, this converts 'usage' to a filehandle.  To fix this, rerun
+perltidy; it will look for this situation and issue a warning. 
+
+-fixed another cuddled-else formatting bug (Reported by Craig Bourne)
+
+-added several diagnostic --dump routines
+
+-added token-level whitespace controls (suggested by Hans Ecke)
+
+ +

2001 03 23:

+ +
-added support for special variables of the form ${^WANT_BITS}
+
+-space added between scalar and left paren in 'for' and 'foreach' loops,
+ (suggestion by Michael Cartmell):
+
+   for $i( 1 .. 20 )   # old
+   for $i ( 1 .. 20 )   # new
+
+-html now outputs cascading style sheets (thanks to suggestion from
+ Hans Ecke)
+
+-flags -o and -st now work with -html
+
+-added missing -html documentation for comments (noted by Alex Izvorski)
+
+-support for VMS added (thanks to Michael Cartmell for code patches and 
+  testing)
+
+-v-strings implemented (noted by Hans Ecke and Michael Cartmell; extensive
+  testing by Michael Cartmell)
+
+-fixed problem where operand may be empty at line 3970 
+ (\b should be just b in lines 3970, 3973) (Thanks to Erik Thaysen, 
+ Keith Marshall for bug reports)
+
+-fixed -ce bug (cuddled else), where lines like '} else {' were indented
+ (Thanks to Shawn Stepper and Rick Measham for reporting this)
+
+ +

2001 03 04:

+ +
-fixed undefined value in line 153 (only worked with -I set)
+(Thanks to Mike Stok, Phantom of the Opcodes, Ian Ehrenwald, and others)
+
+-fixed undefined value in line 1069 (filehandle problem with perl versions <
+5.6) (Thanks to Yuri Leikind, Mike Stok, Michael Holve, Jeff Kolber)
+
+ +

2001 03 03:

+ +
-Initial announcement at freshmeat.net; started Change Log
+(Unfortunately this version was DOA, but it was fixed the next day)
+
diff --git a/docs/INSTALL.html b/docs/INSTALL.html new file mode 100644 index 0000000..9ebd7b5 --- /dev/null +++ b/docs/INSTALL.html @@ -0,0 +1,430 @@ +

PERLTIDY INSTALLATION NOTES

+ +

Get a distribution file

+ +
    +
  • Source Files in .tar.gz and .zip format

    + +

    This document tells how to install perltidy from the basic source +distribution files in .tar.gz or .zip format. These files are +identical except for the line endings. The .tar.gz has Unix style +line endings, and the .zip file has Windows style line endings. The +standard perl MakeMaker method should work for these in most cases.

  • +
  • Source files in RPM and .deb format

    + +

    The web site also has links to RPM and Debian .deb Linux packages, which may be +convenient for some users.

  • +
+ +

Quick Test Drive

+ +

If you want to do a quick test of perltidy without doing any installation, get +a .tar.gz or a .zip source file and see the section below "Method 2: Installation +as a single binary script".

+ +

Uninstall older versions

+ +

In certain circumstances, it is best to remove an older version +of perltidy before installing the latest version. These are:

+ +
    +
  • Uninstall a Version older than 20020225

    + +

    You can use perltidy -v to determine the version number. The first +version of perltidy to use Makefile.PL for installation was 20020225, so +if your previous installation is older than this, it is best to remove +it, because the installation path may now be different. There were up +to 3 files these older installations: the script perltidy and +possibly two man pages, perltidy.1 and perl2web.1. If you saved +your Makefile, you can probably use make uninstall. Otherwise, you +can use a locate or find command to find and remove these files.

  • +
  • Uninstall older versions when changing installation method

    + +

    If you switch from one installation method to another, the paths to the +components of perltidy may change, so it is probably best to remove the older +version before installing the new version. If your older installation method +had an uninstall option (such as with RPM's and debian packages), use it. +Otherwise, you can locate and remove the older files by hand. There are two +key files: Tidy.pm and perltidy. In addition, there may be one or two +man pages, something like Perl::Tidy.3pm and perltidy.1p. You can use a +locate and/or find command to find and remove these files. After +installation, you can verify that the new version of perltidy is working with +the perltidy -v command.

  • +
+ +

Two Installation Methods - Overview

+ +

These are generic instructions. Some system-specific notes and hints +are given in later sections.

+ +

Two separate installation methods are possible.

+ +
    +
  • Method 1: Standard Installation Method

    + +

    The standard method based on MakeMaker should work in a normal perl +environment. This is the recommended installation procedure for +systems which support it.

    + +
        perl Makefile.PL
    +    make
    +    make test
    +    make install
    +
    + +

    The make command is probably nmake under a Windows system. You +may need to become root (or administrator) before doing the make +install step.

  • +
  • Method 2: Installation as a single binary script

    + +

    If you just want to take perltidy for a quick test drive without installing it, +or are having trouble installing modules, you can bundle it all in one +independent executable script. This might also be helpful on a system for +which the Makefile.PL method does not work, or if you are temporarily a guest +on some system, or if you want to try hacking a special version of perltidy +without messing up your regular version.

    + +

    You just need to uncompress the source distribution, cd down into it, and enter +the command:

    + +
        perl pm2pl
    +
    + +

    which will combine the pieces of perltidy into a single script named +perltidy in the current directory. This script should be +fully functional. Try it out on a handy perl script, for example

    + +
    perl perltidy Makefile.PL
    +
    + +

    This should create Makefile.PL.tdy.

  • +
  • After Installation

    + +

    After installation by either method, verify that the installation worked +and that the correct new version is being by entering:

    + +
    perltidy -v
    +
    + +

    If the version number disagrees with the version number embedded in the +distribution file name, search for and remove the old version. +For example, under a Unix system, the command which perltidy might +show where it is. Also, see the above notes on uninstalling older +versions.

    + +

    On a Unix system running the bash shell, if you had a previous +installation of perltidy, you may have to use

    + +
    hash -r
    +
    + +

    to get the shell to find the new one.

    + +

    After perltidy is installed, you can find where it will look for +configuration files and environment variables on your system with +the command:

    + +
    perltidy -dpro
    +
  • +
  • How to Uninstall

    + +

    Unfortunately, the standard Perl installation method does not seem able +to do an uninstall.

    + +

    But try this:

    + +
    make uninstall
    +
    + +

    On some systems, it will give you a list of files to remove by hand. If +not, you need to find the script perltidy and its module file +Tidy.pm, which will be in a subdirectory named Perl in the site +library.

    + +

    If you installed perltidy with the alternative method, you should just +reverse the steps that you used.

  • +
+ +

Unix Installation Notes

+ +
    +
  • Alternative method - Unix

    + +

    If the alternative method is used, test the script produced by the +pm2pl perl script:

    + +
    perl ./perltidy somefile.pl
    +
    + +

    where somefile.pl is any convenient test file, such as Makefile.PL +itself. Then,

    + +

    1. If the script is not executable, use

    + +
    chmod +x perltidy
    +
    + +

    2. Verify that the initial line in perltidy works for your system by +entering:

    + +
    ./perltidy -h
    +
    + +

    which should produce the usage text and then exit. This should usually +work, but if it does not, you will need to change the first line in +perltidy to reflect the location of perl on your system. On a Unix +system, you might find the path to perl with the command 'which perl'.

    + +

    3. A sample Makefile for this installation method is Makefile.npm. +Edit it to have the correct paths.

    + +

    You will need to become root unless you change the paths to point to +somewhere in your home directory. Then issue the command

    + +
    make -f Makefile.npm install
    +
    + +

    This installs perltidy and the man page perltidy.1.

    + +

    5. Test the installation using

    + +
    perltidy -h
    +
    + +

    You should see the usage screen. Then, if you installed the man pages, +try

    + +
    man perltidy
    +
    + +

    which should bring up the manual page.

    + +

    If you ever want to remove perltidy, you can remove perltidy and its man +pages by hand or use

    + +
    make uninstall
    +
  • +
+ +

Windows Installation Notes

+ +

On a Windows 9x/Me system you should CLOSE ANY OPEN APPLICATIONS to +avoid losing unsaved data in case of trouble.

+ +
    +
  • Standard Method - Windows

    + +

    After you unzip the distribution file, the procedure is probably this:

    + +
        perl Makefile.PL
    +    nmake
    +    nmake test
    +    nmake install
    +
    + +

    You may need to download a copy of unzip to unzip the .zip distribution +file; you can get this at +http://www.info-zip.org/pub/infozip/UnZip.html

    + +

    If you have ActiveState +Perl, the installation method is outlined at +http://aspn.activestate.com//ASPN/Reference/Products/ActivePerl/faq/Windows/ActivePerl-Winfaq9.html#How_can_I_use_modules_from_CPAN_

    + +

    You may need to download a copy of Microsoft's nmake program from +ftp://ftp.microsoft.com/Softlib/MSLFILES/nmake15.exe

    + +

    If you are not familiar with installing modules, or have trouble doing +so, and want to start testing perltidy quickly, you may want to use the +alternative method instead (next section).

  • +
  • Alternative Method - Windows

    + +

    From the main installation directory, just enter

    + +
    perl pm2pl
    +
    + +

    Placing the resulting file perltidy and the example batch file +perltidy.bat, located in the examples directory, in your path should +work. (You can determine your path by issuing the msdos command +PATH). However, the batch file probably will not support file +redirection. So, for example, to pipe the long help message through +'more', you might have to invoke perltidy with perl directly, like this:

    + +
    perl \somepath\perltidy -h | more
    +
    + +

    The batch file will not work properly with wildcard filenames, but you may +use wildcard filenames if you place them in quotes. For example

    + +
    perltidy '*.pl'
    +
  • +
+ +

VMS Installation Notes

+ +
    +
  • Links to VMS Utilities and Documentation

    + +

    To install perltidy you will need the following utilities Perl, of +course, source with VMS goodies available from +http://www.sidhe.org/vmsperl or binary available from the Compaq OpenVMS +freeware CD. To unpack the source either gunzip and vmstar available +from the Compaq OpenVMS freeware CD or zip available from +http://www.info-zip.org/

    + +

    To build perltidy you can use either MMS, Compaq's VMS equivalent of +make, or MMK, an MMS clone available from +http://www.madgoat.com.

    + +

    Information on running perl under VMS can be found at: +http://w4.lns.cornell.edu/~pvhp/perl/VMS.html

  • +
  • Unpack the source:

    + +
    $ unzip -a perl-tidy-yyyymmdd.zip  ! or
    +
    +
    +$ unzip /text=auto perl-tidy-yyyymmdd.zip ! or
    +
    +
    +$ gunzip perl-tidy-yyyymmdd.tgz
    +$ vmstar perl-tidy-yyyymmdd.tar
    +
  • +
  • Build and install perltidy under VMS:

    + +
    $ set default [.perl-tidy-yyymmdd]
    +$ perl perltidy.pl
    +$ mmk
    +$ mmk test
    +$ mmk install
    +
  • +
  • Using Perltidy under VMS

    + +

    Create a symbol. This should be put in a logon script, eg sylogin.com

    + +
    $ perltidy == "perl perl_root:[utils]perltidy."
    +
    + +

    Default parameters can be placed in a perltidyrc file. Perltidy +looks for one in the following places and uses the first found if the +logical PERLTIDY is a file and the file exists then that is used if the +logical PERLTIDY is a directory then look for a .perltidyrc file in the +directory look for a .perltidyrc file in the user's home directory

    + +

    To see where the search is done and which .perltidyrc is used type

    + +
    $ perltidy -dpro
    +
    + +

    A system PERLTIDY logical can be defined pointing to a file with a +minimal configuration, and users can defined their own logical to use a +personal .perltidyrc file.

    + +
    $ define /system perltidy perl_root:[utils]perltidy.rc
    +
  • +
  • The -x Parameter

    + +

    If you have one of the magic incantations at the start of perl scripts, +so that they can be invoked as a .com file, then you will need to use +the -x parameter which causes perltidy to skip all lines until it +finds a hash bang line eg #!perl -w. Since it is such a common +option this is probably a good thing to put in a .perltidyrc file.

  • +
  • VMS File Extensions

    + +

    VMS file extensions will use an underscore character instead of a dot, +when necessary, to create a valid filename. So

    + +
      perltidy myfile.pl
    +
    + +

    will generate the output file myfile.pl_tdy instead of +myfile.pl.tdy, and so on.

  • +
+ +

Troubleshooting / Other Operating Systems

+ +

If there seems to be a problem locating a configuration file, you can see +what is going on in the config file search with:

+ +
perltidy -dpro
+
+ +

If you want to customize where perltidy looks for configuration files, +look at the routine 'find_config_file' in module 'Tidy.pm'. You should +be able to at least use the '-pro=filename' method under most systems.

+ +

Remember to place quotes (either single or double) around input +parameters which contain spaces, such as file names. For example:

+ +
perltidy "file name with spaces"
+
+ +

Without the quotes, perltidy would look for four files: file, +name, with, and spaces.

+ +

If you develop a system-dependent patch that might be of general +interest, please let us know.

+ +

CONFIGURATION FILE

+ +

You do not need a configuration file, but you may eventually want to +create one to save typing; the tutorial and man page discuss this.

+ +

SYSTEM TEMPORARY FILES

+ +

Perltidy needs to create a system temporary file when it invokes +Pod::Html to format pod text under the -html option. For Unix systems, +this will normally be a file in /tmp, and for other systems, it will be +a file in the current working directory named perltidy.TMP. This file +will be removed when the run finishes.

+ +

DOCUMENTATION

+ +

Documentation is contained in .pod format, either in the docs directory +or appended to the scripts.

+ +

These documents can also be found at http://perltidy.sourceforge.net

+ +

Reading the brief tutorial should help you use perltidy effectively.
+The tutorial can be read interactively with perldoc, for +example

+ +
cd docs
+perldoc tutorial.pod
+
+ +

or else an html version can be made with pod2html:

+ +
pod2html tutorial.pod >tutorial.html
+
+ +

If you use the Makefile.PL installation method on a Unix system, the +perltidy and Perl::Tidy man pages should automatically be installed. +Otherwise, you can extract the man pages with the pod2xxxx utilities, as +follows:

+ +
cd bin
+pod2text perltidy >perltidy.txt
+pod2html perltidy >perltidy.html
+
+cd lib/Perl
+pod2text Tidy.pm >Tidy.txt
+pod2html Tidy.pm >Tidy.html
+
+ +

After installation, the installation directory of files may be deleted.

+ +

Perltidy is still being developed, so please check sourceforge occasionally +for updates if you find that it is useful. New releases are announced +on freshmeat.net.

+ +

CREDITS

+ +

Thanks to the many programmers who have documented problems, made suggestions and sent patches.

+ +

FEEDBACK / BUG REPORTS

+ +

If you see ways to improve these notes, please let us know.

+ +

A list of current bugs and issues can be found at the CPAN site https://rt.cpan.org/Public/Dist/Display.html?Name=Perl-Tidy

+ +

To report a new bug or problem, use the link on this page .

diff --git a/docs/README b/docs/README deleted file mode 100644 index 9a5bcff..0000000 --- a/docs/README +++ /dev/null @@ -1,13 +0,0 @@ -All of the documentation for perltidy can be found at -http://perltidy.sourceforge.net - -The man page is in pod format appended to the script bin/perltidy. - -The man page for use of the module Perl::Tidy.pm is appended to that file. - -tutorial.pod - is a short tutorial -testfile.pl - the test file referenced by tutorial.pod - -stylekey.pod - is a document to help find parameters for a given style - -perltidy.1 - is a man page for installations which do not use Makefile.PL diff --git a/docs/Tidy.html b/docs/Tidy.html new file mode 100644 index 0000000..2d3a162 --- /dev/null +++ b/docs/Tidy.html @@ -0,0 +1,377 @@ + + + + + + + + + + + + + + + +

NAME

+ +

Perl::Tidy - Parses and beautifies perl source

+ +

SYNOPSIS

+ +
    use Perl::Tidy;
+
+    my $error_flag = Perl::Tidy::perltidy(
+        source            => $source,
+        destination       => $destination,
+        stderr            => $stderr,
+        argv              => $argv,
+        perltidyrc        => $perltidyrc,
+        logfile           => $logfile,
+        errorfile         => $errorfile,
+        formatter         => $formatter,           # callback object (see below)
+        dump_options      => $dump_options,
+        dump_options_type => $dump_options_type,
+        prefilter         => $prefilter_coderef,
+        postfilter        => $postfilter_coderef,
+    );
+ +

DESCRIPTION

+ +

This module makes the functionality of the perltidy utility available to perl scripts. Any or all of the input parameters may be omitted, in which case the @ARGV array will be used to provide input parameters as described in the perltidy(1) man page.

+ +

For example, the perltidy script is basically just this:

+ +
    use Perl::Tidy;
+    Perl::Tidy::perltidy();
+ +

The call to perltidy returns a scalar $error_flag which is TRUE if an error caused premature termination, and FALSE if the process ran to normal completion. Additional discuss of errors is contained below in the "ERROR HANDLING" section.

+ +

The module accepts input and output streams by a variety of methods. The following list of parameters may be any of the following: a filename, an ARRAY reference, a SCALAR reference, or an object with either a getline or print method, as appropriate.

+ +
        source            - the source of the script to be formatted
+        destination       - the destination of the formatted output
+        stderr            - standard error output
+        perltidyrc        - the .perltidyrc file
+        logfile           - the .LOG file stream, if any 
+        errorfile         - the .ERR file stream, if any
+        dump_options      - ref to a hash to receive parameters (see below), 
+        dump_options_type - controls contents of dump_options
+        dump_getopt_flags - ref to a hash to receive Getopt flags
+        dump_options_category - ref to a hash giving category of options
+        dump_abbreviations    - ref to a hash giving all abbreviations
+ +

The following chart illustrates the logic used to decide how to treat a parameter.

+ +
   ref($param)  $param is assumed to be:
+   -----------  ---------------------
+   undef        a filename
+   SCALAR       ref to string
+   ARRAY        ref to array
+   (other)      object with getline (if source) or print method
+ +

If the parameter is an object, and the object has a close method, that close method will be called at the end of the stream.

+ +
+ +
source
+
+ +

If the source parameter is given, it defines the source of the input stream. If an input stream is defined with the source parameter then no other source filenames may be specified in the @ARGV array or argv parameter.

+ +
+
destination
+
+ +

If the destination parameter is given, it will be used to define the file or memory location to receive output of perltidy.

+ +
+
stderr
+
+ +

The stderr parameter allows the calling program to redirect the stream that would otherwise go to the standard error output device to any of the stream types listed above. This stream contains important warnings and errors related to the parameters passed to perltidy.

+ +
+
perltidyrc
+
+ +

If the perltidyrc file is given, it will be used instead of any .perltidyrc configuration file that would otherwise be used.

+ +
+
errorfile
+
+ +

The errorfile parameter allows the calling program to capture the stream that would otherwise go to either a .ERR file. This stream contains warnings or errors related to the contents of one source file or stream.

+ +

The reason that this is different from the stderr stream is that when perltidy is called to process multiple files there will be up to one .ERR file created for each file and it would be very confusing if they were combined.

+ +

However if perltidy is called to process just a single perl script then it may be more convenient to combine the errorfile stream with the stderr stream. This can be done by setting the -se parameter, in which case this parameter is ignored.

+ +
+
logfile
+
+ +

The logfile parameter allows the calling program to capture the log stream. This stream is only created if requested with a -g parameter. It contains detailed diagnostic information about a script which may be useful for debugging.

+ +
+
argv
+
+ +

If the argv parameter is given, it will be used instead of the @ARGV array. The argv parameter may be a string, a reference to a string, or a reference to an array. If it is a string or reference to a string, it will be parsed into an array of items just as if it were a command line string.

+ +
+
dump_options
+
+ +

If the dump_options parameter is given, it must be the reference to a hash. In this case, the parameters contained in any perltidyrc configuration file will be placed in this hash and perltidy will return immediately. This is equivalent to running perltidy with --dump-options, except that the parameters are returned in a hash rather than dumped to standard output. Also, by default only the parameters in the perltidyrc file are returned, but this can be changed (see the next parameter). This parameter provides a convenient method for external programs to read a perltidyrc file. An example program using this feature, perltidyrc_dump.pl, is included in the distribution.

+ +

Any combination of the dump_ parameters may be used together.

+ +
+
dump_options_type
+
+ +

This parameter is a string which can be used to control the parameters placed in the hash reference supplied by dump_options. The possible values are 'perltidyrc' (default) and 'full'. The 'full' parameter causes both the default options plus any options found in a perltidyrc file to be returned.

+ +
+
dump_getopt_flags
+
+ +

If the dump_getopt_flags parameter is given, it must be the reference to a hash. This hash will receive all of the parameters that perltidy understands and flags that are passed to Getopt::Long. This parameter may be used alone or with the dump_options flag. Perltidy will exit immediately after filling this hash. See the demo program perltidyrc_dump.pl for example usage.

+ +
+
dump_options_category
+
+ +

If the dump_options_category parameter is given, it must be the reference to a hash. This hash will receive a hash with keys equal to all long parameter names and values equal to the title of the corresponding section of the perltidy manual. See the demo program perltidyrc_dump.pl for example usage.

+ +
+
dump_abbreviations
+
+ +

If the dump_abbreviations parameter is given, it must be the reference to a hash. This hash will receive all abbreviations used by Perl::Tidy. See the demo program perltidyrc_dump.pl for example usage.

+ +
+
prefilter
+
+ +

A code reference that will be applied to the source before tidying. It is expected to take the full content as a string in its input, and output the transformed content.

+ +
+
postfilter
+
+ +

A code reference that will be applied to the tidied result before outputting. It is expected to take the full content as a string in its input, and output the transformed content.

+ +

Note: A convenient way to check the function of your custom prefilter and postfilter code is to use the --notidy option, first with just the prefilter and then with both the prefilter and postfilter. See also the file filter_example.pl in the perltidy distribution.

+ +
+
+ +

ERROR HANDLING

+ +

Perltidy will return with an error flag indicating if the process had to be terminated early due to errors in the input parameters. This can happen for example if a parameter is misspelled or given an invalid value. The calling program should check this flag because if it is set the destination stream will be empty or incomplete and should be ignored. Error messages in the stderr stream will indicate the cause of any problem.

+ +

If the error flag is not set then perltidy ran to completion. However there may still be warning messages in the stderr stream related to control parameters, and there may be warning messages in the errorfile stream relating to possible syntax errors in the source code being tidied.

+ +

In the event of a catastrophic error for which recovery is not possible perltidy terminates by making calls to croak or confess to help the programmer localize the problem. These should normally only occur during program development.

+ +

NOTES ON FORMATTING PARAMETERS

+ +

Parameters which control formatting may be passed in several ways: in a .perltidyrc configuration file, in the perltidyrc parameter, and in the argv parameter.

+ +

The -syn (--check-syntax) flag may be used with all source and destination streams except for standard input and output. However data streams which are not associated with a filename will be copied to a temporary file before being passed to Perl. This use of temporary files can cause somewhat confusing output from Perl.

+ +

If the -pbp style is used it will typically be necessary to also specify a -nst flag. This is necessary to turn off the -st flag contained in the -pbp parameter set which otherwise would direct the output stream to the standard output.

+ +

EXAMPLES

+ +

The following example uses string references to hold the input and output code and error streams, and illustrates checking for errors.

+ +
  use Perl::Tidy;
+  
+  my $source_string = <<'EOT';
+  my$error=Perl::Tidy::perltidy(argv=>$argv,source=>\$source_string,
+    destination=>\$dest_string,stderr=>\$stderr_string,
+  errorfile=>\$errorfile_string,);
+  EOT
+  
+  my $dest_string;
+  my $stderr_string;
+  my $errorfile_string;
+  my $argv = "-npro";   # Ignore any .perltidyrc at this site
+  $argv .= " -pbp";     # Format according to perl best practices
+  $argv .= " -nst";     # Must turn off -st in case -pbp is specified
+  $argv .= " -se";      # -se appends the errorfile to stderr
+  ## $argv .= " --spell-check";  # uncomment to trigger an error
+  
+  print "<<RAW SOURCE>>\n$source_string\n";
+  
+  my $error = Perl::Tidy::perltidy(
+      argv        => $argv,
+      source      => \$source_string,
+      destination => \$dest_string,
+      stderr      => \$stderr_string,
+      errorfile   => \$errorfile_string,    # ignored when -se flag is set
+      ##phasers   => 'stun',                # uncomment to trigger an error
+  );
+  
+  if ($error) {
+  
+      # serious error in input parameters, no tidied output
+      print "<<STDERR>>\n$stderr_string\n";
+      die "Exiting because of serious errors\n";
+  }
+  
+  if ($dest_string)      { print "<<TIDIED SOURCE>>\n$dest_string\n" }
+  if ($stderr_string)    { print "<<STDERR>>\n$stderr_string\n" }
+  if ($errorfile_string) { print "<<.ERR file>>\n$errorfile_string\n" }
+ +

Additional examples are given in examples section of the perltidy distribution.

+ +

Using the formatter Callback Object

+ +

The formatter parameter is an optional callback object which allows the calling program to receive tokenized lines directly from perltidy for further specialized processing. When this parameter is used, the two formatting options which are built into perltidy (beautification or html) are ignored. The following diagram illustrates the logical flow:

+ +
                    |-- (normal route)   -> code beautification
+  caller->perltidy->|-- (-html flag )    -> create html 
+                    |-- (formatter given)-> callback to write_line
+ +

This can be useful for processing perl scripts in some way. The parameter $formatter in the perltidy call,

+ +
        formatter   => $formatter,  
+ +

is an object created by the caller with a write_line method which will accept and process tokenized lines, one line per call. Here is a simple example of a write_line which merely prints the line number, the line type (as determined by perltidy), and the text of the line:

+ +
 sub write_line {
+ 
+     # This is called from perltidy line-by-line
+     my $self              = shift;
+     my $line_of_tokens    = shift;
+     my $line_type         = $line_of_tokens->{_line_type};
+     my $input_line_number = $line_of_tokens->{_line_number};
+     my $input_line        = $line_of_tokens->{_line_text};
+     print "$input_line_number:$line_type:$input_line";
+ }
+ +

The complete program, perllinetype, is contained in the examples section of the source distribution. As this example shows, the callback method receives a parameter $line_of_tokens, which is a reference to a hash of other useful information. This example uses these hash entries:

+ +
 $line_of_tokens->{_line_number} - the line number (1,2,...)
+ $line_of_tokens->{_line_text}   - the text of the line
+ $line_of_tokens->{_line_type}   - the type of the line, one of:
+
+    SYSTEM         - system-specific code before hash-bang line
+    CODE           - line of perl code (including comments)
+    POD_START      - line starting pod, such as '=head'
+    POD            - pod documentation text
+    POD_END        - last line of pod section, '=cut'
+    HERE           - text of here-document
+    HERE_END       - last line of here-doc (target word)
+    FORMAT         - format section
+    FORMAT_END     - last line of format section, '.'
+    DATA_START     - __DATA__ line
+    DATA           - unidentified text following __DATA__
+    END_START      - __END__ line
+    END            - unidentified text following __END__
+    ERROR          - we are in big trouble, probably not a perl script
+ +

Most applications will be only interested in lines of type CODE. For another example, let's write a program which checks for one of the so-called naughty matching variables &`, $&, and $', which can slow down processing. Here is a write_line, from the example program find_naughty.pl, which does that:

+ +
 sub write_line {
+ 
+     # This is called back from perltidy line-by-line
+     # We're looking for $`, $&, and $'
+     my ( $self, $line_of_tokens ) = @_;
+ 
+     # pull out some stuff we might need
+     my $line_type         = $line_of_tokens->{_line_type};
+     my $input_line_number = $line_of_tokens->{_line_number};
+     my $input_line        = $line_of_tokens->{_line_text};
+     my $rtoken_type       = $line_of_tokens->{_rtoken_type};
+     my $rtokens           = $line_of_tokens->{_rtokens};
+     chomp $input_line;
+ 
+     # skip comments, pod, etc
+     return if ( $line_type ne 'CODE' );
+ 
+     # loop over tokens looking for $`, $&, and $'
+     for ( my $j = 0 ; $j < @$rtoken_type ; $j++ ) {
+ 
+         # we only want to examine token types 'i' (identifier)
+         next unless $$rtoken_type[$j] eq 'i';
+ 
+         # pull out the actual token text
+         my $token = $$rtokens[$j];
+ 
+         # and check it
+         if ( $token =~ /^\$[\`\&\']$/ ) {
+             print STDERR
+               "$input_line_number: $token\n";
+         }
+     }
+ }
+ +

This example pulls out these tokenization variables from the $line_of_tokens hash reference:

+ +
     $rtoken_type = $line_of_tokens->{_rtoken_type};
+     $rtokens     = $line_of_tokens->{_rtokens};
+ +

The variable $rtoken_type is a reference to an array of token type codes, and $rtokens is a reference to a corresponding array of token text. These are obviously only defined for lines of type CODE. Perltidy classifies tokens into types, and has a brief code for each type. You can get a complete list at any time by running perltidy from the command line with

+ +
     perltidy --dump-token-types
+ +

In the present example, we are only looking for tokens of type i (identifiers), so the for loop skips past all other types. When an identifier is found, its actual text is checked to see if it is one being sought. If so, the above write_line prints the token and its line number.

+ +

The formatter feature is relatively new in perltidy, and further documentation needs to be written to complete its description. However, several example programs have been written and can be found in the examples section of the source distribution. Probably the best way to get started is to find one of the examples which most closely matches your application and start modifying it.

+ +

For help with perltidy's peculiar way of breaking lines into tokens, you might run, from the command line,

+ +
 perltidy -D filename
+ +

where filename is a short script of interest. This will produce filename.DEBUG with interleaved lines of text and their token types. The -D flag has been in perltidy from the beginning for this purpose. If you want to see the code which creates this file, it is write_debug_entry in Tidy.pm.

+ +

EXPORT

+ +
  &perltidy
+ +

VERSION

+ +

This man page documents Perl::Tidy version 20190601

+ +

LICENSE

+ +

This package is free software; you can redistribute it and/or modify it under the terms of the "GNU General Public License".

+ +

Please refer to the file "COPYING" for details.

+ +

BUG REPORTS

+ +

A list of current bugs and issues can be found at the CPAN site https://rt.cpan.org/Public/Dist/Display.html?Name=Perl-Tidy

+ +

To report a new bug or problem, use the link on this page.

+ +

The source code repository is at https://github.com/perltidy/perltidy.

+ +

SEE ALSO

+ +

The perltidy(1) man page describes all of the features of perltidy. It can be found at http://perltidy.sourceforge.net.

+ + + + + + + diff --git a/docs/index.html b/docs/index.html new file mode 100644 index 0000000..fb38259 --- /dev/null +++ b/docs/index.html @@ -0,0 +1,72 @@ +

Welcome to Perltidy

+ +

Perltidy is a Perl script which indents and reformats Perl scripts to make them +easier to read. If you write Perl scripts, or spend much time +reading them, you will probably find it useful.

+ +

Perltidy is free software released under the GNU General Public +License -- please see the included file COPYING for details.

+ +

The formatting can be controlled with command line parameters. The default +parameter settings approximately follow the suggestions in the +Perl Style Guide.

+ +

Besides reformatting scripts, Perltidy can help in tracking +down errors with missing or extra braces, parentheses, and square brackets +because it is very good at localizing errors.

+ +

Documentation

+ + + +

Prerequisites

+ +

Perltidy should run on any system with perl 5.008 or later. +The total disk space needed after removing the installation directory will be +about 2 Mb.

+ +

Download

+ +
    +
  • The most recent release is always at CPAN

  • +
  • The most recent release is also at sourceforge

  • +
+ +

Installation

+ +

Perl::Tidy can be installed directly from CPAN one of the standard methods.

+ +

One way is to download a distribution file, unpack it and then +test and install using the Makefile.PL:

+ +
perl Makefile.PL
+make
+make test
+make install
+
+ +

The INSTALL file has additional installation notes. They +are mainly for older sytems but also tell how to use perltidy without doing an installation.

+ +

Links

+ + + +

FEEDBACK / BUG REPORTS

+ +

A list of current bugs and issues can be found at the CPAN site https://rt.cpan.org/Public/Dist/Display.html?Name=Perl-Tidy

+ +

To report a new bug or problem, use the link on this page .

diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..573f4d3 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,75 @@ +# Welcome to Perltidy + +Perltidy is a Perl script which indents and reformats Perl scripts to make them +easier to read. If you write Perl scripts, or spend much time +reading them, you will probably find it useful. + +Perltidy is free software released under the GNU General Public +License -- please see the included file [COPYING](../COPYING) for details. + +The formatting can be controlled with command line parameters. The default +parameter settings approximately follow the suggestions in the +[Perl Style Guide](https://perldoc.perl.org/perlstyle.html). + +Besides reformatting scripts, Perltidy can help in tracking +down errors with missing or extra braces, parentheses, and square brackets +because it is very good at localizing errors. + +## Documentation + +- [A Brief Perltidy Tutorial](./tutorial.html) + +- [Perltidy Style Key](./stylekey.html) will help + in methodically selecting a set of style parameters. + +- [The Perltidy man page](./perltidy.html) explains how +to precisely control the formatting details. + +- [The Perl::Tidy man page](./Tidy.html) discusses how to use the Perl::Tidy module + +- [Change Log](./ChangeLog.html) + + +## Prerequisites + +Perltidy should run on any system with perl 5.008 or later. +The total disk space needed after removing the installation directory will be +about 2 Mb. + + +## Download + + +- The most recent release is always at [CPAN](https://metacpan.org/release/Perl-Tidy) + +- The most recent release is also at [sourceforge](https://sourceforge.net/projects/perltidy/) + + +## Installation + +Perl::Tidy can be installed directly from CPAN one of the standard methods. + +One way is to download a distribution file, unpack it and then +test and install using the Makefile.PL: + + perl Makefile.PL + make + make test + make install + +The [INSTALL file](./INSTALL.html) has additional installation notes. They +are mainly for older sytems but also tell how to use perltidy without doing an installation. + +## Links + + - [Perl::Tidy source code repository at GitHub](https://github.com/perltidy/perltidy) + - [tidyall](https://metacpan.org/pod/distribution/Code-TidyAll/bin/tidyall) is a great tool for automatically running perltidy and other tools including perlcritic on a set of project files. + - [Tidyview](http://sourceforge.net/projects/tidyview) is a graphical program for tweaking your .perltidyrc configuration parameters. + - [A perltidy plugin for Sublime Text 2/3](https://github.com/vifo/SublimePerlTidy) + + +## FEEDBACK / BUG REPORTS + +A list of current bugs and issues can be found at the CPAN site [https://rt.cpan.org/Public/Dist/Display.html?Name=Perl-Tidy](https://rt.cpan.org/Public/Dist/Display.html?Name=Perl-Tidy) + +To report a new bug or problem, use the link on this page . diff --git a/docs/perltidy.1 b/docs/perltidy.1 deleted file mode 100644 index 46be1c3..0000000 --- a/docs/perltidy.1 +++ /dev/null @@ -1,3621 +0,0 @@ -.\" Automatically generated by Pod::Man 2.28 (Pod::Simple 3.29) -.\" -.\" Standard preamble: -.\" ======================================================================== -.de Sp \" Vertical space (when we can't use .PP) -.if t .sp .5v -.if n .sp -.. -.de Vb \" Begin verbatim text -.ft CW -.nf -.ne \\$1 -.. -.de Ve \" End verbatim text -.ft R -.fi -.. -.\" Set up some character translations and predefined strings. \*(-- will -.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left -.\" double quote, and \*(R" will give a right double quote. \*(C+ will -.\" give a nicer C++. Capital omega is used to do unbreakable dashes and -.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, -.\" nothing in troff, for use with C<>. -.tr \(*W- -.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' -.ie n \{\ -. ds -- \(*W- -. ds PI pi -. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch -. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch -. ds L" "" -. ds R" "" -. ds C` "" -. ds C' "" -'br\} -.el\{\ -. ds -- \|\(em\| -. ds PI \(*p -. ds L" `` -. ds R" '' -. ds C` -. ds C' -'br\} -.\" -.\" Escape single quotes in literal strings from groff's Unicode transform. -.ie \n(.g .ds Aq \(aq -.el .ds Aq ' -.\" -.\" If the F register is turned on, we'll generate index entries on stderr for -.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index -.\" entries marked with X<> in POD. Of course, you'll have to process the -.\" output yourself in some meaningful fashion. -.\" -.\" Avoid warning from groff about undefined register 'F'. -.de IX -.. -.nr rF 0 -.if \n(.g .if rF .nr rF 1 -.if (\n(rF:(\n(.g==0)) \{ -. if \nF \{ -. de IX -. tm Index:\\$1\t\\n%\t"\\$2" -.. -. if !\nF==2 \{ -. nr % 0 -. nr F 2 -. \} -. \} -.\} -.rr rF -.\" -.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). -.\" Fear. Run. Save yourself. No user-serviceable parts. -. \" fudge factors for nroff and troff -.if n \{\ -. ds #H 0 -. ds #V .8m -. ds #F .3m -. ds #[ \f1 -. ds #] \fP -.\} -.if t \{\ -. ds #H ((1u-(\\\\n(.fu%2u))*.13m) -. ds #V .6m -. ds #F 0 -. ds #[ \& -. ds #] \& -.\} -. \" simple accents for nroff and troff -.if n \{\ -. ds ' \& -. ds ` \& -. ds ^ \& -. ds , \& -. ds ~ ~ -. ds / -.\} -.if t \{\ -. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" -. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' -. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' -. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' -. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' -. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' -.\} -. \" troff and (daisy-wheel) nroff accents -.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' -.ds 8 \h'\*(#H'\(*b\h'-\*(#H' -.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] -.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' -.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' -.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] -.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] -.ds ae a\h'-(\w'a'u*4/10)'e -.ds Ae A\h'-(\w'A'u*4/10)'E -. \" corrections for vroff -.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' -.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' -. \" for low resolution devices (crt and lpr) -.if \n(.H>23 .if \n(.V>19 \ -\{\ -. ds : e -. ds 8 ss -. ds o a -. ds d- d\h'-1'\(ga -. ds D- D\h'-1'\(hy -. ds th \o'bp' -. ds Th \o'LP' -. ds ae ae -. ds Ae AE -.\} -.rm #[ #] #H #V #F C -.\" ======================================================================== -.\" -.IX Title "PERLTIDY 1" -.TH PERLTIDY 1 "2018-02-20" "perl v5.22.1" "User Contributed Perl Documentation" -.\" For nroff, turn off justification. Always turn off hyphenation; it makes -.\" way too many mistakes in technical documents. -.if n .ad l -.nh -.SH "NAME" -perltidy \- a perl script indenter and reformatter -.SH "SYNOPSIS" -.IX Header "SYNOPSIS" -.Vb 5 -\& perltidy [ options ] file1 file2 file3 ... -\& (output goes to file1.tdy, file2.tdy, file3.tdy, ...) -\& perltidy [ options ] file1 \-o outfile -\& perltidy [ options ] file1 \-st >outfile -\& perltidy [ options ] outfile -.Ve -.SH "DESCRIPTION" -.IX Header "DESCRIPTION" -Perltidy reads a perl script and writes an indented, reformatted script. -.PP -Many users will find enough information in \*(L"\s-1EXAMPLES\*(R"\s0 to get -started. New users may benefit from the short tutorial -which can be found at -http://perltidy.sourceforge.net/tutorial.html -.PP -A convenient aid to systematically defining a set of style parameters -can be found at -http://perltidy.sourceforge.net/stylekey.html -.PP -Perltidy can produce output on either of two modes, depending on the -existence of an \fB\-html\fR flag. Without this flag, the output is passed -through a formatter. The default formatting tries to follow the -recommendations in \fIperlstyle\fR\|(1), but it can be controlled in detail with -numerous input parameters, which are described in \*(L"\s-1FORMATTING -OPTIONS\*(R"\s0. -.PP -When the \fB\-html\fR flag is given, the output is passed through an \s-1HTML\s0 -formatter which is described in \*(L"\s-1HTML OPTIONS\*(R"\s0. -.SH "EXAMPLES" -.IX Header "EXAMPLES" -.Vb 1 -\& perltidy somefile.pl -.Ve -.PP -This will produce a file \fIsomefile.pl.tdy\fR containing the script reformatted -using the default options, which approximate the style suggested in -\&\fIperlstyle\fR\|(1). The source file \fIsomefile.pl\fR is unchanged. -.PP -.Vb 1 -\& perltidy *.pl -.Ve -.PP -Execute perltidy on all \fI.pl\fR files in the current directory with the -default options. The output will be in files with an appended \fI.tdy\fR -extension. For any file with an error, there will be a file with extension -\&\fI.ERR\fR. -.PP -.Vb 1 -\& perltidy \-b file1.pl file2.pl -.Ve -.PP -Modify \fIfile1.pl\fR and \fIfile2.pl\fR in place, and backup the originals to -\&\fIfile1.pl.bak\fR and \fIfile2.pl.bak\fR. If \fIfile1.pl.bak\fR and/or \fIfile2.pl.bak\fR -already exist, they will be overwritten. -.PP -.Vb 1 -\& perltidy \-b \-bext=\*(Aq/\*(Aq file1.pl file2.pl -.Ve -.PP -Same as the previous example except that the backup files \fIfile1.pl.bak\fR and \fIfile2.pl.bak\fR will be deleted if there are no errors. -.PP -.Vb 1 -\& perltidy \-gnu somefile.pl -.Ve -.PP -Execute perltidy on file \fIsomefile.pl\fR with a style which approximates the -\&\s-1GNU\s0 Coding Standards for C programs. The output will be \fIsomefile.pl.tdy\fR. -.PP -.Vb 1 -\& perltidy \-i=3 somefile.pl -.Ve -.PP -Execute perltidy on file \fIsomefile.pl\fR, with 3 columns for each level of -indentation (\fB\-i=3\fR) instead of the default 4 columns. There will not be any -tabs in the reformatted script, except for any which already exist in comments, -pod documents, quotes, and here documents. Output will be \fIsomefile.pl.tdy\fR. -.PP -.Vb 1 -\& perltidy \-i=3 \-et=8 somefile.pl -.Ve -.PP -Same as the previous example, except that leading whitespace will -be entabbed with one tab character per 8 spaces. -.PP -.Vb 1 -\& perltidy \-ce \-l=72 somefile.pl -.Ve -.PP -Execute perltidy on file \fIsomefile.pl\fR with all defaults except use \*(L"cuddled -elses\*(R" (\fB\-ce\fR) and a maximum line length of 72 columns (\fB\-l=72\fR) instead of -the default 80 columns. -.PP -.Vb 1 -\& perltidy \-g somefile.pl -.Ve -.PP -Execute perltidy on file \fIsomefile.pl\fR and save a log file \fIsomefile.pl.LOG\fR -which shows the nesting of braces, parentheses, and square brackets at -the start of every line. -.PP -.Vb 1 -\& perltidy \-html somefile.pl -.Ve -.PP -This will produce a file \fIsomefile.pl.html\fR containing the script with -html markup. The output file will contain an embedded style sheet in -the <\s-1HEAD\s0> section which may be edited to change the appearance. -.PP -.Vb 1 -\& perltidy \-html \-css=mystyle.css somefile.pl -.Ve -.PP -This will produce a file \fIsomefile.pl.html\fR containing the script with -html markup. This output file will contain a link to a separate style -sheet file \fImystyle.css\fR. If the file \fImystyle.css\fR does not exist, -it will be created. If it exists, it will not be overwritten. -.PP -.Vb 1 -\& perltidy \-html \-pre somefile.pl -.Ve -.PP -Write an html snippet with only the \s-1PRE\s0 section to \fIsomefile.pl.html\fR. -This is useful when code snippets are being formatted for inclusion in a -larger web page. No style sheet will be written in this case. -.PP -.Vb 1 -\& perltidy \-html \-ss >mystyle.css -.Ve -.PP -Write a style sheet to \fImystyle.css\fR and exit. -.PP -.Vb 1 -\& perltidy \-html \-frm mymodule.pm -.Ve -.PP -Write html with a frame holding a table of contents and the source code. The -output files will be \fImymodule.pm.html\fR (the frame), \fImymodule.pm.toc.html\fR -(the table of contents), and \fImymodule.pm.src.html\fR (the source code). -.SH "OPTIONS \- OVERVIEW" -.IX Header "OPTIONS - OVERVIEW" -The entire command line is scanned for options, and they are processed -before any files are processed. As a result, it does not matter -whether flags are before or after any filenames. However, the relative -order of parameters is important, with later parameters overriding the -values of earlier parameters. -.PP -For each parameter, there is a long name and a short name. The short -names are convenient for keyboard input, while the long names are -self-documenting and therefore useful in scripts. It is customary to -use two leading dashes for long names, but one may be used. -.PP -Most parameters which serve as on/off flags can be negated with a -leading \*(L"n\*(R" (for the short name) or a leading \*(L"no\*(R" or \*(L"no\-\*(R" (for the -long name). For example, the flag to outdent long quotes is \fB\-olq\fR -or \fB\-\-outdent\-long\-quotes\fR. The flag to skip this is \fB\-nolq\fR -or \fB\-\-nooutdent\-long\-quotes\fR or \fB\-\-no\-outdent\-long\-quotes\fR. -.PP -Options may not be bundled together. In other words, options \fB\-q\fR and -\&\fB\-g\fR may \s-1NOT\s0 be entered as \fB\-qg\fR. -.PP -Option names may be terminated early as long as they are uniquely identified. -For example, instead of \fB\-\-dump\-token\-types\fR, it would be sufficient to enter -\&\fB\-\-dump\-tok\fR, or even \fB\-\-dump\-t\fR, to uniquely identify this command. -.SS "I/O control" -.IX Subsection "I/O control" -The following parameters concern the files which are read and written. -.IP "\fB\-h\fR, \fB\-\-help\fR" 4 -.IX Item "-h, --help" -Show summary of usage and exit. -.IP "\fB\-o\fR=filename, \fB\-\-outfile\fR=filename" 4 -.IX Item "-o=filename, --outfile=filename" -Name of the output file (only if a single input file is being -processed). If no output file is specified, and output is not -redirected to the standard output, the output will go to \fIfilename.tdy\fR. -.IP "\fB\-st\fR, \fB\-\-standard\-output\fR" 4 -.IX Item "-st, --standard-output" -Perltidy must be able to operate on an arbitrarily large number of files -in a single run, with each output being directed to a different output -file. Obviously this would conflict with outputting to the single -standard output device, so a special flag, \fB\-st\fR, is required to -request outputting to the standard output. For example, -.Sp -.Vb 1 -\& perltidy somefile.pl \-st >somefile.new.pl -.Ve -.Sp -This option may only be used if there is just a single input file. -The default is \fB\-nst\fR or \fB\-\-nostandard\-output\fR. -.IP "\fB\-se\fR, \fB\-\-standard\-error\-output\fR" 4 -.IX Item "-se, --standard-error-output" -If perltidy detects an error when processing file \fIsomefile.pl\fR, its -default behavior is to write error messages to file \fIsomefile.pl.ERR\fR. -Use \fB\-se\fR to cause all error messages to be sent to the standard error -output stream instead. This directive may be negated with \fB\-nse\fR. -Thus, you may place \fB\-se\fR in a \fI.perltidyrc\fR and override it when -desired with \fB\-nse\fR on the command line. -.IP "\fB\-oext\fR=ext, \fB\-\-output\-file\-extension\fR=ext" 4 -.IX Item "-oext=ext, --output-file-extension=ext" -Change the extension of the output file to be \fIext\fR instead of the -default \fItdy\fR (or \fIhtml\fR in case the \-\fB\-html\fR option is used). -See \*(L"Specifying File Extensions\*(R". -.IP "\fB\-opath\fR=path, \fB\-\-output\-path\fR=path" 4 -.IX Item "-opath=path, --output-path=path" -When perltidy creates a filename for an output file, by default it merely -appends an extension to the path and basename of the input file. This -parameter causes the path to be changed to \fIpath\fR instead. -.Sp -The path should end in a valid path separator character, but perltidy will try -to add one if it is missing. -.Sp -For example -.Sp -.Vb 1 -\& perltidy somefile.pl \-opath=/tmp/ -.Ve -.Sp -will produce \fI/tmp/somefile.pl.tdy\fR. Otherwise, \fIsomefile.pl.tdy\fR will -appear in whatever directory contains \fIsomefile.pl\fR. -.Sp -If the path contains spaces, it should be placed in quotes. -.Sp -This parameter will be ignored if output is being directed to standard output, -or if it is being specified explicitly with the \fB\-o=s\fR parameter. -.IP "\fB\-b\fR, \fB\-\-backup\-and\-modify\-in\-place\fR" 4 -.IX Item "-b, --backup-and-modify-in-place" -Modify the input file or files in-place and save the original with the -extension \fI.bak\fR. Any existing \fI.bak\fR file will be deleted. See next -item for changing the default backup extension, and for eliminating the -backup file altogether. -.Sp -A \fB\-b\fR flag will be ignored if input is from standard input or goes to -standard output, or if the \fB\-html\fR flag is set. -.Sp -In particular, if you want to use both the \fB\-b\fR flag and the \fB\-pbp\fR -(\-\-perl\-best\-practices) flag, then you must put a \fB\-nst\fR flag after the -\&\fB\-pbp\fR flag because it contains a \fB\-st\fR flag as one of its components, -which means that output will go to the standard output stream. -.IP "\fB\-bext\fR=ext, \fB\-\-backup\-file\-extension\fR=ext" 4 -.IX Item "-bext=ext, --backup-file-extension=ext" -This parameter serves two purposes: (1) to change the extension of the backup -file to be something other than the default \fI.bak\fR, and (2) to indicate -that no backup file should be saved. -.Sp -To change the default extension to something other than \fI.bak\fR see -\&\*(L"Specifying File Extensions\*(R". -.Sp -A backup file of the source is always written, but you can request that it -be deleted at the end of processing if there were no errors. This is risky -unless the source code is being maintained with a source code control -system. -.Sp -To indicate that the backup should be deleted include one forward slash, -\&\fB/\fR, in the extension. If any text remains after the slash is removed -it will be used to define the backup file extension (which is always -created and only deleted if there were no errors). -.Sp -Here are some examples: -.Sp -.Vb 5 -\& Parameter Extension Backup File Treatment -\& <\-bext=bak> F<.bak> Keep (same as the default behavior) -\& <\-bext=\*(Aq/\*(Aq> F<.bak> Delete if no errors -\& <\-bext=\*(Aq/backup\*(Aq> F<.backup> Delete if no errors -\& <\-bext=\*(Aqoriginal/\*(Aq> F<.original> Delete if no errors -.Ve -.IP "\fB\-w\fR, \fB\-\-warning\-output\fR" 4 -.IX Item "-w, --warning-output" -Setting \fB\-w\fR causes any non-critical warning -messages to be reported as errors. These include messages -about possible pod problems, possibly bad starting indentation level, -and cautions about indirect object usage. The default, \fB\-nw\fR or -\&\fB\-\-nowarning\-output\fR, is not to include these warnings. -.IP "\fB\-q\fR, \fB\-\-quiet\fR" 4 -.IX Item "-q, --quiet" -Deactivate error messages and syntax checking (for running under -an editor). -.Sp -For example, if you use a vi-style editor, such as vim, you may execute -perltidy as a filter from within the editor using something like -.Sp -.Vb 1 -\& :n1,n2!perltidy \-q -.Ve -.Sp -where \f(CW\*(C`n1,n2\*(C'\fR represents the selected text. Without the \fB\-q\fR flag, -any error message may mess up your screen, so be prepared to use your -\&\*(L"undo\*(R" key. -.IP "\fB\-log\fR, \fB\-\-logfile\fR" 4 -.IX Item "-log, --logfile" -Save the \fI.LOG\fR file, which has many useful diagnostics. Perltidy always -creates a \fI.LOG\fR file, but by default it is deleted unless a program bug is -suspected. Setting the \fB\-log\fR flag forces the log file to be saved. -.IP "\fB\-g=n\fR, \fB\-\-logfile\-gap=n\fR" 4 -.IX Item "-g=n, --logfile-gap=n" -Set maximum interval between input code lines in the logfile. This purpose of -this flag is to assist in debugging nesting errors. The value of \f(CW\*(C`n\*(C'\fR is -optional. If you set the flag \fB\-g\fR without the value of \f(CW\*(C`n\*(C'\fR, it will be -taken to be 1, meaning that every line will be written to the log file. This -can be helpful if you are looking for a brace, paren, or bracket nesting error. -.Sp -Setting \fB\-g\fR also causes the logfile to be saved, so it is not necessary to -also include \fB\-log\fR. -.Sp -If no \fB\-g\fR flag is given, a value of 50 will be used, meaning that at least -every 50th line will be recorded in the logfile. This helps prevent -excessively long log files. -.Sp -Setting a negative value of \f(CW\*(C`n\*(C'\fR is the same as not setting \fB\-g\fR at all. -.IP "\fB\-npro\fR \fB\-\-noprofile\fR" 4 -.IX Item "-npro --noprofile" -Ignore any \fI.perltidyrc\fR command file. Normally, perltidy looks first in -your current directory for a \fI.perltidyrc\fR file of parameters. (The format -is described below). If it finds one, it applies those options to the -initial default values, and then it applies any that have been defined -on the command line. If no \fI.perltidyrc\fR file is found, it looks for one -in your home directory. -.Sp -If you set the \fB\-npro\fR flag, perltidy will not look for this file. -.IP "\fB\-pro=filename\fR or \fB\-\-profile=filename\fR" 4 -.IX Item "-pro=filename or --profile=filename" -To simplify testing and switching .perltidyrc files, this command may be -used to specify a configuration file which will override the default -name of .perltidyrc. There must not be a space on either side of the -\&'=' sign. For example, the line -.Sp -.Vb 1 -\& perltidy \-pro=testcfg -.Ve -.Sp -would cause file \fItestcfg\fR to be used instead of the -default \fI.perltidyrc\fR. -.Sp -A pathname begins with three dots, e.g. \*(L".../.perltidyrc\*(R", indicates that -the file should be searched for starting in the current directory and -working upwards. This makes it easier to have multiple projects each with -their own .perltidyrc in their root directories. -.IP "\fB\-opt\fR, \fB\-\-show\-options\fR" 4 -.IX Item "-opt, --show-options" -Write a list of all options used to the \fI.LOG\fR file. -Please see \fB\-\-dump\-options\fR for a simpler way to do this. -.IP "\fB\-f\fR, \fB\-\-force\-read\-binary\fR" 4 -.IX Item "-f, --force-read-binary" -Force perltidy to process binary files. To avoid producing excessive -error messages, perltidy skips files identified by the system as non-text. -However, valid perl scripts containing binary data may sometimes be identified -as non-text, and this flag forces perltidy to process them. -.SH "FORMATTING OPTIONS" -.IX Header "FORMATTING OPTIONS" -.SS "Basic Options" -.IX Subsection "Basic Options" -.IP "\fB\-\-notidy\fR" 4 -.IX Item "--notidy" -This flag disables all formatting and causes the input to be copied unchanged -to the output except for possible changes in line ending characters and any -pre\- and post-filters. This can be useful in conjunction with a hierarchical -set of \fI.perltidyrc\fR files to avoid unwanted code tidying. See also -\&\*(L"Skipping Selected Sections of Code\*(R" for a way to avoid tidying specific -sections of code. -.IP "\fB\-i=n\fR, \fB\-\-indent\-columns=n\fR" 4 -.IX Item "-i=n, --indent-columns=n" -Use n columns per indentation level (default n=4). -.IP "\fB\-l=n\fR, \fB\-\-maximum\-line\-length=n\fR" 4 -.IX Item "-l=n, --maximum-line-length=n" -The default maximum line length is n=80 characters. Perltidy will try -to find line break points to keep lines below this length. However, long -quotes and side comments may cause lines to exceed this length. -Setting \fB\-l=0\fR is equivalent to setting \fB\-l=(a large number)\fR. -.IP "\fB\-vmll\fR, \fB\-\-variable\-maximum\-line\-length\fR" 4 -.IX Item "-vmll, --variable-maximum-line-length" -A problem arises using a fixed maximum line length with very deeply nested code -and data structures because eventually the amount of leading whitespace used -for indicating indentation takes up most or all of the available line width, -leaving little or no space for the actual code or data. One solution is to use -a vary long line length. Another solution is to use the \fB\-vmll\fR flag, which -basically tells perltidy to ignore leading whitespace when measuring the line -length. -.Sp -To be precise, when the \fB\-vmll\fR parameter is set, the maximum line length of a -line of code will be M+L*I, where -.Sp -.Vb 3 -\& M is the value of \-\-maximum\-line\-length=M (\-l=M), default 80, -\& I is the value of \-\-indent\-columns=I (\-i=I), default 4, -\& L is the indentation level of the line of code -.Ve -.Sp -When this flag is set, the choice of breakpoints for a block of code should be -essentially independent of its nesting depth. However, the absolute line -lengths, including leading whitespace, can still be arbitrarily large. This -problem can be avoided by including the next parameter. -.Sp -The default is not to do this (\fB\-nvmll\fR). -.IP "\fB\-wc=n\fR, \fB\-\-whitespace\-cycle=n\fR" 4 -.IX Item "-wc=n, --whitespace-cycle=n" -This flag also addresses problems with very deeply nested code and data -structures. When the nesting depth exceeds the value \fBn\fR the leading -whitespace will be reduced and start at a depth of 1 again. The result is that -blocks of code will shift back to the left rather than moving arbitrarily far -to the right. This occurs cyclically to any depth. -.Sp -For example if one level of indentation equals 4 spaces (\fB\-i=4\fR, the default), -and one uses \fB\-wc=15\fR, then if the leading whitespace on a line exceeds about -4*15=60 spaces it will be reduced back to 4*1=4 spaces and continue increasing -from there. If the whitespace never exceeds this limit the formatting remains -unchanged. -.Sp -The combination of \fB\-vmll\fR and \fB\-wc=n\fR provides a solution to the problem of -displaying arbitrarily deep data structures and code in a finite window, -although \fB\-wc=n\fR may of course be used without \fB\-vmll\fR. -.Sp -The default is not to use this, which can also be indicated using \fB\-wc=0\fR. -.IP "tabs" 4 -.IX Item "tabs" -Using tab characters will almost certainly lead to future portability -and maintenance problems, so the default and recommendation is not to -use them. For those who prefer tabs, however, there are two different -options. -.Sp -Except for possibly introducing tab indentation characters, as outlined -below, perltidy does not introduce any tab characters into your file, -and it removes any tabs from the code (unless requested not to do so -with \fB\-fws\fR). If you have any tabs in your comments, quotes, or -here-documents, they will remain. -.RS 4 -.IP "\fB\-et=n\fR, \fB\-\-entab\-leading\-whitespace\fR" 4 -.IX Item "-et=n, --entab-leading-whitespace" -This flag causes each \fBn\fR initial space characters to be replaced by -one tab character. Note that the integer \fBn\fR is completely independent -of the integer specified for indentation parameter, \fB\-i=n\fR. -.IP "\fB\-t\fR, \fB\-\-tabs\fR" 4 -.IX Item "-t, --tabs" -This flag causes one leading tab character to be inserted for each level -of indentation. Certain other features are incompatible with this -option, and if these options are also given, then a warning message will -be issued and this flag will be unset. One example is the \fB\-lp\fR -option. -.IP "\fB\-dt=n\fR, \fB\-\-default\-tabsize=n\fR" 4 -.IX Item "-dt=n, --default-tabsize=n" -If the first line of code passed to perltidy contains leading tabs but no -tab scheme is specified for the output stream then perltidy must guess how many -spaces correspond to each leading tab. This number of spaces \fBn\fR -corresponding to each leading tab of the input stream may be specified with -\&\fB\-dt=n\fR. The default is \fBn=8\fR. -.Sp -This flag has no effect if a tab scheme is specified for the output stream, -because then the input stream is assumed to use the same tab scheme and -indentation spaces as for the output stream (any other assumption would lead to -unstable editing). -.RE -.RS 4 -.RE -.IP "\fB\-syn\fR, \fB\-\-check\-syntax\fR" 4 -.IX Item "-syn, --check-syntax" -This flag is now ignored for safety, but the following documentation -has been retained for reference. -.Sp -This flag causes perltidy to run \f(CW\*(C`perl \-c \-T\*(C'\fR to check syntax of input -and output. (To change the flags passed to perl, see the next -item, \fB\-pscf\fR). The results are written to the \fI.LOG\fR file, which -will be saved if an error is detected in the output script. The output -script is not checked if the input script has a syntax error. Perltidy -does its own checking, but this option employs perl to get a \*(L"second -opinion\*(R". -.Sp -If perl reports errors in the input file, they will not be reported in -the error output unless the \fB\-\-warning\-output\fR flag is given. -.Sp -The default is \fB\s-1NOT\s0\fR to do this type of syntax checking (although -perltidy will still do as much self-checking as possible). The reason -is that it causes all code in \s-1BEGIN\s0 blocks to be executed, for all -modules being used, and this opens the door to security issues and -infinite loops when running perltidy. -.IP "\fB\-pscf=s\fR, \fB\-perl\-syntax\-check\-flags=s\fR" 4 -.IX Item "-pscf=s, -perl-syntax-check-flags=s" -When perl is invoked to check syntax, the normal flags are \f(CW\*(C`\-c \-T\*(C'\fR. In -addition, if the \fB\-x\fR flag is given to perltidy, then perl will also be -passed a \fB\-x\fR flag. It should not normally be necessary to change -these flags, but it can be done with the \fB\-pscf=s\fR flag. For example, -if the taint flag, \f(CW\*(C`\-T\*(C'\fR, is not wanted, the flag could be set to be just -\&\fB\-pscf=\-c\fR. -.Sp -Perltidy will pass your string to perl with the exception that it will -add a \fB\-c\fR and \fB\-x\fR if appropriate. The \fI.LOG\fR file will show -exactly what flags were passed to perl. -.IP "\fB\-xs\fR, \fB\-\-extended\-syntax\fR" 4 -.IX Item "-xs, --extended-syntax" -A problem with formatting Perl code is that some modules can introduce new -syntax. This flag allows perltidy to handle certain common extensions -to the standard syntax without complaint. -.Sp -For example, without this flag a structure such as the following would generate -a syntax error and the braces would not be balanced: -.Sp -.Vb 3 -\& method deposit( Num $amount) { -\& $self\->balance( $self\->balance + $amount ); -\& } -.Ve -.Sp -This flag is enabled by default but it can be deactivated with \fB\-nxs\fR. -Probably the only reason to deactivate this flag is to generate more diagnostic -messages when debugging a script. -.IP "\fB\-io\fR, \fB\-\-indent\-only\fR" 4 -.IX Item "-io, --indent-only" -This flag is used to deactivate all whitespace and line break changes -within non-blank lines of code. -When it is in effect, the only change to the script will be -to the indentation and to the number of blank lines. -And any flags controlling whitespace and newlines will be ignored. You -might want to use this if you are perfectly happy with your whitespace -and line breaks, and merely want perltidy to handle the indentation. -(This also speeds up perltidy by well over a factor of two, so it might be -useful when perltidy is merely being used to help find a brace error in -a large script). -.Sp -Setting this flag is equivalent to setting \fB\-\-freeze\-newlines\fR and -\&\fB\-\-freeze\-whitespace\fR. -.Sp -If you also want to keep your existing blank lines exactly -as they are, you can add \fB\-\-freeze\-blank\-lines\fR. -.Sp -With this option perltidy is still free to modify the indenting (and -outdenting) of code and comments as it normally would. If you also want to -prevent long comment lines from being outdented, you can add either \fB\-noll\fR or -\&\fB\-l=0\fR. -.Sp -Setting this flag will prevent perltidy from doing any special operations on -closing side comments. You may still delete all side comments however when -this flag is in effect. -.IP "\fB\-enc=s\fR, \fB\-\-character\-encoding=s\fR" 4 -.IX Item "-enc=s, --character-encoding=s" -where \fBs\fR=\fBnone\fR or \fButf8\fR. This flag tells perltidy the character encoding -of both the input and output character streams. The value \fButf8\fR causes the -stream to be read and written as \s-1UTF\-8. \s0 The value \fBnone\fR causes the stream to -be processed without special encoding assumptions. At present there is no -automatic detection of character encoding (even if there is a \f(CW\*(Aquse utf8\*(Aq\fR -statement in your code) so this flag must be set for streams encoded in \s-1UTF\-8.\s0 -Incorrectly setting this parameter can cause data corruption, so please -carefully check the output. -.Sp -The default is \fBnone\fR. -.Sp -The abbreviations \fB\-utf8\fR or \fB\-UTF8\fR are equivalent to \fB\-enc=utf8\fR. -So to process a file named \fBfile.pl\fR which is encoded in \s-1UTF\-8\s0 you can use: -.Sp -.Vb 1 -\& perltidy \-utf8 file.pl -.Ve -.IP "\fB\-ole=s\fR, \fB\-\-output\-line\-ending=s\fR" 4 -.IX Item "-ole=s, --output-line-ending=s" -where s=\f(CW\*(C`win\*(C'\fR, \f(CW\*(C`dos\*(C'\fR, \f(CW\*(C`unix\*(C'\fR, or \f(CW\*(C`mac\*(C'\fR. This flag tells perltidy -to output line endings for a specific system. Normally, -perltidy writes files with the line separator character of the host -system. The \f(CW\*(C`win\*(C'\fR and \f(CW\*(C`dos\*(C'\fR flags have an identical result. -.IP "\fB\-ple\fR, \fB\-\-preserve\-line\-endings\fR" 4 -.IX Item "-ple, --preserve-line-endings" -This flag tells perltidy to write its output files with the same line -endings as the input file, if possible. It should work for -\&\fBdos\fR, \fBunix\fR, and \fBmac\fR line endings. It will only work if perltidy -input comes from a filename (rather than stdin, for example). If -perltidy has trouble determining the input file line ending, it will -revert to the default behavior of using the line ending of the host system. -.IP "\fB\-it=n\fR, \fB\-\-iterations=n\fR" 4 -.IX Item "-it=n, --iterations=n" -This flag causes perltidy to do \fBn\fR complete iterations. The reason for this -flag is that code beautification is an iterative process and in some -cases the output from perltidy can be different if it is applied a second time. -For most purposes the default of \fBn=1\fR should be satisfactory. However \fBn=2\fR -can be useful when a major style change is being made, or when code is being -beautified on check-in to a source code control system. It has been found to -be extremely rare for the output to change after 2 iterations. If a value -\&\fBn\fR is greater than 2 is input then a convergence test will be used to stop -the iterations as soon as possible, almost always after 2 iterations. See -the next item for a simplified iteration control. -.Sp -This flag has no effect when perltidy is used to generate html. -.IP "\fB\-conv\fR, \fB\-\-converge\fR" 4 -.IX Item "-conv, --converge" -This flag is equivalent to \fB\-it=4\fR and is included to simplify iteration -control. For all practical purposes one either does or does not want to be -sure that the output is converged, and there is no penalty to using a large -iteration limit since perltidy will check for convergence and stop iterating as -soon as possible. The default is \fB\-nconv\fR (no convergence check). Using -\&\fB\-conv\fR will approximately double run time since normally one extra iteration -is required to verify convergence. -.SS "Code Indentation Control" -.IX Subsection "Code Indentation Control" -.IP "\fB\-ci=n\fR, \fB\-\-continuation\-indentation=n\fR" 4 -.IX Item "-ci=n, --continuation-indentation=n" -Continuation indentation is extra indentation spaces applied when -a long line is broken. The default is n=2, illustrated here: -.Sp -.Vb 2 -\& my $level = # \-ci=2 -\& ( $max_index_to_go >= 0 ) ? $levels_to_go[0] : $last_output_level; -.Ve -.Sp -The same example, with n=0, is a little harder to read: -.Sp -.Vb 2 -\& my $level = # \-ci=0 -\& ( $max_index_to_go >= 0 ) ? $levels_to_go[0] : $last_output_level; -.Ve -.Sp -The value given to \fB\-ci\fR is also used by some commands when a small -space is required. Examples are commands for outdenting labels, -\&\fB\-ola\fR, and control keywords, \fB\-okw\fR. -.Sp -When default values are not used, it is suggested that the value \fBn\fR -given with \fB\-ci=n\fR be no more than about one-half of the number of -spaces assigned to a full indentation level on the \fB\-i=n\fR command. -.IP "\fB\-sil=n\fR \fB\-\-starting\-indentation\-level=n\fR" 4 -.IX Item "-sil=n --starting-indentation-level=n" -By default, perltidy examines the input file and tries to determine the -starting indentation level. While it is often zero, it may not be -zero for a code snippet being sent from an editing session. -.Sp -To guess the starting indentation level perltidy simply assumes that -indentation scheme used to create the code snippet is the same as is being used -for the current perltidy process. This is the only sensible guess that can be -made. It should be correct if this is true, but otherwise it probably won't. -For example, if the input script was written with \-i=2 and the current peltidy -flags have \-i=4, the wrong initial indentation will be guessed for a code -snippet which has non-zero initial indentation. Likewise, if an entabbing -scheme is used in the input script and not in the current process then the -guessed indentation will be wrong. -.Sp -If the default method does not work correctly, or you want to change the -starting level, use \fB\-sil=n\fR, to force the starting level to be n. -.IP "List indentation using \fB\-lp\fR, \fB\-\-line\-up\-parentheses\fR" 4 -.IX Item "List indentation using -lp, --line-up-parentheses" -By default, perltidy indents lists with 4 spaces, or whatever value -is specified with \fB\-i=n\fR. Here is a small list formatted in this way: -.Sp -.Vb 5 -\& # perltidy (default) -\& @month_of_year = ( -\& \*(AqJan\*(Aq, \*(AqFeb\*(Aq, \*(AqMar\*(Aq, \*(AqApr\*(Aq, \*(AqMay\*(Aq, \*(AqJun\*(Aq, -\& \*(AqJul\*(Aq, \*(AqAug\*(Aq, \*(AqSep\*(Aq, \*(AqOct\*(Aq, \*(AqNov\*(Aq, \*(AqDec\*(Aq -\& ); -.Ve -.Sp -Use the \fB\-lp\fR flag to add extra indentation to cause the data to begin -past the opening parentheses of a sub call or list, or opening square -bracket of an anonymous array, or opening curly brace of an anonymous -hash. With this option, the above list would become: -.Sp -.Vb 5 -\& # perltidy \-lp -\& @month_of_year = ( -\& \*(AqJan\*(Aq, \*(AqFeb\*(Aq, \*(AqMar\*(Aq, \*(AqApr\*(Aq, \*(AqMay\*(Aq, \*(AqJun\*(Aq, -\& \*(AqJul\*(Aq, \*(AqAug\*(Aq, \*(AqSep\*(Aq, \*(AqOct\*(Aq, \*(AqNov\*(Aq, \*(AqDec\*(Aq -\& ); -.Ve -.Sp -If the available line length (see \fB\-l=n\fR ) does not permit this much -space, perltidy will use less. For alternate placement of the -closing paren, see the next section. -.Sp -This option has no effect on code \s-1BLOCKS,\s0 such as if/then/else blocks, -which always use whatever is specified with \fB\-i=n\fR. Also, the -existence of line breaks and/or block comments between the opening and -closing parens may cause perltidy to temporarily revert to its default -method. -.Sp -Note: The \fB\-lp\fR option may not be used together with the \fB\-t\fR tabs option. -It may, however, be used with the \fB\-et=n\fR tab method. -.Sp -In addition, any parameter which significantly restricts the ability of -perltidy to choose newlines will conflict with \fB\-lp\fR and will cause -\&\fB\-lp\fR to be deactivated. These include \fB\-io\fR, \fB\-fnl\fR, \fB\-nanl\fR, and -\&\fB\-ndnl\fR. The reason is that the \fB\-lp\fR indentation style can require -the careful coordination of an arbitrary number of break points in -hierarchical lists, and these flags may prevent that. -.IP "\fB\-cti=n\fR, \fB\-\-closing\-token\-indentation\fR" 4 -.IX Item "-cti=n, --closing-token-indentation" -The \fB\-cti=n\fR flag controls the indentation of a line beginning with -a \f(CW\*(C`)\*(C'\fR, \f(CW\*(C`]\*(C'\fR, or a non-block \f(CW\*(C`}\*(C'\fR. Such a line receives: -.Sp -.Vb 6 -\& \-cti = 0 no extra indentation (default) -\& \-cti = 1 extra indentation such that the closing token -\& aligns with its opening token. -\& \-cti = 2 one extra indentation level if the line looks like: -\& ); or ]; or }; -\& \-cti = 3 one extra indentation level always -.Ve -.Sp -The flags \fB\-cti=1\fR and \fB\-cti=2\fR work well with the \fB\-lp\fR flag (previous -section). -.Sp -.Vb 5 -\& # perltidy \-lp \-cti=1 -\& @month_of_year = ( -\& \*(AqJan\*(Aq, \*(AqFeb\*(Aq, \*(AqMar\*(Aq, \*(AqApr\*(Aq, \*(AqMay\*(Aq, \*(AqJun\*(Aq, -\& \*(AqJul\*(Aq, \*(AqAug\*(Aq, \*(AqSep\*(Aq, \*(AqOct\*(Aq, \*(AqNov\*(Aq, \*(AqDec\*(Aq -\& ); -\& -\& # perltidy \-lp \-cti=2 -\& @month_of_year = ( -\& \*(AqJan\*(Aq, \*(AqFeb\*(Aq, \*(AqMar\*(Aq, \*(AqApr\*(Aq, \*(AqMay\*(Aq, \*(AqJun\*(Aq, -\& \*(AqJul\*(Aq, \*(AqAug\*(Aq, \*(AqSep\*(Aq, \*(AqOct\*(Aq, \*(AqNov\*(Aq, \*(AqDec\*(Aq -\& ); -.Ve -.Sp -These flags are merely hints to the formatter and they may not always be -followed. In particular, if \-lp is not being used, the indentation for -\&\fBcti=1\fR is constrained to be no more than one indentation level. -.Sp -If desired, this control can be applied independently to each of the -closing container token types. In fact, \fB\-cti=n\fR is merely an -abbreviation for \fB\-cpi=n \-csbi=n \-cbi=n\fR, where: -\&\fB\-cpi\fR or \fB\-\-closing\-paren\-indentation\fR controls \fB)\fR's, -\&\fB\-csbi\fR or \fB\-\-closing\-square\-bracket\-indentation\fR controls \fB]\fR's, -\&\fB\-cbi\fR or \fB\-\-closing\-brace\-indentation\fR controls non-block \fB}\fR's. -.IP "\fB\-icp\fR, \fB\-\-indent\-closing\-paren\fR" 4 -.IX Item "-icp, --indent-closing-paren" -The \fB\-icp\fR flag is equivalent to -\&\fB\-cti=2\fR, described in the previous section. The \fB\-nicp\fR flag is -equivalent \fB\-cti=0\fR. They are included for backwards compatibility. -.IP "\fB\-icb\fR, \fB\-\-indent\-closing\-brace\fR" 4 -.IX Item "-icb, --indent-closing-brace" -The \fB\-icb\fR option gives one extra level of indentation to a brace which -terminates a code block . For example, -.Sp -.Vb 6 -\& if ($task) { -\& yyy(); -\& } # \-icb -\& else { -\& zzz(); -\& } -.Ve -.Sp -The default is not to do this, indicated by \fB\-nicb\fR. -.IP "\fB\-olq\fR, \fB\-\-outdent\-long\-quotes\fR" 4 -.IX Item "-olq, --outdent-long-quotes" -When \fB\-olq\fR is set, lines which is a quoted string longer than the -value \fBmaximum-line-length\fR will have their indentation removed to make -them more readable. This is the default. To prevent such out-denting, -use \fB\-nolq\fR or \fB\-\-nooutdent\-long\-lines\fR. -.IP "\fB\-oll\fR, \fB\-\-outdent\-long\-lines\fR" 4 -.IX Item "-oll, --outdent-long-lines" -This command is equivalent to \fB\-\-outdent\-long\-quotes\fR and -\&\fB\-\-outdent\-long\-comments\fR, and it is included for compatibility with previous -versions of perltidy. The negation of this also works, \fB\-noll\fR or -\&\fB\-\-nooutdent\-long\-lines\fR, and is equivalent to setting \fB\-nolq\fR and \fB\-nolc\fR. -.IP "Outdenting Labels: \fB\-ola\fR, \fB\-\-outdent\-labels\fR" 4 -.IX Item "Outdenting Labels: -ola, --outdent-labels" -This command will cause labels to be outdented by 2 spaces (or whatever \fB\-ci\fR -has been set to), if possible. This is the default. For example: -.Sp -.Vb 6 -\& my $i; -\& LOOP: while ( $i = ) { -\& chomp($i); -\& next unless $i; -\& fixit($i); -\& } -.Ve -.Sp -Use \fB\-nola\fR to not outdent labels. -.IP "Outdenting Keywords" 4 -.IX Item "Outdenting Keywords" -.RS 4 -.PD 0 -.IP "\fB\-okw\fR, \fB\-\-outdent\-keywords\fR" 4 -.IX Item "-okw, --outdent-keywords" -.PD -The command \fB\-okw\fR will cause certain leading control keywords to -be outdented by 2 spaces (or whatever \fB\-ci\fR has been set to), if -possible. By default, these keywords are \f(CW\*(C`redo\*(C'\fR, \f(CW\*(C`next\*(C'\fR, \f(CW\*(C`last\*(C'\fR, -\&\f(CW\*(C`goto\*(C'\fR, and \f(CW\*(C`return\*(C'\fR. The intention is to make these control keywords -easier to see. To change this list of keywords being outdented, see -the next section. -.Sp -For example, using \f(CW\*(C`perltidy \-okw\*(C'\fR on the previous example gives: -.Sp -.Vb 6 -\& my $i; -\& LOOP: while ( $i = ) { -\& chomp($i); -\& next unless $i; -\& fixit($i); -\& } -.Ve -.Sp -The default is not to do this. -.IP "Specifying Outdented Keywords: \fB\-okwl=string\fR, \fB\-\-outdent\-keyword\-list=string\fR" 4 -.IX Item "Specifying Outdented Keywords: -okwl=string, --outdent-keyword-list=string" -This command can be used to change the keywords which are outdented with -the \fB\-okw\fR command. The parameter \fBstring\fR is a required list of perl -keywords, which should be placed in quotes if there are more than one. -By itself, it does not cause any outdenting to occur, so the \fB\-okw\fR -command is still required. -.Sp -For example, the commands \f(CW\*(C`\-okwl="next last redo goto" \-okw\*(C'\fR will cause -those four keywords to be outdented. It is probably simplest to place -any \fB\-okwl\fR command in a \fI.perltidyrc\fR file. -.RE -.RS 4 -.RE -.SS "Whitespace Control" -.IX Subsection "Whitespace Control" -Whitespace refers to the blank space between variables, operators, -and other code tokens. -.IP "\fB\-fws\fR, \fB\-\-freeze\-whitespace\fR" 4 -.IX Item "-fws, --freeze-whitespace" -This flag causes your original whitespace to remain unchanged, and -causes the rest of the whitespace commands in this section, the -Code Indentation section, and -the Comment Control section to be ignored. -.IP "Tightness of curly braces, parentheses, and square brackets." 4 -.IX Item "Tightness of curly braces, parentheses, and square brackets." -Here the term \*(L"tightness\*(R" will mean the closeness with which -pairs of enclosing tokens, such as parentheses, contain the quantities -within. A numerical value of 0, 1, or 2 defines the tightness, with -0 being least tight and 2 being most tight. Spaces within containers -are always symmetric, so if there is a space after a \f(CW\*(C`(\*(C'\fR then there -will be a space before the corresponding \f(CW\*(C`)\*(C'\fR. -.Sp -The \fB\-pt=n\fR or \fB\-\-paren\-tightness=n\fR parameter controls the space within -parens. The example below shows the effect of the three possible -values, 0, 1, and 2: -.Sp -.Vb 3 -\& if ( ( my $len_tab = length( $tabstr ) ) > 0 ) { # \-pt=0 -\& if ( ( my $len_tab = length($tabstr) ) > 0 ) { # \-pt=1 (default) -\& if ((my $len_tab = length($tabstr)) > 0) { # \-pt=2 -.Ve -.Sp -When n is 0, there is always a space to the right of a '(' and to the left -of a ')'. For n=2 there is never a space. For n=1, the default, there -is a space unless the quantity within the parens is a single token, such -as an identifier or quoted string. -.Sp -Likewise, the parameter \fB\-sbt=n\fR or \fB\-\-square\-bracket\-tightness=n\fR -controls the space within square brackets, as illustrated below. -.Sp -.Vb 3 -\& $width = $col[ $j + $k ] \- $col[ $j ]; # \-sbt=0 -\& $width = $col[ $j + $k ] \- $col[$j]; # \-sbt=1 (default) -\& $width = $col[$j + $k] \- $col[$j]; # \-sbt=2 -.Ve -.Sp -Curly braces which do not contain code blocks are controlled by -the parameter \fB\-bt=n\fR or \fB\-\-brace\-tightness=n\fR. -.Sp -.Vb 3 -\& $obj\->{ $parsed_sql\->{ \*(Aqtable\*(Aq }[0] }; # \-bt=0 -\& $obj\->{ $parsed_sql\->{\*(Aqtable\*(Aq}[0] }; # \-bt=1 (default) -\& $obj\->{$parsed_sql\->{\*(Aqtable\*(Aq}[0]}; # \-bt=2 -.Ve -.Sp -And finally, curly braces which contain blocks of code are controlled by the -parameter \fB\-bbt=n\fR or \fB\-\-block\-brace\-tightness=n\fR as illustrated in the -example below. -.Sp -.Vb 3 -\& %bf = map { $_ => \-M $_ } grep { /\e.deb$/ } dirents \*(Aq.\*(Aq; # \-bbt=0 (default) -\& %bf = map { $_ => \-M $_ } grep {/\e.deb$/} dirents \*(Aq.\*(Aq; # \-bbt=1 -\& %bf = map {$_ => \-M $_} grep {/\e.deb$/} dirents \*(Aq.\*(Aq; # \-bbt=2 -.Ve -.Sp -To simplify input in the case that all of the tightness flags have the same -value , the parameter <\-act=n> or \fB\-\-all\-containers\-tightness=n\fR is an -abbreviation for the combination <\-pt=n \-sbt=n \-bt=n \-bbt=n>. -.IP "\fB\-tso\fR, \fB\-\-tight\-secret\-operators\fR" 4 -.IX Item "-tso, --tight-secret-operators" -The flag \fB\-tso\fR causes certain perl token sequences (secret operators) -which might be considered to be a single operator to be formatted \*(L"tightly\*(R" -(without spaces). The operators currently modified by this flag are: -.Sp -.Vb 1 -\& 0+ +0 ()x!! ~~<> ,=> =( )= -.Ve -.Sp -For example the sequence \fB0 +\fR, which converts a string to a number, -would be formatted without a space: \fB0+\fR when the \fB\-tso\fR flag is set. This -flag is off by default. -.IP "\fB\-sts\fR, \fB\-\-space\-terminal\-semicolon\fR" 4 -.IX Item "-sts, --space-terminal-semicolon" -Some programmers prefer a space before all terminal semicolons. The -default is for no such space, and is indicated with \fB\-nsts\fR or -\&\fB\-\-nospace\-terminal\-semicolon\fR. -.Sp -.Vb 2 -\& $i = 1 ; # \-sts -\& $i = 1; # \-nsts (default) -.Ve -.IP "\fB\-sfs\fR, \fB\-\-space\-for\-semicolon\fR" 4 -.IX Item "-sfs, --space-for-semicolon" -Semicolons within \fBfor\fR loops may sometimes be hard to see, -particularly when commas are also present. This option places spaces on -both sides of these special semicolons, and is the default. Use -\&\fB\-nsfs\fR or \fB\-\-nospace\-for\-semicolon\fR to deactivate it. -.Sp -.Vb 2 -\& for ( @a = @$ap, $u = shift @a ; @a ; $u = $v ) { # \-sfs (default) -\& for ( @a = @$ap, $u = shift @a; @a; $u = $v ) { # \-nsfs -.Ve -.IP "\fB\-asc\fR, \fB\-\-add\-semicolons\fR" 4 -.IX Item "-asc, --add-semicolons" -Setting \fB\-asc\fR allows perltidy to add any missing optional semicolon at the end -of a line which is followed by a closing curly brace on the next line. This -is the default, and may be deactivated with \fB\-nasc\fR or \fB\-\-noadd\-semicolons\fR. -.IP "\fB\-dsm\fR, \fB\-\-delete\-semicolons\fR" 4 -.IX Item "-dsm, --delete-semicolons" -Setting \fB\-dsm\fR allows perltidy to delete extra semicolons which are -simply empty statements. This is the default, and may be deactivated -with \fB\-ndsm\fR or \fB\-\-nodelete\-semicolons\fR. (Such semicolons are not -deleted, however, if they would promote a side comment to a block -comment). -.IP "\fB\-aws\fR, \fB\-\-add\-whitespace\fR" 4 -.IX Item "-aws, --add-whitespace" -Setting this option allows perltidy to add certain whitespace improve -code readability. This is the default. If you do not want any -whitespace added, but are willing to have some whitespace deleted, use -\&\fB\-naws\fR. (Use \fB\-fws\fR to leave whitespace completely unchanged). -.IP "\fB\-dws\fR, \fB\-\-delete\-old\-whitespace\fR" 4 -.IX Item "-dws, --delete-old-whitespace" -Setting this option allows perltidy to remove some old whitespace -between characters, if necessary. This is the default. If you -do not want any old whitespace removed, use \fB\-ndws\fR or -\&\fB\-\-nodelete\-old\-whitespace\fR. -.IP "Detailed whitespace controls around tokens" 4 -.IX Item "Detailed whitespace controls around tokens" -For those who want more detailed control over the whitespace around -tokens, there are four parameters which can directly modify the default -whitespace rules built into perltidy for any token. They are: -.Sp -\&\fB\-wls=s\fR or \fB\-\-want\-left\-space=s\fR, -.Sp -\&\fB\-nwls=s\fR or \fB\-\-nowant\-left\-space=s\fR, -.Sp -\&\fB\-wrs=s\fR or \fB\-\-want\-right\-space=s\fR, -.Sp -\&\fB\-nwrs=s\fR or \fB\-\-nowant\-right\-space=s\fR. -.Sp -These parameters are each followed by a quoted string, \fBs\fR, containing a -list of token types. No more than one of each of these parameters -should be specified, because repeating a command-line parameter -always overwrites the previous one before perltidy ever sees it. -.Sp -To illustrate how these are used, suppose it is desired that there be no -space on either side of the token types \fB= + \- / *\fR. The following two -parameters would specify this desire: -.Sp -.Vb 1 -\& \-nwls="= + \- / *" \-nwrs="= + \- / *" -.Ve -.Sp -(Note that the token types are in quotes, and that they are separated by -spaces). With these modified whitespace rules, the following line of math: -.Sp -.Vb 1 -\& $root = \-$b + sqrt( $b * $b \- 4. * $a * $c ) / ( 2. * $a ); -.Ve -.Sp -becomes this: -.Sp -.Vb 1 -\& $root=\-$b+sqrt( $b*$b\-4.*$a*$c )/( 2.*$a ); -.Ve -.Sp -These parameters should be considered to be hints to perltidy rather -than fixed rules, because perltidy must try to resolve conflicts that -arise between them and all of the other rules that it uses. One -conflict that can arise is if, between two tokens, the left token wants -a space and the right one doesn't. In this case, the token not wanting -a space takes priority. -.Sp -It is necessary to have a list of all token types in order to create -this type of input. Such a list can be obtained by the command -\&\fB\-\-dump\-token\-types\fR. Also try the \fB\-D\fR flag on a short snippet of code -and look at the .DEBUG file to see the tokenization. -.Sp -\&\fB\s-1WARNING\s0\fR Be sure to put these tokens in quotes to avoid having them -misinterpreted by your command shell. -.IP "Space between specific keywords and opening paren" 4 -.IX Item "Space between specific keywords and opening paren" -When an opening paren follows a Perl keyword, no space is introduced after the -keyword, unless it is (by default) one of these: -.Sp -.Vb 2 -\& my local our and or eq ne if else elsif until unless -\& while for foreach return switch case given when -.Ve -.Sp -These defaults can be modified with two commands: -.Sp -\&\fB\-sak=s\fR or \fB\-\-space\-after\-keyword=s\fR adds keywords. -.Sp -\&\fB\-nsak=s\fR or \fB\-\-nospace\-after\-keyword=s\fR removes keywords. -.Sp -where \fBs\fR is a list of keywords (in quotes if necessary). For example, -.Sp -.Vb 2 -\& my ( $a, $b, $c ) = @_; # default -\& my( $a, $b, $c ) = @_; # \-nsak="my local our" -.Ve -.Sp -The abbreviation \fB\-nsak='*'\fR is equivalent to including all of the -keywords in the above list. -.Sp -When both \fB\-nsak=s\fR and \fB\-sak=s\fR commands are included, the \fB\-nsak=s\fR -command is executed first. For example, to have space after only the -keywords (my, local, our) you could use \fB\-nsak=\*(L"*\*(R" \-sak=\*(L"my local our\*(R"\fR. -.Sp -To put a space after all keywords, see the next item. -.IP "Space between all keywords and opening parens" 4 -.IX Item "Space between all keywords and opening parens" -When an opening paren follows a function or keyword, no space is introduced -after the keyword except for the keywords noted in the previous item. To -always put a space between a function or keyword and its opening paren, -use the command: -.Sp -\&\fB\-skp\fR or \fB\-\-space\-keyword\-paren\fR -.Sp -You will probably also want to use the flag \fB\-sfp\fR (next item) too. -.IP "Space between all function names and opening parens" 4 -.IX Item "Space between all function names and opening parens" -When an opening paren follows a function the default is not to introduce -a space. To cause a space to be introduced use: -.Sp -\&\fB\-sfp\fR or \fB\-\-space\-function\-paren\fR -.Sp -.Vb 2 -\& myfunc( $a, $b, $c ); # default -\& myfunc ( $a, $b, $c ); # \-sfp -.Ve -.Sp -You will probably also want to use the flag \fB\-skp\fR (previous item) too. -.ie n .IP "Trimming whitespace around ""qw"" quotes" 4 -.el .IP "Trimming whitespace around \f(CWqw\fR quotes" 4 -.IX Item "Trimming whitespace around qw quotes" -\&\fB\-tqw\fR or \fB\-\-trim\-qw\fR provide the default behavior of trimming -spaces around multi-line \f(CW\*(C`qw\*(C'\fR quotes and indenting them appropriately. -.Sp -\&\fB\-ntqw\fR or \fB\-\-notrim\-qw\fR cause leading and trailing whitespace around -multi-line \f(CW\*(C`qw\*(C'\fR quotes to be left unchanged. This option will not -normally be necessary, but was added for testing purposes, because in -some versions of perl, trimming \f(CW\*(C`qw\*(C'\fR quotes changes the syntax tree. -.IP "\fB\-sbq=n\fR or \fB\-\-space\-backslash\-quote=n\fR" 4 -.IX Item "-sbq=n or --space-backslash-quote=n" -Lines like -.Sp -.Vb 2 -\& $str1=\e"string1"; -\& $str2=\e\*(Aqstring2\*(Aq; -.Ve -.Sp -can confuse syntax highlighters unless a space is included between the backslash and the single or double quotation mark. -.Sp -This can be controlled with the value of \fBn\fR as follows: -.Sp -.Vb 3 -\& \-sbq=0 means no space between the backslash and quote -\& \-sbq=1 means follow the example of the source code -\& \-sbq=2 means always put a space between the backslash and quote -.Ve -.Sp -The default is \fB\-sbq=1\fR, meaning that a space will be used 0if there is one in the source code. -.IP "Trimming trailing whitespace from lines of \s-1POD\s0" 4 -.IX Item "Trimming trailing whitespace from lines of POD" -\&\fB\-trp\fR or \fB\-\-trim\-pod\fR will remove trailing whitespace from lines of \s-1POD.\s0 -The default is not to do this. -.SS "Comment Controls" -.IX Subsection "Comment Controls" -Perltidy has a number of ways to control the appearance of both block comments -and side comments. The term \fBblock comment\fR here refers to a full-line -comment, whereas \fBside comment\fR will refer to a comment which appears on a -line to the right of some code. -.IP "\fB\-ibc\fR, \fB\-\-indent\-block\-comments\fR" 4 -.IX Item "-ibc, --indent-block-comments" -Block comments normally look best when they are indented to the same -level as the code which follows them. This is the default behavior, but -you may use \fB\-nibc\fR to keep block comments left-justified. Here is an -example: -.Sp -.Vb 2 -\& # this comment is indented (\-ibc, default) -\& if ($task) { yyy(); } -.Ve -.Sp -The alternative is \fB\-nibc\fR: -.Sp -.Vb 2 -\& # this comment is not indented (\-nibc) -\& if ($task) { yyy(); } -.Ve -.Sp -See also the next item, \fB\-isbc\fR, as well as \fB\-sbc\fR, for other ways to -have some indented and some outdented block comments. -.IP "\fB\-isbc\fR, \fB\-\-indent\-spaced\-block\-comments\fR" 4 -.IX Item "-isbc, --indent-spaced-block-comments" -If there is no leading space on the line, then the comment will not be -indented, and otherwise it may be. -.Sp -If both \fB\-ibc\fR and \fB\-isbc\fR are set, then \fB\-isbc\fR takes priority. -.IP "\fB\-olc\fR, \fB\-\-outdent\-long\-comments\fR" 4 -.IX Item "-olc, --outdent-long-comments" -When \fB\-olc\fR is set, lines which are full-line (block) comments longer -than the value \fBmaximum-line-length\fR will have their indentation -removed. This is the default; use \fB\-nolc\fR to prevent outdenting. -.IP "\fB\-msc=n\fR, \fB\-\-minimum\-space\-to\-comment=n\fR" 4 -.IX Item "-msc=n, --minimum-space-to-comment=n" -Side comments look best when lined up several spaces to the right of -code. Perltidy will try to keep comments at least n spaces to the -right. The default is n=4 spaces. -.IP "\fB\-fpsc=n\fR, \fB\-\-fixed\-position\-side\-comment=n\fR" 4 -.IX Item "-fpsc=n, --fixed-position-side-comment=n" -This parameter tells perltidy to line up side comments in column number \fBn\fR -whenever possible. The default, n=0, will not do this. -.IP "\fB\-iscl\fR, \fB\-\-ignore\-side\-comment\-lengths\fR" 4 -.IX Item "-iscl, --ignore-side-comment-lengths" -This parameter causes perltidy to ignore the length of side comments when -setting line breaks. The default, \fB\-niscl\fR, is to include the length of -side comments when breaking lines to stay within the length prescribed -by the \fB\-l=n\fR maximum line length parameter. For example, the following -long single line would remain intact with \-l=80 and \-iscl: -.Sp -.Vb 2 -\& perltidy \-l=80 \-iscl -\& $vmsfile =~ s/;[\ed\e\-]*$//; # Clip off version number; we can use a newer version as well -.Ve -.Sp -whereas without the \-iscl flag the line will be broken: -.Sp -.Vb 3 -\& perltidy \-l=80 -\& $vmsfile =~ s/;[\ed\e\-]*$// -\& ; # Clip off version number; we can use a newer version as well -.Ve -.IP "\fB\-hsc\fR, \fB\-\-hanging\-side\-comments\fR" 4 -.IX Item "-hsc, --hanging-side-comments" -By default, perltidy tries to identify and align \*(L"hanging side -comments\*(R", which are something like this: -.Sp -.Vb 3 -\& my $IGNORE = 0; # This is a side comment -\& # This is a hanging side comment -\& # And so is this -.Ve -.Sp -A comment is considered to be a hanging side comment if (1) it immediately -follows a line with a side comment, or another hanging side comment, and -(2) there is some leading whitespace on the line. -To deactivate this feature, use \fB\-nhsc\fR or \fB\-\-nohanging\-side\-comments\fR. -If block comments are preceded by a blank line, or have no leading -whitespace, they will not be mistaken as hanging side comments. -.IP "Closing Side Comments" 4 -.IX Item "Closing Side Comments" -A closing side comment is a special comment which perltidy can -automatically create and place after the closing brace of a code block. -They can be useful for code maintenance and debugging. The command -\&\fB\-csc\fR (or \fB\-\-closing\-side\-comments\fR) adds or updates closing side -comments. For example, here is a small code snippet -.Sp -.Vb 8 -\& sub message { -\& if ( !defined( $_[0] ) ) { -\& print("Hello, World\en"); -\& } -\& else { -\& print( $_[0], "\en" ); -\& } -\& } -.Ve -.Sp -And here is the result of processing with \f(CW\*(C`perltidy \-csc\*(C'\fR: -.Sp -.Vb 8 -\& sub message { -\& if ( !defined( $_[0] ) ) { -\& print("Hello, World\en"); -\& } -\& else { -\& print( $_[0], "\en" ); -\& } -\& } ## end sub message -.Ve -.Sp -A closing side comment was added for \f(CW\*(C`sub message\*(C'\fR in this case, but not -for the \f(CW\*(C`if\*(C'\fR and \f(CW\*(C`else\*(C'\fR blocks, because they were below the 6 line -cutoff limit for adding closing side comments. This limit may be -changed with the \fB\-csci\fR command, described below. -.Sp -The command \fB\-dcsc\fR (or \fB\-\-delete\-closing\-side\-comments\fR) reverses this -process and removes these comments. -.Sp -Several commands are available to modify the behavior of these two basic -commands, \fB\-csc\fR and \fB\-dcsc\fR: -.RS 4 -.IP "\fB\-csci=n\fR, or \fB\-\-closing\-side\-comment\-interval=n\fR" 4 -.IX Item "-csci=n, or --closing-side-comment-interval=n" -where \f(CW\*(C`n\*(C'\fR is the minimum number of lines that a block must have in -order for a closing side comment to be added. The default value is -\&\f(CW\*(C`n=6\*(C'\fR. To illustrate: -.Sp -.Vb 9 -\& # perltidy \-csci=2 \-csc -\& sub message { -\& if ( !defined( $_[0] ) ) { -\& print("Hello, World\en"); -\& } ## end if ( !defined( $_[0] )) -\& else { -\& print( $_[0], "\en" ); -\& } ## end else [ if ( !defined( $_[0] )) -\& } ## end sub message -.Ve -.Sp -Now the \f(CW\*(C`if\*(C'\fR and \f(CW\*(C`else\*(C'\fR blocks are commented. However, now this has -become very cluttered. -.IP "\fB\-cscp=string\fR, or \fB\-\-closing\-side\-comment\-prefix=string\fR" 4 -.IX Item "-cscp=string, or --closing-side-comment-prefix=string" -where string is the prefix used before the name of the block type. The -default prefix, shown above, is \f(CW\*(C`## end\*(C'\fR. This string will be added to -closing side comments, and it will also be used to recognize them in -order to update, delete, and format them. Any comment identified as a -closing side comment will be placed just a single space to the right of -its closing brace. -.IP "\fB\-cscl=string\fR, or \fB\-\-closing\-side\-comment\-list\fR" 4 -.IX Item "-cscl=string, or --closing-side-comment-list" -where \f(CW\*(C`string\*(C'\fR is a list of block types to be tagged with closing side -comments. By default, all code block types preceded by a keyword or -label (such as \f(CW\*(C`if\*(C'\fR, \f(CW\*(C`sub\*(C'\fR, and so on) will be tagged. The \fB\-cscl\fR -command changes the default list to be any selected block types; see -\&\*(L"Specifying Block Types\*(R". -For example, the following command -requests that only \f(CW\*(C`sub\*(C'\fR's, labels, \f(CW\*(C`BEGIN\*(C'\fR, and \f(CW\*(C`END\*(C'\fR blocks be -affected by any \fB\-csc\fR or \fB\-dcsc\fR operation: -.Sp -.Vb 1 -\& \-cscl="sub : BEGIN END" -.Ve -.IP "\fB\-csct=n\fR, or \fB\-\-closing\-side\-comment\-maximum\-text=n\fR" 4 -.IX Item "-csct=n, or --closing-side-comment-maximum-text=n" -The text appended to certain block types, such as an \f(CW\*(C`if\*(C'\fR block, is -whatever lies between the keyword introducing the block, such as \f(CW\*(C`if\*(C'\fR, -and the opening brace. Since this might be too much text for a side -comment, there needs to be a limit, and that is the purpose of this -parameter. The default value is \f(CW\*(C`n=20\*(C'\fR, meaning that no additional -tokens will be appended to this text after its length reaches 20 -characters. Omitted text is indicated with \f(CW\*(C`...\*(C'\fR. (Tokens, including -sub names, are never truncated, however, so actual lengths may exceed -this). To illustrate, in the above example, the appended text of the -first block is \f(CW\*(C` ( !defined( $_[0] )...\*(C'\fR. The existing limit of -\&\f(CW\*(C`n=20\*(C'\fR caused this text to be truncated, as indicated by the \f(CW\*(C`...\*(C'\fR. See -the next flag for additional control of the abbreviated text. -.IP "\fB\-cscb\fR, or \fB\-\-closing\-side\-comments\-balanced\fR" 4 -.IX Item "-cscb, or --closing-side-comments-balanced" -As discussed in the previous item, when the -closing-side-comment-maximum-text limit is exceeded the comment text must -be truncated. Older versions of perltidy terminated with three dots, and this -can still be achieved with \-ncscb: -.Sp -.Vb 2 -\& perltidy \-csc \-ncscb -\& } ## end foreach my $foo (sort { $b cmp $a ... -.Ve -.Sp -However this causes a problem with editors which cannot recognize -comments or are not configured to do so because they cannot \*(L"bounce\*(R" around in -the text correctly. The \fB\-cscb\fR flag has been added to -help them by appending appropriate balancing structure: -.Sp -.Vb 2 -\& perltidy \-csc \-cscb -\& } ## end foreach my $foo (sort { $b cmp $a ... }) -.Ve -.Sp -The default is \fB\-cscb\fR. -.IP "\fB\-csce=n\fR, or \fB\-\-closing\-side\-comment\-else\-flag=n\fR" 4 -.IX Item "-csce=n, or --closing-side-comment-else-flag=n" -The default, \fBn=0\fR, places the text of the opening \f(CW\*(C`if\*(C'\fR statement after any -terminal \f(CW\*(C`else\*(C'\fR. -.Sp -If \fBn=2\fR is used, then each \f(CW\*(C`elsif\*(C'\fR is also given the text of the opening -\&\f(CW\*(C`if\*(C'\fR statement. Also, an \f(CW\*(C`else\*(C'\fR will include the text of a preceding -\&\f(CW\*(C`elsif\*(C'\fR statement. Note that this may result some long closing -side comments. -.Sp -If \fBn=1\fR is used, the results will be the same as \fBn=2\fR whenever the -resulting line length is less than the maximum allowed. -.IP "\fB\-cscb\fR, or \fB\-\-closing\-side\-comments\-balanced\fR" 4 -.IX Item "-cscb, or --closing-side-comments-balanced" -When using closing-side-comments, and the closing-side-comment-maximum-text -limit is exceeded, then the comment text must be abbreviated. -It is terminated with three dots if the \fB\-cscb\fR flag is negated: -.Sp -.Vb 2 -\& perltidy \-csc \-ncscb -\& } ## end foreach my $foo (sort { $b cmp $a ... -.Ve -.Sp -This causes a problem with older editors which do not recognize comments -because they cannot \*(L"bounce\*(R" around in the text correctly. The \fB\-cscb\fR -flag tries to help them by appending appropriate terminal balancing structures: -.Sp -.Vb 2 -\& perltidy \-csc \-cscb -\& } ## end foreach my $foo (sort { $b cmp $a ... }) -.Ve -.Sp -The default is \fB\-cscb\fR. -.IP "\fB\-cscw\fR, or \fB\-\-closing\-side\-comment\-warnings\fR" 4 -.IX Item "-cscw, or --closing-side-comment-warnings" -This parameter is intended to help make the initial transition to the use of -closing side comments. -It causes two -things to happen if a closing side comment replaces an existing, different -closing side comment: first, an error message will be issued, and second, the -original side comment will be placed alone on a new specially marked comment -line for later attention. -.Sp -The intent is to avoid clobbering existing hand-written side comments -which happen to match the pattern of closing side comments. This flag -should only be needed on the first run with \fB\-csc\fR. -.RE -.RS 4 -.Sp -\&\fBImportant Notes on Closing Side Comments:\fR -.IP "\(bu" 4 -Closing side comments are only placed on lines terminated with a closing -brace. Certain closing styles, such as the use of cuddled elses -(\fB\-ce\fR), preclude the generation of some closing side comments. -.IP "\(bu" 4 -Please note that adding or deleting of closing side comments takes -place only through the commands \fB\-csc\fR or \fB\-dcsc\fR. The other commands, -if used, merely modify the behavior of these two commands. -.IP "\(bu" 4 -It is recommended that the \fB\-cscw\fR flag be used along with \fB\-csc\fR on -the first use of perltidy on a given file. This will prevent loss of -any existing side comment data which happens to have the csc prefix. -.IP "\(bu" 4 -Once you use \fB\-csc\fR, you should continue to use it so that any -closing side comments remain correct as code changes. Otherwise, these -comments will become incorrect as the code is updated. -.IP "\(bu" 4 -If you edit the closing side comments generated by perltidy, you must also -change the prefix to be different from the closing side comment prefix. -Otherwise, your edits will be lost when you rerun perltidy with \fB\-csc\fR. For -example, you could simply change \f(CW\*(C`## end\*(C'\fR to be \f(CW\*(C`## End\*(C'\fR, since the test is -case sensitive. You may also want to use the \fB\-ssc\fR flag to keep these -modified closing side comments spaced the same as actual closing side comments. -.IP "\(bu" 4 -Temporarily generating closing side comments is a useful technique for -exploring and/or debugging a perl script, especially one written by someone -else. You can always remove them with \fB\-dcsc\fR. -.RE -.RS 4 -.RE -.IP "Static Block Comments" 4 -.IX Item "Static Block Comments" -Static block comments are block comments with a special leading pattern, -\&\f(CW\*(C`##\*(C'\fR by default, which will be treated slightly differently from other -block comments. They effectively behave as if they had glue along their -left and top edges, because they stick to the left edge and previous line -when there is no blank spaces in those places. This option is -particularly useful for controlling how commented code is displayed. -.RS 4 -.IP "\fB\-sbc\fR, \fB\-\-static\-block\-comments\fR" 4 -.IX Item "-sbc, --static-block-comments" -When \fB\-sbc\fR is used, a block comment with a special leading pattern, \f(CW\*(C`##\*(C'\fR by -default, will be treated specially. -.Sp -Comments so identified are treated as follows: -.RS 4 -.IP "\(bu" 4 -If there is no leading space on the line, then the comment will not -be indented, and otherwise it may be, -.IP "\(bu" 4 -no new blank line will be -inserted before such a comment, and -.IP "\(bu" 4 -such a comment will never become -a hanging side comment. -.RE -.RS 4 -.Sp -For example, assuming \f(CW@month_of_year\fR is -left-adjusted: -.Sp -.Vb 4 -\& @month_of_year = ( # \-sbc (default) -\& \*(AqJan\*(Aq, \*(AqFeb\*(Aq, \*(AqMar\*(Aq, \*(AqApr\*(Aq, \*(AqMay\*(Aq, \*(AqJun\*(Aq, \*(AqJul\*(Aq, \*(AqAug\*(Aq, \*(AqSep\*(Aq, \*(AqOct\*(Aq, -\& ## \*(AqDec\*(Aq, \*(AqNov\*(Aq -\& \*(AqNov\*(Aq, \*(AqDec\*(Aq); -.Ve -.Sp -Without this convention, the above code would become -.Sp -.Vb 2 -\& @month_of_year = ( # \-nsbc -\& \*(AqJan\*(Aq, \*(AqFeb\*(Aq, \*(AqMar\*(Aq, \*(AqApr\*(Aq, \*(AqMay\*(Aq, \*(AqJun\*(Aq, \*(AqJul\*(Aq, \*(AqAug\*(Aq, \*(AqSep\*(Aq, \*(AqOct\*(Aq, -\& -\& ## \*(AqDec\*(Aq, \*(AqNov\*(Aq -\& \*(AqNov\*(Aq, \*(AqDec\*(Aq -\& ); -.Ve -.Sp -which is not as clear. -The default is to use \fB\-sbc\fR. This may be deactivated with \fB\-nsbc\fR. -.RE -.IP "\fB\-sbcp=string\fR, \fB\-\-static\-block\-comment\-prefix=string\fR" 4 -.IX Item "-sbcp=string, --static-block-comment-prefix=string" -This parameter defines the prefix used to identify static block comments -when the \fB\-sbc\fR parameter is set. The default prefix is \f(CW\*(C`##\*(C'\fR, -corresponding to \f(CW\*(C`\-sbcp=##\*(C'\fR. The prefix is actually part of a perl -pattern used to match lines and it must either begin with \f(CW\*(C`#\*(C'\fR or \f(CW\*(C`^#\*(C'\fR. -In the first case a prefix ^\es* will be added to match any leading -whitespace, while in the second case the pattern will match only -comments with no leading whitespace. For example, to -identify all comments as static block comments, one would use \f(CW\*(C`\-sbcp=#\*(C'\fR. -To identify all left-adjusted comments as static block comments, use \f(CW\*(C`\-sbcp=\*(Aq^#\*(Aq\*(C'\fR. -.Sp -Please note that \fB\-sbcp\fR merely defines the pattern used to identify static -block comments; it will not be used unless the switch \fB\-sbc\fR is set. Also, -please be aware that since this string is used in a perl regular expression -which identifies these comments, it must enable a valid regular expression to -be formed. -.Sp -A pattern which can be useful is: -.Sp -.Vb 1 -\& \-sbcp=^#{2,}[^\es#] -.Ve -.Sp -This pattern requires a static block comment to have at least one character -which is neither a # nor a space. It allows a line containing only '#' -characters to be rejected as a static block comment. Such lines are often used -at the start and end of header information in subroutines and should not be -separated from the intervening comments, which typically begin with just a -single '#'. -.IP "\fB\-osbc\fR, \fB\-\-outdent\-static\-block\-comments\fR" 4 -.IX Item "-osbc, --outdent-static-block-comments" -The command \fB\-osbc\fR will cause static block comments to be outdented by 2 -spaces (or whatever \fB\-ci=n\fR has been set to), if possible. -.RE -.RS 4 -.RE -.IP "Static Side Comments" 4 -.IX Item "Static Side Comments" -Static side comments are side comments with a special leading pattern. -This option can be useful for controlling how commented code is displayed -when it is a side comment. -.RS 4 -.IP "\fB\-ssc\fR, \fB\-\-static\-side\-comments\fR" 4 -.IX Item "-ssc, --static-side-comments" -When \fB\-ssc\fR is used, a side comment with a static leading pattern, which is -\&\f(CW\*(C`##\*(C'\fR by default, will be spaced only a single space from previous -character, and it will not be vertically aligned with other side comments. -.Sp -The default is \fB\-nssc\fR. -.IP "\fB\-sscp=string\fR, \fB\-\-static\-side\-comment\-prefix=string\fR" 4 -.IX Item "-sscp=string, --static-side-comment-prefix=string" -This parameter defines the prefix used to identify static side comments -when the \fB\-ssc\fR parameter is set. The default prefix is \f(CW\*(C`##\*(C'\fR, -corresponding to \f(CW\*(C`\-sscp=##\*(C'\fR. -.Sp -Please note that \fB\-sscp\fR merely defines the pattern used to identify -static side comments; it will not be used unless the switch \fB\-ssc\fR is -set. Also, note that this string is used in a perl regular expression -which identifies these comments, so it must enable a valid regular -expression to be formed. -.RE -.RS 4 -.RE -.SS "Skipping Selected Sections of Code" -.IX Subsection "Skipping Selected Sections of Code" -Selected lines of code may be passed verbatim to the output without any -formatting. This feature is enabled by default but can be disabled with -the \fB\-\-noformat\-skipping\fR or \fB\-nfs\fR flag. It should be used sparingly to -avoid littering code with markers, but it might be helpful for working -around occasional problems. For example it might be useful for keeping -the indentation of old commented code unchanged, keeping indentation of -long blocks of aligned comments unchanged, keeping certain list -formatting unchanged, or working around a glitch in perltidy. -.IP "\fB\-fs\fR, \fB\-\-format\-skipping\fR" 4 -.IX Item "-fs, --format-skipping" -This flag, which is enabled by default, causes any code between -special beginning and ending comment markers to be passed to the -output without formatting. The default beginning marker is #<<< -and the default ending marker is #>>> but they -may be changed (see next items below). Additional text may appear on -these special comment lines provided that it is separated from the -marker by at least one space. For example -.Sp -.Vb 7 -\& #<<< do not let perltidy touch this -\& my @list = (1, -\& 1, 1, -\& 1, 2, 1, -\& 1, 3, 3, 1, -\& 1, 4, 6, 4, 1,); -\& #>>> -.Ve -.Sp -The comment markers may be placed at any location that a block comment may -appear. If they do not appear to be working, use the \-log flag and examine the -\&\fI.LOG\fR file. Use \fB\-nfs\fR to disable this feature. -.IP "\fB\-fsb=string\fR, \fB\-\-format\-skipping\-begin=string\fR" 4 -.IX Item "-fsb=string, --format-skipping-begin=string" -The \fB\-fsb=string\fR parameter may be used to change the beginning marker for -format skipping. The default is equivalent to \-fsb='#<<<'. The string that -you enter must begin with a # and should be in quotes as necessary to get past -the command shell of your system. It is actually the leading text of a pattern -that is constructed by appending a '\es', so you must also include backslashes -for characters to be taken literally rather than as patterns. -.Sp -Some examples show how example strings become patterns: -.Sp -.Vb 3 -\& \-fsb=\*(Aq#\e{\e{\e{\*(Aq becomes /^#\e{\e{\e{\es/ which matches #{{{ but not #{{{{ -\& \-fsb=\*(Aq#\e*\e*\*(Aq becomes /^#\e*\e*\es/ which matches #** but not #*** -\& \-fsb=\*(Aq#\e*{2,}\*(Aq becomes /^#\e*{2,}\es/ which matches #** and #***** -.Ve -.IP "\fB\-fse=string\fR, \fB\-\-format\-skipping\-end=string\fR" 4 -.IX Item "-fse=string, --format-skipping-end=string" -The \fB\-fsb=string\fR is the corresponding parameter used to change the -ending marker for format skipping. The default is equivalent to -\&\-fse='#<<<'. -.SS "Line Break Control" -.IX Subsection "Line Break Control" -The parameters in this section control breaks after -non-blank lines of code. Blank lines are controlled -separately by parameters in the section \*(L"Blank Line -Control\*(R". -.IP "\fB\-fnl\fR, \fB\-\-freeze\-newlines\fR" 4 -.IX Item "-fnl, --freeze-newlines" -If you do not want any changes to the line breaks within -lines of code in your script, set -\&\fB\-fnl\fR, and they will remain fixed, and the rest of the commands in -this section and sections -\&\*(L"Controlling List Formatting\*(R", -\&\*(L"Retaining or Ignoring Existing Line Breaks\*(R". -You may want to use \fB\-noll\fR with this. -.Sp -Note: If you also want to keep your blank lines exactly -as they are, you can use the \fB\-fbl\fR flag which is described -in the section \*(L"Blank Line Control\*(R". -.IP "\fB\-ce\fR, \fB\-\-cuddled\-else\fR" 4 -.IX Item "-ce, --cuddled-else" -Enable the \*(L"cuddled else\*(R" style, in which \f(CW\*(C`else\*(C'\fR and \f(CW\*(C`elsif\*(C'\fR are -follow immediately after the curly brace closing the previous block. -The default is not to use cuddled elses, and is indicated with the flag -\&\fB\-nce\fR or \fB\-\-nocuddled\-else\fR. Here is a comparison of the -alternatives: -.Sp -.Vb 5 -\& if ($task) { -\& yyy(); -\& } else { # \-ce -\& zzz(); -\& } -\& -\& if ($task) { -\& yyy(); -\& } -\& else { # \-nce (default) -\& zzz(); -\& } -.Ve -.IP "\fB\-cb\fR, \fB\-\-cuddled\-blocks\fR" 4 -.IX Item "-cb, --cuddled-blocks" -This flag enables the \*(L"cuddled else\*(R" format style on a chain of specified block -types. The default is to apply it to a chain consisting of try-catch-finally -blocks, but it can apply to any desired chain of blocks by specifying their -names on a separate parameter \fB\-cbl\fR, described in the next section. -.Sp -.Vb 9 -\& # perltidy \-cb: -\& try { -\& throw Error::Simple( "ok 2\en", 2 ); -\& } catch Error::Simple with { -\& my $err = shift; -\& print "$err"; -\& } finally { -\& print "ok 3\en"; -\& }; -.Ve -.Sp -Cuddling between a pair of code blocks requires that the closing brace of the -first block start a new line. If this block is entirely on one line in the -input file, it is necessary to decide if it should be broken to allow cuddling. -This decision is controlled by the flag \fB\-cbo=n\fR discussed below. The default -and recommended value of \fB\-cbo=1\fR bases this decision on the first block in -the chain. If it spans multiple lines then cuddling is made and continues -along the chain, regardless of the sizes of subsequent blocks. Otherwise, short -lines remain intact. -.Sp -So for example, the \fB\-cb\fR flag would not have any effect if the above snippet -is rewritten as -.Sp -.Vb 3 -\& try { throw Error::Simple( "ok 2\en", 2 ); } -\& catch Error::Simple with { my $err = shift; print "$err"; } -\& finally { print "ok 3\en"; }; -.Ve -.Sp -If the first block spans multiple lines, then cuddling can be done and will -continue for the subsequent blocks in the chain, as illustrated in the previous -snippet. -.Sp -If there are blank lines between cuddled blocks they will be eliminated. If -there are comments after the closing brace where cuddling would occur then -cuddling will be prevented. If this occurs, cuddling will restart later in the -chain if possible. -.Sp -The default for this parameter is \fB\-\-nocuddled\-blocks\fR -.IP "\fB\-cbl\fR, \fB\-\-cuddled\-block\-list\fR" 4 -.IX Item "-cbl, --cuddled-block-list" -The block types to which the \fB\-cuddled\-blocks\fR style applies is defined by -this parameter. This parameter is a character string, giving a list of -block types separated by dashes. -.Sp -The default value for this string is -.Sp -.Vb 1 -\& \-cbl="try\-catch\-finally" -.Ve -.Sp -This string will cause cuddled formatting to be applied to every block in a chain -starting with a \*(L"try\*(R" and followed by any number of \*(L"catch\*(R" and \*(L"finally\*(R" -blocks. -.Sp -In general, a string describing a chain of blocks has the form -.Sp -.Vb 1 -\& \-cbl="word1\-word2\-word3\-...\-wordn" -.Ve -.Sp -In this case, a chain begins when an opening block brace preceded by word1 in -the list is encountered. The chain continues if the closing block brace is -followed immediately by any of word2, word3, etc. -.Sp -If the leading word, word1, might be repeated later in a chain then it should -also be included amoung the secondary words. -.Sp -Multiple chain types may be specified by separating the strings with commas or -spaces. So for example if we have two chains of code blocks, f1\-f2\-f3 and g1\-g2\-g3\-g4, -they could be specified as -.Sp -.Vb 3 -\& \-cbl="f1\-f2\-f3 g1\-g2\-g3\-g4" -\&or -\& \-cbl=f1\-f2\-f3,g1\-g2\-g3\-g4 -.Ve -.Sp -Spaces are easier to read but commas may avoid quotation difficulties when -entering data in a command shell. -.Sp -To define secondary words that apply to all block types, other than those explicitly specified, -the leading word can be omitted. For example, the built-in cuddled-else format specified by -the \fB\-ce\fR flag can be approximately specified by -.Sp -.Vb 3 -\& \-cbl="if\-else\-elsif unless\-else\-elsif \-continue" -\&or -\& \-cbl=if\-else\-elsif,unless\-else\-elsif,\-continue -.Ve -.Sp -The final string \-continue allows cuddling the optional continue block -which may follow may other block types. -.Sp -As a diagnostic check, the flag \fB\-\-dump\-cuddled\-block\-list\fR or \fB\-dcbl\fR can be -used to view the hash of values this flag creates. -.Sp -Finally, note that the \fB\-cbl\fR flag by itself merely specifies which blocks are formatted -with the cuddled format. It has no effect unless this formatting style is activated with -\&\fB\-cb\fR. -.IP "\fB\-cbo=n\fR, \fB\-\-cuddled\-break\-option=n\fR" 4 -.IX Item "-cbo=n, --cuddled-break-option=n" -Cuddled formatting is only possible between a pair of code blocks if the -closing brace of the first block starts a new line. If a block is encountered -which is entirely on a single line, and cuddled formatting is selected, it is -necessary to make a decision as to whether or not to \*(L"break\*(R" the block, meaning -to cause it to span multiple lines. This parameter controls that decision. The -options are: -.Sp -.Vb 4 -\& cbo=0 Never force a short block to break. -\& cbo=1 If the first of a pair of blocks is broken in the input file, -\& then break the second. -\& cbo=2 Break open all blocks for maximal cuddled formatting. -.Ve -.Sp -The default and recommended value is \fBcbo=1\fR. With this value, if the starting -block of a chain spans multiple lines, then a cascade of breaks will occur for -remaining blocks causing the entire chain to be cuddled. -.Sp -The option \fBcbo=0\fR can produce erratic cuddling if there are numerous one-line -blocks. -.Sp -The option \fBcbo=2\fR produces maximal cuddling but will not allow any short blocks. -.Sp -Note: at present, this option currently only applies to blocks controlled by -the \fB\-cb\fR flag. Cuddling under the \fB\-ce\fR flag corresponds approximately to -\&\fB\-cbo=1\fR but cannot currently be changed. -.IP "\fB\-bl\fR, \fB\-\-opening\-brace\-on\-new\-line\fR" 4 -.IX Item "-bl, --opening-brace-on-new-line" -Use the flag \fB\-bl\fR to place the opening brace on a new line: -.Sp -.Vb 4 -\& if ( $input_file eq \*(Aq\-\*(Aq ) # \-bl -\& { -\& important_function(); -\& } -.Ve -.Sp -This flag applies to all structural blocks, including named sub's (unless -the \fB\-sbl\fR flag is set \*(-- see next item). -.Sp -The default style, \fB\-nbl\fR, places an opening brace on the same line as -the keyword introducing it. For example, -.Sp -.Vb 1 -\& if ( $input_file eq \*(Aq\-\*(Aq ) { # \-nbl (default) -.Ve -.IP "\fB\-sbl\fR, \fB\-\-opening\-sub\-brace\-on\-new\-line\fR" 4 -.IX Item "-sbl, --opening-sub-brace-on-new-line" -The flag \fB\-sbl\fR can be used to override the value of \fB\-bl\fR for -the opening braces of named sub's. For example, -.Sp -.Vb 1 -\& perltidy \-sbl -.Ve -.Sp -produces this result: -.Sp -.Vb 9 -\& sub message -\& { -\& if (!defined($_[0])) { -\& print("Hello, World\en"); -\& } -\& else { -\& print($_[0], "\en"); -\& } -\& } -.Ve -.Sp -This flag is negated with \fB\-nsbl\fR. If \fB\-sbl\fR is not specified, -the value of \fB\-bl\fR is used. -.IP "\fB\-asbl\fR, \fB\-\-opening\-anonymous\-sub\-brace\-on\-new\-line\fR" 4 -.IX Item "-asbl, --opening-anonymous-sub-brace-on-new-line" -The flag \fB\-asbl\fR is like the \fB\-sbl\fR flag except that it applies -to anonymous sub's instead of named subs. For example -.Sp -.Vb 1 -\& perltidy \-asbl -.Ve -.Sp -produces this result: -.Sp -.Vb 9 -\& $a = sub -\& { -\& if ( !defined( $_[0] ) ) { -\& print("Hello, World\en"); -\& } -\& else { -\& print( $_[0], "\en" ); -\& } -\& }; -.Ve -.Sp -This flag is negated with \fB\-nasbl\fR, and the default is \fB\-nasbl\fR. -.IP "\fB\-bli\fR, \fB\-\-brace\-left\-and\-indent\fR" 4 -.IX Item "-bli, --brace-left-and-indent" -The flag \fB\-bli\fR is the same as \fB\-bl\fR but in addition it causes one -unit of continuation indentation ( see \fB\-ci\fR ) to be placed before -an opening and closing block braces. -.Sp -For example, -.Sp -.Vb 4 -\& if ( $input_file eq \*(Aq\-\*(Aq ) # \-bli -\& { -\& important_function(); -\& } -.Ve -.Sp -By default, this extra indentation occurs for blocks of type: -\&\fBif\fR, \fBelsif\fR, \fBelse\fR, \fBunless\fR, \fBfor\fR, \fBforeach\fR, \fBsub\fR, -\&\fBwhile\fR, \fBuntil\fR, and also with a preceding label. The next item -shows how to change this. -.IP "\fB\-blil=s\fR, \fB\-\-brace\-left\-and\-indent\-list=s\fR" 4 -.IX Item "-blil=s, --brace-left-and-indent-list=s" -Use this parameter to change the types of block braces for which the -\&\fB\-bli\fR flag applies; see \*(L"Specifying Block Types\*(R". For example, -\&\fB\-blil='if elsif else'\fR would apply it to only \f(CW\*(C`if/elsif/else\*(C'\fR blocks. -.IP "\fB\-bar\fR, \fB\-\-opening\-brace\-always\-on\-right\fR" 4 -.IX Item "-bar, --opening-brace-always-on-right" -The default style, \fB\-nbl\fR places the opening code block brace on a new -line if it does not fit on the same line as the opening keyword, like -this: -.Sp -.Vb 5 -\& if ( $bigwasteofspace1 && $bigwasteofspace2 -\& || $bigwasteofspace3 && $bigwasteofspace4 ) -\& { -\& big_waste_of_time(); -\& } -.Ve -.Sp -To force the opening brace to always be on the right, use the \fB\-bar\fR -flag. In this case, the above example becomes -.Sp -.Vb 4 -\& if ( $bigwasteofspace1 && $bigwasteofspace2 -\& || $bigwasteofspace3 && $bigwasteofspace4 ) { -\& big_waste_of_time(); -\& } -.Ve -.Sp -A conflict occurs if both \fB\-bl\fR and \fB\-bar\fR are specified. -.IP "\fB\-otr\fR, \fB\-\-opening\-token\-right\fR and related flags" 4 -.IX Item "-otr, --opening-token-right and related flags" -The \fB\-otr\fR flag is a hint that perltidy should not place a break between a -comma and an opening token. For example: -.Sp -.Vb 6 -\& # default formatting -\& push @{ $self\->{$module}{$key} }, -\& { -\& accno => $ref\->{accno}, -\& description => $ref\->{description} -\& }; -\& -\& # perltidy \-otr -\& push @{ $self\->{$module}{$key} }, { -\& accno => $ref\->{accno}, -\& description => $ref\->{description} -\& }; -.Ve -.Sp -The flag \fB\-otr\fR is actually an abbreviation for three other flags -which can be used to control parens, hash braces, and square brackets -separately if desired: -.Sp -.Vb 3 -\& \-opr or \-\-opening\-paren\-right -\& \-ohbr or \-\-opening\-hash\-brace\-right -\& \-osbr or \-\-opening\-square\-bracket\-right -.Ve -.IP "\fB\-wn\fR, \fB\-\-weld\-nested\-containers\fR" 4 -.IX Item "-wn, --weld-nested-containers" -The \fB\-wn\fR flag causes closely nested pairs of opening and closing container -symbols (curly braces, brackets, or parens) to be \*(L"welded\*(R" together, meaning -that they are treated as if combined into a single unit, with the indentation -of the innermost code reduced to be as if there were just a single container -symbol. -.Sp -For example: -.Sp -.Vb 6 -\& # default formatting -\& do { -\& { -\& next if $x == $y; -\& } -\& } until $x++ > $z; -\& -\& # perltidy \-wn -\& do { { -\& next if $x == $y; -\& } } until $x++ > $z; -.Ve -.Sp -When this flag is set perltidy makes a preliminary pass through the file and -identifies all nested pairs of containers. To qualify as a nested pair, the -closing container symbols must be immediately adjacent. The opening symbols -must either be adjacent, or, if the outer opening symbol is an opening -paren, they may be separated by any single non-container symbol or something -that looks like a function evaluation. -.Sp -Any container symbol may serve as both the inner container of one pair and as -the outer container of an adjacent pair. Consequently, any number of adjacent -opening or closing symbols may join together in weld. For example, here are -three levels of wrapped function calls: -.Sp -.Vb 9 -\& # default formatting -\& my (@date_time) = Localtime( -\& Date_to_Time( -\& Add_Delta_DHMS( -\& $year, $month, $day, $hour, $minute, $second, -\& \*(Aq0\*(Aq, $offset, \*(Aq0\*(Aq, \*(Aq0\*(Aq -\& ) -\& ) -\& ); -\& -\& # perltidy \-wn -\& my (@date_time) = Localtime( Date_to_Time( Add_Delta_DHMS( -\& $year, $month, $day, $hour, $minute, $second, -\& \*(Aq0\*(Aq, $offset, \*(Aq0\*(Aq, \*(Aq0\*(Aq -\& ) ) ); -.Ve -.Sp -Notice how the indentation of the inner lines are reduced by two levels in this -case. This example also shows the typical result of this formatting, namely it -is a sandwich consisting of an initial opening layer, a central section of any -complexity forming the \*(L"meat\*(R" of the sandwich, and a final closing layer. This -predictable structure helps keep the compacted structure readable. -.Sp -The inner sandwich layer is required to be at least one line thick. If this -cannot be achieved, welding does not occur. This constraint can cause -formatting to take a couple of iterations to stabilize when it is first applied -to a script. The \fB\-conv\fR flag can be used to insure that the final format is -achieved in a single run. -.Sp -Here is an example illustrating a welded container within a welded containers: -.Sp -.Vb 11 -\& # default formatting -\& $x\->badd( -\& bmul( -\& $class\->new( -\& abs( -\& $sx * int( $xr\->numify() ) & $sy * int( $yr\->numify() ) -\& ) -\& ), -\& $m -\& ) -\& ); -\& -\& # perltidy \-wn -\& $x\->badd( bmul( -\& $class\->new( abs( -\& $sx * int( $xr\->numify() ) & $sy * int( $yr\->numify() ) -\& ) ), -\& $m -\& ) ); -.Ve -.Sp -This format option is quite general but there are some limitations. -.Sp -One limitiation is that any line length limit still applies and can cause long -welded sections to be broken into multiple lines. -.Sp -Another limitation is that an opening symbol which delimits quoted text cannot -be included in a welded pair. This is because quote delimiters are treated -specially in perltidy. -.Sp -Finally, the stacking of containers defined by this flag have priority over -any other container stacking flags. This is because any welding is done first. -.IP "\fBVertical tightness\fR of non-block curly braces, parentheses, and square brackets." 4 -.IX Item "Vertical tightness of non-block curly braces, parentheses, and square brackets." -These parameters control what shall be called vertical tightness. Here are the -main points: -.RS 4 -.IP "\(bu" 4 -Opening tokens (except for block braces) are controlled by \fB\-vt=n\fR, or -\&\fB\-\-vertical\-tightness=n\fR, where -.Sp -.Vb 4 -\& \-vt=0 always break a line after opening token (default). -\& \-vt=1 do not break unless this would produce more than one -\& step in indentation in a line. -\& \-vt=2 never break a line after opening token -.Ve -.IP "\(bu" 4 -You must also use the \fB\-lp\fR flag when you use the \fB\-vt\fR flag; the -reason is explained below. -.IP "\(bu" 4 -Closing tokens (except for block braces) are controlled by \fB\-vtc=n\fR, or -\&\fB\-\-vertical\-tightness\-closing=n\fR, where -.Sp -.Vb 5 -\& \-vtc=0 always break a line before a closing token (default), -\& \-vtc=1 do not break before a closing token which is followed -\& by a semicolon or another closing token, and is not in -\& a list environment. -\& \-vtc=2 never break before a closing token. -.Ve -.Sp -The rules for \fB\-vtc=1\fR are designed to maintain a reasonable balance -between tightness and readability in complex lists. -.IP "\(bu" 4 -Different controls may be applied to different token types, -and it is also possible to control block braces; see below. -.IP "\(bu" 4 -Finally, please note that these vertical tightness flags are merely -hints to the formatter, and it cannot always follow them. Things which -make it difficult or impossible include comments, blank lines, blocks of -code within a list, and possibly the lack of the \fB\-lp\fR parameter. -Also, these flags may be ignored for very small lists (2 or 3 lines in -length). -.RE -.RS 4 -.Sp -Here are some examples: -.Sp -.Vb 7 -\& # perltidy \-lp \-vt=0 \-vtc=0 -\& %romanNumerals = ( -\& one => \*(AqI\*(Aq, -\& two => \*(AqII\*(Aq, -\& three => \*(AqIII\*(Aq, -\& four => \*(AqIV\*(Aq, -\& ); -\& -\& # perltidy \-lp \-vt=1 \-vtc=0 -\& %romanNumerals = ( one => \*(AqI\*(Aq, -\& two => \*(AqII\*(Aq, -\& three => \*(AqIII\*(Aq, -\& four => \*(AqIV\*(Aq, -\& ); -\& -\& # perltidy \-lp \-vt=1 \-vtc=1 -\& %romanNumerals = ( one => \*(AqI\*(Aq, -\& two => \*(AqII\*(Aq, -\& three => \*(AqIII\*(Aq, -\& four => \*(AqIV\*(Aq, ); -.Ve -.Sp -The difference between \fB\-vt=1\fR and \fB\-vt=2\fR is shown here: -.Sp -.Vb 6 -\& # perltidy \-lp \-vt=1 -\& $init\->add( -\& mysprintf( "(void)find_threadsv(%s);", -\& cstring( $threadsv_names[ $op\->targ ] ) -\& ) -\& ); -\& -\& # perltidy \-lp \-vt=2 -\& $init\->add( mysprintf( "(void)find_threadsv(%s);", -\& cstring( $threadsv_names[ $op\->targ ] ) -\& ) -\& ); -.Ve -.Sp -With \fB\-vt=1\fR, the line ending in \f(CW\*(C`add(\*(C'\fR does not combine with the next -line because the next line is not balanced. This can help with -readability, but \fB\-vt=2\fR can be used to ignore this rule. -.Sp -The tightest, and least readable, code is produced with both \f(CW\*(C`\-vt=2\*(C'\fR and -\&\f(CW\*(C`\-vtc=2\*(C'\fR: -.Sp -.Vb 3 -\& # perltidy \-lp \-vt=2 \-vtc=2 -\& $init\->add( mysprintf( "(void)find_threadsv(%s);", -\& cstring( $threadsv_names[ $op\->targ ] ) ) ); -.Ve -.Sp -Notice how the code in all of these examples collapses vertically as -\&\fB\-vt\fR increases, but the indentation remains unchanged. This is -because perltidy implements the \fB\-vt\fR parameter by first formatting as -if \fB\-vt=0\fR, and then simply overwriting one output line on top of the -next, if possible, to achieve the desired vertical tightness. The -\&\fB\-lp\fR indentation style has been designed to allow this vertical -collapse to occur, which is why it is required for the \fB\-vt\fR parameter. -.Sp -The \fB\-vt=n\fR and \fB\-vtc=n\fR parameters apply to each type of container -token. If desired, vertical tightness controls can be applied -independently to each of the closing container token types. -.Sp -The parameters for controlling parentheses are \fB\-pvt=n\fR or -\&\fB\-\-paren\-vertical\-tightness=n\fR, and \fB\-pcvt=n\fR or -\&\fB\-\-paren\-vertical\-tightness\-closing=n\fR. -.Sp -Likewise, the parameters for square brackets are \fB\-sbvt=n\fR or -\&\fB\-\-square\-bracket\-vertical\-tightness=n\fR, and \fB\-sbcvt=n\fR or -\&\fB\-\-square\-bracket\-vertical\-tightness\-closing=n\fR. -.Sp -Finally, the parameters for controlling non-code block braces are -\&\fB\-bvt=n\fR or \fB\-\-brace\-vertical\-tightness=n\fR, and \fB\-bcvt=n\fR or -\&\fB\-\-brace\-vertical\-tightness\-closing=n\fR. -.Sp -In fact, the parameter \fB\-vt=n\fR is actually just an abbreviation for -\&\fB\-pvt=n \-bvt=n sbvt=n\fR, and likewise \fB\-vtc=n\fR is an abbreviation -for \fB\-pvtc=n \-bvtc=n sbvtc=n\fR. -.RE -.IP "\fB\-bbvt=n\fR or \fB\-\-block\-brace\-vertical\-tightness=n\fR" 4 -.IX Item "-bbvt=n or --block-brace-vertical-tightness=n" -The \fB\-bbvt=n\fR flag is just like the \fB\-vt=n\fR flag but applies -to opening code block braces. -.Sp -.Vb 4 -\& \-bbvt=0 break after opening block brace (default). -\& \-bbvt=1 do not break unless this would produce more than one -\& step in indentation in a line. -\& \-bbvt=2 do not break after opening block brace. -.Ve -.Sp -It is necessary to also use either \fB\-bl\fR or \fB\-bli\fR for this to work, -because, as with other vertical tightness controls, it is implemented by -simply overwriting a line ending with an opening block brace with the -subsequent line. For example: -.Sp -.Vb 10 -\& # perltidy \-bli \-bbvt=0 -\& if ( open( FILE, "< $File" ) ) -\& { -\& while ( $File = ) -\& { -\& $In .= $File; -\& $count++; -\& } -\& close(FILE); -\& } -\& -\& # perltidy \-bli \-bbvt=1 -\& if ( open( FILE, "< $File" ) ) -\& { while ( $File = ) -\& { $In .= $File; -\& $count++; -\& } -\& close(FILE); -\& } -.Ve -.Sp -By default this applies to blocks associated with keywords \fBif\fR, -\&\fBelsif\fR, \fBelse\fR, \fBunless\fR, \fBfor\fR, \fBforeach\fR, \fBsub\fR, \fBwhile\fR, -\&\fBuntil\fR, and also with a preceding label. This can be changed with -the parameter \fB\-bbvtl=string\fR, or -\&\fB\-\-block\-brace\-vertical\-tightness\-list=string\fR, where \fBstring\fR is a -space-separated list of block types. For more information on the -possible values of this string, see \*(L"Specifying Block Types\*(R" -.Sp -For example, if we want to just apply this style to \f(CW\*(C`if\*(C'\fR, -\&\f(CW\*(C`elsif\*(C'\fR, and \f(CW\*(C`else\*(C'\fR blocks, we could use -\&\f(CW\*(C`perltidy \-bli \-bbvt=1 \-bbvtl=\*(Aqif elsif else\*(Aq\*(C'\fR. -.Sp -There is no vertical tightness control for closing block braces; with -one exception they will be placed on separate lines. -The exception is that a cascade of closing block braces may -be stacked on a single line. See \fB\-scbb\fR. -.IP "\fB\-sot\fR, \fB\-\-stack\-opening\-tokens\fR and related flags" 4 -.IX Item "-sot, --stack-opening-tokens and related flags" -The \fB\-sot\fR flag tells perltidy to \*(L"stack\*(R" opening tokens -when possible to avoid lines with isolated opening tokens. -.Sp -For example: -.Sp -.Vb 8 -\& # default -\& $opt_c = Text::CSV_XS\->new( -\& { -\& binary => 1, -\& sep_char => $opt_c, -\& always_quote => 1, -\& } -\& ); -\& -\& # \-sot -\& $opt_c = Text::CSV_XS\->new( { -\& binary => 1, -\& sep_char => $opt_c, -\& always_quote => 1, -\& } -\& ); -.Ve -.Sp -For detailed control of individual closing tokens the following -controls can be used: -.Sp -.Vb 4 -\& \-sop or \-\-stack\-opening\-paren -\& \-sohb or \-\-stack\-opening\-hash\-brace -\& \-sosb or \-\-stack\-opening\-square\-bracket -\& \-sobb or \-\-stack\-opening\-block\-brace -.Ve -.Sp -The flag \fB\-sot\fR is an abbreviation for \fB\-sop \-sohb \-sosb\fR. -.Sp -The flag \fB\-sobb\fR is a abbreviation for \fB\-bbvt=2 \-bbvtl='*'\fR. This -will case a cascade of opening block braces to appear on a single line, -although this an uncommon occurrence except in test scripts. -.IP "\fB\-sct\fR, \fB\-\-stack\-closing\-tokens\fR and related flags" 4 -.IX Item "-sct, --stack-closing-tokens and related flags" -The \fB\-sct\fR flag tells perltidy to \*(L"stack\*(R" closing tokens -when possible to avoid lines with isolated closing tokens. -.Sp -For example: -.Sp -.Vb 8 -\& # default -\& $opt_c = Text::CSV_XS\->new( -\& { -\& binary => 1, -\& sep_char => $opt_c, -\& always_quote => 1, -\& } -\& ); -\& -\& # \-sct -\& $opt_c = Text::CSV_XS\->new( -\& { -\& binary => 1, -\& sep_char => $opt_c, -\& always_quote => 1, -\& } ); -.Ve -.Sp -The \fB\-sct\fR flag is somewhat similar to the \fB\-vtc\fR flags, and in some -cases it can give a similar result. The difference is that the \fB\-vtc\fR -flags try to avoid lines with leading opening tokens by \*(L"hiding\*(R" them at -the end of a previous line, whereas the \fB\-sct\fR flag merely tries to -reduce the number of lines with isolated closing tokens by stacking them -but does not try to hide them. For example: -.Sp -.Vb 6 -\& # \-vtc=2 -\& $opt_c = Text::CSV_XS\->new( -\& { -\& binary => 1, -\& sep_char => $opt_c, -\& always_quote => 1, } ); -.Ve -.Sp -For detailed control of the stacking of individual closing tokens the -following controls can be used: -.Sp -.Vb 4 -\& \-scp or \-\-stack\-closing\-paren -\& \-schb or \-\-stack\-closing\-hash\-brace -\& \-scsb or \-\-stack\-closing\-square\-bracket -\& \-scbb or \-\-stack\-closing\-block\-brace -.Ve -.Sp -The flag \fB\-sct\fR is an abbreviation for stacking the non-block closing -tokens, \fB\-scp \-schb \-scsb\fR. -.Sp -Stacking of closing block braces, \fB\-scbb\fR, causes a cascade of isolated -closing block braces to be combined into a single line as in the following -example: -.Sp -.Vb 7 -\& # \-scbb: -\& for $w1 (@w1) { -\& for $w2 (@w2) { -\& for $w3 (@w3) { -\& for $w4 (@w4) { -\& push( @lines, "$w1 $w2 $w3 $w4\en" ); -\& } } } } -.Ve -.Sp -To simplify input even further for the case in which both opening and closing -non-block containers are stacked, the flag \fB\-sac\fR or \fB\-\-stack\-all\-containers\fR -is an abbreviation for \fB\-sot \-sot\fR. -.IP "\fB\-dnl\fR, \fB\-\-delete\-old\-newlines\fR" 4 -.IX Item "-dnl, --delete-old-newlines" -By default, perltidy first deletes all old line break locations, and then it -looks for good break points to match the desired line length. Use \fB\-ndnl\fR -or \fB\-\-nodelete\-old\-newlines\fR to force perltidy to retain all old line break -points. -.IP "\fB\-anl\fR, \fB\-\-add\-newlines\fR" 4 -.IX Item "-anl, --add-newlines" -By default, perltidy will add line breaks when necessary to create -continuations of long lines and to improve the script appearance. Use -\&\fB\-nanl\fR or \fB\-\-noadd\-newlines\fR to prevent any new line breaks. -.Sp -This flag does not prevent perltidy from eliminating existing line -breaks; see \fB\-\-freeze\-newlines\fR to completely prevent changes to line -break points. -.IP "Controlling whether perltidy breaks before or after operators" 4 -.IX Item "Controlling whether perltidy breaks before or after operators" -Four command line parameters provide some control over whether -a line break should be before or after specific token types. -Two parameters give detailed control: -.Sp -\&\fB\-wba=s\fR or \fB\-\-want\-break\-after=s\fR, and -.Sp -\&\fB\-wbb=s\fR or \fB\-\-want\-break\-before=s\fR. -.Sp -These parameters are each followed by a quoted string, \fBs\fR, containing -a list of token types (separated only by spaces). No more than one of each -of these parameters should be specified, because repeating a -command-line parameter always overwrites the previous one before -perltidy ever sees it. -.Sp -By default, perltidy breaks \fBafter\fR these token types: - % + \- * / x != == >= <= =~ !~ < > | & - = **= += *= &= <<= &&= \-= /= |= >>= ||= //= .= %= ^= x= -.Sp -And perltidy breaks \fBbefore\fR these token types by default: - . << >> \-> && || // -.Sp -To illustrate, to cause a break after a concatenation operator, \f(CW\*(Aq.\*(Aq\fR, -rather than before it, the command line would be -.Sp -.Vb 1 -\& \-wba="." -.Ve -.Sp -As another example, the following command would cause a break before -math operators \f(CW\*(Aq+\*(Aq\fR, \f(CW\*(Aq\-\*(Aq\fR, \f(CW\*(Aq/\*(Aq\fR, and \f(CW\*(Aq*\*(Aq\fR: -.Sp -.Vb 1 -\& \-wbb="+ \- / *" -.Ve -.Sp -These commands should work well for most of the token types that perltidy uses -(use \fB\-\-dump\-token\-types\fR for a list). Also try the \fB\-D\fR flag on a short -snippet of code and look at the .DEBUG file to see the tokenization. However, -for a few token types there may be conflicts with hardwired logic which cause -unexpected results. One example is curly braces, which should be controlled -with the parameter \fBbl\fR provided for that purpose. -.Sp -\&\fB\s-1WARNING\s0\fR Be sure to put these tokens in quotes to avoid having them -misinterpreted by your command shell. -.Sp -Two additional parameters are available which, though they provide no further -capability, can simplify input are: -.Sp -\&\fB\-baao\fR or \fB\-\-break\-after\-all\-operators\fR, -.Sp -\&\fB\-bbao\fR or \fB\-\-break\-before\-all\-operators\fR. -.Sp -The \-baao sets the default to be to break after all of the following operators: -.Sp -.Vb 3 -\& % + \- * / x != == >= <= =~ !~ < > | & -\& = **= += *= &= <<= &&= \-= /= |= >>= ||= //= .= %= ^= x= -\& . : ? && || and or err xor -.Ve -.Sp -and the \fB\-bbao\fR flag sets the default to break before all of these operators. -These can be used to define an initial break preference which can be fine-tuned -with the \fB\-wba\fR and \fB\-wbb\fR flags. For example, to break before all operators -except an \fB=\fR one could use \-\-bbao \-wba='=' rather than listing every -single perl operator except \fB=\fR on a \-wbb flag. -.SS "Controlling List Formatting" -.IX Subsection "Controlling List Formatting" -Perltidy attempts to place comma-separated arrays of values in tables -which look good. Its default algorithms usually work well, and they -have been improving with each release, but several parameters are -available to control list formatting. -.IP "\fB\-boc\fR, \fB\-\-break\-at\-old\-comma\-breakpoints\fR" 4 -.IX Item "-boc, --break-at-old-comma-breakpoints" -This flag tells perltidy to try to break at all old commas. This is not -the default. Normally, perltidy makes a best guess at list formatting, -and seldom uses old comma breakpoints. Usually this works well, -but consider: -.Sp -.Vb 5 -\& my @list = (1, -\& 1, 1, -\& 1, 2, 1, -\& 1, 3, 3, 1, -\& 1, 4, 6, 4, 1,); -.Ve -.Sp -The default formatting will flatten this down to one line: -.Sp -.Vb 2 -\& # perltidy (default) -\& my @list = ( 1, 1, 1, 1, 2, 1, 1, 3, 3, 1, 1, 4, 6, 4, 1, ); -.Ve -.Sp -which hides the structure. Using \fB\-boc\fR, plus additional flags -to retain the original style, yields -.Sp -.Vb 6 -\& # perltidy \-boc \-lp \-pt=2 \-vt=1 \-vtc=1 -\& my @list = (1, -\& 1, 1, -\& 1, 2, 1, -\& 1, 3, 3, 1, -\& 1, 4, 6, 4, 1,); -.Ve -.Sp -A disadvantage of this flag is that all tables in the file -must already be nicely formatted. For another possibility see -the \-fs flag in \*(L"Skipping Selected Sections of Code\*(R". -.IP "\fB\-mft=n\fR, \fB\-\-maximum\-fields\-per\-table=n\fR" 4 -.IX Item "-mft=n, --maximum-fields-per-table=n" -If the computed number of fields for any table exceeds \fBn\fR, then it -will be reduced to \fBn\fR. The default value for \fBn\fR is a large number, -40. While this value should probably be left unchanged as a general -rule, it might be used on a small section of code to force a list to -have a particular number of fields per line, and then either the \fB\-boc\fR -flag could be used to retain this formatting, or a single comment could -be introduced somewhere to freeze the formatting in future applications -of perltidy. -.Sp -.Vb 9 -\& # perltidy \-mft=2 -\& @month_of_year = ( -\& \*(AqJan\*(Aq, \*(AqFeb\*(Aq, -\& \*(AqMar\*(Aq, \*(AqApr\*(Aq, -\& \*(AqMay\*(Aq, \*(AqJun\*(Aq, -\& \*(AqJul\*(Aq, \*(AqAug\*(Aq, -\& \*(AqSep\*(Aq, \*(AqOct\*(Aq, -\& \*(AqNov\*(Aq, \*(AqDec\*(Aq -\& ); -.Ve -.IP "\fB\-cab=n\fR, \fB\-\-comma\-arrow\-breakpoints=n\fR" 4 -.IX Item "-cab=n, --comma-arrow-breakpoints=n" -A comma which follows a comma arrow, '=>', is given special -consideration. In a long list, it is common to break at all such -commas. This parameter can be used to control how perltidy breaks at -these commas. (However, it will have no effect if old comma breaks are -being forced because \fB\-boc\fR is used). The possible values of \fBn\fR are: -.Sp -.Vb 10 -\& n=0 break at all commas after => -\& n=1 stable: break at all commas after => if container is open, -\& EXCEPT FOR one\-line containers -\& n=2 break at all commas after =>, BUT try to form the maximum -\& maximum one\-line container lengths -\& n=3 do not treat commas after => specially at all -\& n=4 break everything: like n=0 but ALSO break a short container with -\& a => not followed by a comma when \-vt=0 is used -\& n=5 stable: like n=1 but ALSO break at open one\-line containers when -\& \-vt=0 is used (default) -.Ve -.Sp -For example, given the following single line, perltidy by default will -not add any line breaks because it would break the existing one-line -container: -.Sp -.Vb 1 -\& bless { B => $B, Root => $Root } => $package; -.Ve -.Sp -Using \fB\-cab=0\fR will force a break after each comma-arrow item: -.Sp -.Vb 5 -\& # perltidy \-cab=0: -\& bless { -\& B => $B, -\& Root => $Root -\& } => $package; -.Ve -.Sp -If perltidy is subsequently run with this container broken, then by -default it will break after each '=>' because the container is now -broken. To reform a one-line container, the parameter \fB\-cab=2\fR could -be used. -.Sp -The flag \fB\-cab=3\fR can be used to prevent these commas from being -treated specially. In this case, an item such as \*(L"01\*(R" => 31 is -treated as a single item in a table. The number of fields in this table -will be determined by the same rules that are used for any other table. -Here is an example. -.Sp -.Vb 6 -\& # perltidy \-cab=3 -\& my %last_day = ( -\& "01" => 31, "02" => 29, "03" => 31, "04" => 30, -\& "05" => 31, "06" => 30, "07" => 31, "08" => 31, -\& "09" => 30, "10" => 31, "11" => 30, "12" => 31 -\& ); -.Ve -.SS "Retaining or Ignoring Existing Line Breaks" -.IX Subsection "Retaining or Ignoring Existing Line Breaks" -Several additional parameters are available for controlling the extent -to which line breaks in the input script influence the output script. -In most cases, the default parameter values are set so that, if a choice -is possible, the output style follows the input style. For example, if -a short logical container is broken in the input script, then the -default behavior is for it to remain broken in the output script. -.PP -Most of the parameters in this section would only be required for a -one-time conversion of a script from short container lengths to longer -container lengths. The opposite effect, of converting long container -lengths to shorter lengths, can be obtained by temporarily using a short -maximum line length. -.IP "\fB\-bol\fR, \fB\-\-break\-at\-old\-logical\-breakpoints\fR" 4 -.IX Item "-bol, --break-at-old-logical-breakpoints" -By default, if a logical expression is broken at a \f(CW\*(C`&&\*(C'\fR, \f(CW\*(C`||\*(C'\fR, \f(CW\*(C`and\*(C'\fR, -or \f(CW\*(C`or\*(C'\fR, then the container will remain broken. Also, breaks -at internal keywords \f(CW\*(C`if\*(C'\fR and \f(CW\*(C`unless\*(C'\fR will normally be retained. -To prevent this, and thus form longer lines, use \fB\-nbol\fR. -.IP "\fB\-bok\fR, \fB\-\-break\-at\-old\-keyword\-breakpoints\fR" 4 -.IX Item "-bok, --break-at-old-keyword-breakpoints" -By default, perltidy will retain a breakpoint before keywords which may -return lists, such as \f(CW\*(C`sort\*(C'\fR and . This allows chains of these -operators to be displayed one per line. Use \fB\-nbok\fR to prevent -retaining these breakpoints. -.IP "\fB\-bot\fR, \fB\-\-break\-at\-old\-ternary\-breakpoints\fR" 4 -.IX Item "-bot, --break-at-old-ternary-breakpoints" -By default, if a conditional (ternary) operator is broken at a \f(CW\*(C`:\*(C'\fR, -then it will remain broken. To prevent this, and thereby -form longer lines, use \fB\-nbot\fR. -.IP "\fB\-boa\fR, \fB\-\-break\-at\-old\-attribute\-breakpoints\fR" 4 -.IX Item "-boa, --break-at-old-attribute-breakpoints" -By default, if an attribute list is broken at a \f(CW\*(C`:\*(C'\fR in the source file, then -it will remain broken. For example, given the following code, the line breaks -at the ':'s will be retained: -.Sp -.Vb 4 -\& my @field -\& : field -\& : Default(1) -\& : Get(\*(AqName\*(Aq => \*(Aqfoo\*(Aq) : Set(\*(AqName\*(Aq); -.Ve -.Sp -If the attributes are on a single line in the source code then they will remain -on a single line if possible. -.Sp -To prevent this, and thereby always form longer lines, use \fB\-nboa\fR. -.IP "\fB\-iob\fR, \fB\-\-ignore\-old\-breakpoints\fR" 4 -.IX Item "-iob, --ignore-old-breakpoints" -Use this flag to tell perltidy to ignore existing line breaks to the -maximum extent possible. This will tend to produce the longest possible -containers, regardless of type, which do not exceed the line length -limit. -.IP "\fB\-kis\fR, \fB\-\-keep\-interior\-semicolons\fR" 4 -.IX Item "-kis, --keep-interior-semicolons" -Use the \fB\-kis\fR flag to prevent breaking at a semicolon if -there was no break there in the input file. Normally -perltidy places a newline after each semicolon which -terminates a statement unless several statements are -contained within a one-line brace block. To illustrate, -consider the following input lines: -.Sp -.Vb 2 -\& dbmclose(%verb_delim); undef %verb_delim; -\& dbmclose(%expanded); undef %expanded; -.Ve -.Sp -The default is to break after each statement, giving -.Sp -.Vb 4 -\& dbmclose(%verb_delim); -\& undef %verb_delim; -\& dbmclose(%expanded); -\& undef %expanded; -.Ve -.Sp -With \fBperltidy \-kis\fR the multiple statements are retained: -.Sp -.Vb 2 -\& dbmclose(%verb_delim); undef %verb_delim; -\& dbmclose(%expanded); undef %expanded; -.Ve -.Sp -The statements are still subject to the specified value -of \fBmaximum-line-length\fR and will be broken if this -maximum is exceeded. -.SS "Blank Line Control" -.IX Subsection "Blank Line Control" -Blank lines can improve the readability of a script if they are carefully -placed. Perltidy has several commands for controlling the insertion, -retention, and removal of blank lines. -.IP "\fB\-fbl\fR, \fB\-\-freeze\-blank\-lines\fR" 4 -.IX Item "-fbl, --freeze-blank-lines" -Set \fB\-fbl\fR if you want to the blank lines in your script to -remain exactly as they are. The rest of the parameters in -this section may then be ignored. (Note: setting the \fB\-fbl\fR flag -is equivalent to setting \fB\-mbl=0\fR and \fB\-kbl=2\fR). -.IP "\fB\-bbc\fR, \fB\-\-blanks\-before\-comments\fR" 4 -.IX Item "-bbc, --blanks-before-comments" -A blank line will be introduced before a full-line comment. This is the -default. Use \fB\-nbbc\fR or \fB\-\-noblanks\-before\-comments\fR to prevent -such blank lines from being introduced. -.IP "\fB\-blbs=n\fR, \fB\-\-blank\-lines\-before\-subs=n\fR" 4 -.IX Item "-blbs=n, --blank-lines-before-subs=n" -The parameter \fB\-blbs=n\fR requests that least \fBn\fR blank lines precede a sub -definition which does not follow a comment and which is more than one-line -long. The default is <\-blbs=1>. \fB\s-1BEGIN\s0\fR and \fB\s-1END\s0\fR blocks are included. -.Sp -The requested number of blanks statement will be inserted regardless of the -value of \fB\-\-maximum\-consecutive\-blank\-lines=n\fR (\fB\-mbl=n\fR) with the exception -that if \fB\-mbl=0\fR then no blanks will be output. -.Sp -This parameter interacts with the value \fBk\fR of the parameter \fB\-\-maximum\-consecutive\-blank\-lines=k\fR (\fB\-mbl=k\fR) as follows: -.Sp -1. If \fB\-mbl=0\fR then no blanks will be output. This allows all blanks to be suppressed with a single parameter. Otherwise, -.Sp -2. If the number of old blank lines in the script is less than \fBn\fR then -additional blanks will be inserted to make the total \fBn\fR regardless of the -value of \fB\-mbl=k\fR. -.Sp -3. If the number of old blank lines in the script equals or exceeds \fBn\fR then -this parameter has no effect, however the total will not exceed -value specified on the \fB\-mbl=k\fR flag. -.IP "\fB\-blbp=n\fR, \fB\-\-blank\-lines\-before\-packages=n\fR" 4 -.IX Item "-blbp=n, --blank-lines-before-packages=n" -The parameter \fB\-blbp=n\fR requests that least \fBn\fR blank lines precede a package -which does not follow a comment. The default is \fB\-blbp=1\fR. -.Sp -This parameter interacts with the value \fBk\fR of the parameter -\&\fB\-\-maximum\-consecutive\-blank\-lines=k\fR (\fB\-mbl=k\fR) in the same way as described -for the previous item \fB\-blbs=n\fR. -.IP "\fB\-bbs\fR, \fB\-\-blanks\-before\-subs\fR" 4 -.IX Item "-bbs, --blanks-before-subs" -For compatibility with previous versions, \fB\-bbs\fR or \fB\-\-blanks\-before\-subs\fR -is equivalent to \fI\-blbp=1\fR and \fI\-blbs=1\fR. -.Sp -Likewise, \fB\-nbbs\fR or \fB\-\-noblanks\-before\-subs\fR -is equivalent to \fI\-blbp=0\fR and \fI\-blbs=0\fR. -.IP "\fB\-bbb\fR, \fB\-\-blanks\-before\-blocks\fR" 4 -.IX Item "-bbb, --blanks-before-blocks" -A blank line will be introduced before blocks of coding delimited by -\&\fBfor\fR, \fBforeach\fR, \fBwhile\fR, \fBuntil\fR, and \fBif\fR, \fBunless\fR, in the following -circumstances: -.RS 4 -.IP "\(bu" 4 -The block is not preceded by a comment. -.IP "\(bu" 4 -The block is not a one-line block. -.IP "\(bu" 4 -The number of consecutive non-blank lines at the current indentation depth is at least \fB\-lbl\fR -(see next section). -.RE -.RS 4 -.Sp -This is the default. The intention of this option is to introduce -some space within dense coding. -This is negated with \fB\-nbbb\fR or \fB\-\-noblanks\-before\-blocks\fR. -.RE -.IP "\fB\-lbl=n\fR \fB\-\-long\-block\-line\-count=n\fR" 4 -.IX Item "-lbl=n --long-block-line-count=n" -This controls how often perltidy is allowed to add blank lines before -certain block types (see previous section). The default is 8. Entering -a value of \fB0\fR is equivalent to entering a very large number. -.IP "\fB\-blao=i\fR or \fB\-\-blank\-lines\-after\-opening\-block=i\fR" 4 -.IX Item "-blao=i or --blank-lines-after-opening-block=i" -This control places a minimum of \fBi\fR blank lines \fBafter\fR a line which \fBends\fR -with an opening block brace of a specified type. By default, this only applies -to the block of a named \fBsub\fR, but this can be changed (see \fB\-blaol\fR below). -The default is not to do this (\fBi=0\fR). -.Sp -Please see the note below on using the \fB\-blao\fR and \fB\-blbc\fR options. -.IP "\fB\-blbc=i\fR or \fB\-\-blank\-lines\-before\-closing\-block=i\fR" 4 -.IX Item "-blbc=i or --blank-lines-before-closing-block=i" -This control places a minimum of \fBi\fR blank lines \fBbefore\fR a line which -\&\fBbegins\fR with a closing block brace of a specified type. By default, this -only applies to the block of a named \fBsub\fR, but this can be changed (see -\&\fB\-blbcl\fR below). The default is not to do this (\fBi=0\fR). -.IP "\fB\-blaol=s\fR or \fB\-\-blank\-lines\-after\-opening\-block\-list=s\fR" 4 -.IX Item "-blaol=s or --blank-lines-after-opening-block-list=s" -The parameter \fBs\fR is a list of block type keywords to which the flag \fB\-blao\fR -should apply. The section \*(L"Specifying Block Types\*(R" explains how to list -block types. -.IP "\fB\-blbcl=s\fR or \fB\-\-blank\-lines\-before\-closing\-block\-list=s\fR" 4 -.IX Item "-blbcl=s or --blank-lines-before-closing-block-list=s" -This parameter is a list of block type keywords to which the flag \fB\-blbc\fR -should apply. The section \*(L"Specifying Block Types\*(R" explains how to list -block types. -.IP "Note on using the \fB\-blao\fR and \fB\-blbc\fR options." 4 -.IX Item "Note on using the -blao and -blbc options." -These blank line controls introduce a certain minimum number of blank lines in -the text, but the final number of blank lines may be greater, depending on -values of the other blank line controls and the number of old blank lines. A -consequence is that introducing blank lines with these and other controls -cannot be exactly undone, so some experimentation with these controls is -recommended before using them. -.Sp -For example, suppose that for some reason we decide to introduce one blank -space at the beginning and ending of all blocks. We could do -this using -.Sp -.Vb 1 -\& perltidy \-blao=2 \-blbc=2 \-blaol=\*(Aq*\*(Aq \-blbcl=\*(Aq*\*(Aq filename -.Ve -.Sp -Now suppose the script continues to be developed, but at some later date we -decide we don't want these spaces after all. we might expect that running with -the flags \fB\-blao=0\fR and \fB\-blbc=0\fR will undo them. However, by default -perltidy retains single blank lines, so the blank lines remain. -.Sp -We can easily fix this by telling perltidy to ignore old blank lines by -including the added parameter \fB\-kbl=0\fR and rerunning. Then the unwanted blank -lines will be gone. However, this will cause all old blank lines to be -ignored, perhaps even some that were added by hand to improve formatting. So -please be cautious when using these parameters. -.IP "\fB\-mbl=n\fR \fB\-\-maximum\-consecutive\-blank\-lines=n\fR" 4 -.IX Item "-mbl=n --maximum-consecutive-blank-lines=n" -This parameter specifies the maximum number of consecutive blank lines which -will be output within code sections of a script. The default is n=1. If the -input file has more than n consecutive blank lines, the number will be reduced -to n except as noted above for the \fB\-blbp\fR and \fB\-blbs\fR parameters. If \fBn=0\fR -then no blank lines will be output (unless all old blank lines are retained -with the \fB\-kbl=2\fR flag of the next section). -.Sp -This flag obviously does not apply to pod sections, -here-documents, and quotes. -.IP "\fB\-kbl=n\fR, \fB\-\-keep\-old\-blank\-lines=n\fR" 4 -.IX Item "-kbl=n, --keep-old-blank-lines=n" -The \fB\-kbl=n\fR flag gives you control over how your existing blank lines are -treated. -.Sp -The possible values of \fBn\fR are: -.Sp -.Vb 3 -\& n=0 ignore all old blank lines -\& n=1 stable: keep old blanks, but limited by the value of the B<\-mbl=n> flag -\& n=2 keep all old blank lines, regardless of the value of the B<\-mbl=n> flag -.Ve -.Sp -The default is \fBn=1\fR. -.IP "\fB\-sob\fR, \fB\-\-swallow\-optional\-blank\-lines\fR" 4 -.IX Item "-sob, --swallow-optional-blank-lines" -This is equivalent to \fBkbl=0\fR and is included for compatibility with -previous versions. -.IP "\fB\-nsob\fR, \fB\-\-noswallow\-optional\-blank\-lines\fR" 4 -.IX Item "-nsob, --noswallow-optional-blank-lines" -This is equivalent to \fBkbl=1\fR and is included for compatibility with -previous versions. -.SS "Styles" -.IX Subsection "Styles" -A style refers to a convenient collection of existing parameters. -.IP "\fB\-gnu\fR, \fB\-\-gnu\-style\fR" 4 -.IX Item "-gnu, --gnu-style" -\&\fB\-gnu\fR gives an approximation to the \s-1GNU\s0 Coding Standards (which do -not apply to perl) as they are sometimes implemented. At present, this -style overrides the default style with the following parameters: -.Sp -.Vb 1 -\& \-lp \-bl \-noll \-pt=2 \-bt=2 \-sbt=2 \-icp -.Ve -.IP "\fB\-pbp\fR, \fB\-\-perl\-best\-practices\fR" 4 -.IX Item "-pbp, --perl-best-practices" -\&\fB\-pbp\fR is an abbreviation for the parameters in the book \fBPerl Best Practices\fR -by Damian Conway: -.Sp -.Vb 3 -\& \-l=78 \-i=4 \-ci=4 \-st \-se \-vt=2 \-cti=0 \-pt=1 \-bt=1 \-sbt=1 \-bbt=1 \-nsfs \-nolq -\& \-wbb="% + \- * / x != == >= <= =~ !~ < > | & = -\& **= += *= &= <<= &&= \-= /= |= >>= ||= //= .= %= ^= x=" -.Ve -.Sp -Please note that this parameter set includes \-st and \-se flags, which make -perltidy act as a filter on one file only. These can be overridden by placing -\&\fB\-nst\fR and/or \fB\-nse\fR after the \-pbp parameter. -.Sp -Also note that the value of continuation indentation, \-ci=4, is equal to the -value of the full indentation, \-i=4. In some complex statements perltidy will -produce nicer results with \-ci=2. This can be implemented by including \-ci=2 -after the \-pbp parameter. For example, -.Sp -.Vb 11 -\& # perltidy \-pbp -\& $self\->{_text} = ( -\& !$section ? \*(Aq\*(Aq -\& : $type eq \*(Aqitem\*(Aq ? "the $section entry" -\& : "the section on $section" -\& ) -\& . ( -\& $page -\& ? ( $section ? \*(Aq in \*(Aq : \*(Aq\*(Aq ) . "the $page$page_ext manpage" -\& : \*(Aq elsewhere in this document\*(Aq -\& ); -\& -\& # perltidy \-pbp \-ci=2 -\& $self\->{_text} = ( -\& !$section ? \*(Aq\*(Aq -\& : $type eq \*(Aqitem\*(Aq ? "the $section entry" -\& : "the section on $section" -\& ) -\& . ( -\& $page -\& ? ( $section ? \*(Aq in \*(Aq : \*(Aq\*(Aq ) . "the $page$page_ext manpage" -\& : \*(Aq elsewhere in this document\*(Aq -\& ); -.Ve -.SS "Controlling Vertical Alignment" -.IX Subsection "Controlling Vertical Alignment" -Vertical alignment refers to lining up certain symbols in list of consecutive -similar lines to improve readability. For example, the \*(L"fat commas\*(R" are -aligned in the following statement: -.PP -.Vb 5 -\& $data = $pkg\->new( -\& PeerAddr => join( ".", @port[ 0 .. 3 ] ), -\& PeerPort => $port[4] * 256 + $port[5], -\& Proto => \*(Aqtcp\*(Aq -\& ); -.Ve -.PP -The only explicit control on vertical alignment is to turn it off using -\&\fB\-novalign\fR, a flag mainly intended for debugging. However, vertical -alignment can be forced to stop and restart by selectively introducing blank -lines. For example, a blank has been inserted in the following code -to keep somewhat similar things aligned. -.PP -.Vb 4 -\& %option_range = ( -\& \*(Aqformat\*(Aq => [ \*(Aqtidy\*(Aq, \*(Aqhtml\*(Aq, \*(Aquser\*(Aq ], -\& \*(Aqoutput\-line\-ending\*(Aq => [ \*(Aqdos\*(Aq, \*(Aqwin\*(Aq, \*(Aqmac\*(Aq, \*(Aqunix\*(Aq ], -\& \*(Aqcharacter\-encoding\*(Aq => [ \*(Aqnone\*(Aq, \*(Aqutf8\*(Aq ], -\& -\& \*(Aqblock\-brace\-tightness\*(Aq => [ 0, 2 ], -\& \*(Aqbrace\-tightness\*(Aq => [ 0, 2 ], -\& \*(Aqparen\-tightness\*(Aq => [ 0, 2 ], -\& \*(Aqsquare\-bracket\-tightness\*(Aq => [ 0, 2 ], -\& ); -.Ve -.SS "Other Controls" -.IX Subsection "Other Controls" -.IP "Deleting selected text" 4 -.IX Item "Deleting selected text" -Perltidy can selectively delete comments and/or pod documentation. The -command \fB\-dac\fR or \fB\-\-delete\-all\-comments\fR will delete all comments -\&\fBand\fR all pod documentation, leaving just code and any leading system -control lines. -.Sp -The command \fB\-dp\fR or \fB\-\-delete\-pod\fR will remove all pod documentation -(but not comments). -.Sp -Two commands which remove comments (but not pod) are: \fB\-dbc\fR or -\&\fB\-\-delete\-block\-comments\fR and \fB\-dsc\fR or \fB\-\-delete\-side\-comments\fR. -(Hanging side comments will be deleted with block comments here.) -.Sp -The negatives of these commands also work, and are the defaults. When -block comments are deleted, any leading 'hash\-bang' will be retained. -Also, if the \fB\-x\fR flag is used, any system commands before a leading -hash-bang will be retained (even if they are in the form of comments). -.IP "Writing selected text to a file" 4 -.IX Item "Writing selected text to a file" -When perltidy writes a formatted text file, it has the ability to also -send selected text to a file with a \fI.TEE\fR extension. This text can -include comments and pod documentation. -.Sp -The command \fB\-tac\fR or \fB\-\-tee\-all\-comments\fR will write all comments -\&\fBand\fR all pod documentation. -.Sp -The command \fB\-tp\fR or \fB\-\-tee\-pod\fR will write all pod documentation (but -not comments). -.Sp -The commands which write comments (but not pod) are: \fB\-tbc\fR or -\&\fB\-\-tee\-block\-comments\fR and \fB\-tsc\fR or \fB\-\-tee\-side\-comments\fR. -(Hanging side comments will be written with block comments here.) -.Sp -The negatives of these commands also work, and are the defaults. -.IP "Using a \fI.perltidyrc\fR command file" 4 -.IX Item "Using a .perltidyrc command file" -If you use perltidy frequently, you probably won't be happy until you -create a \fI.perltidyrc\fR file to avoid typing commonly-used parameters. -Perltidy will first look in your current directory for a command file -named \fI.perltidyrc\fR. If it does not find one, it will continue looking -for one in other standard locations. -.Sp -These other locations are system-dependent, and may be displayed with -the command \f(CW\*(C`perltidy \-dpro\*(C'\fR. Under Unix systems, it will first look -for an environment variable \fB\s-1PERLTIDY\s0\fR. Then it will look for a -\&\fI.perltidyrc\fR file in the home directory, and then for a system-wide -file \fI/usr/local/etc/perltidyrc\fR, and then it will look for -\&\fI/etc/perltidyrc\fR. Note that these last two system-wide files do not -have a leading dot. Further system-dependent information will be found -in the \s-1INSTALL\s0 file distributed with perltidy. -.Sp -Under Windows, perltidy will also search for a configuration file named perltidy.ini since Windows does not allow files with a leading period (.). -Use \f(CW\*(C`perltidy \-dpro\*(C'\fR to see the possible locations for your system. -An example might be \fIC:\eDocuments and Settings\eAll Users\eperltidy.ini\fR. -.Sp -Another option is the use of the \s-1PERLTIDY\s0 environment variable. -The method for setting environment variables depends upon the version of -Windows that you are using. Instructions for Windows 95 and later versions can -be found here: -.Sp -http://www.netmanage.com/000/20021101_005_tcm21\-6336.pdf -.Sp -Under Windows \s-1NT / 2000 / XP\s0 the \s-1PERLTIDY\s0 environment variable can be placed in -either the user section or the system section. The later makes the -configuration file common to all users on the machine. Be sure to enter the -full path of the configuration file in the value of the environment variable. -Ex. PERLTIDY=C:\eDocuments and Settings\eperltidy.ini -.Sp -The configuration file is free format, and simply a list of parameters, just as -they would be entered on a command line. Any number of lines may be used, with -any number of parameters per line, although it may be easiest to read with one -parameter per line. Comment text begins with a #, and there must -also be a space before the # for side comments. It is a good idea to -put complex parameters in either single or double quotes. -.Sp -Here is an example of a \fI.perltidyrc\fR file: -.Sp -.Vb 8 -\& # This is a simple of a .perltidyrc configuration file -\& # This implements a highly spaced style -\& \-se # errors to standard error output -\& \-w # show all warnings -\& \-bl # braces on new lines -\& \-pt=0 # parens not tight at all -\& \-bt=0 # braces not tight -\& \-sbt=0 # square brackets not tight -.Ve -.Sp -The parameters in the \fI.perltidyrc\fR file are installed first, so any -parameters given on the command line will have priority over them. -.Sp -To avoid confusion, perltidy ignores any command in the .perltidyrc -file which would cause some kind of dump and an exit. These are: -.Sp -.Vb 1 -\& \-h \-v \-ddf \-dln \-dop \-dsn \-dtt \-dwls \-dwrs \-ss -.Ve -.Sp -There are several options may be helpful in debugging a \fI.perltidyrc\fR -file: -.RS 4 -.IP "\(bu" 4 -A very helpful command is \fB\-\-dump\-profile\fR or \fB\-dpro\fR. It writes a -list of all configuration filenames tested to standard output, and -if a file is found, it dumps the content to standard output before -exiting. So, to find out where perltidy looks for its configuration -files, and which one if any it selects, just enter -.Sp -.Vb 1 -\& perltidy \-dpro -.Ve -.IP "\(bu" 4 -It may be simplest to develop and test configuration files with -alternative names, and invoke them with \fB\-pro=filename\fR on the command -line. Then rename the desired file to \fI.perltidyrc\fR when finished. -.IP "\(bu" 4 -The parameters in the \fI.perltidyrc\fR file can be switched off with -the \fB\-npro\fR option. -.IP "\(bu" 4 -The commands \fB\-\-dump\-options\fR, \fB\-\-dump\-defaults\fR, \fB\-\-dump\-long\-names\fR, -and \fB\-\-dump\-short\-names\fR, all described below, may all be helpful. -.RE -.RS 4 -.RE -.IP "Creating a new abbreviation" 4 -.IX Item "Creating a new abbreviation" -A special notation is available for use in a \fI.perltidyrc\fR file -for creating an abbreviation for a group -of options. This can be used to create a -shorthand for one or more styles which are frequently, but not always, -used. The notation is to group the options within curly braces which -are preceded by the name of the alias (without leading dashes), like this: -.Sp -.Vb 4 -\& newword { -\& \-opt1 -\& \-opt2 -\& } -.Ve -.Sp -where \fBnewword\fR is the abbreviation, and \fBopt1\fR, etc, are existing parameters -\&\fIor other abbreviations\fR. The main syntax requirement is that the new -abbreviation along with its opening curly brace must begin on a new line. -Space before and after the curly braces is optional. -For a -specific example, the following line -.Sp -.Vb 1 -\& airy {\-bl \-pt=0 \-bt=0 \-sbt=0} -.Ve -.Sp -could be placed in a \fI.perltidyrc\fR file, and then invoked at will with -.Sp -.Vb 1 -\& perltidy \-airy somefile.pl -.Ve -.Sp -(Either \f(CW\*(C`\-airy\*(C'\fR or \f(CW\*(C`\-\-airy\*(C'\fR may be used). -.IP "Skipping leading non-perl commands with \fB\-x\fR or \fB\-\-look\-for\-hash\-bang\fR" 4 -.IX Item "Skipping leading non-perl commands with -x or --look-for-hash-bang" -If your script has leading lines of system commands or other text which -are not valid perl code, and which are separated from the start of the -perl code by a \*(L"hash-bang\*(R" line, ( a line of the form \f(CW\*(C`#!...perl\*(C'\fR ), -you must use the \fB\-x\fR flag to tell perltidy not to parse and format any -lines before the \*(L"hash-bang\*(R" line. This option also invokes perl with a -\&\-x flag when checking the syntax. This option was originally added to -allow perltidy to parse interactive \s-1VMS\s0 scripts, but it should be used -for any script which is normally invoked with \f(CW\*(C`perl \-x\*(C'\fR. -.IP "Making a file unreadable" 4 -.IX Item "Making a file unreadable" -The goal of perltidy is to improve the readability of files, but there -are two commands which have the opposite effect, \fB\-\-mangle\fR and -\&\fB\-\-extrude\fR. They are actually -merely aliases for combinations of other parameters. Both of these -strip all possible whitespace, but leave comments and pod documents, -so that they are essentially reversible. The -difference between these is that \fB\-\-mangle\fR puts the fewest possible -line breaks in a script while \fB\-\-extrude\fR puts the maximum possible. -Note that these options do not provided any meaningful obfuscation, because -perltidy can be used to reformat the files. They were originally -developed to help test the tokenization logic of perltidy, but they -have other uses. -One use for \fB\-\-mangle\fR is the following: -.Sp -.Vb 1 -\& perltidy \-\-mangle myfile.pl \-st | perltidy \-o myfile.pl.new -.Ve -.Sp -This will form the maximum possible number of one-line blocks (see next -section), and can sometimes help clean up a badly formatted script. -.Sp -A similar technique can be used with \fB\-\-extrude\fR instead of \fB\-\-mangle\fR -to make the minimum number of one-line blocks. -.Sp -Another use for \fB\-\-mangle\fR is to combine it with \fB\-dac\fR to reduce -the file size of a perl script. -.IP "One-line blocks" 4 -.IX Item "One-line blocks" -There are a few points to note regarding one-line blocks. A one-line -block is something like this, -.Sp -.Vb 1 -\& if ($x > 0) { $y = 1 / $x } -.Ve -.Sp -where the contents within the curly braces is short enough to fit -on a single line. -.Sp -With few exceptions, perltidy retains existing one-line blocks, if it -is possible within the line-length constraint, but it does not attempt -to form new ones. In other words, perltidy will try to follow the -one-line block style of the input file. -.Sp -If an existing one-line block is longer than the maximum line length, -however, it will be broken into multiple lines. When this happens, perltidy -checks for and adds any optional terminating semicolon (unless the \fB\-nasc\fR -option is used) if the block is a code block. -.Sp -The main exception is that perltidy will attempt to form new one-line -blocks following the keywords \f(CW\*(C`map\*(C'\fR, \f(CW\*(C`eval\*(C'\fR, and \f(CW\*(C`sort\*(C'\fR, because -these code blocks are often small and most clearly displayed in a single -line. -.Sp -One-line block rules can conflict with the cuddled-else option. When -the cuddled-else option is used, perltidy retains existing one-line -blocks, even if they do not obey cuddled-else formatting. -.Sp -Occasionally, when one-line blocks get broken because they exceed the -available line length, the formatting will violate the requested brace style. -If this happens, reformatting the script a second time should correct -the problem. -.IP "Debugging" 4 -.IX Item "Debugging" -The following flags are available for debugging: -.Sp -\&\fB\-\-dump\-cuddled\-block\-list\fR or \fB\-dcbl\fR will dump to standard output the -internal hash of cuddled block types created by a \fB\-cuddled\-block\-list\fR input -string. -.Sp -\&\fB\-\-dump\-defaults\fR or \fB\-ddf\fR will write the default option set to standard output and quit -.Sp -\&\fB\-\-dump\-profile\fR or \fB\-dpro\fR will write the name of the current -configuration file and its contents to standard output and quit. -.Sp -\&\fB\-\-dump\-options\fR or \fB\-dop\fR will write current option set to standard -output and quit. -.Sp -\&\fB\-\-dump\-long\-names\fR or \fB\-dln\fR will write all command line long names (passed -to Get_options) to standard output and quit. -.Sp -\&\fB\-\-dump\-short\-names\fR or \fB\-dsn\fR will write all command line short names -to standard output and quit. -.Sp -\&\fB\-\-dump\-token\-types\fR or \fB\-dtt\fR will write a list of all token types -to standard output and quit. -.Sp -\&\fB\-\-dump\-want\-left\-space\fR or \fB\-dwls\fR will write the hash \f(CW%want_left_space\fR -to standard output and quit. See the section on controlling whitespace -around tokens. -.Sp -\&\fB\-\-dump\-want\-right\-space\fR or \fB\-dwrs\fR will write the hash \f(CW%want_right_space\fR -to standard output and quit. See the section on controlling whitespace -around tokens. -.Sp -\&\fB\-\-no\-memoize\fR or \fB\-nmem\fR will turn of memoizing. -Memoization can reduce run time when running perltidy repeatedly in a -single process. It is on by default but can be deactivated for -testing with \fB\-nmem\fR. -.Sp -\&\fB\-\-file\-size\-order\fR or \fB\-fso\fR will cause files to be processed in order of -increasing size, when multiple files are being processed. This is useful -during program development, when large numbers of files with varying sizes are -processed, because it can reduce virtual memory usage. -.Sp -\&\fB\-DEBUG\fR will write a file with extension \fI.DEBUG\fR for each input file -showing the tokenization of all lines of code. -.IP "Working with MakeMaker, AutoLoader and SelfLoader" 4 -.IX Item "Working with MakeMaker, AutoLoader and SelfLoader" -The first \f(CW$VERSION\fR line of a file which might be eval'd by MakeMaker -is passed through unchanged except for indentation. -Use \fB\-\-nopass\-version\-line\fR, or \fB\-npvl\fR, to deactivate this feature. -.Sp -If the AutoLoader module is used, perltidy will continue formatting -code after seeing an _\|_END_\|_ line. -Use \fB\-\-nolook\-for\-autoloader\fR, or \fB\-nlal\fR, to deactivate this feature. -.Sp -Likewise, if the SelfLoader module is used, perltidy will continue formatting -code after seeing a _\|_DATA_\|_ line. -Use \fB\-\-nolook\-for\-selfloader\fR, or \fB\-nlsl\fR, to deactivate this feature. -.IP "Working around problems with older version of Perl" 4 -.IX Item "Working around problems with older version of Perl" -Perltidy contains a number of rules which help avoid known subtleties -and problems with older versions of perl, and these rules always -take priority over whatever formatting flags have been set. For example, -perltidy will usually avoid starting a new line with a bareword, because -this might cause problems if \f(CW\*(C`use strict\*(C'\fR is active. -.Sp -There is no way to override these rules. -.SH "HTML OPTIONS" -.IX Header "HTML OPTIONS" -.IP "The \fB\-html\fR master switch" 4 -.IX Item "The -html master switch" -The flag \fB\-html\fR causes perltidy to write an html file with extension -\&\fI.html\fR. So, for example, the following command -.Sp -.Vb 1 -\& perltidy \-html somefile.pl -.Ve -.Sp -will produce a syntax-colored html file named \fIsomefile.pl.html\fR -which may be viewed with a browser. -.Sp -\&\fBPlease Note\fR: In this case, perltidy does not do any formatting to the -input file, and it does not write a formatted file with extension -\&\fI.tdy\fR. This means that two perltidy runs are required to create a -fully reformatted, html copy of a script. -.IP "The \fB\-pre\fR flag for code snippets" 4 -.IX Item "The -pre flag for code snippets" -When the \fB\-pre\fR flag is given, only the pre-formatted section, within -the <\s-1PRE\s0> and
tags, will be output. This simplifies inclusion -of the output in other files. The default is to output a complete -web page. -.IP "The \fB\-nnn\fR flag for line numbering" 4 -.IX Item "The -nnn flag for line numbering" -When the \fB\-nnn\fR flag is given, the output lines will be numbered. -.IP "The \fB\-toc\fR, or \fB\-\-html\-table\-of\-contents\fR flag" 4 -.IX Item "The -toc, or --html-table-of-contents flag" -By default, a table of contents to packages and subroutines will be -written at the start of html output. Use \fB\-ntoc\fR to prevent this. -This might be useful, for example, for a pod document which contains a -number of unrelated code snippets. This flag only influences the code -table of contents; it has no effect on any table of contents produced by -pod2html (see next item). -.IP "The \fB\-pod\fR, or \fB\-\-pod2html\fR flag" 4 -.IX Item "The -pod, or --pod2html flag" -There are two options for formatting pod documentation. The default is -to pass the pod through the Pod::Html module (which forms the basis of -the pod2html utility). Any code sections are formatted by perltidy, and -the results then merged. Note: perltidy creates a temporary file when -Pod::Html is used; see \*(L"\s-1FILES\*(R"\s0. Also, Pod::Html creates temporary -files for its cache. -.Sp -\&\s-1NOTE:\s0 Perltidy counts the number of \f(CW\*(C`=cut\*(C'\fR lines, and either moves the -pod text to the top of the html file if there is one \f(CW\*(C`=cut\*(C'\fR, or leaves -the pod text in its original order (interleaved with code) otherwise. -.Sp -Most of the flags accepted by pod2html may be included in the perltidy -command line, and they will be passed to pod2html. In some cases, -the flags have a prefix \f(CW\*(C`pod\*(C'\fR to emphasize that they are for the -pod2html, and this prefix will be removed before they are passed to -pod2html. The flags which have the additional \f(CW\*(C`pod\*(C'\fR prefix are: -.Sp -.Vb 2 -\& \-\-[no]podheader \-\-[no]podindex \-\-[no]podrecurse \-\-[no]podquiet -\& \-\-[no]podverbose \-\-podflush -.Ve -.Sp -The flags which are unchanged from their use in pod2html are: -.Sp -.Vb 2 -\& \-\-backlink=s \-\-cachedir=s \-\-htmlroot=s \-\-libpods=s \-\-title=s -\& \-\-podpath=s \-\-podroot=s -.Ve -.Sp -where 's' is an appropriate character string. Not all of these flags are -available in older versions of Pod::Html. See your Pod::Html documentation for -more information. -.Sp -The alternative, indicated with \fB\-npod\fR, is not to use Pod::Html, but -rather to format pod text in italics (or whatever the stylesheet -indicates), without special html markup. This is useful, for example, -if pod is being used as an alternative way to write comments. -.IP "The \fB\-frm\fR, or \fB\-\-frames\fR flag" 4 -.IX Item "The -frm, or --frames flag" -By default, a single html output file is produced. This can be changed -with the \fB\-frm\fR option, which creates a frame holding a table of -contents in the left panel and the source code in the right side. This -simplifies code browsing. Assume, for example, that the input file is -\&\fIMyModule.pm\fR. Then, for default file extension choices, these three -files will be created: -.Sp -.Vb 3 -\& MyModule.pm.html \- the frame -\& MyModule.pm.toc.html \- the table of contents -\& MyModule.pm.src.html \- the formatted source code -.Ve -.Sp -Obviously this file naming scheme requires that output be directed to a real -file (as opposed to, say, standard output). If this is not the -case, or if the file extension is unknown, the \fB\-frm\fR option will be -ignored. -.IP "The \fB\-text=s\fR, or \fB\-\-html\-toc\-extension\fR flag" 4 -.IX Item "The -text=s, or --html-toc-extension flag" -Use this flag to specify the extra file extension of the table of contents file -when html frames are used. The default is \*(L"toc\*(R". -See \*(L"Specifying File Extensions\*(R". -.IP "The \fB\-sext=s\fR, or \fB\-\-html\-src\-extension\fR flag" 4 -.IX Item "The -sext=s, or --html-src-extension flag" -Use this flag to specify the extra file extension of the content file when html -frames are used. The default is \*(L"src\*(R". -See \*(L"Specifying File Extensions\*(R". -.IP "The \fB\-hent\fR, or \fB\-\-html\-entities\fR flag" 4 -.IX Item "The -hent, or --html-entities flag" -This flag controls the use of Html::Entities for html formatting. By -default, the module Html::Entities is used to encode special symbols. -This may not be the right thing for some browser/language -combinations. Use \-\-nohtml\-entities or \-nhent to prevent this. -.IP "Style Sheets" 4 -.IX Item "Style Sheets" -Style sheets make it very convenient to control and adjust the -appearance of html pages. The default behavior is to write a page of -html with an embedded style sheet. -.Sp -An alternative to an embedded style sheet is to create a page with a -link to an external style sheet. This is indicated with the -\&\fB\-css=filename\fR, where the external style sheet is \fIfilename\fR. The -external style sheet \fIfilename\fR will be created if and only if it does -not exist. This option is useful for controlling multiple pages from a -single style sheet. -.Sp -To cause perltidy to write a style sheet to standard output and exit, -use the \fB\-ss\fR, or \fB\-\-stylesheet\fR, flag. This is useful if the style -sheet could not be written for some reason, such as if the \fB\-pre\fR flag -was used. Thus, for example, -.Sp -.Vb 1 -\& perltidy \-html \-ss >mystyle.css -.Ve -.Sp -will write a style sheet with the default properties to file -\&\fImystyle.css\fR. -.Sp -The use of style sheets is encouraged, but a web page without a style -sheets can be created with the flag \fB\-nss\fR. Use this option if you -must to be sure that older browsers (roughly speaking, versions prior to -4.0 of Netscape Navigator and Internet Explorer) can display the -syntax-coloring of the html files. -.IP "Controlling \s-1HTML\s0 properties" 4 -.IX Item "Controlling HTML properties" -Note: It is usually more convenient to accept the default properties -and then edit the stylesheet which is produced. However, this section -shows how to control the properties with flags to perltidy. -.Sp -Syntax colors may be changed from their default values by flags of the either -the long form, \fB\-html\-color\-xxxxxx=n\fR, or more conveniently the short form, -\&\fB\-hcx=n\fR, where \fBxxxxxx\fR is one of the following words, and \fBx\fR is the -corresponding abbreviation: -.Sp -.Vb 10 -\& Token Type xxxxxx x -\& \-\-\-\-\-\-\-\-\-\- \-\-\-\-\-\-\-\- \-\- -\& comment comment c -\& number numeric n -\& identifier identifier i -\& bareword, function bareword w -\& keyword keyword k -\& quite, pattern quote q -\& here doc text here\-doc\-text h -\& here doc target here\-doc\-target hh -\& punctuation punctuation pu -\& parentheses paren p -\& structural braces structure s -\& semicolon semicolon sc -\& colon colon co -\& comma comma cm -\& label label j -\& sub definition name subroutine m -\& pod text pod\-text pd -.Ve -.Sp -A default set of colors has been defined, but they may be changed by providing -values to any of the following parameters, where \fBn\fR is either a 6 digit -hex \s-1RGB\s0 color value or an ascii name for a color, such as 'red'. -.Sp -To illustrate, the following command will produce an html -file \fIsomefile.pl.html\fR with \*(L"aqua\*(R" keywords: -.Sp -.Vb 1 -\& perltidy \-html \-hck=00ffff somefile.pl -.Ve -.Sp -and this should be equivalent for most browsers: -.Sp -.Vb 1 -\& perltidy \-html \-hck=aqua somefile.pl -.Ve -.Sp -Perltidy merely writes any non-hex names that it sees in the html file. -The following 16 color names are defined in the \s-1HTML 3.2\s0 standard: -.Sp -.Vb 10 -\& black => 000000, -\& silver => c0c0c0, -\& gray => 808080, -\& white => ffffff, -\& maroon => 800000, -\& red => ff0000, -\& purple => 800080, -\& fuchsia => ff00ff, -\& green => 008000, -\& lime => 00ff00, -\& olive => 808000, -\& yellow => ffff00 -\& navy => 000080, -\& blue => 0000ff, -\& teal => 008080, -\& aqua => 00ffff, -.Ve -.Sp -Many more names are supported in specific browsers, but it is safest -to use the hex codes for other colors. Helpful color tables can be -located with an internet search for \*(L"\s-1HTML\s0 color tables\*(R". -.Sp -Besides color, two other character attributes may be set: bold, and italics. -To set a token type to use bold, use the flag -\&\fB\-\-html\-bold\-xxxxxx\fR or \fB\-hbx\fR, where \fBxxxxxx\fR or \fBx\fR are the long -or short names from the above table. Conversely, to set a token type to -\&\s-1NOT\s0 use bold, use \fB\-\-nohtml\-bold\-xxxxxx\fR or \fB\-nhbx\fR. -.Sp -Likewise, to set a token type to use an italic font, use the flag -\&\fB\-\-html\-italic\-xxxxxx\fR or \fB\-hix\fR, where again \fBxxxxxx\fR or \fBx\fR are the -long or short names from the above table. And to set a token type to -\&\s-1NOT\s0 use italics, use \fB\-\-nohtml\-italic\-xxxxxx\fR or \fB\-nhix\fR. -.Sp -For example, to use bold braces and lime color, non-bold, italics keywords the -following command would be used: -.Sp -.Vb 1 -\& perltidy \-html \-hbs \-hck=00FF00 \-nhbk \-hik somefile.pl -.Ve -.Sp -The background color can be specified with \fB\-\-html\-color\-background=n\fR, -or \fB\-hcbg=n\fR for short, where n is a 6 character hex \s-1RGB\s0 value. The -default color of text is the value given to \fBpunctuation\fR, which is -black as a default. -.Sp -Here are some notes and hints: -.Sp -1. If you find a preferred set of these parameters, you may want -to create a \fI.perltidyrc\fR file containing them. See the perltidy man -page for an explanation. -.Sp -2. Rather than specifying values for these parameters, it is probably -easier to accept the defaults and then edit a style sheet. The style -sheet contains comments which should make this easy. -.Sp -3. The syntax-colored html files can be very large, so it may be best to -split large files into smaller pieces to improve download times. -.SH "SOME COMMON INPUT CONVENTIONS" -.IX Header "SOME COMMON INPUT CONVENTIONS" -.SS "Specifying Block Types" -.IX Subsection "Specifying Block Types" -Several parameters which refer to code block types may be customized by also -specifying an associated list of block types. The type of a block is the name -of the keyword which introduces that block, such as \fBif\fR, \fBelse\fR, or \fBsub\fR. -An exception is a labeled block, which has no keyword, and should be specified -with just a colon. To specify all blocks use \fB'*'\fR. -.PP -The keyword \fBsub\fR indicates a named sub. For anonymous subs, use the special -keyword \fBasub\fR. -.PP -For example, the following parameter specifies \f(CW\*(C`sub\*(C'\fR, labels, \f(CW\*(C`BEGIN\*(C'\fR, and -\&\f(CW\*(C`END\*(C'\fR blocks: -.PP -.Vb 1 -\& \-cscl="sub : BEGIN END" -.Ve -.PP -(the meaning of the \-cscl parameter is described above.) Note that -quotes are required around the list of block types because of the -spaces. For another example, the following list specifies all block types -for vertical tightness: -.PP -.Vb 1 -\& \-bbvtl=\*(Aq*\*(Aq -.Ve -.SS "Specifying File Extensions" -.IX Subsection "Specifying File Extensions" -Several parameters allow default file extensions to be overridden. For -example, a backup file extension may be specified with \fB\-bext=ext\fR, -where \fBext\fR is some new extension. In order to provides the user some -flexibility, the following convention is used in all cases to decide if -a leading '.' should be used. If the extension \f(CW\*(C`ext\*(C'\fR begins with -\&\f(CW\*(C`A\-Z\*(C'\fR, \f(CW\*(C`a\-z\*(C'\fR, or \f(CW\*(C`0\-9\*(C'\fR, then it will be appended to the filename with -an intermediate '.' (or perhaps an '_' on \s-1VMS\s0 systems). Otherwise, it -will be appended directly. -.PP -For example, suppose the file is \fIsomefile.pl\fR. For \f(CW\*(C`\-bext=old\*(C'\fR, a '.' is -added to give \fIsomefile.pl.old\fR. For \f(CW\*(C`\-bext=.old\*(C'\fR, no additional '.' is -added, so again the backup file is \fIsomefile.pl.old\fR. For \f(CW\*(C`\-bext=~\*(C'\fR, then no -dot is added, and the backup file will be \fIsomefile.pl~\fR . -.SH "SWITCHES WHICH MAY BE NEGATED" -.IX Header "SWITCHES WHICH MAY BE NEGATED" -The following list shows all short parameter names which allow a prefix -\&'n' to produce the negated form: -.PP -.Vb 6 -\& D anl asc aws b bbb bbc bbs bl bli boc bok bol bot ce -\& csc dac dbc dcsc ddf dln dnl dop dp dpro dsc dsm dsn dtt dwls -\& dwrs dws f fll frm fs hsc html ibc icb icp iob isbc lal log -\& lp lsl ohbr okw ola oll opr opt osbr otr ple pod pvl q -\& sbc sbl schb scp scsb sct se sfp sfs skp sob sohb sop sosb sot -\& ssc st sts syn t tac tbc toc tp tqw tsc w x bar kis -.Ve -.PP -Equivalently, the prefix 'no' or 'no\-' on the corresponding long names may be -used. -.SH "LIMITATIONS" -.IX Header "LIMITATIONS" -.IP "Parsing Limitations" 4 -.IX Item "Parsing Limitations" -Perltidy should work properly on most perl scripts. It does a lot of -self-checking, but still, it is possible that an error could be -introduced and go undetected. Therefore, it is essential to make -careful backups and to test reformatted scripts. -.Sp -The main current limitation is that perltidy does not scan modules -included with 'use' statements. This makes it necessary to guess the -context of any bare words introduced by such modules. Perltidy has good -guessing algorithms, but they are not infallible. When it must guess, -it leaves a message in the log file. -.Sp -If you encounter a bug, please report it. -.IP "What perltidy does not parse and format" 4 -.IX Item "What perltidy does not parse and format" -Perltidy indents but does not reformat comments and \f(CW\*(C`qw\*(C'\fR quotes. -Perltidy does not in any way modify the contents of here documents or -quoted text, even if they contain source code. (You could, however, -reformat them separately). Perltidy does not format 'format' sections -in any way. And, of course, it does not modify pod documents. -.SH "FILES" -.IX Header "FILES" -.IP "Temporary files" 4 -.IX Item "Temporary files" -Under the \-html option with the default \-\-pod2html flag, a temporary file is -required to pass text to Pod::Html. Unix systems will try to use the \s-1POSIX\s0 -\&\fItmpnam()\fR function. Otherwise the file \fIperltidy.TMP\fR will be temporarily -created in the current working directory. -.IP "Special files when standard input is used" 4 -.IX Item "Special files when standard input is used" -When standard input is used, the log file, if saved, is \fIperltidy.LOG\fR, -and any errors are written to \fIperltidy.ERR\fR unless the \fB\-se\fR flag is -set. These are saved in the current working directory. -.IP "Files overwritten" 4 -.IX Item "Files overwritten" -The following file extensions are used by perltidy, and files with these -extensions may be overwritten or deleted: \fI.ERR\fR, \fI.LOG\fR, \fI.TEE\fR, -and/or \fI.tdy\fR, \fI.html\fR, and \fI.bak\fR, depending on the run type and -settings. -.IP "Files extensions limitations" 4 -.IX Item "Files extensions limitations" -Perltidy does not operate on files for which the run could produce a file with -a duplicated file extension. These extensions include \fI.LOG\fR, \fI.ERR\fR, -\&\fI.TEE\fR, and perhaps \fI.tdy\fR and \fI.bak\fR, depending on the run type. The -purpose of this rule is to prevent generating confusing filenames such as -\&\fIsomefile.tdy.tdy.tdy\fR. -.SH "SEE ALSO" -.IX Header "SEE ALSO" -\&\fIperlstyle\fR\|(1), \fIPerl::Tidy\fR\|(3) -.SH "VERSION" -.IX Header "VERSION" -This man page documents perltidy version 20180220. -.SH "BUG REPORTS" -.IX Header "BUG REPORTS" -A list of current bugs and issues can be found at the \s-1CPAN\s0 site -.PP -.Vb 1 -\& https://rt.cpan.org/Public/Dist/Display.html?Name=Perl\-Tidy -.Ve -.PP -To report a new bug or problem, use the link on this page. -.SH "COPYRIGHT" -.IX Header "COPYRIGHT" -Copyright (c) 2000\-2018 by Steve Hancock -.SH "LICENSE" -.IX Header "LICENSE" -This package is free software; you can redistribute it and/or modify it -under the terms of the \*(L"\s-1GNU\s0 General Public License\*(R". -.PP -Please refer to the file \*(L"\s-1COPYING\*(R"\s0 for details. -.SH "DISCLAIMER" -.IX Header "DISCLAIMER" -This package is distributed in the hope that it will be useful, -but \s-1WITHOUT ANY WARRANTY\s0; without even the implied warranty of -\&\s-1MERCHANTABILITY\s0 or \s-1FITNESS FOR A PARTICULAR PURPOSE.\s0 -.PP -See the \*(L"\s-1GNU\s0 General Public License\*(R" for more details. diff --git a/docs/perltidy.html b/docs/perltidy.html new file mode 100644 index 0000000..732e3d2 --- /dev/null +++ b/docs/perltidy.html @@ -0,0 +1,2907 @@ + + + + + + + + + + + + + + + +

NAME

+ +

perltidy - a perl script indenter and reformatter

+ +

SYNOPSIS

+ +
    perltidy [ options ] file1 file2 file3 ...
+            (output goes to file1.tdy, file2.tdy, file3.tdy, ...)
+    perltidy [ options ] file1 -o outfile
+    perltidy [ options ] file1 -st >outfile
+    perltidy [ options ] <infile >outfile
+ +

DESCRIPTION

+ +

Perltidy reads a perl script and writes an indented, reformatted script.

+ +

Many users will find enough information in "EXAMPLES" to get started. New users may benefit from the short tutorial which can be found at http://perltidy.sourceforge.net/tutorial.html

+ +

A convenient aid to systematically defining a set of style parameters can be found at http://perltidy.sourceforge.net/stylekey.html

+ +

Perltidy can produce output on either of two modes, depending on the existence of an -html flag. Without this flag, the output is passed through a formatter. The default formatting tries to follow the recommendations in perlstyle(1), but it can be controlled in detail with numerous input parameters, which are described in "FORMATTING OPTIONS".

+ +

When the -html flag is given, the output is passed through an HTML formatter which is described in "HTML OPTIONS".

+ +

EXAMPLES

+ +
  perltidy somefile.pl
+ +

This will produce a file somefile.pl.tdy containing the script reformatted using the default options, which approximate the style suggested in perlstyle(1). The source file somefile.pl is unchanged.

+ +
  perltidy *.pl
+ +

Execute perltidy on all .pl files in the current directory with the default options. The output will be in files with an appended .tdy extension. For any file with an error, there will be a file with extension .ERR.

+ +
  perltidy -b file1.pl file2.pl
+ +

Modify file1.pl and file2.pl in place, and backup the originals to file1.pl.bak and file2.pl.bak. If file1.pl.bak and/or file2.pl.bak already exist, they will be overwritten.

+ +
  perltidy -b -bext='/' file1.pl file2.pl
+ +

Same as the previous example except that the backup files file1.pl.bak and file2.pl.bak will be deleted if there are no errors.

+ +
  perltidy -gnu somefile.pl
+ +

Execute perltidy on file somefile.pl with a style which approximates the GNU Coding Standards for C programs. The output will be somefile.pl.tdy.

+ +
  perltidy -i=3 somefile.pl
+ +

Execute perltidy on file somefile.pl, with 3 columns for each level of indentation (-i=3) instead of the default 4 columns. There will not be any tabs in the reformatted script, except for any which already exist in comments, pod documents, quotes, and here documents. Output will be somefile.pl.tdy.

+ +
  perltidy -i=3 -et=8 somefile.pl
+ +

Same as the previous example, except that leading whitespace will be entabbed with one tab character per 8 spaces.

+ +
  perltidy -ce -l=72 somefile.pl
+ +

Execute perltidy on file somefile.pl with all defaults except use "cuddled elses" (-ce) and a maximum line length of 72 columns (-l=72) instead of the default 80 columns.

+ +
  perltidy -g somefile.pl
+ +

Execute perltidy on file somefile.pl and save a log file somefile.pl.LOG which shows the nesting of braces, parentheses, and square brackets at the start of every line.

+ +
  perltidy -html somefile.pl
+ +

This will produce a file somefile.pl.html containing the script with html markup. The output file will contain an embedded style sheet in the <HEAD> section which may be edited to change the appearance.

+ +
  perltidy -html -css=mystyle.css somefile.pl
+ +

This will produce a file somefile.pl.html containing the script with html markup. This output file will contain a link to a separate style sheet file mystyle.css. If the file mystyle.css does not exist, it will be created. If it exists, it will not be overwritten.

+ +
  perltidy -html -pre somefile.pl
+ +

Write an html snippet with only the PRE section to somefile.pl.html. This is useful when code snippets are being formatted for inclusion in a larger web page. No style sheet will be written in this case.

+ +
  perltidy -html -ss >mystyle.css
+ +

Write a style sheet to mystyle.css and exit.

+ +
  perltidy -html -frm mymodule.pm
+ +

Write html with a frame holding a table of contents and the source code. The output files will be mymodule.pm.html (the frame), mymodule.pm.toc.html (the table of contents), and mymodule.pm.src.html (the source code).

+ +

OPTIONS - OVERVIEW

+ +

The entire command line is scanned for options, and they are processed before any files are processed. As a result, it does not matter whether flags are before or after any filenames. However, the relative order of parameters is important, with later parameters overriding the values of earlier parameters.

+ +

For each parameter, there is a long name and a short name. The short names are convenient for keyboard input, while the long names are self-documenting and therefore useful in scripts. It is customary to use two leading dashes for long names, but one may be used.

+ +

Most parameters which serve as on/off flags can be negated with a leading "n" (for the short name) or a leading "no" or "no-" (for the long name). For example, the flag to outdent long quotes is -olq or --outdent-long-quotes. The flag to skip this is -nolq or --nooutdent-long-quotes or --no-outdent-long-quotes.

+ +

Options may not be bundled together. In other words, options -q and -g may NOT be entered as -qg.

+ +

Option names may be terminated early as long as they are uniquely identified. For example, instead of --dump-token-types, it would be sufficient to enter --dump-tok, or even --dump-t, to uniquely identify this command.

+ +

I/O control

+ +

The following parameters concern the files which are read and written.

+ +
+ +
-h, --help
+
+ +

Show summary of usage and exit.

+ +
+
-o=filename, --outfile=filename
+
+ +

Name of the output file (only if a single input file is being processed). If no output file is specified, and output is not redirected to the standard output (see -st), the output will go to filename.tdy. [Note: - does not redirect to standard output. Use -st instead.]

+ +
+
-st, --standard-output
+
+ +

Perltidy must be able to operate on an arbitrarily large number of files in a single run, with each output being directed to a different output file. Obviously this would conflict with outputting to the single standard output device, so a special flag, -st, is required to request outputting to the standard output. For example,

+ +
  perltidy somefile.pl -st >somefile.new.pl
+ +

This option may only be used if there is just a single input file. The default is -nst or --nostandard-output.

+ +
+
-se, --standard-error-output
+
+ +

If perltidy detects an error when processing file somefile.pl, its default behavior is to write error messages to file somefile.pl.ERR. Use -se to cause all error messages to be sent to the standard error output stream instead. This directive may be negated with -nse. Thus, you may place -se in a .perltidyrc and override it when desired with -nse on the command line.

+ +
+
-oext=ext, --output-file-extension=ext
+
+ +

Change the extension of the output file to be ext instead of the default tdy (or html in case the --html option is used). See "Specifying File Extensions".

+ +
+
-opath=path, --output-path=path
+
+ +

When perltidy creates a filename for an output file, by default it merely appends an extension to the path and basename of the input file. This parameter causes the path to be changed to path instead.

+ +

The path should end in a valid path separator character, but perltidy will try to add one if it is missing.

+ +

For example

+ +
 perltidy somefile.pl -opath=/tmp/
+ +

will produce /tmp/somefile.pl.tdy. Otherwise, somefile.pl.tdy will appear in whatever directory contains somefile.pl.

+ +

If the path contains spaces, it should be placed in quotes.

+ +

This parameter will be ignored if output is being directed to standard output, or if it is being specified explicitly with the -o=s parameter.

+ +
+
-b, --backup-and-modify-in-place
+
+ +

Modify the input file or files in-place and save the original with the extension .bak. Any existing .bak file will be deleted. See next item for changing the default backup extension, and for eliminating the backup file altogether.

+ +

A -b flag will be ignored if input is from standard input or goes to standard output, or if the -html flag is set.

+ +

In particular, if you want to use both the -b flag and the -pbp (--perl-best-practices) flag, then you must put a -nst flag after the -pbp flag because it contains a -st flag as one of its components, which means that output will go to the standard output stream.

+ +
+
-bext=ext, --backup-file-extension=ext
+
+ +

This parameter serves two purposes: (1) to change the extension of the backup file to be something other than the default .bak, and (2) to indicate that no backup file should be saved.

+ +

To change the default extension to something other than .bak see "Specifying File Extensions".

+ +

A backup file of the source is always written, but you can request that it be deleted at the end of processing if there were no errors. This is risky unless the source code is being maintained with a source code control system.

+ +

To indicate that the backup should be deleted include one forward slash, /, in the extension. If any text remains after the slash is removed it will be used to define the backup file extension (which is always created and only deleted if there were no errors).

+ +

Here are some examples:

+ +
  Parameter           Extension          Backup File Treatment
+  <-bext=bak>         F<.bak>            Keep (same as the default behavior)
+  <-bext='/'>         F<.bak>            Delete if no errors
+  <-bext='/backup'>   F<.backup>         Delete if no errors
+  <-bext='original/'> F<.original>       Delete if no errors
+ +
+
-w, --warning-output
+
+ +

Setting -w causes any non-critical warning messages to be reported as errors. These include messages about possible pod problems, possibly bad starting indentation level, and cautions about indirect object usage. The default, -nw or --nowarning-output, is not to include these warnings.

+ +
+
-q, --quiet
+
+ +

Deactivate error messages and syntax checking (for running under an editor).

+ +

For example, if you use a vi-style editor, such as vim, you may execute perltidy as a filter from within the editor using something like

+ +
 :n1,n2!perltidy -q
+ +

where n1,n2 represents the selected text. Without the -q flag, any error message may mess up your screen, so be prepared to use your "undo" key.

+ +
+
-log, --logfile
+
+ +

Save the .LOG file, which has many useful diagnostics. Perltidy always creates a .LOG file, but by default it is deleted unless a program bug is suspected. Setting the -log flag forces the log file to be saved.

+ +
+
-g=n, --logfile-gap=n
+
+ +

Set maximum interval between input code lines in the logfile. This purpose of this flag is to assist in debugging nesting errors. The value of n is optional. If you set the flag -g without the value of n, it will be taken to be 1, meaning that every line will be written to the log file. This can be helpful if you are looking for a brace, paren, or bracket nesting error.

+ +

Setting -g also causes the logfile to be saved, so it is not necessary to also include -log.

+ +

If no -g flag is given, a value of 50 will be used, meaning that at least every 50th line will be recorded in the logfile. This helps prevent excessively long log files.

+ +

Setting a negative value of n is the same as not setting -g at all.

+ +
+
-npro --noprofile
+
+ +

Ignore any .perltidyrc command file. Normally, perltidy looks first in your current directory for a .perltidyrc file of parameters. (The format is described below). If it finds one, it applies those options to the initial default values, and then it applies any that have been defined on the command line. If no .perltidyrc file is found, it looks for one in your home directory.

+ +

If you set the -npro flag, perltidy will not look for this file.

+ +
+
-pro=filename or --profile=filename
+
+ +

To simplify testing and switching .perltidyrc files, this command may be used to specify a configuration file which will override the default name of .perltidyrc. There must not be a space on either side of the '=' sign. For example, the line

+ +
   perltidy -pro=testcfg
+ +

would cause file testcfg to be used instead of the default .perltidyrc.

+ +

A pathname begins with three dots, e.g. ".../.perltidyrc", indicates that the file should be searched for starting in the current directory and working upwards. This makes it easier to have multiple projects each with their own .perltidyrc in their root directories.

+ +
+
-opt, --show-options
+
+ +

Write a list of all options used to the .LOG file. Please see --dump-options for a simpler way to do this.

+ +
+
-f, --force-read-binary
+
+ +

Force perltidy to process binary files. To avoid producing excessive error messages, perltidy skips files identified by the system as non-text. However, valid perl scripts containing binary data may sometimes be identified as non-text, and this flag forces perltidy to process them.

+ +
+
+ +

FORMATTING OPTIONS

+ +

Basic Options

+ +
+ +
--notidy
+
+ +

This flag disables all formatting and causes the input to be copied unchanged to the output except for possible changes in line ending characters and any pre- and post-filters. This can be useful in conjunction with a hierarchical set of .perltidyrc files to avoid unwanted code tidying. See also "Skipping Selected Sections of Code" for a way to avoid tidying specific sections of code.

+ +
+
-i=n, --indent-columns=n
+
+ +

Use n columns per indentation level (default n=4).

+ +
+
-l=n, --maximum-line-length=n
+
+ +

The default maximum line length is n=80 characters. Perltidy will try to find line break points to keep lines below this length. However, long quotes and side comments may cause lines to exceed this length. Setting -l=0 is equivalent to setting -l=(a large number).

+ +
+
-vmll, --variable-maximum-line-length
+
+ +

A problem arises using a fixed maximum line length with very deeply nested code and data structures because eventually the amount of leading whitespace used for indicating indentation takes up most or all of the available line width, leaving little or no space for the actual code or data. One solution is to use a vary long line length. Another solution is to use the -vmll flag, which basically tells perltidy to ignore leading whitespace when measuring the line length.

+ +

To be precise, when the -vmll parameter is set, the maximum line length of a line of code will be M+L*I, where

+ +
      M is the value of --maximum-line-length=M (-l=M), default 80,
+      I is the value of --indent-columns=I (-i=I), default 4,
+      L is the indentation level of the line of code
+ +

When this flag is set, the choice of breakpoints for a block of code should be essentially independent of its nesting depth. However, the absolute line lengths, including leading whitespace, can still be arbitrarily large. This problem can be avoided by including the next parameter.

+ +

The default is not to do this (-nvmll).

+ +
+
-wc=n, --whitespace-cycle=n
+
+ +

This flag also addresses problems with very deeply nested code and data structures. When the nesting depth exceeds the value n the leading whitespace will be reduced and start at a depth of 1 again. The result is that blocks of code will shift back to the left rather than moving arbitrarily far to the right. This occurs cyclically to any depth.

+ +

For example if one level of indentation equals 4 spaces (-i=4, the default), and one uses -wc=15, then if the leading whitespace on a line exceeds about 4*15=60 spaces it will be reduced back to 4*1=4 spaces and continue increasing from there. If the whitespace never exceeds this limit the formatting remains unchanged.

+ +

The combination of -vmll and -wc=n provides a solution to the problem of displaying arbitrarily deep data structures and code in a finite window, although -wc=n may of course be used without -vmll.

+ +

The default is not to use this, which can also be indicated using -wc=0.

+ +
+
tabs
+
+ +

Using tab characters will almost certainly lead to future portability and maintenance problems, so the default and recommendation is not to use them. For those who prefer tabs, however, there are two different options.

+ +

Except for possibly introducing tab indentation characters, as outlined below, perltidy does not introduce any tab characters into your file, and it removes any tabs from the code (unless requested not to do so with -fws). If you have any tabs in your comments, quotes, or here-documents, they will remain.

+ +
+ +
-et=n, --entab-leading-whitespace
+
+ +

This flag causes each n initial space characters to be replaced by one tab character. Note that the integer n is completely independent of the integer specified for indentation parameter, -i=n.

+ +
+
-t, --tabs
+
+ +

This flag causes one leading tab character to be inserted for each level of indentation. Certain other features are incompatible with this option, and if these options are also given, then a warning message will be issued and this flag will be unset. One example is the -lp option.

+ +
+
-dt=n, --default-tabsize=n
+
+ +

If the first line of code passed to perltidy contains leading tabs but no tab scheme is specified for the output stream then perltidy must guess how many spaces correspond to each leading tab. This number of spaces n corresponding to each leading tab of the input stream may be specified with -dt=n. The default is n=8.

+ +

This flag has no effect if a tab scheme is specified for the output stream, because then the input stream is assumed to use the same tab scheme and indentation spaces as for the output stream (any other assumption would lead to unstable editing).

+ +
+
+ +
+
-syn, --check-syntax
+
+ +

This flag is now ignored for safety, but the following documentation has been retained for reference.

+ +

This flag causes perltidy to run perl -c -T to check syntax of input and output. (To change the flags passed to perl, see the next item, -pscf). The results are written to the .LOG file, which will be saved if an error is detected in the output script. The output script is not checked if the input script has a syntax error. Perltidy does its own checking, but this option employs perl to get a "second opinion".

+ +

If perl reports errors in the input file, they will not be reported in the error output unless the --warning-output flag is given.

+ +

The default is NOT to do this type of syntax checking (although perltidy will still do as much self-checking as possible). The reason is that it causes all code in BEGIN blocks to be executed, for all modules being used, and this opens the door to security issues and infinite loops when running perltidy.

+ +
+
-pscf=s, -perl-syntax-check-flags=s
+
+ +

When perl is invoked to check syntax, the normal flags are -c -T. In addition, if the -x flag is given to perltidy, then perl will also be passed a -x flag. It should not normally be necessary to change these flags, but it can be done with the -pscf=s flag. For example, if the taint flag, -T, is not wanted, the flag could be set to be just -pscf=-c.

+ +

Perltidy will pass your string to perl with the exception that it will add a -c and -x if appropriate. The .LOG file will show exactly what flags were passed to perl.

+ +
+
-xs, --extended-syntax
+
+ +

A problem with formatting Perl code is that some modules can introduce new syntax. This flag allows perltidy to handle certain common extensions to the standard syntax without complaint.

+ +

For example, without this flag a structure such as the following would generate a syntax error and the braces would not be balanced:

+ +
    method deposit( Num $amount) {
+        $self->balance( $self->balance + $amount );
+    }
+ +

This flag is enabled by default but it can be deactivated with -nxs. Probably the only reason to deactivate this flag is to generate more diagnostic messages when debugging a script.

+ +
+
-io, --indent-only
+
+ +

This flag is used to deactivate all whitespace and line break changes within non-blank lines of code. When it is in effect, the only change to the script will be to the indentation and to the number of blank lines. And any flags controlling whitespace and newlines will be ignored. You might want to use this if you are perfectly happy with your whitespace and line breaks, and merely want perltidy to handle the indentation. (This also speeds up perltidy by well over a factor of two, so it might be useful when perltidy is merely being used to help find a brace error in a large script).

+ +

Setting this flag is equivalent to setting --freeze-newlines and --freeze-whitespace.

+ +

If you also want to keep your existing blank lines exactly as they are, you can add --freeze-blank-lines.

+ +

With this option perltidy is still free to modify the indenting (and outdenting) of code and comments as it normally would. If you also want to prevent long comment lines from being outdented, you can add either -noll or -l=0.

+ +

Setting this flag will prevent perltidy from doing any special operations on closing side comments. You may still delete all side comments however when this flag is in effect.

+ +
+
-enc=s, --character-encoding=s
+
+ +

where s=none or utf8. This flag tells perltidy the character encoding of both the input and output character streams. The value utf8 causes the stream to be read and written as UTF-8. The value none causes the stream to be processed without special encoding assumptions. At present there is no automatic detection of character encoding (even if there is a 'use utf8' statement in your code) so this flag must be set for streams encoded in UTF-8. Incorrectly setting this parameter can cause data corruption, so please carefully check the output.

+ +

The default is none.

+ +

The abbreviations -utf8 or -UTF8 are equivalent to -enc=utf8. So to process a file named file.pl which is encoded in UTF-8 you can use:

+ +
   perltidy -utf8 file.pl
+ +
+
-ole=s, --output-line-ending=s
+
+ +

where s=win, dos, unix, or mac. This flag tells perltidy to output line endings for a specific system. Normally, perltidy writes files with the line separator character of the host system. The win and dos flags have an identical result.

+ +
+
-ple, --preserve-line-endings
+
+ +

This flag tells perltidy to write its output files with the same line endings as the input file, if possible. It should work for dos, unix, and mac line endings. It will only work if perltidy input comes from a filename (rather than stdin, for example). If perltidy has trouble determining the input file line ending, it will revert to the default behavior of using the line ending of the host system.

+ +
+
-it=n, --iterations=n
+
+ +

This flag causes perltidy to do n complete iterations. The reason for this flag is that code beautification is an iterative process and in some cases the output from perltidy can be different if it is applied a second time. For most purposes the default of n=1 should be satisfactory. However n=2 can be useful when a major style change is being made, or when code is being beautified on check-in to a source code control system. It has been found to be extremely rare for the output to change after 2 iterations. If a value n is greater than 2 is input then a convergence test will be used to stop the iterations as soon as possible, almost always after 2 iterations. See the next item for a simplified iteration control.

+ +

This flag has no effect when perltidy is used to generate html.

+ +
+
-conv, --converge
+
+ +

This flag is equivalent to -it=4 and is included to simplify iteration control. For all practical purposes one either does or does not want to be sure that the output is converged, and there is no penalty to using a large iteration limit since perltidy will check for convergence and stop iterating as soon as possible. The default is -nconv (no convergence check). Using -conv will approximately double run time since normally one extra iteration is required to verify convergence.

+ +
+
+ +

Code Indentation Control

+ +
+ +
-ci=n, --continuation-indentation=n
+
+ +

Continuation indentation is extra indentation spaces applied when a long line is broken. The default is n=2, illustrated here:

+ +
 my $level =   # -ci=2      
+   ( $max_index_to_go >= 0 ) ? $levels_to_go[0] : $last_output_level;
+ +

The same example, with n=0, is a little harder to read:

+ +
 my $level =   # -ci=0    
+ ( $max_index_to_go >= 0 ) ? $levels_to_go[0] : $last_output_level;
+ +

The value given to -ci is also used by some commands when a small space is required. Examples are commands for outdenting labels, -ola, and control keywords, -okw.

+ +

When default values are not used, it is suggested that the value n given with -ci=n be no more than about one-half of the number of spaces assigned to a full indentation level on the -i=n command.

+ +
+
-sil=n --starting-indentation-level=n
+
+ +

By default, perltidy examines the input file and tries to determine the starting indentation level. While it is often zero, it may not be zero for a code snippet being sent from an editing session.

+ +

To guess the starting indentation level perltidy simply assumes that indentation scheme used to create the code snippet is the same as is being used for the current perltidy process. This is the only sensible guess that can be made. It should be correct if this is true, but otherwise it probably won't. For example, if the input script was written with -i=2 and the current peltidy flags have -i=4, the wrong initial indentation will be guessed for a code snippet which has non-zero initial indentation. Likewise, if an entabbing scheme is used in the input script and not in the current process then the guessed indentation will be wrong.

+ +

If the default method does not work correctly, or you want to change the starting level, use -sil=n, to force the starting level to be n.

+ +
+
List indentation using -lp, --line-up-parentheses
+
+ +

By default, perltidy indents lists with 4 spaces, or whatever value is specified with -i=n. Here is a small list formatted in this way:

+ +
    # perltidy (default)
+    @month_of_year = (
+        'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+        'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
+    );
+ +

Use the -lp flag to add extra indentation to cause the data to begin past the opening parentheses of a sub call or list, or opening square bracket of an anonymous array, or opening curly brace of an anonymous hash. With this option, the above list would become:

+ +
    # perltidy -lp
+    @month_of_year = (
+                       'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+                       'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
+    );
+ +

If the available line length (see -l=n ) does not permit this much space, perltidy will use less. For alternate placement of the closing paren, see the next section.

+ +

This option has no effect on code BLOCKS, such as if/then/else blocks, which always use whatever is specified with -i=n. Also, the existence of line breaks and/or block comments between the opening and closing parens may cause perltidy to temporarily revert to its default method.

+ +

Note: The -lp option may not be used together with the -t tabs option. It may, however, be used with the -et=n tab method.

+ +

In addition, any parameter which significantly restricts the ability of perltidy to choose newlines will conflict with -lp and will cause -lp to be deactivated. These include -io, -fnl, -nanl, and -ndnl. The reason is that the -lp indentation style can require the careful coordination of an arbitrary number of break points in hierarchical lists, and these flags may prevent that.

+ +
+
-cti=n, --closing-token-indentation
+
+ +

The -cti=n flag controls the indentation of a line beginning with a ), ], or a non-block }. Such a line receives:

+ +
 -cti = 0 no extra indentation (default)
+ -cti = 1 extra indentation such that the closing token
+        aligns with its opening token.
+ -cti = 2 one extra indentation level if the line looks like:
+        );  or  ];  or  };
+ -cti = 3 one extra indentation level always
+ +

The flags -cti=1 and -cti=2 work well with the -lp flag (previous section).

+ +
    # perltidy -lp -cti=1
+    @month_of_year = (
+                       'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+                       'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
+                     );
+
+    # perltidy -lp -cti=2
+    @month_of_year = (
+                       'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+                       'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
+                       );
+ +

These flags are merely hints to the formatter and they may not always be followed. In particular, if -lp is not being used, the indentation for cti=1 is constrained to be no more than one indentation level.

+ +

If desired, this control can be applied independently to each of the closing container token types. In fact, -cti=n is merely an abbreviation for -cpi=n -csbi=n -cbi=n, where: -cpi or --closing-paren-indentation controls )'s, -csbi or --closing-square-bracket-indentation controls ]'s, -cbi or --closing-brace-indentation controls non-block }'s.

+ +
+
-icp, --indent-closing-paren
+
+ +

The -icp flag is equivalent to -cti=2, described in the previous section. The -nicp flag is equivalent -cti=0. They are included for backwards compatibility.

+ +
+
-icb, --indent-closing-brace
+
+ +

The -icb option gives one extra level of indentation to a brace which terminates a code block . For example,

+ +
        if ($task) {
+            yyy();
+            }    # -icb
+        else {
+            zzz();
+            }
+ +

The default is not to do this, indicated by -nicb.

+ +
+
-olq, --outdent-long-quotes
+
+ +

When -olq is set, lines which is a quoted string longer than the value maximum-line-length will have their indentation removed to make them more readable. This is the default. To prevent such out-denting, use -nolq or --nooutdent-long-lines.

+ +
+
-oll, --outdent-long-lines
+
+ +

This command is equivalent to --outdent-long-quotes and --outdent-long-comments, and it is included for compatibility with previous versions of perltidy. The negation of this also works, -noll or --nooutdent-long-lines, and is equivalent to setting -nolq and -nolc.

+ +
+
Outdenting Labels: -ola, --outdent-labels
+
+ +

This command will cause labels to be outdented by 2 spaces (or whatever -ci has been set to), if possible. This is the default. For example:

+ +
        my $i;
+      LOOP: while ( $i = <FOTOS> ) {
+            chomp($i);
+            next unless $i;
+            fixit($i);
+        }
+ +

Use -nola to not outdent labels.

+ +
+
Outdenting Keywords
+
+ +
+ +
-okw, --outdent-keywords
+
+ +

The command -okw will cause certain leading control keywords to be outdented by 2 spaces (or whatever -ci has been set to), if possible. By default, these keywords are redo, next, last, goto, and return. The intention is to make these control keywords easier to see. To change this list of keywords being outdented, see the next section.

+ +

For example, using perltidy -okw on the previous example gives:

+ +
        my $i;
+      LOOP: while ( $i = <FOTOS> ) {
+            chomp($i);
+          next unless $i;
+            fixit($i);
+        }
+ +

The default is not to do this.

+ +
+
Specifying Outdented Keywords: -okwl=string, --outdent-keyword-list=string
+
+ +

This command can be used to change the keywords which are outdented with the -okw command. The parameter string is a required list of perl keywords, which should be placed in quotes if there are more than one. By itself, it does not cause any outdenting to occur, so the -okw command is still required.

+ +

For example, the commands -okwl="next last redo goto" -okw will cause those four keywords to be outdented. It is probably simplest to place any -okwl command in a .perltidyrc file.

+ +
+
+ +
+
+ +

Whitespace Control

+ +

Whitespace refers to the blank space between variables, operators, and other code tokens.

+ +
+ +
-fws, --freeze-whitespace
+
+ +

This flag causes your original whitespace to remain unchanged, and causes the rest of the whitespace commands in this section, the Code Indentation section, and the Comment Control section to be ignored.

+ +
+
Tightness of curly braces, parentheses, and square brackets.
+
+ +

Here the term "tightness" will mean the closeness with which pairs of enclosing tokens, such as parentheses, contain the quantities within. A numerical value of 0, 1, or 2 defines the tightness, with 0 being least tight and 2 being most tight. Spaces within containers are always symmetric, so if there is a space after a ( then there will be a space before the corresponding ).

+ +

The -pt=n or --paren-tightness=n parameter controls the space within parens. The example below shows the effect of the three possible values, 0, 1, and 2:

+ +
 if ( ( my $len_tab = length( $tabstr ) ) > 0 ) {  # -pt=0
+ if ( ( my $len_tab = length($tabstr) ) > 0 ) {    # -pt=1 (default)
+ if ((my $len_tab = length($tabstr)) > 0) {        # -pt=2
+ +

When n is 0, there is always a space to the right of a '(' and to the left of a ')'. For n=2 there is never a space. For n=1, the default, there is a space unless the quantity within the parens is a single token, such as an identifier or quoted string.

+ +

Likewise, the parameter -sbt=n or --square-bracket-tightness=n controls the space within square brackets, as illustrated below.

+ +
 $width = $col[ $j + $k ] - $col[ $j ];  # -sbt=0
+ $width = $col[ $j + $k ] - $col[$j];    # -sbt=1 (default)
+ $width = $col[$j + $k] - $col[$j];      # -sbt=2 
+ +

Curly braces which do not contain code blocks are controlled by the parameter -bt=n or --brace-tightness=n.

+ +
 $obj->{ $parsed_sql->{ 'table' }[0] };    # -bt=0
+ $obj->{ $parsed_sql->{'table'}[0] };      # -bt=1 (default)
+ $obj->{$parsed_sql->{'table'}[0]};        # -bt=2
+ +

And finally, curly braces which contain blocks of code are controlled by the parameter -bbt=n or --block-brace-tightness=n as illustrated in the example below.

+ +
 %bf = map { $_ => -M $_ } grep { /\.deb$/ } dirents '.'; # -bbt=0 (default)
+ %bf = map { $_ => -M $_ } grep {/\.deb$/} dirents '.';   # -bbt=1
+ %bf = map {$_ => -M $_} grep {/\.deb$/} dirents '.';     # -bbt=2
+ +

To simplify input in the case that all of the tightness flags have the same value <n>, the parameter <-act=n> or --all-containers-tightness=n is an abbreviation for the combination <-pt=n -sbt=n -bt=n -bbt=n>.

+ +
+
-tso, --tight-secret-operators
+
+ +

The flag -tso causes certain perl token sequences (secret operators) which might be considered to be a single operator to be formatted "tightly" (without spaces). The operators currently modified by this flag are:

+ +
     0+  +0  ()x!! ~~<>  ,=>   =( )=  
+ +

For example the sequence 0 +, which converts a string to a number, would be formatted without a space: 0+ when the -tso flag is set. This flag is off by default.

+ +
+
-sts, --space-terminal-semicolon
+
+ +

Some programmers prefer a space before all terminal semicolons. The default is for no such space, and is indicated with -nsts or --nospace-terminal-semicolon.

+ +
        $i = 1 ;     #  -sts
+        $i = 1;      #  -nsts   (default)
+ +
+
-sfs, --space-for-semicolon
+
+ +

Semicolons within for loops may sometimes be hard to see, particularly when commas are also present. This option places spaces on both sides of these special semicolons, and is the default. Use -nsfs or --nospace-for-semicolon to deactivate it.

+ +
 for ( @a = @$ap, $u = shift @a ; @a ; $u = $v ) {  # -sfs (default)
+ for ( @a = @$ap, $u = shift @a; @a; $u = $v ) {    # -nsfs
+ +
+
-asc, --add-semicolons
+
+ +

Setting -asc allows perltidy to add any missing optional semicolon at the end of a line which is followed by a closing curly brace on the next line. This is the default, and may be deactivated with -nasc or --noadd-semicolons.

+ +
+
-dsm, --delete-semicolons
+
+ +

Setting -dsm allows perltidy to delete extra semicolons which are simply empty statements. This is the default, and may be deactivated with -ndsm or --nodelete-semicolons. (Such semicolons are not deleted, however, if they would promote a side comment to a block comment).

+ +
+
-aws, --add-whitespace
+
+ +

Setting this option allows perltidy to add certain whitespace improve code readability. This is the default. If you do not want any whitespace added, but are willing to have some whitespace deleted, use -naws. (Use -fws to leave whitespace completely unchanged).

+ +
+
-dws, --delete-old-whitespace
+
+ +

Setting this option allows perltidy to remove some old whitespace between characters, if necessary. This is the default. If you do not want any old whitespace removed, use -ndws or --nodelete-old-whitespace.

+ +
+
Detailed whitespace controls around tokens
+
+ +

For those who want more detailed control over the whitespace around tokens, there are four parameters which can directly modify the default whitespace rules built into perltidy for any token. They are:

+ +

-wls=s or --want-left-space=s,

+ +

-nwls=s or --nowant-left-space=s,

+ +

-wrs=s or --want-right-space=s,

+ +

-nwrs=s or --nowant-right-space=s.

+ +

These parameters are each followed by a quoted string, s, containing a list of token types. No more than one of each of these parameters should be specified, because repeating a command-line parameter always overwrites the previous one before perltidy ever sees it.

+ +

To illustrate how these are used, suppose it is desired that there be no space on either side of the token types = + - / *. The following two parameters would specify this desire:

+ +
  -nwls="= + - / *"    -nwrs="= + - / *"
+ +

(Note that the token types are in quotes, and that they are separated by spaces). With these modified whitespace rules, the following line of math:

+ +
  $root = -$b + sqrt( $b * $b - 4. * $a * $c ) / ( 2. * $a );
+ +

becomes this:

+ +
  $root=-$b+sqrt( $b*$b-4.*$a*$c )/( 2.*$a );
+ +

These parameters should be considered to be hints to perltidy rather than fixed rules, because perltidy must try to resolve conflicts that arise between them and all of the other rules that it uses. One conflict that can arise is if, between two tokens, the left token wants a space and the right one doesn't. In this case, the token not wanting a space takes priority.

+ +

It is necessary to have a list of all token types in order to create this type of input. Such a list can be obtained by the command --dump-token-types. Also try the -D flag on a short snippet of code and look at the .DEBUG file to see the tokenization.

+ +

WARNING Be sure to put these tokens in quotes to avoid having them misinterpreted by your command shell.

+ +
+
Space between specific keywords and opening paren
+
+ +

When an opening paren follows a Perl keyword, no space is introduced after the keyword, unless it is (by default) one of these:

+ +
   my local our and or eq ne if else elsif until unless 
+   while for foreach return switch case given when
+ +

These defaults can be modified with two commands:

+ +

-sak=s or --space-after-keyword=s adds keywords.

+ +

-nsak=s or --nospace-after-keyword=s removes keywords.

+ +

where s is a list of keywords (in quotes if necessary). For example,

+ +
  my ( $a, $b, $c ) = @_;    # default
+  my( $a, $b, $c ) = @_;     # -nsak="my local our"
+ +

The abbreviation -nsak='*' is equivalent to including all of the keywords in the above list.

+ +

When both -nsak=s and -sak=s commands are included, the -nsak=s command is executed first. For example, to have space after only the keywords (my, local, our) you could use -nsak="*" -sak="my local our".

+ +

To put a space after all keywords, see the next item.

+ +
+
Space between all keywords and opening parens
+
+ +

When an opening paren follows a function or keyword, no space is introduced after the keyword except for the keywords noted in the previous item. To always put a space between a function or keyword and its opening paren, use the command:

+ +

-skp or --space-keyword-paren

+ +

You will probably also want to use the flag -sfp (next item) too.

+ +
+
Space between all function names and opening parens
+
+ +

When an opening paren follows a function the default is not to introduce a space. To cause a space to be introduced use:

+ +

-sfp or --space-function-paren

+ +
  myfunc( $a, $b, $c );    # default 
+  myfunc ( $a, $b, $c );   # -sfp
+ +

You will probably also want to use the flag -skp (previous item) too.

+ +
+
Trimming whitespace around qw quotes
+
+ +

-tqw or --trim-qw provide the default behavior of trimming spaces around multi-line qw quotes and indenting them appropriately.

+ +

-ntqw or --notrim-qw cause leading and trailing whitespace around multi-line qw quotes to be left unchanged. This option will not normally be necessary, but was added for testing purposes, because in some versions of perl, trimming qw quotes changes the syntax tree.

+ +
+
-sbq=n or --space-backslash-quote=n
+
+ +

Lines like

+ +
       $str1=\"string1";
+       $str2=\'string2';
+ +

can confuse syntax highlighters unless a space is included between the backslash and the single or double quotation mark.

+ +

This can be controlled with the value of n as follows:

+ +
    -sbq=0 means no space between the backslash and quote
+    -sbq=1 means follow the example of the source code
+    -sbq=2 means always put a space between the backslash and quote
+ +

The default is -sbq=1, meaning that a space will be used 0if there is one in the source code.

+ +
+
Trimming trailing whitespace from lines of POD
+
+ +

-trp or --trim-pod will remove trailing whitespace from lines of POD. The default is not to do this.

+ +
+
+ +

Comment Controls

+ +

Perltidy has a number of ways to control the appearance of both block comments and side comments. The term block comment here refers to a full-line comment, whereas side comment will refer to a comment which appears on a line to the right of some code.

+ +
+ +
-ibc, --indent-block-comments
+
+ +

Block comments normally look best when they are indented to the same level as the code which follows them. This is the default behavior, but you may use -nibc to keep block comments left-justified. Here is an example:

+ +
             # this comment is indented      (-ibc, default)
+             if ($task) { yyy(); }
+ +

The alternative is -nibc:

+ +
 # this comment is not indented              (-nibc)
+             if ($task) { yyy(); }
+ +

See also the next item, -isbc, as well as -sbc, for other ways to have some indented and some outdented block comments.

+ +
+
-isbc, --indent-spaced-block-comments
+
+ +

If there is no leading space on the line, then the comment will not be indented, and otherwise it may be.

+ +

If both -ibc and -isbc are set, then -isbc takes priority.

+ +
+
-olc, --outdent-long-comments
+
+ +

When -olc is set, lines which are full-line (block) comments longer than the value maximum-line-length will have their indentation removed. This is the default; use -nolc to prevent outdenting.

+ +
+
-msc=n, --minimum-space-to-comment=n
+
+ +

Side comments look best when lined up several spaces to the right of code. Perltidy will try to keep comments at least n spaces to the right. The default is n=4 spaces.

+ +
+
-fpsc=n, --fixed-position-side-comment=n
+
+ +

This parameter tells perltidy to line up side comments in column number n whenever possible. The default, n=0, will not do this.

+ +
+
-iscl, --ignore-side-comment-lengths
+
+ +

This parameter causes perltidy to ignore the length of side comments when setting line breaks. The default, -niscl, is to include the length of side comments when breaking lines to stay within the length prescribed by the -l=n maximum line length parameter. For example, the following long single line would remain intact with -l=80 and -iscl:

+ +
     perltidy -l=80 -iscl
+        $vmsfile =~ s/;[\d\-]*$//; # Clip off version number; we can use a newer version as well
+ +

whereas without the -iscl flag the line will be broken:

+ +
     perltidy -l=80
+        $vmsfile =~ s/;[\d\-]*$//
+          ;    # Clip off version number; we can use a newer version as well
+   
+ +
+
-hsc, --hanging-side-comments
+
+ +

By default, perltidy tries to identify and align "hanging side comments", which are something like this:

+ +
        my $IGNORE = 0;    # This is a side comment
+                           # This is a hanging side comment
+                           # And so is this
+ +

A comment is considered to be a hanging side comment if (1) it immediately follows a line with a side comment, or another hanging side comment, and (2) there is some leading whitespace on the line. To deactivate this feature, use -nhsc or --nohanging-side-comments. If block comments are preceded by a blank line, or have no leading whitespace, they will not be mistaken as hanging side comments.

+ +
+
Closing Side Comments
+
+ +

A closing side comment is a special comment which perltidy can automatically create and place after the closing brace of a code block. They can be useful for code maintenance and debugging. The command -csc (or --closing-side-comments) adds or updates closing side comments. For example, here is a small code snippet

+ +
        sub message {
+            if ( !defined( $_[0] ) ) {
+                print("Hello, World\n");
+            }
+            else {
+                print( $_[0], "\n" );
+            }
+        }
+ +

And here is the result of processing with perltidy -csc:

+ +
        sub message {
+            if ( !defined( $_[0] ) ) {
+                print("Hello, World\n");
+            }
+            else {
+                print( $_[0], "\n" );
+            }
+        } ## end sub message
+ +

A closing side comment was added for sub message in this case, but not for the if and else blocks, because they were below the 6 line cutoff limit for adding closing side comments. This limit may be changed with the -csci command, described below.

+ +

The command -dcsc (or --delete-closing-side-comments) reverses this process and removes these comments.

+ +

Several commands are available to modify the behavior of these two basic commands, -csc and -dcsc:

+ +
+ +
-csci=n, or --closing-side-comment-interval=n
+
+ +

where n is the minimum number of lines that a block must have in order for a closing side comment to be added. The default value is n=6. To illustrate:

+ +
        # perltidy -csci=2 -csc
+        sub message {
+            if ( !defined( $_[0] ) ) {
+                print("Hello, World\n");
+            } ## end if ( !defined( $_[0] ))
+            else {
+                print( $_[0], "\n" );
+            } ## end else [ if ( !defined( $_[0] ))
+        } ## end sub message
+ +

Now the if and else blocks are commented. However, now this has become very cluttered.

+ +
+
-cscp=string, or --closing-side-comment-prefix=string
+
+ +

where string is the prefix used before the name of the block type. The default prefix, shown above, is ## end. This string will be added to closing side comments, and it will also be used to recognize them in order to update, delete, and format them. Any comment identified as a closing side comment will be placed just a single space to the right of its closing brace.

+ +
+
-cscl=string, or --closing-side-comment-list
+
+ +

where string is a list of block types to be tagged with closing side comments. By default, all code block types preceded by a keyword or label (such as if, sub, and so on) will be tagged. The -cscl command changes the default list to be any selected block types; see "Specifying Block Types". For example, the following command requests that only sub's, labels, BEGIN, and END blocks be affected by any -csc or -dcsc operation:

+ +
   -cscl="sub : BEGIN END"
+ +
+
-csct=n, or --closing-side-comment-maximum-text=n
+
+ +

The text appended to certain block types, such as an if block, is whatever lies between the keyword introducing the block, such as if, and the opening brace. Since this might be too much text for a side comment, there needs to be a limit, and that is the purpose of this parameter. The default value is n=20, meaning that no additional tokens will be appended to this text after its length reaches 20 characters. Omitted text is indicated with .... (Tokens, including sub names, are never truncated, however, so actual lengths may exceed this). To illustrate, in the above example, the appended text of the first block is ( !defined( $_[0] ).... The existing limit of n=20 caused this text to be truncated, as indicated by the .... See the next flag for additional control of the abbreviated text.

+ +
+
-cscb, or --closing-side-comments-balanced
+
+ +

As discussed in the previous item, when the closing-side-comment-maximum-text limit is exceeded the comment text must be truncated. Older versions of perltidy terminated with three dots, and this can still be achieved with -ncscb:

+ +
  perltidy -csc -ncscb
+  } ## end foreach my $foo (sort { $b cmp $a ...
+ +

However this causes a problem with editors which cannot recognize comments or are not configured to do so because they cannot "bounce" around in the text correctly. The -cscb flag has been added to help them by appending appropriate balancing structure:

+ +
  perltidy -csc -cscb
+  } ## end foreach my $foo (sort { $b cmp $a ... })
+ +

The default is -cscb.

+ +
+
-csce=n, or --closing-side-comment-else-flag=n
+
+ +

The default, n=0, places the text of the opening if statement after any terminal else.

+ +

If n=2 is used, then each elsif is also given the text of the opening if statement. Also, an else will include the text of a preceding elsif statement. Note that this may result some long closing side comments.

+ +

If n=1 is used, the results will be the same as n=2 whenever the resulting line length is less than the maximum allowed.

+ +
+
-cscb, or --closing-side-comments-balanced
+
+ +

When using closing-side-comments, and the closing-side-comment-maximum-text limit is exceeded, then the comment text must be abbreviated. It is terminated with three dots if the -cscb flag is negated:

+ +
  perltidy -csc -ncscb
+  } ## end foreach my $foo (sort { $b cmp $a ...
+ +

This causes a problem with older editors which do not recognize comments because they cannot "bounce" around in the text correctly. The -cscb flag tries to help them by appending appropriate terminal balancing structures:

+ +
  perltidy -csc -cscb
+  } ## end foreach my $foo (sort { $b cmp $a ... })
+ +

The default is -cscb.

+ +
+
-cscw, or --closing-side-comment-warnings
+
+ +

This parameter is intended to help make the initial transition to the use of closing side comments. It causes two things to happen if a closing side comment replaces an existing, different closing side comment: first, an error message will be issued, and second, the original side comment will be placed alone on a new specially marked comment line for later attention.

+ +

The intent is to avoid clobbering existing hand-written side comments which happen to match the pattern of closing side comments. This flag should only be needed on the first run with -csc.

+ +
+
+ +

Important Notes on Closing Side Comments:

+ +
    + +
  • Closing side comments are only placed on lines terminated with a closing brace. Certain closing styles, such as the use of cuddled elses (-ce), preclude the generation of some closing side comments.

    + +
  • +
  • Please note that adding or deleting of closing side comments takes place only through the commands -csc or -dcsc. The other commands, if used, merely modify the behavior of these two commands.

    + +
  • +
  • It is recommended that the -cscw flag be used along with -csc on the first use of perltidy on a given file. This will prevent loss of any existing side comment data which happens to have the csc prefix.

    + +
  • +
  • Once you use -csc, you should continue to use it so that any closing side comments remain correct as code changes. Otherwise, these comments will become incorrect as the code is updated.

    + +
  • +
  • If you edit the closing side comments generated by perltidy, you must also change the prefix to be different from the closing side comment prefix. Otherwise, your edits will be lost when you rerun perltidy with -csc. For example, you could simply change ## end to be ## End, since the test is case sensitive. You may also want to use the -ssc flag to keep these modified closing side comments spaced the same as actual closing side comments.

    + +
  • +
  • Temporarily generating closing side comments is a useful technique for exploring and/or debugging a perl script, especially one written by someone else. You can always remove them with -dcsc.

    + +
  • +
+ +
+
Static Block Comments
+
+ +

Static block comments are block comments with a special leading pattern, ## by default, which will be treated slightly differently from other block comments. They effectively behave as if they had glue along their left and top edges, because they stick to the left edge and previous line when there is no blank spaces in those places. This option is particularly useful for controlling how commented code is displayed.

+ +
+ +
-sbc, --static-block-comments
+
+ +

When -sbc is used, a block comment with a special leading pattern, ## by default, will be treated specially.

+ +

Comments so identified are treated as follows:

+ +
    + +
  • If there is no leading space on the line, then the comment will not be indented, and otherwise it may be,

    + +
  • +
  • no new blank line will be inserted before such a comment, and

    + +
  • +
  • such a comment will never become a hanging side comment.

    + +
  • +
+ +

For example, assuming @month_of_year is left-adjusted:

+ +
    @month_of_year = (    # -sbc (default)
+        'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct',
+    ##  'Dec', 'Nov'
+        'Nov', 'Dec');
+ +

Without this convention, the above code would become

+ +
    @month_of_year = (   # -nsbc
+        'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct',
+  
+        ##  'Dec', 'Nov'
+        'Nov', 'Dec'
+    );
+ +

which is not as clear. The default is to use -sbc. This may be deactivated with -nsbc.

+ +
+
-sbcp=string, --static-block-comment-prefix=string
+
+ +

This parameter defines the prefix used to identify static block comments when the -sbc parameter is set. The default prefix is ##, corresponding to -sbcp=##. The prefix is actually part of a perl pattern used to match lines and it must either begin with # or ^#. In the first case a prefix ^\s* will be added to match any leading whitespace, while in the second case the pattern will match only comments with no leading whitespace. For example, to identify all comments as static block comments, one would use -sbcp=#. To identify all left-adjusted comments as static block comments, use -sbcp='^#'.

+ +

Please note that -sbcp merely defines the pattern used to identify static block comments; it will not be used unless the switch -sbc is set. Also, please be aware that since this string is used in a perl regular expression which identifies these comments, it must enable a valid regular expression to be formed.

+ +

A pattern which can be useful is:

+ +
    -sbcp=^#{2,}[^\s#] 
+ +

This pattern requires a static block comment to have at least one character which is neither a # nor a space. It allows a line containing only '#' characters to be rejected as a static block comment. Such lines are often used at the start and end of header information in subroutines and should not be separated from the intervening comments, which typically begin with just a single '#'.

+ +
+
-osbc, --outdent-static-block-comments
+
+ +

The command -osbc will cause static block comments to be outdented by 2 spaces (or whatever -ci=n has been set to), if possible.

+ +
+
+ +
+
Static Side Comments
+
+ +

Static side comments are side comments with a special leading pattern. This option can be useful for controlling how commented code is displayed when it is a side comment.

+ +
+ +
-ssc, --static-side-comments
+
+ +

When -ssc is used, a side comment with a static leading pattern, which is ## by default, will be spaced only a single space from previous character, and it will not be vertically aligned with other side comments.

+ +

The default is -nssc.

+ +
+
-sscp=string, --static-side-comment-prefix=string
+
+ +

This parameter defines the prefix used to identify static side comments when the -ssc parameter is set. The default prefix is ##, corresponding to -sscp=##.

+ +

Please note that -sscp merely defines the pattern used to identify static side comments; it will not be used unless the switch -ssc is set. Also, note that this string is used in a perl regular expression which identifies these comments, so it must enable a valid regular expression to be formed.

+ +
+
+ +
+
+ +

Skipping Selected Sections of Code

+ +

Selected lines of code may be passed verbatim to the output without any formatting. This feature is enabled by default but can be disabled with the --noformat-skipping or -nfs flag. It should be used sparingly to avoid littering code with markers, but it might be helpful for working around occasional problems. For example it might be useful for keeping the indentation of old commented code unchanged, keeping indentation of long blocks of aligned comments unchanged, keeping certain list formatting unchanged, or working around a glitch in perltidy.

+ +
+ +
-fs, --format-skipping
+
+ +

This flag, which is enabled by default, causes any code between special beginning and ending comment markers to be passed to the output without formatting. The default beginning marker is #<<< and the default ending marker is #>>> but they may be changed (see next items below). Additional text may appear on these special comment lines provided that it is separated from the marker by at least one space. For example

+ +
 #<<<  do not let perltidy touch this
+    my @list = (1,
+                1, 1,
+                1, 2, 1,
+                1, 3, 3, 1,
+                1, 4, 6, 4, 1,);
+ #>>>
+ +

The comment markers may be placed at any location that a block comment may appear. If they do not appear to be working, use the -log flag and examine the .LOG file. Use -nfs to disable this feature.

+ +
+
-fsb=string, --format-skipping-begin=string
+
+ +

The -fsb=string parameter may be used to change the beginning marker for format skipping. The default is equivalent to -fsb='#<<<'. The string that you enter must begin with a # and should be in quotes as necessary to get past the command shell of your system. It is actually the leading text of a pattern that is constructed by appending a '\s', so you must also include backslashes for characters to be taken literally rather than as patterns.

+ +

Some examples show how example strings become patterns:

+ +
 -fsb='#\{\{\{' becomes /^#\{\{\{\s/  which matches  #{{{ but not #{{{{
+ -fsb='#\*\*'   becomes /^#\*\*\s/    which matches  #** but not #***
+ -fsb='#\*{2,}' becomes /^#\*{2,}\s/  which matches  #** and #***** 
+ +
+
-fse=string, --format-skipping-end=string
+
+ +

The -fsb=string is the corresponding parameter used to change the ending marker for format skipping. The default is equivalent to -fse='#<<<'.

+ +
+
+ +

Line Break Control

+ +

The parameters in this section control breaks after non-blank lines of code. Blank lines are controlled separately by parameters in the section "Blank Line Control".

+ +
+ +
-fnl, --freeze-newlines
+
+ +

If you do not want any changes to the line breaks within lines of code in your script, set -fnl, and they will remain fixed, and the rest of the commands in this section and sections "Controlling List Formatting", "Retaining or Ignoring Existing Line Breaks". You may want to use -noll with this.

+ +

Note: If you also want to keep your blank lines exactly as they are, you can use the -fbl flag which is described in the section "Blank Line Control".

+ +
+
-ce, --cuddled-else
+
+ +

Enable the "cuddled else" style, in which else and elsif are follow immediately after the curly brace closing the previous block. The default is not to use cuddled elses, and is indicated with the flag -nce or --nocuddled-else. Here is a comparison of the alternatives:

+ +
  # -ce
+  if ($task) {
+      yyy();
+  } else {    
+      zzz();
+  }
+
+  # -nce (default)
+  if ($task) {
+        yyy();
+  }
+  else {    
+        zzz();
+  }
+ +

In this example the keyword else is placed on the same line which begins with the preceding closing block brace and is followed by its own opening block brace on the same line. Other keywords and function names which are formatted with this "cuddled" style are elsif, continue, catch, finally.

+ +

Other block types can be formatted by specifying their names on a separate parameter -cbl, described in a later section.

+ +

Cuddling between a pair of code blocks requires that the closing brace of the first block start a new line. If this block is entirely on one line in the input file, it is necessary to decide if it should be broken to allow cuddling. This decision is controlled by the flag -cbo=n discussed below. The default and recommended value of -cbo=1 bases this decision on the first block in the chain. If it spans multiple lines then cuddling is made and continues along the chain, regardless of the sizes of subsequent blocks. Otherwise, short lines remain intact.

+ +

So for example, the -ce flag would not have any effect if the above snippet is rewritten as

+ +
  if ($task) { yyy() }
+  else {    zzz() }
+ +

If the first block spans multiple lines, then cuddling can be done and will continue for the subsequent blocks in the chain, as illustrated in the previous snippet.

+ +

If there are blank lines between cuddled blocks they will be eliminated. If there are comments after the closing brace where cuddling would occur then cuddling will be prevented. If this occurs, cuddling will restart later in the chain if possible.

+ +
+
-cb, --cuddled-blocks
+
+ +

This flag is equivalent to -ce.

+ +
+
-cbl, --cuddled-block-list
+
+ +

The built-in default cuddled block types are else, elsif, continue, catch, finally.

+ +

Additional block types to which the -cuddled-blocks style applies can be defined by this parameter. This parameter is a character string, giving a list of block types separated by commas or spaces. For example, to cuddle code blocks of type sort, map and grep, in addition to the default types, the string could be set to

+ +
  -cbl="sort map grep"
+ +

or equivalently

+ +
  -cbl=sort,map,grep 
+ +

Note however that these particular block types are typically short so there might not be much opportunity for the cuddled format style.

+ +

Using commas avoids the need to protect spaces with quotes.

+ +

As a diagnostic check, the flag --dump-cuddled-block-list or -dcbl can be used to view the hash of values that are generated by this flag.

+ +

Finally, note that the -cbl flag by itself merely specifies which blocks are formatted with the cuddled format. It has no effect unless this formatting style is activated with -ce.

+ +
+
-cblx, --cuddled-block-list-exclusive
+
+ +

When cuddled else formatting is selected with -ce, setting this flag causes perltidy to ignore its built-in defaults and rely exclusively on the block types specified on the -cbl flag described in the previous section. For example, to avoid using cuddled catch and finally, which among in the defaults, the following set of parameters could be used:

+ +
  perltidy -ce -cbl='else elsif continue' -cblx
+ +
+
-cbo=n, --cuddled-break-option=n
+
+ +

Cuddled formatting is only possible between a pair of code blocks if the closing brace of the first block starts a new line. If a block is encountered which is entirely on a single line, and cuddled formatting is selected, it is necessary to make a decision as to whether or not to "break" the block, meaning to cause it to span multiple lines. This parameter controls that decision. The options are:

+ +
   cbo=0  Never force a short block to break.
+   cbo=1  If the first of a pair of blocks is broken in the input file, 
+          then break the second [DEFAULT].
+   cbo=2  Break open all blocks for maximal cuddled formatting.
+ +

The default and recommended value is cbo=1. With this value, if the starting block of a chain spans multiple lines, then a cascade of breaks will occur for remaining blocks causing the entire chain to be cuddled.

+ +

The option cbo=0 can produce erratic cuddling if there are numerous one-line blocks.

+ +

The option cbo=2 produces maximal cuddling but will not allow any short blocks.

+ +
+
-bl, --opening-brace-on-new-line
+
+ +

Use the flag -bl to place the opening brace on a new line:

+ +
  if ( $input_file eq '-' )    # -bl 
+  {                          
+      important_function();
+  }
+ +

This flag applies to all structural blocks, including named sub's (unless the -sbl flag is set -- see next item).

+ +

The default style, -nbl, places an opening brace on the same line as the keyword introducing it. For example,

+ +
  if ( $input_file eq '-' ) {   # -nbl (default)
+ +
+
-sbl, --opening-sub-brace-on-new-line
+
+ +

The flag -sbl can be used to override the value of -bl for the opening braces of named sub's. For example,

+ +
 perltidy -sbl
+ +

produces this result:

+ +
 sub message
+ {
+    if (!defined($_[0])) {
+        print("Hello, World\n");
+    }
+    else {
+        print($_[0], "\n");
+    }
+ }
+ +

This flag is negated with -nsbl. If -sbl is not specified, the value of -bl is used.

+ +
+
-asbl, --opening-anonymous-sub-brace-on-new-line
+
+ +

The flag -asbl is like the -sbl flag except that it applies to anonymous sub's instead of named subs. For example

+ +
 perltidy -asbl
+ +

produces this result:

+ +
 $a = sub
+ {
+     if ( !defined( $_[0] ) ) {
+         print("Hello, World\n");
+     }
+     else {
+         print( $_[0], "\n" );
+     }
+ };
+ +

This flag is negated with -nasbl, and the default is -nasbl.

+ +
+
-bli, --brace-left-and-indent
+
+ +

The flag -bli is the same as -bl but in addition it causes one unit of continuation indentation ( see -ci ) to be placed before an opening and closing block braces.

+ +

For example,

+ +
        if ( $input_file eq '-' )    # -bli
+          {
+            important_function();
+          }
+ +

By default, this extra indentation occurs for blocks of type: if, elsif, else, unless, for, foreach, sub, while, until, and also with a preceding label. The next item shows how to change this.

+ +
+
-blil=s, --brace-left-and-indent-list=s
+
+ +

Use this parameter to change the types of block braces for which the -bli flag applies; see "Specifying Block Types". For example, -blil='if elsif else' would apply it to only if/elsif/else blocks.

+ +
+
-bar, --opening-brace-always-on-right
+
+ +

The default style, -nbl places the opening code block brace on a new line if it does not fit on the same line as the opening keyword, like this:

+ +
        if ( $bigwasteofspace1 && $bigwasteofspace2
+          || $bigwasteofspace3 && $bigwasteofspace4 )
+        {
+            big_waste_of_time();
+        }
+ +

To force the opening brace to always be on the right, use the -bar flag. In this case, the above example becomes

+ +
        if ( $bigwasteofspace1 && $bigwasteofspace2
+          || $bigwasteofspace3 && $bigwasteofspace4 ) {
+            big_waste_of_time();
+        }
+ +

A conflict occurs if both -bl and -bar are specified.

+ +
+ +
+ +

The -otr flag is a hint that perltidy should not place a break between a comma and an opening token. For example:

+ +
    # default formatting
+    push @{ $self->{$module}{$key} },
+      {
+        accno       => $ref->{accno},
+        description => $ref->{description}
+      };
+
+    # perltidy -otr
+    push @{ $self->{$module}{$key} }, {
+        accno       => $ref->{accno},
+        description => $ref->{description}
+      };
+ +

The flag -otr is actually an abbreviation for three other flags which can be used to control parens, hash braces, and square brackets separately if desired:

+ +
  -opr  or --opening-paren-right
+  -ohbr or --opening-hash-brace-right
+  -osbr or --opening-square-bracket-right
+ +
+
-wn, --weld-nested-containers
+
+ +

The -wn flag causes closely nested pairs of opening and closing container symbols (curly braces, brackets, or parens) to be "welded" together, meaning that they are treated as if combined into a single unit, with the indentation of the innermost code reduced to be as if there were just a single container symbol.

+ +

For example:

+ +
        # default formatting
+        do {
+            {
+                next if $x == $y;    
+            }
+        } until $x++ > $z;
+
+        # perltidy -wn
+        do { {
+            next if $x == $y;
+        } } until $x++ > $z;
+ +

When this flag is set perltidy makes a preliminary pass through the file and identifies all nested pairs of containers. To qualify as a nested pair, the closing container symbols must be immediately adjacent. The opening symbols must either be adjacent, or, if the outer opening symbol is an opening paren, they may be separated by any single non-container symbol or something that looks like a function evaluation.

+ +

Any container symbol may serve as both the inner container of one pair and as the outer container of an adjacent pair. Consequently, any number of adjacent opening or closing symbols may join together in weld. For example, here are three levels of wrapped function calls:

+ +
        # default formatting
+        my (@date_time) = Localtime(
+            Date_to_Time(
+                Add_Delta_DHMS(
+                    $year, $month,  $day, $hour, $minute, $second,
+                    '0',   $offset, '0',  '0'
+                )
+            )
+        );
+
+        # perltidy -wn
+        my (@date_time) = Localtime( Date_to_Time( Add_Delta_DHMS(
+            $year, $month,  $day, $hour, $minute, $second,
+            '0',   $offset, '0',  '0'
+        ) ) );
+ +

Notice how the indentation of the inner lines are reduced by two levels in this case. This example also shows the typical result of this formatting, namely it is a sandwich consisting of an initial opening layer, a central section of any complexity forming the "meat" of the sandwich, and a final closing layer. This predictable structure helps keep the compacted structure readable.

+ +

The inner sandwich layer is required to be at least one line thick. If this cannot be achieved, welding does not occur. This constraint can cause formatting to take a couple of iterations to stabilize when it is first applied to a script. The -conv flag can be used to insure that the final format is achieved in a single run.

+ +

Here is an example illustrating a welded container within a welded containers:

+ +
        # default formatting
+        $x->badd(
+            bmul(
+                $class->new(
+                    abs(
+                        $sx * int( $xr->numify() ) & $sy * int( $yr->numify() )
+                    )
+                ),
+                $m
+            )
+        );
+
+        # perltidy -wn
+        $x->badd( bmul(
+            $class->new( abs(
+                $sx * int( $xr->numify() ) & $sy * int( $yr->numify() )
+            ) ),
+            $m
+        ) );
+ +

This format option is quite general but there are some limitations.

+ +

One limitation is that any line length limit still applies and can cause long welded sections to be broken into multiple lines.

+ +

Another limitation is that an opening symbol which delimits quoted text cannot be included in a welded pair. This is because quote delimiters are treated specially in perltidy.

+ +

Finally, the stacking of containers defined by this flag have priority over any other container stacking flags. This is because any welding is done first.

+ +
+
Vertical tightness of non-block curly braces, parentheses, and square brackets.
+
+ +

These parameters control what shall be called vertical tightness. Here are the main points:

+ +
    + +
  • Opening tokens (except for block braces) are controlled by -vt=n, or --vertical-tightness=n, where

    + +
     -vt=0 always break a line after opening token (default). 
    + -vt=1 do not break unless this would produce more than one 
    +         step in indentation in a line.
    + -vt=2 never break a line after opening token
    + +
  • +
  • You must also use the -lp flag when you use the -vt flag; the reason is explained below.

    + +
  • +
  • Closing tokens (except for block braces) are controlled by -vtc=n, or --vertical-tightness-closing=n, where

    + +
     -vtc=0 always break a line before a closing token (default), 
    + -vtc=1 do not break before a closing token which is followed 
    +        by a semicolon or another closing token, and is not in 
    +        a list environment.
    + -vtc=2 never break before a closing token.
    + +

    The rules for -vtc=1 are designed to maintain a reasonable balance between tightness and readability in complex lists.

    + +
  • +
  • Different controls may be applied to different token types, and it is also possible to control block braces; see below.

    + +
  • +
  • Finally, please note that these vertical tightness flags are merely hints to the formatter, and it cannot always follow them. Things which make it difficult or impossible include comments, blank lines, blocks of code within a list, and possibly the lack of the -lp parameter. Also, these flags may be ignored for very small lists (2 or 3 lines in length).

    + +
  • +
+ +

Here are some examples:

+ +
    # perltidy -lp -vt=0 -vtc=0
+    %romanNumerals = (
+                       one   => 'I',
+                       two   => 'II',
+                       three => 'III',
+                       four  => 'IV',
+    );
+
+    # perltidy -lp -vt=1 -vtc=0
+    %romanNumerals = ( one   => 'I',
+                       two   => 'II',
+                       three => 'III',
+                       four  => 'IV',
+    );
+
+    # perltidy -lp -vt=1 -vtc=1
+    %romanNumerals = ( one   => 'I',
+                       two   => 'II',
+                       three => 'III',
+                       four  => 'IV', );
+ +

The difference between -vt=1 and -vt=2 is shown here:

+ +
    # perltidy -lp -vt=1 
+    $init->add(
+                mysprintf( "(void)find_threadsv(%s);",
+                           cstring( $threadsv_names[ $op->targ ] )
+                )
+    );
+
+    # perltidy -lp -vt=2 
+    $init->add( mysprintf( "(void)find_threadsv(%s);",
+                           cstring( $threadsv_names[ $op->targ ] )
+                )
+    );
+ +

With -vt=1, the line ending in add( does not combine with the next line because the next line is not balanced. This can help with readability, but -vt=2 can be used to ignore this rule.

+ +

The tightest, and least readable, code is produced with both -vt=2 and -vtc=2:

+ +
    # perltidy -lp -vt=2 -vtc=2
+    $init->add( mysprintf( "(void)find_threadsv(%s);",
+                           cstring( $threadsv_names[ $op->targ ] ) ) );
+ +

Notice how the code in all of these examples collapses vertically as -vt increases, but the indentation remains unchanged. This is because perltidy implements the -vt parameter by first formatting as if -vt=0, and then simply overwriting one output line on top of the next, if possible, to achieve the desired vertical tightness. The -lp indentation style has been designed to allow this vertical collapse to occur, which is why it is required for the -vt parameter.

+ +

The -vt=n and -vtc=n parameters apply to each type of container token. If desired, vertical tightness controls can be applied independently to each of the closing container token types.

+ +

The parameters for controlling parentheses are -pvt=n or --paren-vertical-tightness=n, and -pcvt=n or --paren-vertical-tightness-closing=n.

+ +

Likewise, the parameters for square brackets are -sbvt=n or --square-bracket-vertical-tightness=n, and -sbcvt=n or --square-bracket-vertical-tightness-closing=n.

+ +

Finally, the parameters for controlling non-code block braces are -bvt=n or --brace-vertical-tightness=n, and -bcvt=n or --brace-vertical-tightness-closing=n.

+ +

In fact, the parameter -vt=n is actually just an abbreviation for -pvt=n -bvt=n sbvt=n, and likewise -vtc=n is an abbreviation for -pvtc=n -bvtc=n sbvtc=n.

+ +
+
-bbvt=n or --block-brace-vertical-tightness=n
+
+ +

The -bbvt=n flag is just like the -vt=n flag but applies to opening code block braces.

+ +
 -bbvt=0 break after opening block brace (default). 
+ -bbvt=1 do not break unless this would produce more than one 
+         step in indentation in a line.
+ -bbvt=2 do not break after opening block brace.
+ +

It is necessary to also use either -bl or -bli for this to work, because, as with other vertical tightness controls, it is implemented by simply overwriting a line ending with an opening block brace with the subsequent line. For example:

+ +
    # perltidy -bli -bbvt=0
+    if ( open( FILE, "< $File" ) )
+      {
+        while ( $File = <FILE> )
+          {
+            $In .= $File;
+            $count++;
+          }
+        close(FILE);
+      }
+
+    # perltidy -bli -bbvt=1
+    if ( open( FILE, "< $File" ) )
+      { while ( $File = <FILE> )
+          { $In .= $File;
+            $count++;
+          }
+        close(FILE);
+      }
+ +

By default this applies to blocks associated with keywords if, elsif, else, unless, for, foreach, sub, while, until, and also with a preceding label. This can be changed with the parameter -bbvtl=string, or --block-brace-vertical-tightness-list=string, where string is a space-separated list of block types. For more information on the possible values of this string, see "Specifying Block Types"

+ +

For example, if we want to just apply this style to if, elsif, and else blocks, we could use perltidy -bli -bbvt=1 -bbvtl='if elsif else'.

+ +

There is no vertical tightness control for closing block braces; with one exception they will be placed on separate lines. The exception is that a cascade of closing block braces may be stacked on a single line. See -scbb.

+ +
+ +
+ +

The -sot flag tells perltidy to "stack" opening tokens when possible to avoid lines with isolated opening tokens.

+ +

For example:

+ +
    # default
+    $opt_c = Text::CSV_XS->new(
+        {
+            binary       => 1,
+            sep_char     => $opt_c,
+            always_quote => 1,
+        }
+    );
+
+    # -sot
+    $opt_c = Text::CSV_XS->new( {
+            binary       => 1,
+            sep_char     => $opt_c,
+            always_quote => 1,
+        }
+    );
+ +

For detailed control of individual closing tokens the following controls can be used:

+ +
  -sop  or --stack-opening-paren
+  -sohb or --stack-opening-hash-brace
+  -sosb or --stack-opening-square-bracket
+  -sobb or --stack-opening-block-brace
+ +

The flag -sot is an abbreviation for -sop -sohb -sosb.

+ +

The flag -sobb is an abbreviation for -bbvt=2 -bbvtl='*'. This will case a cascade of opening block braces to appear on a single line, although this an uncommon occurrence except in test scripts.

+ +
+ +
+ +

The -sct flag tells perltidy to "stack" closing tokens when possible to avoid lines with isolated closing tokens.

+ +

For example:

+ +
    # default
+    $opt_c = Text::CSV_XS->new(
+        {
+            binary       => 1,
+            sep_char     => $opt_c,
+            always_quote => 1,
+        }
+    );
+
+    # -sct
+    $opt_c = Text::CSV_XS->new(
+        {
+            binary       => 1,
+            sep_char     => $opt_c,
+            always_quote => 1,
+        } );
+ +

The -sct flag is somewhat similar to the -vtc flags, and in some cases it can give a similar result. The difference is that the -vtc flags try to avoid lines with leading opening tokens by "hiding" them at the end of a previous line, whereas the -sct flag merely tries to reduce the number of lines with isolated closing tokens by stacking them but does not try to hide them. For example:

+ +
    # -vtc=2
+    $opt_c = Text::CSV_XS->new(
+        {
+            binary       => 1,
+            sep_char     => $opt_c,
+            always_quote => 1, } );
+ +

For detailed control of the stacking of individual closing tokens the following controls can be used:

+ +
  -scp  or --stack-closing-paren
+  -schb or --stack-closing-hash-brace
+  -scsb or --stack-closing-square-bracket
+  -scbb or --stack-closing-block-brace
+ +

The flag -sct is an abbreviation for stacking the non-block closing tokens, -scp -schb -scsb.

+ +

Stacking of closing block braces, -scbb, causes a cascade of isolated closing block braces to be combined into a single line as in the following example:

+ +
    # -scbb:
+    for $w1 (@w1) {
+        for $w2 (@w2) {
+            for $w3 (@w3) {
+                for $w4 (@w4) {
+                    push( @lines, "$w1 $w2 $w3 $w4\n" );
+                } } } }
+ +

To simplify input even further for the case in which both opening and closing non-block containers are stacked, the flag -sac or --stack-all-containers is an abbreviation for -sot -sot.

+ +
+
-dnl, --delete-old-newlines
+
+ +

By default, perltidy first deletes all old line break locations, and then it looks for good break points to match the desired line length. Use -ndnl or --nodelete-old-newlines to force perltidy to retain all old line break points.

+ +
+
-anl, --add-newlines
+
+ +

By default, perltidy will add line breaks when necessary to create continuations of long lines and to improve the script appearance. Use -nanl or --noadd-newlines to prevent any new line breaks.

+ +

This flag does not prevent perltidy from eliminating existing line breaks; see --freeze-newlines to completely prevent changes to line break points.

+ +
+
Controlling whether perltidy breaks before or after operators
+
+ +

Four command line parameters provide some control over whether a line break should be before or after specific token types. Two parameters give detailed control:

+ +

-wba=s or --want-break-after=s, and

+ +

-wbb=s or --want-break-before=s.

+ +

These parameters are each followed by a quoted string, s, containing a list of token types (separated only by spaces). No more than one of each of these parameters should be specified, because repeating a command-line parameter always overwrites the previous one before perltidy ever sees it.

+ +

By default, perltidy breaks after these token types: % + - * / x != == >= <= =~ !~ < > | & = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=

+ +

And perltidy breaks before these token types by default: . << >> -> && || //

+ +

To illustrate, to cause a break after a concatenation operator, '.', rather than before it, the command line would be

+ +
  -wba="."
+ +

As another example, the following command would cause a break before math operators '+', '-', '/', and '*':

+ +
  -wbb="+ - / *"
+ +

These commands should work well for most of the token types that perltidy uses (use --dump-token-types for a list). Also try the -D flag on a short snippet of code and look at the .DEBUG file to see the tokenization. However, for a few token types there may be conflicts with hardwired logic which cause unexpected results. One example is curly braces, which should be controlled with the parameter bl provided for that purpose.

+ +

WARNING Be sure to put these tokens in quotes to avoid having them misinterpreted by your command shell.

+ +

Two additional parameters are available which, though they provide no further capability, can simplify input are:

+ +

-baao or --break-after-all-operators,

+ +

-bbao or --break-before-all-operators.

+ +

The -baao sets the default to be to break after all of the following operators:

+ +
    % + - * / x != == >= <= =~ !~ < > | & 
+    = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
+    . : ? && || and or err xor
+ +

and the -bbao flag sets the default to break before all of these operators. These can be used to define an initial break preference which can be fine-tuned with the -wba and -wbb flags. For example, to break before all operators except an = one could use --bbao -wba='=' rather than listing every single perl operator except = on a -wbb flag.

+ +
+
+ +

Controlling List Formatting

+ +

Perltidy attempts to place comma-separated arrays of values in tables which look good. Its default algorithms usually work well, and they have been improving with each release, but several parameters are available to control list formatting.

+ +
+ +
-boc, --break-at-old-comma-breakpoints
+
+ +

This flag tells perltidy to try to break at all old commas. This is not the default. Normally, perltidy makes a best guess at list formatting, and seldom uses old comma breakpoints. Usually this works well, but consider:

+ +
    my @list = (1,
+                1, 1,
+                1, 2, 1,
+                1, 3, 3, 1,
+                1, 4, 6, 4, 1,);
+ +

The default formatting will flatten this down to one line:

+ +
    # perltidy (default)
+    my @list = ( 1, 1, 1, 1, 2, 1, 1, 3, 3, 1, 1, 4, 6, 4, 1, );
+ +

which hides the structure. Using -boc, plus additional flags to retain the original style, yields

+ +
    # perltidy -boc -lp -pt=2 -vt=1 -vtc=1
+    my @list = (1,
+                1, 1,
+                1, 2, 1,
+                1, 3, 3, 1,
+                1, 4, 6, 4, 1,);
+ +

A disadvantage of this flag is that all tables in the file must already be nicely formatted. For another possibility see the -fs flag in "Skipping Selected Sections of Code".

+ +
+
-mft=n, --maximum-fields-per-table=n
+
+ +

If the computed number of fields for any table exceeds n, then it will be reduced to n. The default value for n is a large number, 40. While this value should probably be left unchanged as a general rule, it might be used on a small section of code to force a list to have a particular number of fields per line, and then either the -boc flag could be used to retain this formatting, or a single comment could be introduced somewhere to freeze the formatting in future applications of perltidy.

+ +
    # perltidy -mft=2
+    @month_of_year = (    
+        'Jan', 'Feb',
+        'Mar', 'Apr',
+        'May', 'Jun',
+        'Jul', 'Aug',
+        'Sep', 'Oct',
+        'Nov', 'Dec'
+    );
+ +
+
-cab=n, --comma-arrow-breakpoints=n
+
+ +

A comma which follows a comma arrow, '=>', is given special consideration. In a long list, it is common to break at all such commas. This parameter can be used to control how perltidy breaks at these commas. (However, it will have no effect if old comma breaks are being forced because -boc is used). The possible values of n are:

+ +
 n=0 break at all commas after =>  
+ n=1 stable: break at all commas after => if container is open,
+     EXCEPT FOR one-line containers
+ n=2 break at all commas after =>, BUT try to form the maximum
+     maximum one-line container lengths
+ n=3 do not treat commas after => specially at all 
+ n=4 break everything: like n=0 but ALSO break a short container with
+     a => not followed by a comma when -vt=0 is used
+ n=5 stable: like n=1 but ALSO break at open one-line containers when
+     -vt=0 is used (default)
+ +

For example, given the following single line, perltidy by default will not add any line breaks because it would break the existing one-line container:

+ +
    bless { B => $B, Root => $Root } => $package;
+ +

Using -cab=0 will force a break after each comma-arrow item:

+ +
    # perltidy -cab=0:
+    bless {
+        B    => $B,
+        Root => $Root
+    } => $package;
+ +

If perltidy is subsequently run with this container broken, then by default it will break after each '=>' because the container is now broken. To reform a one-line container, the parameter -cab=2 could be used.

+ +

The flag -cab=3 can be used to prevent these commas from being treated specially. In this case, an item such as "01" => 31 is treated as a single item in a table. The number of fields in this table will be determined by the same rules that are used for any other table. Here is an example.

+ +
    # perltidy -cab=3
+    my %last_day = (
+        "01" => 31, "02" => 29, "03" => 31, "04" => 30,
+        "05" => 31, "06" => 30, "07" => 31, "08" => 31,
+        "09" => 30, "10" => 31, "11" => 30, "12" => 31
+    );
+ +
+
+ +

Retaining or Ignoring Existing Line Breaks

+ +

Several additional parameters are available for controlling the extent to which line breaks in the input script influence the output script. In most cases, the default parameter values are set so that, if a choice is possible, the output style follows the input style. For example, if a short logical container is broken in the input script, then the default behavior is for it to remain broken in the output script.

+ +

Most of the parameters in this section would only be required for a one-time conversion of a script from short container lengths to longer container lengths. The opposite effect, of converting long container lengths to shorter lengths, can be obtained by temporarily using a short maximum line length.

+ +
+ +
-bol, --break-at-old-logical-breakpoints
+
+ +

By default, if a logical expression is broken at a &&, ||, and, or or, then the container will remain broken. Also, breaks at internal keywords if and unless will normally be retained. To prevent this, and thus form longer lines, use -nbol.

+ +
+
-bom, --break-at-old-method-breakpoints
+
+ +

By default, a method call arrow -> is considered a candidate for a breakpoint, but method chains will fill to the line width before a break is considered. With -bom, breaks before the arrow are preserved, so if you have preformatted a method chain:

+ +
  my $q = $rs
+    ->related_resultset('CDs')
+    ->related_resultset('Tracks')
+    ->search({
+      'track.id' => {-ident => 'none_search.id'},
+    })->as_query;
+ +

It will keep these breaks, rather than become this:

+ +
  my $q = $rs->related_resultset('CDs')->related_resultset('Tracks')->search({
+      'track.id' => {-ident => 'none_search.id'},
+    })->as_query;
+ +

This flag will also look for and keep a 'cuddled' style of calls, in which lines begin with a closing paren followed by a call arrow, as in this example:

+ +
  my $q = $rs->related_resultset(
+      'CDs'
+  )->related_resultset(
+      'Tracks'
+  )->search( {
+      'track.id' => { -ident => 'none_search.id' },
+  } )->as_query;
+ +

You may want to include the -weld-nested-containers flag in this case to keep nested braces and parens together, as in the last line.

+ +
+
-bok, --break-at-old-keyword-breakpoints
+
+ +

By default, perltidy will retain a breakpoint before keywords which may return lists, such as sort and <map>. This allows chains of these operators to be displayed one per line. Use -nbok to prevent retaining these breakpoints.

+ +
+
-bot, --break-at-old-ternary-breakpoints
+
+ +

By default, if a conditional (ternary) operator is broken at a :, then it will remain broken. To prevent this, and thereby form longer lines, use -nbot.

+ +
+
-boa, --break-at-old-attribute-breakpoints
+
+ +

By default, if an attribute list is broken at a : in the source file, then it will remain broken. For example, given the following code, the line breaks at the ':'s will be retained:

+ +
                    my @field
+                      : field
+                      : Default(1)
+                      : Get('Name' => 'foo') : Set('Name');
+ +

If the attributes are on a single line in the source code then they will remain on a single line if possible.

+ +

To prevent this, and thereby always form longer lines, use -nboa.

+ +
+
-iob, --ignore-old-breakpoints
+
+ +

Use this flag to tell perltidy to ignore existing line breaks to the maximum extent possible. This will tend to produce the longest possible containers, regardless of type, which do not exceed the line length limit.

+ +
+
-kis, --keep-interior-semicolons
+
+ +

Use the -kis flag to prevent breaking at a semicolon if there was no break there in the input file. Normally perltidy places a newline after each semicolon which terminates a statement unless several statements are contained within a one-line brace block. To illustrate, consider the following input lines:

+ +
    dbmclose(%verb_delim); undef %verb_delim;
+    dbmclose(%expanded); undef %expanded;
+ +

The default is to break after each statement, giving

+ +
    dbmclose(%verb_delim);
+    undef %verb_delim;
+    dbmclose(%expanded);
+    undef %expanded;
+ +

With perltidy -kis the multiple statements are retained:

+ +
    dbmclose(%verb_delim); undef %verb_delim;
+    dbmclose(%expanded);   undef %expanded;
+ +

The statements are still subject to the specified value of maximum-line-length and will be broken if this maximum is exceeded.

+ +
+
+ +

Blank Line Control

+ +

Blank lines can improve the readability of a script if they are carefully placed. Perltidy has several commands for controlling the insertion, retention, and removal of blank lines.

+ +
+ +
-fbl, --freeze-blank-lines
+
+ +

Set -fbl if you want to the blank lines in your script to remain exactly as they are. The rest of the parameters in this section may then be ignored. (Note: setting the -fbl flag is equivalent to setting -mbl=0 and -kbl=2).

+ +
+
-bbc, --blanks-before-comments
+
+ +

A blank line will be introduced before a full-line comment. This is the default. Use -nbbc or --noblanks-before-comments to prevent such blank lines from being introduced.

+ +
+
-blbs=n, --blank-lines-before-subs=n
+
+ +

The parameter -blbs=n requests that least n blank lines precede a sub definition which does not follow a comment and which is more than one-line long. The default is <-blbs=1>. BEGIN and END blocks are included.

+ +

The requested number of blanks statement will be inserted regardless of the value of --maximum-consecutive-blank-lines=n (-mbl=n) with the exception that if -mbl=0 then no blanks will be output.

+ +

This parameter interacts with the value k of the parameter --maximum-consecutive-blank-lines=k (-mbl=k) as follows:

+ +

1. If -mbl=0 then no blanks will be output. This allows all blanks to be suppressed with a single parameter. Otherwise,

+ +

2. If the number of old blank lines in the script is less than n then additional blanks will be inserted to make the total n regardless of the value of -mbl=k.

+ +

3. If the number of old blank lines in the script equals or exceeds n then this parameter has no effect, however the total will not exceed value specified on the -mbl=k flag.

+ +
+
-blbp=n, --blank-lines-before-packages=n
+
+ +

The parameter -blbp=n requests that least n blank lines precede a package which does not follow a comment. The default is -blbp=1.

+ +

This parameter interacts with the value k of the parameter --maximum-consecutive-blank-lines=k (-mbl=k) in the same way as described for the previous item -blbs=n.

+ +
+
-bbs, --blanks-before-subs
+
+ +

For compatibility with previous versions, -bbs or --blanks-before-subs is equivalent to -blbp=1 and -blbs=1.

+ +

Likewise, -nbbs or --noblanks-before-subs is equivalent to -blbp=0 and -blbs=0.

+ +
+
-bbb, --blanks-before-blocks
+
+ +

A blank line will be introduced before blocks of coding delimited by for, foreach, while, until, and if, unless, in the following circumstances:

+ +
    + +
  • The block is not preceded by a comment.

    + +
  • +
  • The block is not a one-line block.

    + +
  • +
  • The number of consecutive non-blank lines at the current indentation depth is at least -lbl (see next section).

    + +
  • +
+ +

This is the default. The intention of this option is to introduce some space within dense coding. This is negated with -nbbb or --noblanks-before-blocks.

+ +
+
-lbl=n --long-block-line-count=n
+
+ +

This controls how often perltidy is allowed to add blank lines before certain block types (see previous section). The default is 8. Entering a value of 0 is equivalent to entering a very large number.

+ +
+
-blao=i or --blank-lines-after-opening-block=i
+
+ +

This control places a minimum of i blank lines after a line which ends with an opening block brace of a specified type. By default, this only applies to the block of a named sub, but this can be changed (see -blaol below). The default is not to do this (i=0).

+ +

Please see the note below on using the -blao and -blbc options.

+ +
+
-blbc=i or --blank-lines-before-closing-block=i
+
+ +

This control places a minimum of i blank lines before a line which begins with a closing block brace of a specified type. By default, this only applies to the block of a named sub, but this can be changed (see -blbcl below). The default is not to do this (i=0).

+ +
+
-blaol=s or --blank-lines-after-opening-block-list=s
+
+ +

The parameter s is a list of block type keywords to which the flag -blao should apply. The section "Specifying Block Types" explains how to list block types.

+ +
+
-blbcl=s or --blank-lines-before-closing-block-list=s
+
+ +

This parameter is a list of block type keywords to which the flag -blbc should apply. The section "Specifying Block Types" explains how to list block types.

+ +
+
Note on using the -blao and -blbc options.
+
+ +

These blank line controls introduce a certain minimum number of blank lines in the text, but the final number of blank lines may be greater, depending on values of the other blank line controls and the number of old blank lines. A consequence is that introducing blank lines with these and other controls cannot be exactly undone, so some experimentation with these controls is recommended before using them.

+ +

For example, suppose that for some reason we decide to introduce one blank space at the beginning and ending of all blocks. We could do this using

+ +
  perltidy -blao=2 -blbc=2 -blaol='*' -blbcl='*' filename
+ +

Now suppose the script continues to be developed, but at some later date we decide we don't want these spaces after all. we might expect that running with the flags -blao=0 and -blbc=0 will undo them. However, by default perltidy retains single blank lines, so the blank lines remain.

+ +

We can easily fix this by telling perltidy to ignore old blank lines by including the added parameter -kbl=0 and rerunning. Then the unwanted blank lines will be gone. However, this will cause all old blank lines to be ignored, perhaps even some that were added by hand to improve formatting. So please be cautious when using these parameters.

+ +
+
-mbl=n --maximum-consecutive-blank-lines=n
+
+ +

This parameter specifies the maximum number of consecutive blank lines which will be output within code sections of a script. The default is n=1. If the input file has more than n consecutive blank lines, the number will be reduced to n except as noted above for the -blbp and -blbs parameters. If n=0 then no blank lines will be output (unless all old blank lines are retained with the -kbl=2 flag of the next section).

+ +

This flag obviously does not apply to pod sections, here-documents, and quotes.

+ +
+
-kbl=n, --keep-old-blank-lines=n
+
+ +

The -kbl=n flag gives you control over how your existing blank lines are treated.

+ +

The possible values of n are:

+ +
 n=0 ignore all old blank lines
+ n=1 stable: keep old blanks, but limited by the value of the B<-mbl=n> flag
+ n=2 keep all old blank lines, regardless of the value of the B<-mbl=n> flag
+ +

The default is n=1.

+ +
+
-sob, --swallow-optional-blank-lines
+
+ +

This is equivalent to kbl=0 and is included for compatibility with previous versions.

+ +
+
-nsob, --noswallow-optional-blank-lines
+
+ +

This is equivalent to kbl=1 and is included for compatibility with previous versions.

+ +
+
+ +

Controls for blank lines around lines of consecutive keywords

+ +

The parameters in this section provide some control over the placement of blank lines within and around groups of statements beginning with selected keywords. These blank lines are called here keyword group blanks, and all of the parameters begin with --keyword-group-blanks*, or -kgb* for short. The default settings do not employ these controls but they can be enabled with the following parameters:

+ +

-kgbl=s or --keyword-group-blanks-list=s; s is a quoted string of keywords

+ +

-kgbs=s or --keyword-group-blanks-size=s; s gives the number of keywords required to form a group.

+ +

-kgbb=n or --keyword-group-blanks-before=n; n = (0, 1, or 2) controls a leading blank

+ +

-kgba=n or --keyword-group-blanks-after=n; n = (0, 1, or 2) controls a trailing blank

+ +

-kgbi or --keyword-group-blanks-inside is a switch for adding blanks between subgroups

+ +

-kgbd or --keyword-group-blanks-delete is a switch for removing initial blank lines between keywords

+ +

-kgbr=n or --keyword-group-blanks-repeat-count=n can limit the number of times this logic is applied

+ +

In addition, the following abbreviations are available to for simplified usage:

+ +

-kgb or --keyword-group-blanks is short for -kgbb=2 -kgba=2 kgbi

+ +

-nkgb or --nokeyword-group-blanks, is short for -kgbb=1 -kgba=1 nkgbi

+ +

Before describing the meaning of the parameters in detail let us look at an example which is formatted with default parameter settings.

+ +
        print "Entering test 2\n";
+        use Test;
+        use Encode qw(from_to encode decode
+          encode_utf8 decode_utf8
+          find_encoding is_utf8);
+        use charnames qw(greek);
+        my @encodings     = grep( /iso-?8859/, Encode::encodings() );
+        my @character_set = ( '0' .. '9', 'A' .. 'Z', 'a' .. 'z' );
+        my @source        = qw(ascii iso8859-1 cp1250);
+        my @destiny       = qw(cp1047 cp37 posix-bc);
+        my @ebcdic_sets   = qw(cp1047 cp37 posix-bc);
+        my $str           = join( '', map( chr($_), 0x20 .. 0x7E ) );
+        return unless ($str);
+ +

using perltidy -kgb gives:

+ +
        print "Entering test 2\n";
+                                      <----------this blank controlled by -kgbb
+        use Test;
+        use Encode qw(from_to encode decode
+          encode_utf8 decode_utf8
+          find_encoding is_utf8);
+        use charnames qw(greek);
+                                      <---------this blank controlled by -kgbi
+        my @encodings     = grep( /iso-?8859/, Encode::encodings() );
+        my @character_set = ( '0' .. '9', 'A' .. 'Z', 'a' .. 'z' );
+        my @source        = qw(ascii iso8859-1 cp1250);
+        my @destiny       = qw(cp1047 cp37 posix-bc);
+        my @ebcdic_sets   = qw(cp1047 cp37 posix-bc);
+        my $str           = join( '', map( chr($_), 0x20 .. 0x7E ) );
+                                      <----------this blank controlled by -kgba
+        return unless ($str);
+ +

Blank lines have been introduced around the my and use sequences. What happened is that the default keyword list includes my and use but not print and return. So a continuous sequence of nine my and use statements was located. This number exceeds the default threshold of five, so blanks were placed before and after the entire group. Then, since there was also a subsequence of six my lines, a blank line was introduced to separate them.

+ +

Finer control over blank placement can be achieved by using the individual parameters rather than the -kgb flag. The individual controls are as follows.

+ +

-kgbl=s or --keyword-group-blanks-list=s, where s is a quoted string, defines the set of keywords which will be formed into groups. The string is a space separated list of keywords. The default set is s="use require local our my", but any list of keywords may be used. Comment lines may also be included in a keyword group, even though they are not keywords. To include ordinary block comments, include the symbol BC. To include static block comments (which normally begin with '##'), include the symbol SBC.

+ +

-kgbs=s or --keyword-group-blanks-size=s, where s is a string describing the number of consecutive keyword statements forming a group. If s is an integer then it is the minimum number required for a group. A maximum value may also be given with the format s=min.max, where min is the minimum number and max is the maximum number, and the min and max values are separated by one or more dots. No groups will be found if the maximum is less than the minimum. The maximum is unlimited if not given. The default is s=5. Some examples:

+ +
    s      min   max         number for group
+    3      3     unlimited   3 or more
+    1.1    1     1           1
+    1..3   1     3           1 to 3
+    1.0    1     0           (no match)
+    
+ +

-kgbb=n or --keyword-group-blanks-before=n specifies whether a blank should appear before the first line of the group, as follows:

+ +
   n=0 => (delete) an existing blank line will be removed
+   n=1 => (stable) no change to the input file is made  [DEFAULT]
+   n=2 => (insert) a blank line is introduced if possible
+ +

-kgba=n or --keyword-group-blanks-after=n likewise specifies whether a blank should appear after the last line of the group, using the same scheme (0=delete, 1=stable, 2=insert).

+ +

-kgbi or --keyword-group-blanks-inside controls the insertion of blank lines between the first and last statement of the entire group. If there is a continuous run of a single statement type with more than the minimum threshold number (as specified with -kgbs=s) then this switch causes a blank line be inserted between this subgroup and the others. In the example above this happened between the use and my statements.

+ +

-kgbd or --keyword-group-blanks-delete controls the deletion of any blank lines that exist in the the group when it is first scanned. When statements are initially scanned, any existing blank lines are included in the collection. Any such orignial blank lines will be deleted before any other insertions are made when the parameter -kgbd is set. The default is not to do this, -nkgbd.

+ +

-kgbr=n or --keyword-group-blanks-repeat-count=n specifies n, the maximum number of times this logic will be applied to any file. The special value n=0 is the same as n=infinity which means it will be applied to an entire script [Default]. A value n=1 could be used to make it apply just one time for example. This might be useful for adjusting just the use statements in the top part of a module for example.

+ +

-kgb or --keyword-group-blanks is an abbreviation equivalent to setting -kgbb=1 -kgba=1 -kgbi. This turns on keyword group formatting with a set of default values.

+ +

-nkgb or --nokeyword-group-blanks is equivalent to -kgbb=0 -kgba nkgbi. This flag turns off keyword group blank lines and is the default setting.

+ +

Here are a few notes about the functioning of this technique.

+ +
    + +
  • These parameters are probably more useful as part of a major code reformatting operation rather than as a routine formatting operation.

    + +

    In particular, note that deleting old blank lines with -kgbd is an irreversible operation so it should be applied with care. Existing blank lines may be serving an important role in controlling vertical alignment.

    + +
  • +
  • Conflicts which arise among these kgb* parameters and other blank line controls are generally resolved by producing the maximum number of blank lines implied by any parameter.

    + +

    For example, if the flags --freeze-blank-lines, or --keep-old-blank-lines=2, are set, then they have priority over any blank line deletion implied by the -kgb flags of this section, so no blank lines will be deleted.

    + +

    For another example, if a keyword group ends at a sub and the flag kgba=0 requests no blank line there, but we also have --blank-lines-before-subs=2, then two blank lines will still be introduced before the sub.

    + +
  • +
  • The introduction of blank lines does not occur if it would conflict with other input controls or code validity. For example, a blank line will not be placed within a here-doc or within a section of code marked with format skipping comments. And in general, a blank line will only be introduced at the end of a group if the next statement is a line of code.

    + +
  • +
  • The count which is used to determine the group size is not the number of lines but rather the total number of keywords which are found. Individual statements with a certain leading keyword may continue on multiple lines, but if any of these lines is nested more than one level deep then that group will be ended.

    + +
  • +
  • The search for groups of lines with similar leading keywords is based on the input source, not the final formatted source. Consequently, if the source code is badly formatted, it would be best to make a first formatting pass without these options.

    + +
  • +
+ +

Styles

+ +

A style refers to a convenient collection of existing parameters.

+ +
+ +
-gnu, --gnu-style
+
+ +

-gnu gives an approximation to the GNU Coding Standards (which do not apply to perl) as they are sometimes implemented. At present, this style overrides the default style with the following parameters:

+ +
    -lp -bl -noll -pt=2 -bt=2 -sbt=2 -icp
+ +
+
-pbp, --perl-best-practices
+
+ +

-pbp is an abbreviation for the parameters in the book Perl Best Practices by Damian Conway:

+ +
    -l=78 -i=4 -ci=4 -st -se -vt=2 -cti=0 -pt=1 -bt=1 -sbt=1 -bbt=1 -nsfs -nolq
+    -wbb="% + - * / x != == >= <= =~ !~ < > | & = 
+          **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x="
+ +

Please note that this parameter set includes -st and -se flags, which make perltidy act as a filter on one file only. These can be overridden by placing -nst and/or -nse after the -pbp parameter.

+ +

Also note that the value of continuation indentation, -ci=4, is equal to the value of the full indentation, -i=4. In some complex statements perltidy will produce nicer results with -ci=2. This can be implemented by including -ci=2 after the -pbp parameter. For example,

+ +
    # perltidy -pbp
+    $self->{_text} = (
+         !$section        ? ''
+        : $type eq 'item' ? "the $section entry"
+        :                   "the section on $section"
+        )
+        . (
+        $page
+        ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage"
+        : ' elsewhere in this document'
+        );
+
+    # perltidy -pbp -ci=2
+    $self->{_text} = (
+         !$section        ? ''
+        : $type eq 'item' ? "the $section entry"
+        :                   "the section on $section"
+      )
+      . (
+        $page
+        ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage"
+        : ' elsewhere in this document'
+      );
+ +
+
One-line blocks
+
+ +

There are a few points to note regarding one-line blocks. A one-line block is something like this,

+ +
        if ($x > 0) { $y = 1 / $x }  
+ +

where the contents within the curly braces is short enough to fit on a single line.

+ +

With few exceptions, perltidy retains existing one-line blocks, if it is possible within the line-length constraint, but it does not attempt to form new ones. In other words, perltidy will try to follow the one-line block style of the input file.

+ +

If an existing one-line block is longer than the maximum line length, however, it will be broken into multiple lines. When this happens, perltidy checks for and adds any optional terminating semicolon (unless the -nasc option is used) if the block is a code block.

+ +

The main exception is that perltidy will attempt to form new one-line blocks following the keywords map, eval, and sort, because these code blocks are often small and most clearly displayed in a single line.

+ +

One-line block rules can conflict with the cuddled-else option. When the cuddled-else option is used, perltidy retains existing one-line blocks, even if they do not obey cuddled-else formatting.

+ +

Occasionally, when one-line blocks get broken because they exceed the available line length, the formatting will violate the requested brace style. If this happens, reformatting the script a second time should correct the problem.

+ +

Sometimes it might be desirable to convert a script to have one-line blocks whenever possible. Although there is currently no flag for this, a simple workaround is to execute perltidy twice, once with the flag -noadd-newlines and then once again with normal parameters, like this:

+ +
     cat infile | perltidy -nanl | perltidy >outfile
+ +

When executed on this snippet

+ +
    if ( $? == -1 ) {
+        die "failed to execute: $!\n";
+    }
+    if ( $? == -1 ) {
+        print "Had enough.\n";
+        die "failed to execute: $!\n";
+    }
+ +

the result is

+ +
    if ( $? == -1 ) { die "failed to execute: $!\n"; }
+    if ( $? == -1 ) {
+        print "Had enough.\n";
+        die "failed to execute: $!\n";
+    }
+ +

This shows that blocks with a single statement become one-line blocks.

+ +
+
-olbs=n, --one-line-block-semicolons=n
+
+ +

This flag controls the placement of semicolons at the end of one-line blocks. Semicolons are optional before a closing block brace, and frequently they are omitted at the end of a one-line block containing just a single statement. By default, perltidy follows the input file regarding these semicolons, but this behavior can be controlled by this flag. The values of n are:

+ +
  n=0 remove terminal semicolons in one-line blocks having a single statement
+  n=1 stable; keep input file placement of terminal semicolons [DEFAULT ]
+  n=2 add terminal semicolons in all one-line blocks
+ +

Note that the n=2 option has no effect if adding semicolons is prohibited with the -nasc flag. Also not that while n=2 adds missing semicolons to all one-line blocks, regardless of complexity, the n=0 option only removes ending semicolons which terminate one-line blocks containing just one semicolon. So these two options are not exact inverses.

+ +
+
+ +

Controlling Vertical Alignment

+ +

Vertical alignment refers to lining up certain symbols in list of consecutive similar lines to improve readability. For example, the "fat commas" are aligned in the following statement:

+ +
        $data = $pkg->new(
+            PeerAddr => join( ".", @port[ 0 .. 3 ] ),   
+            PeerPort => $port[4] * 256 + $port[5],
+            Proto    => 'tcp'
+        );
+ +

The only explicit control on vertical alignment is to turn it off using -novalign, a flag mainly intended for debugging. However, vertical alignment can be forced to stop and restart by selectively introducing blank lines. For example, a blank has been inserted in the following code to keep somewhat similar things aligned.

+ +
    %option_range = (
+        'format'             => [ 'tidy', 'html', 'user' ],
+        'output-line-ending' => [ 'dos',  'win',  'mac', 'unix' ],
+        'character-encoding' => [ 'none', 'utf8' ],
+
+        'block-brace-tightness'    => [ 0, 2 ],
+        'brace-tightness'          => [ 0, 2 ],
+        'paren-tightness'          => [ 0, 2 ],
+        'square-bracket-tightness' => [ 0, 2 ],
+    );
+ +

Other Controls

+ +
+ +
Deleting selected text
+
+ +

Perltidy can selectively delete comments and/or pod documentation. The command -dac or --delete-all-comments will delete all comments and all pod documentation, leaving just code and any leading system control lines.

+ +

The command -dp or --delete-pod will remove all pod documentation (but not comments).

+ +

Two commands which remove comments (but not pod) are: -dbc or --delete-block-comments and -dsc or --delete-side-comments. (Hanging side comments will be deleted with block comments here.)

+ +

The negatives of these commands also work, and are the defaults. When block comments are deleted, any leading 'hash-bang' will be retained. Also, if the -x flag is used, any system commands before a leading hash-bang will be retained (even if they are in the form of comments).

+ +
+
Writing selected text to a file
+
+ +

When perltidy writes a formatted text file, it has the ability to also send selected text to a file with a .TEE extension. This text can include comments and pod documentation.

+ +

The command -tac or --tee-all-comments will write all comments and all pod documentation.

+ +

The command -tp or --tee-pod will write all pod documentation (but not comments).

+ +

The commands which write comments (but not pod) are: -tbc or --tee-block-comments and -tsc or --tee-side-comments. (Hanging side comments will be written with block comments here.)

+ +

The negatives of these commands also work, and are the defaults.

+ +
+
Using a .perltidyrc command file
+
+ +

If you use perltidy frequently, you probably won't be happy until you create a .perltidyrc file to avoid typing commonly-used parameters. Perltidy will first look in your current directory for a command file named .perltidyrc. If it does not find one, it will continue looking for one in other standard locations.

+ +

These other locations are system-dependent, and may be displayed with the command perltidy -dpro. Under Unix systems, it will first look for an environment variable PERLTIDY. Then it will look for a .perltidyrc file in the home directory, and then for a system-wide file /usr/local/etc/perltidyrc, and then it will look for /etc/perltidyrc. Note that these last two system-wide files do not have a leading dot. Further system-dependent information will be found in the INSTALL file distributed with perltidy.

+ +

Under Windows, perltidy will also search for a configuration file named perltidy.ini since Windows does not allow files with a leading period (.). Use perltidy -dpro to see the possible locations for your system. An example might be C:\Documents and Settings\All Users\perltidy.ini.

+ +

Another option is the use of the PERLTIDY environment variable. The method for setting environment variables depends upon the version of Windows that you are using. Instructions for Windows 95 and later versions can be found here:

+ +

http://www.netmanage.com/000/20021101_005_tcm21-6336.pdf

+ +

Under Windows NT / 2000 / XP the PERLTIDY environment variable can be placed in either the user section or the system section. The later makes the configuration file common to all users on the machine. Be sure to enter the full path of the configuration file in the value of the environment variable. Ex. PERLTIDY=C:\Documents and Settings\perltidy.ini

+ +

The configuration file is free format, and simply a list of parameters, just as they would be entered on a command line. Any number of lines may be used, with any number of parameters per line, although it may be easiest to read with one parameter per line. Comment text begins with a #, and there must also be a space before the # for side comments. It is a good idea to put complex parameters in either single or double quotes.

+ +

Here is an example of a .perltidyrc file:

+ +
  # This is a simple of a .perltidyrc configuration file
+  # This implements a highly spaced style
+  -se    # errors to standard error output
+  -w     # show all warnings
+  -bl    # braces on new lines
+  -pt=0  # parens not tight at all
+  -bt=0  # braces not tight
+  -sbt=0 # square brackets not tight
+ +

The parameters in the .perltidyrc file are installed first, so any parameters given on the command line will have priority over them.

+ +

To avoid confusion, perltidy ignores any command in the .perltidyrc file which would cause some kind of dump and an exit. These are:

+ +
 -h -v -ddf -dln -dop -dsn -dtt -dwls -dwrs -ss
+ +

There are several options may be helpful in debugging a .perltidyrc file:

+ +
    + +
  • A very helpful command is --dump-profile or -dpro. It writes a list of all configuration filenames tested to standard output, and if a file is found, it dumps the content to standard output before exiting. So, to find out where perltidy looks for its configuration files, and which one if any it selects, just enter

    + +
      perltidy -dpro
    + +
  • +
  • It may be simplest to develop and test configuration files with alternative names, and invoke them with -pro=filename on the command line. Then rename the desired file to .perltidyrc when finished.

    + +
  • +
  • The parameters in the .perltidyrc file can be switched off with the -npro option.

    + +
  • +
  • The commands --dump-options, --dump-defaults, --dump-long-names, and --dump-short-names, all described below, may all be helpful.

    + +
  • +
+ +
+
Creating a new abbreviation
+
+ +

A special notation is available for use in a .perltidyrc file for creating an abbreviation for a group of options. This can be used to create a shorthand for one or more styles which are frequently, but not always, used. The notation is to group the options within curly braces which are preceded by the name of the alias (without leading dashes), like this:

+ +
        newword {
+        -opt1
+        -opt2
+        }
+ +

where newword is the abbreviation, and opt1, etc, are existing parameters or other abbreviations. The main syntax requirement is that the new abbreviation along with its opening curly brace must begin on a new line. Space before and after the curly braces is optional. For a specific example, the following line

+ +
        airy {-bl -pt=0 -bt=0 -sbt=0}
+ +

could be placed in a .perltidyrc file, and then invoked at will with

+ +
        perltidy -airy somefile.pl
+ +

(Either -airy or --airy may be used).

+ +
+
Skipping leading non-perl commands with -x or --look-for-hash-bang
+
+ +

If your script has leading lines of system commands or other text which are not valid perl code, and which are separated from the start of the perl code by a "hash-bang" line, ( a line of the form #!...perl ), you must use the -x flag to tell perltidy not to parse and format any lines before the "hash-bang" line. This option also invokes perl with a -x flag when checking the syntax. This option was originally added to allow perltidy to parse interactive VMS scripts, but it should be used for any script which is normally invoked with perl -x.

+ +
+
Making a file unreadable
+
+ +

The goal of perltidy is to improve the readability of files, but there are two commands which have the opposite effect, --mangle and --extrude. They are actually merely aliases for combinations of other parameters. Both of these strip all possible whitespace, but leave comments and pod documents, so that they are essentially reversible. The difference between these is that --mangle puts the fewest possible line breaks in a script while --extrude puts the maximum possible. Note that these options do not provided any meaningful obfuscation, because perltidy can be used to reformat the files. They were originally developed to help test the tokenization logic of perltidy, but they have other uses. One use for --mangle is the following:

+ +
  perltidy --mangle myfile.pl -st | perltidy -o myfile.pl.new
+ +

This will form the maximum possible number of one-line blocks (see next section), and can sometimes help clean up a badly formatted script.

+ +

A similar technique can be used with --extrude instead of --mangle to make the minimum number of one-line blocks.

+ +

Another use for --mangle is to combine it with -dac to reduce the file size of a perl script.

+ +
+
Debugging
+
+ +

The following flags are available for debugging:

+ +

--dump-cuddled-block-list or -dcbl will dump to standard output the internal hash of cuddled block types created by a -cuddled-block-list input string.

+ +

--dump-defaults or -ddf will write the default option set to standard output and quit

+ +

--dump-profile or -dpro will write the name of the current configuration file and its contents to standard output and quit.

+ +

--dump-options or -dop will write current option set to standard output and quit.

+ +

--dump-long-names or -dln will write all command line long names (passed to Get_options) to standard output and quit.

+ +

--dump-short-names or -dsn will write all command line short names to standard output and quit.

+ +

--dump-token-types or -dtt will write a list of all token types to standard output and quit.

+ +

--dump-want-left-space or -dwls will write the hash %want_left_space to standard output and quit. See the section on controlling whitespace around tokens.

+ +

--dump-want-right-space or -dwrs will write the hash %want_right_space to standard output and quit. See the section on controlling whitespace around tokens.

+ +

--no-memoize or -nmem will turn of memoizing. Memoization can reduce run time when running perltidy repeatedly in a single process. It is on by default but can be deactivated for testing with -nmem.

+ +

--no-timestamp or -nts will eliminate any time stamps in output files to prevent differences in dates from causing test installation scripts to fail. There are just a couple of places where timestamps normally occur. One is in the headers of html files, and another is when the -cscw option is selected. The default is to allow timestamps (--timestamp or -ts).

+ +

--file-size-order or -fso will cause files to be processed in order of increasing size, when multiple files are being processed. This is useful during program development, when large numbers of files with varying sizes are processed, because it can reduce virtual memory usage.

+ +

-DEBUG will write a file with extension .DEBUG for each input file showing the tokenization of all lines of code.

+ +
+
Working with MakeMaker, AutoLoader and SelfLoader
+
+ +

The first $VERSION line of a file which might be eval'd by MakeMaker is passed through unchanged except for indentation. Use --nopass-version-line, or -npvl, to deactivate this feature.

+ +

If the AutoLoader module is used, perltidy will continue formatting code after seeing an __END__ line. Use --nolook-for-autoloader, or -nlal, to deactivate this feature.

+ +

Likewise, if the SelfLoader module is used, perltidy will continue formatting code after seeing a __DATA__ line. Use --nolook-for-selfloader, or -nlsl, to deactivate this feature.

+ +
+
Working around problems with older version of Perl
+
+ +

Perltidy contains a number of rules which help avoid known subtleties and problems with older versions of perl, and these rules always take priority over whatever formatting flags have been set. For example, perltidy will usually avoid starting a new line with a bareword, because this might cause problems if use strict is active.

+ +

There is no way to override these rules.

+ +
+
+ +

HTML OPTIONS

+ +
+ +
The -html master switch
+
+ +

The flag -html causes perltidy to write an html file with extension .html. So, for example, the following command

+ +
        perltidy -html somefile.pl
+ +

will produce a syntax-colored html file named somefile.pl.html which may be viewed with a browser.

+ +

Please Note: In this case, perltidy does not do any formatting to the input file, and it does not write a formatted file with extension .tdy. This means that two perltidy runs are required to create a fully reformatted, html copy of a script.

+ +
+
The -pre flag for code snippets
+
+ +

When the -pre flag is given, only the pre-formatted section, within the <PRE> and </PRE> tags, will be output. This simplifies inclusion of the output in other files. The default is to output a complete web page.

+ +
+
The -nnn flag for line numbering
+
+ +

When the -nnn flag is given, the output lines will be numbered.

+ +
+
The -toc, or --html-table-of-contents flag
+
+ +

By default, a table of contents to packages and subroutines will be written at the start of html output. Use -ntoc to prevent this. This might be useful, for example, for a pod document which contains a number of unrelated code snippets. This flag only influences the code table of contents; it has no effect on any table of contents produced by pod2html (see next item).

+ +
+
The -pod, or --pod2html flag
+
+ +

There are two options for formatting pod documentation. The default is to pass the pod through the Pod::Html module (which forms the basis of the pod2html utility). Any code sections are formatted by perltidy, and the results then merged. Note: perltidy creates a temporary file when Pod::Html is used; see "FILES". Also, Pod::Html creates temporary files for its cache.

+ +

NOTE: Perltidy counts the number of =cut lines, and either moves the pod text to the top of the html file if there is one =cut, or leaves the pod text in its original order (interleaved with code) otherwise.

+ +

Most of the flags accepted by pod2html may be included in the perltidy command line, and they will be passed to pod2html. In some cases, the flags have a prefix pod to emphasize that they are for the pod2html, and this prefix will be removed before they are passed to pod2html. The flags which have the additional pod prefix are:

+ +
   --[no]podheader --[no]podindex --[no]podrecurse --[no]podquiet 
+   --[no]podverbose --podflush
+ +

The flags which are unchanged from their use in pod2html are:

+ +
   --backlink=s --cachedir=s --htmlroot=s --libpods=s --title=s
+   --podpath=s --podroot=s 
+ +

where 's' is an appropriate character string. Not all of these flags are available in older versions of Pod::Html. See your Pod::Html documentation for more information.

+ +

The alternative, indicated with -npod, is not to use Pod::Html, but rather to format pod text in italics (or whatever the stylesheet indicates), without special html markup. This is useful, for example, if pod is being used as an alternative way to write comments.

+ +
+
The -frm, or --frames flag
+
+ +

By default, a single html output file is produced. This can be changed with the -frm option, which creates a frame holding a table of contents in the left panel and the source code in the right side. This simplifies code browsing. Assume, for example, that the input file is MyModule.pm. Then, for default file extension choices, these three files will be created:

+ +
 MyModule.pm.html      - the frame
+ MyModule.pm.toc.html  - the table of contents
+ MyModule.pm.src.html  - the formatted source code
+ +

Obviously this file naming scheme requires that output be directed to a real file (as opposed to, say, standard output). If this is not the case, or if the file extension is unknown, the -frm option will be ignored.

+ +
+
The -text=s, or --html-toc-extension flag
+
+ +

Use this flag to specify the extra file extension of the table of contents file when html frames are used. The default is "toc". See "Specifying File Extensions".

+ +
+
The -sext=s, or --html-src-extension flag
+
+ +

Use this flag to specify the extra file extension of the content file when html frames are used. The default is "src". See "Specifying File Extensions".

+ +
+
The -hent, or --html-entities flag
+
+ +

This flag controls the use of Html::Entities for html formatting. By default, the module Html::Entities is used to encode special symbols. This may not be the right thing for some browser/language combinations. Use --nohtml-entities or -nhent to prevent this.

+ +
+
Style Sheets
+
+ +

Style sheets make it very convenient to control and adjust the appearance of html pages. The default behavior is to write a page of html with an embedded style sheet.

+ +

An alternative to an embedded style sheet is to create a page with a link to an external style sheet. This is indicated with the -css=filename, where the external style sheet is filename. The external style sheet filename will be created if and only if it does not exist. This option is useful for controlling multiple pages from a single style sheet.

+ +

To cause perltidy to write a style sheet to standard output and exit, use the -ss, or --stylesheet, flag. This is useful if the style sheet could not be written for some reason, such as if the -pre flag was used. Thus, for example,

+ +
  perltidy -html -ss >mystyle.css
+ +

will write a style sheet with the default properties to file mystyle.css.

+ +

The use of style sheets is encouraged, but a web page without a style sheets can be created with the flag -nss. Use this option if you must to be sure that older browsers (roughly speaking, versions prior to 4.0 of Netscape Navigator and Internet Explorer) can display the syntax-coloring of the html files.

+ +
+
Controlling HTML properties
+
+ +

Note: It is usually more convenient to accept the default properties and then edit the stylesheet which is produced. However, this section shows how to control the properties with flags to perltidy.

+ +

Syntax colors may be changed from their default values by flags of the either the long form, -html-color-xxxxxx=n, or more conveniently the short form, -hcx=n, where xxxxxx is one of the following words, and x is the corresponding abbreviation:

+ +
      Token Type             xxxxxx           x 
+      ----------             --------         --
+      comment                comment          c
+      number                 numeric          n
+      identifier             identifier       i
+      bareword, function     bareword         w
+      keyword                keyword          k
+      quite, pattern         quote            q
+      here doc text          here-doc-text    h
+      here doc target        here-doc-target  hh
+      punctuation            punctuation      pu
+      parentheses            paren            p
+      structural braces      structure        s
+      semicolon              semicolon        sc
+      colon                  colon            co
+      comma                  comma            cm
+      label                  label            j
+      sub definition name    subroutine       m
+      pod text               pod-text         pd
+ +

A default set of colors has been defined, but they may be changed by providing values to any of the following parameters, where n is either a 6 digit hex RGB color value or an ascii name for a color, such as 'red'.

+ +

To illustrate, the following command will produce an html file somefile.pl.html with "aqua" keywords:

+ +
        perltidy -html -hck=00ffff somefile.pl
+ +

and this should be equivalent for most browsers:

+ +
        perltidy -html -hck=aqua somefile.pl
+ +

Perltidy merely writes any non-hex names that it sees in the html file. The following 16 color names are defined in the HTML 3.2 standard:

+ +
        black   => 000000,
+        silver  => c0c0c0,
+        gray    => 808080,
+        white   => ffffff,
+        maroon  => 800000,
+        red     => ff0000,
+        purple  => 800080,
+        fuchsia => ff00ff,
+        green   => 008000,
+        lime    => 00ff00,
+        olive   => 808000,
+        yellow  => ffff00
+        navy    => 000080,
+        blue    => 0000ff,
+        teal    => 008080,
+        aqua    => 00ffff,
+ +

Many more names are supported in specific browsers, but it is safest to use the hex codes for other colors. Helpful color tables can be located with an internet search for "HTML color tables".

+ +

Besides color, two other character attributes may be set: bold, and italics. To set a token type to use bold, use the flag --html-bold-xxxxxx or -hbx, where xxxxxx or x are the long or short names from the above table. Conversely, to set a token type to NOT use bold, use --nohtml-bold-xxxxxx or -nhbx.

+ +

Likewise, to set a token type to use an italic font, use the flag --html-italic-xxxxxx or -hix, where again xxxxxx or x are the long or short names from the above table. And to set a token type to NOT use italics, use --nohtml-italic-xxxxxx or -nhix.

+ +

For example, to use bold braces and lime color, non-bold, italics keywords the following command would be used:

+ +
        perltidy -html -hbs -hck=00FF00 -nhbk -hik somefile.pl
+ +

The background color can be specified with --html-color-background=n, or -hcbg=n for short, where n is a 6 character hex RGB value. The default color of text is the value given to punctuation, which is black as a default.

+ +

Here are some notes and hints:

+ +

1. If you find a preferred set of these parameters, you may want to create a .perltidyrc file containing them. See the perltidy man page for an explanation.

+ +

2. Rather than specifying values for these parameters, it is probably easier to accept the defaults and then edit a style sheet. The style sheet contains comments which should make this easy.

+ +

3. The syntax-colored html files can be very large, so it may be best to split large files into smaller pieces to improve download times.

+ +
+
+ +

SOME COMMON INPUT CONVENTIONS

+ +

Specifying Block Types

+ +

Several parameters which refer to code block types may be customized by also specifying an associated list of block types. The type of a block is the name of the keyword which introduces that block, such as if, else, or sub. An exception is a labeled block, which has no keyword, and should be specified with just a colon. To specify all blocks use '*'.

+ +

The keyword sub indicates a named sub. For anonymous subs, use the special keyword asub.

+ +

For example, the following parameter specifies sub, labels, BEGIN, and END blocks:

+ +
   -cscl="sub : BEGIN END"
+ +

(the meaning of the -cscl parameter is described above.) Note that quotes are required around the list of block types because of the spaces. For another example, the following list specifies all block types for vertical tightness:

+ +
   -bbvtl='*'
+ +

Specifying File Extensions

+ +

Several parameters allow default file extensions to be overridden. For example, a backup file extension may be specified with -bext=ext, where ext is some new extension. In order to provides the user some flexibility, the following convention is used in all cases to decide if a leading '.' should be used. If the extension ext begins with A-Z, a-z, or 0-9, then it will be appended to the filename with an intermediate '.' (or perhaps an '_' on VMS systems). Otherwise, it will be appended directly.

+ +

For example, suppose the file is somefile.pl. For -bext=old, a '.' is added to give somefile.pl.old. For -bext=.old, no additional '.' is added, so again the backup file is somefile.pl.old. For -bext=~, then no dot is added, and the backup file will be somefile.pl~ .

+ +

SWITCHES WHICH MAY BE NEGATED

+ +

The following list shows all short parameter names which allow a prefix 'n' to produce the negated form:

+ +
 D    anl asc  aws  b    bbb bbc bbs  bl   bli  boc bok  bol  bot  ce
+ csc  dac dbc  dcsc ddf  dln dnl dop  dp   dpro dsc dsm  dsn  dtt  dwls
+ dwrs dws f    fll  frm  fs  hsc html ibc  icb  icp iob  isbc lal  log
+ lp   lsl ohbr okw  ola  oll opr opt  osbr otr  ple  pod  pvl  q
+ sbc  sbl schb scp  scsb sct se  sfp  sfs  skp  sob sohb sop  sosb sot
+ ssc  st  sts  syn  t    tac tbc toc  tp   tqw  tsc w    x    bar  kis
+ +

Equivalently, the prefix 'no' or 'no-' on the corresponding long names may be used.

+ +

LIMITATIONS

+ +
+ +
Parsing Limitations
+
+ +

Perltidy should work properly on most perl scripts. It does a lot of self-checking, but still, it is possible that an error could be introduced and go undetected. Therefore, it is essential to make careful backups and to test reformatted scripts.

+ +

The main current limitation is that perltidy does not scan modules included with 'use' statements. This makes it necessary to guess the context of any bare words introduced by such modules. Perltidy has good guessing algorithms, but they are not infallible. When it must guess, it leaves a message in the log file.

+ +

If you encounter a bug, please report it.

+ +
+
What perltidy does not parse and format
+
+ +

Perltidy indents but does not reformat comments and qw quotes. Perltidy does not in any way modify the contents of here documents or quoted text, even if they contain source code. (You could, however, reformat them separately). Perltidy does not format 'format' sections in any way. And, of course, it does not modify pod documents.

+ +
+
+ +

FILES

+ +
+ +
Temporary files
+
+ +

Under the -html option with the default --pod2html flag, a temporary file is required to pass text to Pod::Html. Unix systems will try to use the POSIX tmpnam() function. Otherwise the file perltidy.TMP will be temporarily created in the current working directory.

+ +
+
Special files when standard input is used
+
+ +

When standard input is used, the log file, if saved, is perltidy.LOG, and any errors are written to perltidy.ERR unless the -se flag is set. These are saved in the current working directory.

+ +
+
Files overwritten
+
+ +

The following file extensions are used by perltidy, and files with these extensions may be overwritten or deleted: .ERR, .LOG, .TEE, and/or .tdy, .html, and .bak, depending on the run type and settings.

+ +
+
Files extensions limitations
+
+ +

Perltidy does not operate on files for which the run could produce a file with a duplicated file extension. These extensions include .LOG, .ERR, .TEE, and perhaps .tdy and .bak, depending on the run type. The purpose of this rule is to prevent generating confusing filenames such as somefile.tdy.tdy.tdy.

+ +
+
+ +

SEE ALSO

+ +

perlstyle(1), Perl::Tidy(3)

+ +

VERSION

+ +

This man page documents perltidy version 20190601

+ +

BUG REPORTS

+ +

A list of current bugs and issues can be found at the CPAN site https://rt.cpan.org/Public/Dist/Display.html?Name=Perl-Tidy

+ +

To report a new bug or problem, use the link on this page.

+ +

The source code repository is at https://github.com/perltidy/perltidy.

+ +

COPYRIGHT

+ +

Copyright (c) 2000-2018 by Steve Hancock

+ +

LICENSE

+ +

This package is free software; you can redistribute it and/or modify it under the terms of the "GNU General Public License".

+ +

Please refer to the file "COPYING" for details.

+ +

DISCLAIMER

+ +

This package is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

+ +

See the "GNU General Public License" for more details.

+ + + + + + + diff --git a/docs/stylekey.html b/docs/stylekey.html new file mode 100644 index 0000000..a34f319 --- /dev/null +++ b/docs/stylekey.html @@ -0,0 +1,682 @@ + + + + + + + + + + + + + + + +

Perltidy Style Key

+ +

When perltidy was first developed, the main parameter choices were the number of indentation spaces and if the user liked cuddled else's. As the number of users has grown so has the number of parameters. Now there are so many that it can be difficult for a new user to find a good initial set. This document is one attempt to help with this problem, and some other suggestions are given at the end.

+ +

Use this document to methodically find a starting set of perltidy parameters to approximate your style. We will be working on just one aspect of formatting at a time. Just read each question and select the best answer. Enter your parameters in a file named .perltidyrc (examples are listed at the end). Then move it to one of the places where perltidy will find it. You can run perltidy with the parameter -dpro to see where these places are for your system.

+ +

Before You Start

+ +

Before you begin, experiment using just perltidy filename.pl on some of your files. From the results (which you will find in files with a .tdy extension), you will get a sense of what formatting changes, if any, you'd like to make. If the default formatting is acceptable, you do not need a .perltidyrc file.

+ +

Use as Filter?

+ +

Do you almost always want to run perltidy as a standard filter on just one input file? If yes, use -st and -se.

+ +

Line Length Setting

+ +

Perltidy will set line breaks to prevent lines from exceeding the maximum line length.

+ +

Do you want the maximum line length to be 80 columns? If no, use -l=n, where n is the number of columns you prefer.

+ +

Indentation in Code Blocks

+ +

In the block below, the variable $anchor is one indentation level deep and is indented by 4 spaces as shown here:

+ +
    if ( $flag eq "a" ) {
+        $anchor = $header;
+    }  
+ +

If you want to change this to be a different number n of spaces per indentation level, use -i=n.

+ +

Continuation Indentation

+ +

Look at the statement beginning with $anchor:

+ +
    if ( $flag eq "a" ) {
+        $anchor =
+          substr( $header, 0, 6 )
+          . substr( $char_list, $place_1, 1 )
+          . substr( $char_list, $place_2, 1 );
+    }
+ +

The statement is too long for the line length (80 characters by default), so it has been broken into 4 lines. The second and later lines have some extra "continuation indentation" to help make the start of the statement easy to find. The default number of extra spaces is 2. If you prefer a number n different from 2, you may specify this with -ci=n. It is probably best if it does not exceed the value of the primary indentation.

+ +

Tabs

+ +

The default, and recommendation, is to represent leading whitespace with actual space characters. However, if you prefer to entab leading whitespace with one tab character for each n spaces, use -et=n. Typically, n would be 8.

+ +

Opening Block Brace Right or Left?

+ +

Opening and closing curly braces, parentheses, and square brackets are divided into two separate categories and controlled separately in most cases. The two categories are (1) code block curly braces, which contain perl code, and (2) everything else. Basically, a code block brace is one which could contain semicolon-terminated lines of perl code. We will first work on the scheme for code block curly braces.

+ +

Decide which of the following opening brace styles you prefer for most blocks of code (with the possible exception of a sub block brace which will be covered later):

+ +

If you like opening braces on the right, like this, go to "Opening Braces Right".

+ +
    if ( $flag eq "h" ) {
+        $headers = 0;
+    }  
+ +

If you like opening braces on the left, like this, go to "Opening Braces Left".

+ +
    if ( $flag eq "h" )
+    {
+        $headers = 0;
+    }
+ +

Opening Braces Right

+ +

In a multi-line if test expression, the default is to place the opening brace on the left, like this:

+ +
    if ( $bigwasteofspace1 && $bigwasteofspace2
+        || $bigwasteofspace3 && $bigwasteofspace4 )
+    {
+        big_waste_of_time();
+    }
+ +

This helps to visually separate the block contents from the test expression.

+ +

An alternative is to keep the brace on the right even for multiple-line test expressions, like this:

+ +
    if ( $bigwasteofspace1 && $bigwasteofspace2
+        || $bigwasteofspace3 && $bigwasteofspace4 ) {
+        big_waste_of_time();
+    }
+ +

If you prefer this alternative, use -bar.

+ +

Cuddled Else?

+ +

Do you prefer this Cuddled Else style

+ +
    if ( $flag eq "h" ) {
+        $headers = 0;
+    } elsif ( $flag eq "f" ) {
+        $sectiontype = 3;
+    } else {
+        print "invalid option: " . substr( $arg, $i, 1 ) . "\n";
+        dohelp();
+    }
+ +

instead of this default style?

+ +
    if ( $flag eq "h" ) {
+        $headers = 0;
+    }  
+    elsif ( $flag eq "f" ) {
+        $sectiontype = 3;
+    } 
+    else {    
+        print "invalid option: " . substr( $arg, $i, 1 ) . "\n";
+        dohelp();
+    }
+ +

If yes, you should use -ce. Now skip ahead to "Opening Sub Braces".

+ +

Opening Braces Left

+ +

Use -bl if you prefer this style:

+ +
    if ( $flag eq "h" )
+    {
+        $headers = 0;
+    }
+ +

Use -bli if you prefer this indented-brace style:

+ +
    if ( $flag eq "h" )
+      {
+        $headers = 0;
+      }
+ +

The number of spaces of extra indentation will be the value specified for continuation indentation with the -ci=n parameter (2 by default).

+ +

Opening Sub Braces

+ +

By default, the opening brace of a sub block will be treated the same as other code blocks. If this is okay, skip ahead to "Block Brace Vertical Tightness".

+ +

If you prefer an opening sub brace to be on a new line, like this:

+ +
    sub message
+    {
+        # -sbl
+    }
+ +

use -sbl. If you prefer the sub brace on the right like this

+ +
    sub message {
+
+        # -nsbl
+    }
+ +

use -nsbl.

+ +

If you wish to give this opening sub brace some indentation you can do that with the parameters -bli and -blil which are described in the manual.

+ +

Block Brace Vertical Tightness

+ +

If you chose to put opening block braces of all types to the right, skip ahead to "Closing Block Brace Indentation".

+ +

If you chose to put braces of any type on the left, the default is to leave the opening brace on a line by itself, like this (shown for -bli, but also true for -bl):

+ +
    if ( $flag eq "h" )
+      {
+        $headers = 0;
+      }
+ +

But you may also use this more compressed style if you wish:

+ +
    if ( $flag eq "h" )
+      { $headers = 0;
+      }
+ +

If you do not prefer this more compressed form, go to "Opening Sub Braces".

+ +

Otherwise use parameter -bbvt=n, where n=1 or n=2. To decide, look at this snippet:

+ +
    # -bli -bbvt=1
+    sub _directives
+      {
+        {
+            'ENDIF' => \&_endif,
+               'IF' => \&_if,
+        };
+      }
+
+    # -bli -bbvt=2
+    sub _directives
+    {   {
+            'ENDIF' => \&_endif,
+            'IF'    => \&_if,
+        };
+    }
+ +

The difference is that -bbvt=1 breaks after an opening brace if the next line is unbalanced, whereas -bbvt=2 never breaks.

+ +

If you were expecting the 'ENDIF' word to move up vertically here, note that the second opening brace in the above example is not a code block brace (it is a hash brace), so the -bbvt does not apply to it (another parameter will).

+ +

Closing Block Brace Indentation

+ +

The default is to place closing braces at the same indentation as the opening keyword or brace of that code block, as shown here:

+ +
        if ($task) {
+            yyy();
+        }            # default
+ +

If you chose the -bli style, however, the default closing braces will be indented one continuation indentation like the opening brace:

+ +
        if ($task)
+          {
+            yyy();
+          }    # -bli
+ +

If you prefer to give closing block braces one full level of indentation, independently of how the opening brace is treated, for example like this:

+ +
        if ($task) {
+            yyy();
+            }          # -icb
+ +

use -icb.

+ +

This completes the definition of the placement of code block braces.

+ +

Indentation Style for Other Containers

+ +

You have a choice of two basic indentation schemes for non-block containers. The default is to use a fixed number of spaces per indentation level (the same number of spaces used for code blocks, which is 4 by default). Here is an example of the default:

+ +
    $dbh = DBI->connect(
+        undef, undef, undef,
+        {
+            PrintError => 0,
+            RaiseError => 1
+        }
+    );
+ +

In this default indentation scheme, a simple formula is used to find the indentation of every line. Notice how the first 'undef' is indented 4 spaces (one level) to the right, and how 'PrintError' is indented 4 more speces (one more level) to the right.

+ +

The alternate is to let the location of the opening paren (or square bracket, or curly brace) define the indentation, like this:

+ +
    $dbh = DBI->connect(
+                         undef, undef, undef,
+                         {
+                           PrintError => 0,
+                           RaiseError => 1
+                         }
+    );
+ +

The first scheme is completely robust. The second scheme often looks a little nicer, but be aware that deeply nested structures it can be spoiled if the line length limit is exceeded. Also, if there are comments or blank lines within a complex structure perltidy will temporarily fall back on the default indentation scheme. You may want to try both on large sections of code to see which works best.

+ +

If you prefer the first (default) scheme, no parameter is needed.

+ +

If you prefer the latter scheme, use -lp.

+ +

Opening Vertical Tightness

+ +

The information in this section applies mainly to the -lp style but it also applies in some cases to the default style. It will be illustrated for the -lp indentation style.

+ +

The default -lp indentation style ends a line at the opening tokens, like this:

+ +
    $dbh = DBI->connect(
+                         undef, undef, undef,
+                         {
+                           PrintError => 0,
+                           RaiseError => 1
+                         }
+    );
+ +

Here is a tighter alternative, which does not end a line with the opening tokens:

+ +
    $dbh = DBI->connect( undef, undef, undef,
+                         { PrintError => 0,
+                           RaiseError => 1
+                         }
+    );
+ +

The difference is that the lines have been compressed vertically without any changes to the indentation. This can almost always be done with the -lp indentation style, but only in limited cases for the default indentation style.

+ +

If you prefer the default, skip ahead to "Closing Token Placement".

+ +

Otherwise, use -vt=n, where n should be either 1 or 2. To help decide, observe the first three opening parens in the following snippet and choose the value of n you prefer. Here it is with -lp -vt=1:

+ +
    if (
+         !defined(
+                   start_slip( $DEVICE, $PHONE,  $ACCOUNT, $PASSWORD,
+                               $LOCAL,  $REMOTE, $NETMASK, $MTU
+                   )
+         )
+         && $continuation_flag
+      )
+    {
+        do_something_about_it();
+    }
+ +

And here it is again formatted with -lp -vt=2:

+ +
    if ( !defined( start_slip( $DEVICE, $PHONE,  $ACCOUNT, $PASSWORD,
+                               $LOCAL,  $REMOTE, $NETMASK, $MTU
+                   )
+         )
+         && $continuation_flag
+      )
+    {
+        do_something_about_it();
+    }
+ +

The -vt=1 style tries to display the structure by preventing more than one step in indentation per line. In this example, the first two opening parens were not followed by balanced lines, so -vt=1 broke after them.

+ +

The -vt=2 style does not limit itself to a single indentation step per line.

+ +

Note that in the above example the function 'do_sumething_about_it' started on a new line. This is because it follows an opening code block brace and is governed by the flag previously set in "Block Brace Vertical Tightness".

+ +

Closing Token Placement

+ +

You have several options for dealing with the terminal closing tokens of non-blocks. In the following examples, a closing parenthesis is shown, but these parameters apply to closing square brackets and non-block curly braces as well.

+ +

The default behavior for parenthesized relatively large lists is to place the closing paren on a separate new line. The flag -cti=n controls the amount of indentation of such a closing paren.

+ +

The default, -cti=0, for a line beginning with a closing paren, is to use the indentation defined by the next (lower) indentation level. This works well for the default indentation scheme:

+ +
    # perltidy
+    @month_of_year = (
+        'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+        'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
+    );
+ +

but it may not look very good with the -lp indentation scheme:

+ +
    # perltidy -lp
+    @month_of_year = (
+                       'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+                       'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
+    );
+ +

An alternative which works well with -lp indentation is -cti=1, which aligns the closing paren vertically with its opening paren, if possible:

+ +
    # perltidy -lp -cti=1
+    @month_of_year = (
+                       'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+                       'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
+                     );
+ +

Another alternative, -cti=3, indents a line with leading closing paren one full indentation level:

+ +
    # perltidy -lp -cti=3
+    @month_of_year = (
+                       'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+                       'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
+                       );
+ +

If you prefer the closing paren on a separate line like this, note the value of -cti=n that you prefer and skip ahead to "Define Horizontal Tightness".

+ +

Finally, the question of paren indentation can be avoided by placing it at the end of the previous line, like this:

+ +
    @month_of_year = (
+        'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+        'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' );
+ +

Perltidy will automatically do this to save space for very short lists but not for longer lists.

+ +

Use -vtc=n if you prefer to usually do this, where n is either 1 or 2. To determine n, we have to look at something more complex. Observe the behavior of the closing tokens in the following snippet:

+ +

Here is -lp -vtc=1:

+ +
    $srec->{'ACTION'} = [
+                          $self->read_value(
+                                             $lookup->{'VFMT'},
+                                             $loc, $lookup, $fh
+                          ),
+                          $self->read_value(
+                                             $lookup->{'VFMT2'},
+                                             $loc, $lookup, $fh
+                          ) ];
+ +

Here is -lp -vtc=2:

+ +
    $srec->{'ACTION'} = [
+                          $self->read_value(
+                                             $lookup->{'VFMT'},
+                                             $loc, $lookup, $fh ),
+                          $self->read_value(
+                                             $lookup->{'VFMT2'},
+                                             $loc, $lookup, $fh ) ];
+ +

Choose the one that you prefer. The difference is that -vtc=1 leaves closing tokens at the start of a line within a list, which can assist in keeping hierarchical lists readable. The -vtc=2 style always tries to move closing tokens to the end of a line.

+ +

If you choose -vtc=1, you may also want to specify a value of -cti=n (previous section) to handle cases where a line begins with a closing paren.

+ +

Stack Opening Tokens

+ +

In the following snippet the opening hash brace has been placed alone on a new line.

+ +
    $opt_c = Text::CSV_XS->new(
+        {
+            binary       => 1,
+            sep_char     => $opt_c,
+            always_quote => 1,
+        }
+    );
+ +

If you prefer to avoid isolated opening tokens by "stacking" them together with other opening tokens like this:

+ +
    $opt_c = Text::CSV_XS->new( {
+            binary       => 1,
+            sep_char     => $opt_c,
+            always_quote => 1,
+        }
+    );
+ +

use -sot.

+ +

Stack Closing Tokens

+ +

Likewise, in the same snippet the default formatting leaves the closing paren on a line by itself here:

+ +
    $opt_c = Text::CSV_XS->new(
+        {
+            binary       => 1,
+            sep_char     => $opt_c,
+            always_quote => 1,
+        }
+    );
+ +

If you would like to avoid leaving isolated closing tokens by stacking them with other closing tokens, like this:

+ +
    $opt_c = Text::CSV_XS->new(
+        {
+            binary       => 1,
+            sep_char     => $opt_c,
+            always_quote => 1,
+        } );
+ +

use -sct.

+ +

The -sct flag is somewhat similar to the -vtc flags, and in some cases it can give a similar result. The difference is that the -vtc flags try to avoid lines with leading opening tokens by "hiding" them at the end of a previous line, whereas the -sct flag merely tries to reduce the number of lines with isolated closing tokens by stacking multiple closing tokens together, but it does not try to hide them.

+ +

The manual shows how all of these vertical tightness controls may be applied independently to each type of non-block opening and opening token.

+ +

Define Horizontal Tightness

+ +

Horizontal tightness parameters define how much space is included within a set of container tokens.

+ +

For parentheses, decide which of the following values of -pt=n you prefer:

+ +
 if ( ( my $len_tab = length( $tabstr ) ) > 0 ) {  # -pt=0
+ if ( ( my $len_tab = length($tabstr) ) > 0 ) {    # -pt=1 (default)
+ if ((my $len_tab = length($tabstr)) > 0) {        # -pt=2
+ +

For n=0, space is always used, and for n=2, space is never used. For the default n=1, space is used if the parentheses contain more than one token.

+ +

For square brackets, decide which of the following values of -sbt=n you prefer:

+ +
 $width = $col[ $j + $k ] - $col[ $j ];  # -sbt=0
+ $width = $col[ $j + $k ] - $col[$j];    # -sbt=1 (default)
+ $width = $col[$j + $k] - $col[$j];      # -sbt=2 
+ +

For curly braces, decide which of the following values of -bt=n you prefer:

+ +
 $obj->{ $parsed_sql->{ 'table' }[0] };    # -bt=0
+ $obj->{ $parsed_sql->{'table'}[0] };      # -bt=1 (default)
+ $obj->{$parsed_sql->{'table'}[0]};        # -bt=2
+ +

For code block curly braces, decide which of the following values of -bbt=n you prefer:

+ +
 %bf = map { $_ => -M $_ } grep { /\.deb$/ } dirents '.'; # -bbt=0 (default)
+ %bf = map { $_ => -M $_ } grep {/\.deb$/} dirents '.';   # -bbt=1
+ %bf = map {$_ => -M $_} grep {/\.deb$/} dirents '.';     # -bbt=2
+ +

Spaces between function names and opening parens

+ +

The default is not to place a space after a function call:

+ +
  myfunc( $a, $b, $c );    # default 
+ +

If you prefer a space:

+ +
  myfunc ( $a, $b, $c );   # -sfp
+ +

use -sfp.

+ +

Spaces between Perl keywords and parens

+ +

The default is to place a space between only these keywords and an opening paren:

+ +
   my local our and or eq ne if else elsif until unless 
+   while for foreach return switch case given when
+ +

but no others. For example, the default is:

+ +
    $aa = pop(@bb);
+ +

If you want a space between all Perl keywords and an opening paren,

+ +
    $aa = pop (@bb);
+ +

use -skp. For detailed control of individual keywords, see the manual.

+ +

Statement Termination Semicolon Spaces

+ +

The default is not to put a space before a statement termination semicolon, like this:

+ +
    $i = 1;
+ +

If you prefer a space, like this:

+ +
    $i = 1 ; 
+ +

enter -sts.

+ +

For Loop Semicolon Spaces

+ +

The default is to place a space before a semicolon in a for statement, like this:

+ +
 for ( @a = @$ap, $u = shift @a ; @a ; $u = $v ) {  # -sfs (default)
+ +

If you prefer no such space, like this:

+ +
 for ( @a = @$ap, $u = shift @a; @a; $u = $v ) {    # -nsfs
+ +

enter -nsfs.

+ +

Block Comment Indentation

+ +

Block comments are comments which occupy a full line, as opposed to side comments. The default is to indent block comments with the same indentation as the code block that contains them (even though this will allow long comments to exceed the maximum line length).

+ +

If you would like block comments indented except when this would cause the maximum line length to be exceeded, use -olc. This will cause a group of consecutive block comments to be outdented by the amount needed to prevent any one from exceeding the maximum line length.

+ +

If you never want block comments indented, use -nibc.

+ +

If block comments may only be indented if they have some space characters before the leading # character in the input file, use -isbc.

+ +

The manual shows many other options for controlling comments.

+ +

Outdenting Long Quotes

+ +

Long quoted strings may exceed the specified line length limit. The default, when this happens, is to outdent them to the first column. Here is an example of an outdented long quote:

+ +
        if ($source_stream) {
+            if ( @ARGV > 0 ) {
+                die
+ "You may not specify any filenames when a source array is given\n";
+            }
+        }
+ +

The effect is not too different from using a here document to represent the quote. If you prefer to leave the quote indented, like this:

+ +
        if ($source_stream) {
+            if ( @ARGV > 0 ) {
+                die
+                  "You may not specify any filenames when a source array is given\n";
+            }
+        }
+ +

use -nolq.

+ +

Many Other Parameters

+ +

This document has only covered the most popular parameters. The manual contains many more and should be consulted if you did not find what you need here.

+ +

Example .perltidyrc files

+ +

Now gather together all of the parameters you prefer and enter them in a file called .perltidyrc.

+ +

Here are some example .perltidyrc files and the corresponding style.

+ +

Here is a little test snippet, shown the way it would appear with the default style.

+ +
    for (@methods) {
+        push (
+            @results,
+            {
+                name => $_->name,
+                help => $_->help,
+            }
+        );
+    }
+ +

You do not need a .perltidyrc file for this style.

+ +

Here is the same snippet

+ +
    for (@methods)
+    {
+        push(@results,
+             {  name => $_->name,
+                help => $_->help,
+             }
+            );
+    }
+ +

for a .perltidyrc file containing these parameters:

+ +
 -bl
+ -lp
+ -cti=1
+ -vt=1
+ -pt=2
+ +

You do not need to place just one parameter per line, but this may be convenient for long lists. You may then hide any parameter by placing a # symbol before it.

+ +

And here is the snippet

+ +
    for (@methods) {
+        push ( @results,
+               { name => $_->name,
+                 help => $_->help,
+               } );
+    }
+ +

for a .perltidyrc file containing these parameters:

+ +
 -lp
+ -vt=1
+ -vtc=1
+ +

Tidyview

+ +

There is a graphical program called tidyview which you can use to read a preliminary .perltidyrc file, make trial adjustments and immediately see their effect on a test file, and then write a new .perltidyrc. You can download a copy at

+ +

http://sourceforge.net/projects/tidyview

+ +

Additional Information

+ +

This document has covered the main parameters. Many more parameters are available for special purposes and for fine-tuning a style. For complete information see the perltidy manual http://perltidy.sourceforge.net/perltidy.html

+ +

For an introduction to using perltidy, see the tutorial http://perltidy.sourceforge.net/tutorial.html

+ +

Suggestions for improving this document are welcome and may be sent to perltidy at users.sourceforge.net

+ + + + + + + diff --git a/docs/stylekey.pod b/docs/stylekey.pod deleted file mode 100644 index ca9a7e0..0000000 --- a/docs/stylekey.pod +++ /dev/null @@ -1,781 +0,0 @@ -=head1 Perltidy Style Key - -When perltidy was first developed, the main parameter choices were the number -of indentation spaces and if the user liked cuddled else's. As the number of -users has grown so has the number of parameters. Now there are so many that it -can be difficult for a new user to find a good initial set. This document is -one attempt to help with this problem, and some other suggestions are given at -the end. - -Use this document to methodically find a starting set of perltidy parameters to -approximate your style. We will be working on just one aspect of formatting at -a time. Just read each question and select the best answer. Enter your -parameters in a file named F<.perltidyrc> (examples are listed at the end). -Then move it to one of the places where perltidy will find it. You can run -perltidy with the parameter B<-dpro> to see where these places are for your -system. - -=head2 Before You Start - -Before you begin, experiment using just C on some -of your files. From the results (which you will find in files with a -F<.tdy> extension), you will get a sense of what formatting changes, if -any, you'd like to make. If the default formatting is acceptable, you -do not need a F<.perltidyrc> file. - -=head2 Use as Filter? - -Do you almost always want to run perltidy as a standard filter on just -one input file? If yes, use B<-st> and B<-se>. - -=head2 Line Length Setting - -Perltidy will set line breaks to prevent lines from exceeding the -maximum line length. - -Do you want the maximum line length to be 80 columns? If no, use -B<-l=n>, where B is the number of columns you prefer. - -=head2 Indentation in Code Blocks - -In the block below, the variable C<$anchor> is one indentation level deep -and is indented by 4 spaces as shown here: - - if ( $flag eq "a" ) { - $anchor = $header; - } - -If you want to change this to be a different number B of spaces -per indentation level, use B<-i=n>. - -=head2 Continuation Indentation - -Look at the statement beginning with C<$anchor>: - - if ( $flag eq "a" ) { - $anchor = - substr( $header, 0, 6 ) - . substr( $char_list, $place_1, 1 ) - . substr( $char_list, $place_2, 1 ); - } - -The statement is too long for the line length (80 characters by default), so it -has been broken into 4 lines. The second and later lines have some extra -"continuation indentation" to help make the start of the statement easy to -find. The default number of extra spaces is 2. If you prefer a number n -different from 2, you may specify this with B<-ci=n>. It is probably best if -it does not exceed the value of the primary indentation. - -=head2 Tabs - -The default, and recommendation, is to represent leading whitespace -with actual space characters. However, if you prefer to entab -leading whitespace with one tab character for each B spaces, -use B<-et=n>. Typically, B would be 8. - -=head2 Opening Block Brace Right or Left? - -Opening and closing curly braces, parentheses, and square brackets are divided -into two separate categories and controlled separately in most cases. The two -categories are (1) code block curly braces, which contain perl code, and (2) -everything else. Basically, a code block brace is one which could contain -semicolon-terminated lines of perl code. We will first work on the scheme for -code block curly braces. - -Decide which of the following opening brace styles you prefer for most blocks -of code (with the possible exception of a B which will -be covered later): - -If you like opening braces on the right, like this, go to -L. - - if ( $flag eq "h" ) { - $headers = 0; - } - -If you like opening braces on the left, like this, go to -L. - - if ( $flag eq "h" ) - { - $headers = 0; - } - -=head2 Opening Braces Right - -In a multi-line B test expression, the default is to place -the opening brace on the left, like this: - - if ( $bigwasteofspace1 && $bigwasteofspace2 - || $bigwasteofspace3 && $bigwasteofspace4 ) - { - big_waste_of_time(); - } - -This helps to visually separate the block contents from the test -expression. - -An alternative is to keep the brace on the right even for -multiple-line test expressions, like this: - - if ( $bigwasteofspace1 && $bigwasteofspace2 - || $bigwasteofspace3 && $bigwasteofspace4 ) { - big_waste_of_time(); - } - -If you prefer this alternative, use B<-bar>. - -=head2 Cuddled Else? - -Do you prefer this B style - - if ( $flag eq "h" ) { - $headers = 0; - } elsif ( $flag eq "f" ) { - $sectiontype = 3; - } else { - print "invalid option: " . substr( $arg, $i, 1 ) . "\n"; - dohelp(); - } - -instead of this default style? - - if ( $flag eq "h" ) { - $headers = 0; - } - elsif ( $flag eq "f" ) { - $sectiontype = 3; - } - else { - print "invalid option: " . substr( $arg, $i, 1 ) . "\n"; - dohelp(); - } - -If yes, you should use B<-ce>. -Now skip ahead to L. - -=head2 Opening Braces Left - -Use B<-bl> if you prefer this style: - - if ( $flag eq "h" ) - { - $headers = 0; - } - -Use B<-bli> if you prefer this indented-brace style: - - if ( $flag eq "h" ) - { - $headers = 0; - } - -The number of spaces of extra indentation will be the value specified -for continuation indentation with the B<-ci=n> parameter (2 by default). - -=head2 Opening Sub Braces - -By default, the opening brace of a sub block will be treated -the same as other code blocks. If this is okay, skip ahead -to L. - -If you prefer an opening sub brace to be on a new line, -like this: - - sub message - { - # -sbl - } - -use B<-sbl>. If you prefer the sub brace on the right like this - - sub message { - - # -nsbl - } - -use B<-nsbl>. - -If you wish to give this opening sub brace some indentation you can do -that with the parameters B<-bli> and B<-blil> which are described in the -manual. - -=head2 Block Brace Vertical Tightness - -If you chose to put opening block braces of all types to the right, skip -ahead to L. - -If you chose to put braces of any type on the left, the default is to leave the -opening brace on a line by itself, like this (shown for B<-bli>, but also true -for B<-bl>): - - if ( $flag eq "h" ) - { - $headers = 0; - } - -But you may also use this more compressed style if you wish: - - if ( $flag eq "h" ) - { $headers = 0; - } - -If you do not prefer this more compressed form, go to -L. - -Otherwise use parameter B<-bbvt=n>, where n=1 or n=2. To decide, -look at this snippet: - - # -bli -bbvt=1 - sub _directives - { - { - 'ENDIF' => \&_endif, - 'IF' => \&_if, - }; - } - - # -bli -bbvt=2 - sub _directives - { { - 'ENDIF' => \&_endif, - 'IF' => \&_if, - }; - } - -The difference is that B<-bbvt=1> breaks after an opening brace if -the next line is unbalanced, whereas B<-bbvt=2> never breaks. - -If you were expecting the 'ENDIF' word to move up vertically here, note that -the second opening brace in the above example is not a code block brace (it is -a hash brace), so the B<-bbvt> does not apply to it (another parameter will). - -=head2 Closing Block Brace Indentation - -The default is to place closing braces at the same indentation as the -opening keyword or brace of that code block, as shown here: - - if ($task) { - yyy(); - } # default - -If you chose the B<-bli> style, however, the default closing braces will be -indented one continuation indentation like the opening brace: - - if ($task) - { - yyy(); - } # -bli - -If you prefer to give closing block braces one full level of -indentation, independently of how the opening brace is treated, -for example like this: - - if ($task) { - yyy(); - } # -icb - -use B<-icb>. - -This completes the definition of the placement of code block braces. - -=head2 Indentation Style for Other Containers - -You have a choice of two basic indentation schemes for non-block containers. -The default is to use a fixed number of spaces per indentation level (the same -number of spaces used for code blocks, which is 4 by default). Here is an -example of the default: - - $dbh = DBI->connect( - undef, undef, undef, - { - PrintError => 0, - RaiseError => 1 - } - ); - -In this default indentation scheme, a simple formula is used to find the -indentation of every line. Notice how the first 'undef' is indented 4 -spaces (one level) to the right, and how 'PrintError' is indented 4 more -speces (one more level) to the right. - -The alternate is to let the location of the opening paren (or square -bracket, or curly brace) define the indentation, like this: - - $dbh = DBI->connect( - undef, undef, undef, - { - PrintError => 0, - RaiseError => 1 - } - ); - -The first scheme is completely robust. The second scheme often looks a little -nicer, but be aware that deeply nested structures it can be spoiled if the line -length limit is exceeded. Also, if there are comments or blank lines within a -complex structure perltidy will temporarily fall back on the default -indentation scheme. You may want to try both on large sections of code to see -which works best. - -If you prefer the first (default) scheme, no parameter is needed. - -If you prefer the latter scheme, use B<-lp>. - -=head2 Opening Vertical Tightness - -The information in this section applies mainly to the B<-lp> -style but it also applies in some cases to the default style. -It will be illustrated for the B<-lp> indentation style. - -The default B<-lp> indentation style ends a line at the -opening tokens, like this: - - $dbh = DBI->connect( - undef, undef, undef, - { - PrintError => 0, - RaiseError => 1 - } - ); - -Here is a tighter alternative, which does not end a line -with the opening tokens: - - $dbh = DBI->connect( undef, undef, undef, - { PrintError => 0, - RaiseError => 1 - } - ); - -The difference is that the lines have been compressed vertically without -any changes to the indentation. This can almost always be done with the -B<-lp> indentation style, but only in limited cases for the default -indentation style. - -If you prefer the default, skip ahead to L. - -Otherwise, use B<-vt=n>, where B should be either 1 or 2. To help -decide, observe the first three opening parens in the following snippet -and choose the value of n you prefer. Here it is with B<-lp -vt=1>: - - if ( - !defined( - start_slip( $DEVICE, $PHONE, $ACCOUNT, $PASSWORD, - $LOCAL, $REMOTE, $NETMASK, $MTU - ) - ) - && $continuation_flag - ) - { - do_something_about_it(); - } - -And here it is again formatted with B<-lp -vt=2>: - - if ( !defined( start_slip( $DEVICE, $PHONE, $ACCOUNT, $PASSWORD, - $LOCAL, $REMOTE, $NETMASK, $MTU - ) - ) - && $continuation_flag - ) - { - do_something_about_it(); - } - -The B<-vt=1> style tries to display the structure by preventing more -than one step in indentation per line. In this example, the first two -opening parens were not followed by balanced lines, so B<-vt=1> broke -after them. - -The B<-vt=2> style does not limit itself to a single indentation step -per line. - -Note that in the above example the function 'do_sumething_about_it' -started on a new line. This is because it follows an opening code -block brace and is governed by the flag previously set in -L. - -=head2 Closing Token Placement - -You have several options for dealing with the terminal closing tokens of -non-blocks. In the following examples, a closing parenthesis is shown, but -these parameters apply to closing square brackets and non-block curly braces as -well. - -The default behavior for parenthesized relatively large lists is to place the -closing paren on a separate new line. The flag B<-cti=n> controls the amount -of indentation of such a closing paren. - -The default, B<-cti=0>, for a line beginning with a closing paren, is to use -the indentation defined by the next (lower) indentation level. This works -well for the default indentation scheme: - - # perltidy - @month_of_year = ( - 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', - 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' - ); - -but it may not look very good with the B<-lp> indentation scheme: - - # perltidy -lp - @month_of_year = ( - 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', - 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' - ); - -An alternative which works well with B<-lp> indentation is B<-cti=1>, -which aligns the closing paren vertically with its -opening paren, if possible: - - # perltidy -lp -cti=1 - @month_of_year = ( - 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', - 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' - ); - -Another alternative, B<-cti=3>, indents a line with leading closing -paren one full indentation level: - - # perltidy -lp -cti=3 - @month_of_year = ( - 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', - 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' - ); - -If you prefer the closing paren on a separate line like this, -note the value of B<-cti=n> that you prefer and skip ahead to -L. - -Finally, the question of paren indentation can be avoided by placing it -at the end of the previous line, like this: - - @month_of_year = ( - 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', - 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' ); - -Perltidy will automatically do this to save space for very short lists but not -for longer lists. - -Use B<-vtc=n> if you prefer to usually do this, where B is either 1 or 2. To -determine B, we have to look at something more complex. Observe the -behavior of the closing tokens in the following snippet: - -Here is B<-lp -vtc=1>: - - $srec->{'ACTION'} = [ - $self->read_value( - $lookup->{'VFMT'}, - $loc, $lookup, $fh - ), - $self->read_value( - $lookup->{'VFMT2'}, - $loc, $lookup, $fh - ) ]; - -Here is B<-lp -vtc=2>: - - $srec->{'ACTION'} = [ - $self->read_value( - $lookup->{'VFMT'}, - $loc, $lookup, $fh ), - $self->read_value( - $lookup->{'VFMT2'}, - $loc, $lookup, $fh ) ]; - -Choose the one that you prefer. The difference is that B<-vtc=1> leaves -closing tokens at the start of a line within a list, which can assist in -keeping hierarchical lists readable. The B<-vtc=2> style always tries -to move closing tokens to the end of a line. - -If you choose B<-vtc=1>, -you may also want to specify a value of B<-cti=n> (previous section) to -handle cases where a line begins with a closing paren. - -=head2 Stack Opening Tokens - -In the following snippet the opening hash brace has been placed -alone on a new line. - - $opt_c = Text::CSV_XS->new( - { - binary => 1, - sep_char => $opt_c, - always_quote => 1, - } - ); - -If you prefer to avoid isolated opening tokens by -"stacking" them together with other opening tokens like this: - - $opt_c = Text::CSV_XS->new( { - binary => 1, - sep_char => $opt_c, - always_quote => 1, - } - ); - -use B<-sot>. - -=head2 Stack Closing Tokens - -Likewise, in the same snippet the default formatting leaves -the closing paren on a line by itself here: - - $opt_c = Text::CSV_XS->new( - { - binary => 1, - sep_char => $opt_c, - always_quote => 1, - } - ); - -If you would like to avoid leaving isolated closing tokens by -stacking them with other closing tokens, like this: - - $opt_c = Text::CSV_XS->new( - { - binary => 1, - sep_char => $opt_c, - always_quote => 1, - } ); - -use B<-sct>. - -The B<-sct> flag is somewhat similar to the B<-vtc> flags, and in some cases it -can give a similar result. The difference is that the B<-vtc> flags try to -avoid lines with leading opening tokens by "hiding" them at the end of a -previous line, whereas the B<-sct> flag merely tries to reduce the number of -lines with isolated closing tokens by stacking multiple closing tokens -together, but it does not try to hide them. - -The manual shows how all of these vertical tightness controls may be applied -independently to each type of non-block opening and opening token. - -=head2 Define Horizontal Tightness - -Horizontal tightness parameters define how much space is included -within a set of container tokens. - -For parentheses, decide which of the following values of B<-pt=n> -you prefer: - - if ( ( my $len_tab = length( $tabstr ) ) > 0 ) { # -pt=0 - if ( ( my $len_tab = length($tabstr) ) > 0 ) { # -pt=1 (default) - if ((my $len_tab = length($tabstr)) > 0) { # -pt=2 - -For n=0, space is always used, and for n=2, space is never used. For -the default n=1, space is used if the parentheses contain more than -one token. - -For square brackets, decide which of the following values of B<-sbt=n> -you prefer: - - $width = $col[ $j + $k ] - $col[ $j ]; # -sbt=0 - $width = $col[ $j + $k ] - $col[$j]; # -sbt=1 (default) - $width = $col[$j + $k] - $col[$j]; # -sbt=2 - -For curly braces, decide which of the following values of B<-bt=n> -you prefer: - - $obj->{ $parsed_sql->{ 'table' }[0] }; # -bt=0 - $obj->{ $parsed_sql->{'table'}[0] }; # -bt=1 (default) - $obj->{$parsed_sql->{'table'}[0]}; # -bt=2 - -For code block curly braces, decide which of the following values of -B<-bbt=n> you prefer: - - %bf = map { $_ => -M $_ } grep { /\.deb$/ } dirents '.'; # -bbt=0 (default) - %bf = map { $_ => -M $_ } grep {/\.deb$/} dirents '.'; # -bbt=1 - %bf = map {$_ => -M $_} grep {/\.deb$/} dirents '.'; # -bbt=2 - -=head2 Spaces between function names and opening parens - -The default is not to place a space after a function call: - - myfunc( $a, $b, $c ); # default - -If you prefer a space: - - myfunc ( $a, $b, $c ); # -sfp - -use B<-sfp>. - -=head2 Spaces between Perl keywords and parens - -The default is to place a space between only these keywords -and an opening paren: - - my local our and or eq ne if else elsif until unless - while for foreach return switch case given when - -but no others. For example, the default is: - - $aa = pop(@bb); - -If you want a space between all Perl keywords and an opening paren, - - $aa = pop (@bb); - -use B<-skp>. For detailed control of individual keywords, see the manual. - -=head2 Statement Termination Semicolon Spaces - -The default is not to put a space before a statement termination -semicolon, like this: - - $i = 1; - -If you prefer a space, like this: - - $i = 1 ; - -enter B<-sts>. - -=head2 For Loop Semicolon Spaces - -The default is to place a space before a semicolon in a for statement, -like this: - - for ( @a = @$ap, $u = shift @a ; @a ; $u = $v ) { # -sfs (default) - -If you prefer no such space, like this: - - for ( @a = @$ap, $u = shift @a; @a; $u = $v ) { # -nsfs - -enter B<-nsfs>. - -=head2 Block Comment Indentation - -Block comments are comments which occupy a full line, as opposed to side -comments. The default is to indent block comments with the same -indentation as the code block that contains them (even though this -will allow long comments to exceed the maximum line length). - -If you would like block comments indented except when this would cause -the maximum line length to be exceeded, use B<-olc>. This will cause a -group of consecutive block comments to be outdented by the amount needed -to prevent any one from exceeding the maximum line length. - -If you never want block comments indented, use B<-nibc>. - -If block comments may only be indented if they have some space -characters before the leading C<#> character in the input file, use -B<-isbc>. - -The manual shows many other options for controlling comments. - -=head2 Outdenting Long Quotes - -Long quoted strings may exceed the specified line length limit. The -default, when this happens, is to outdent them to the first column. -Here is an example of an outdented long quote: - - if ($source_stream) { - if ( @ARGV > 0 ) { - die - "You may not specify any filenames when a source array is given\n"; - } - } - -The effect is not too different from using a here document to represent -the quote. If you prefer to leave the quote indented, like this: - - if ($source_stream) { - if ( @ARGV > 0 ) { - die - "You may not specify any filenames when a source array is given\n"; - } - } - -use B<-nolq>. - -=head2 Many Other Parameters - -This document has only covered the most popular parameters. The manual -contains many more and should be consulted if you did not find what you need -here. - -=head2 Example F<.perltidyrc> files - -Now gather together all of the parameters you prefer and enter them -in a file called F<.perltidyrc>. - -Here are some example F<.perltidyrc> files and the corresponding style. - -Here is a little test snippet, shown the way it would appear with -the default style. - - for (@methods) { - push ( - @results, - { - name => $_->name, - help => $_->help, - } - ); - } - -You do not need a F<.perltidyrc> file for this style. - -Here is the same snippet - - for (@methods) - { - push(@results, - { name => $_->name, - help => $_->help, - } - ); - } - -for a F<.perltidyrc> file containing these parameters: - - -bl - -lp - -cti=1 - -vt=1 - -pt=2 - -You do not need to place just one parameter per line, but this may be -convenient for long lists. You may then hide any parameter by placing -a C<#> symbol before it. - -And here is the snippet - - for (@methods) { - push ( @results, - { name => $_->name, - help => $_->help, - } ); - } - -for a F<.perltidyrc> file containing these parameters: - - -lp - -vt=1 - -vtc=1 - -=head2 Tidyview - -There is a graphical program called B which you can use to read a -preliminary F<.perltidyrc> file, make trial adjustments and immediately see -their effect on a test file, and then write a new F<.perltidyrc>. You can -download a copy at - -http://sourceforge.net/projects/tidyview - -=head2 Additional Information - -This document has covered the main parameters. Many more parameters are -available for special purposes and for fine-tuning a style. For complete -information see the perltidy manual -http://perltidy.sourceforge.net/perltidy.html - -For an introduction to using perltidy, see the tutorial -http://perltidy.sourceforge.net/tutorial.html - -Suggestions for improving this document are welcome and may be sent to -perltidy at users.sourceforge.net - -=cut diff --git a/docs/testfile.pl b/docs/testfile.pl deleted file mode 100644 index 182f2be..0000000 --- a/docs/testfile.pl +++ /dev/null @@ -1,10 +0,0 @@ -print "Help Desk -- What Editor do you use? "; -chomp($editor = ); -if ($editor =~ /emacs/i) { - print "Why aren't you using vi?\n"; -} elsif ($editor =~ /vi/i) { - print "Why aren't you using emacs?\n"; -} else { - print "I think that's the problem\n"; -} - diff --git a/docs/tutorial.html b/docs/tutorial.html new file mode 100644 index 0000000..7031763 --- /dev/null +++ b/docs/tutorial.html @@ -0,0 +1,381 @@ + + + + + + + + + + + + + + + +

A Brief Perltidy Tutorial

+ +

Perltidy can save you a lot of tedious editing if you spend a few minutes learning to use it effectively. Perltidy is highly configurable, but for many programmers the default parameter set will be satisfactory, with perhaps a few additional parameters to account for style preferences.

+ +

This tutorial assumes that perltidy has been installed on your system. Installation instructions accompany the package. To follow along with this tutorial, please find a small Perl script and place a copy in a temporary directory. For example, here is a small (and silly) script:

+ +
 print "Help Desk -- What Editor do you use?";
+ chomp($editor = <STDIN>);
+ if ($editor =~ /emacs/i) {
+   print "Why aren't you using vi?\n";
+ } elsif ($editor =~ /vi/i) {
+   print "Why aren't you using emacs?\n";
+ } else {
+   print "I think that's the problem\n";
+ }
+ +

It is included in the docs section of the distribution.

+ +

A First Test

+ +

Assume that the name of your script is testfile.pl. You can reformat it with the default options to use the style recommended in the perlstyle man pages with the command:

+ +
 perltidy testfile.pl
+ +

For safety, perltidy never overwrites your original file. In this case, its output will go to a file named testfile.pl.tdy, which you should examine now with your editor. Here is what the above file looks like with the default options:

+ +
 print "Help Desk -- What Editor do you use?";
+ chomp( $editor = <STDIN> );
+ if ( $editor =~ /emacs/i ) {
+     print "Why aren't you using vi?\n";
+ }
+ elsif ( $editor =~ /vi/i ) {
+     print "Why aren't you using emacs?\n";
+ }
+ else {
+     print "I think that's the problem\n";
+ }
+ +

You'll notice an immediate style change from the "cuddled-else" style of the original to the default "non-cuddled-else" style. This is because perltidy has to make some kind of default selection of formatting options, and this default tries to follow the suggestions in the perlstyle man pages.

+ +

If you prefer the original "cuddled-else" style, don't worry, you can indicate that with a -ce flag. So if you rerun with that flag

+ +
 perltidy -ce testfile.pl
+ +

you will see a return to the original "cuddled-else" style. There are many more parameters for controlling style, and some of the most useful of these are discussed below.

+ +

Indentation

+ +

Another noticeable difference between the original and the reformatted file is that the indentation has been changed from 2 spaces to 4 spaces. That's because 4 spaces is the default. You may change this to be a different number with -i=n.

+ +

To get some practice, try these examples, and examine the resulting testfile.pl.tdy file:

+ +
 perltidy -i=8 testfile.pl
+ +

This changes the default of 4 spaces per indentation level to be 8. Now just to emphasize the point, try this and examine the result:

+ +
 perltidy -i=0 testfile.pl
+ +

There will be no indentation at all in this case.

+ +

Input Flags

+ +

This is a good place to mention a few points regarding the input flags. First, for each option, there are two forms, a long form and a short form, and either may be used.

+ +

For example, if you want to change the number of columns corresponding to one indentation level to 3 (from the default of 4) you may use either

+ +
 -i=3   or  --indent-columns=3
+ +

The short forms are convenient for entering parameters by hand, whereas the long forms, though often ridiculously long, are self-documenting and therefore useful in configuration scripts. You may use either one or two dashes ahead of the parameters. Also, the '=' sign is optional, and may be a single space instead. However, the value of a parameter must NOT be adjacent to the flag, like this -i3 (WRONG). Also, flags must be input separately, never bundled together.

+ +

Line Length and Continuation Indentation.

+ +

If you change the indentation spaces you will probably also need to change the continuation indentation spaces with the parameter -ci=n. The continuation indentation is the extra indentation -- 2 spaces by default -- given to that portion of a long line which has been placed below the start of a statement. For example:

+ +
 croak "Couldn't pop genome file"
+   unless sysread( $impl->{file}, $element, $impl->{group} )
+   and truncate( $impl->{file}, $new_end );
+ +

There is no fixed rule for setting the value for -ci=n, but it should probably not exceed one-half of the number of spaces of a full indentation level.

+ +

In the above snippet, the statement was broken into three lines. The actual number is governed by a parameter, the maximum line length, as well as by what perltidy considers to be good break points. The maximum line length is 80 characters by default. You can change this to be any number n with the -l=n flag. Perltidy tries to produce lines which do not exceed this length, and it does this by finding good break points. For example, the above snippet would look like this with perltidy -l=40:

+ +
 croak "Couldn't pop genome file"
+   unless
+   sysread( $impl->{file}, $element,
+     $impl->{group} )
+   and
+   truncate( $impl->{file}, $new_end );
+ +

You may be wondering what would happen with, say, -l=1. Go ahead and try it.

+ +

Tabs or Spaces?

+ +

With indentation, there is always a tab issue to resolve. By default, perltidy will use leading ascii space characters instead of tabs. The reason is that this will be displayed correctly by virtually all editors, and in the long run, will avoid maintenance problems.

+ +

However, if you prefer, you may have perltidy entab the leading whitespace of a line with the command -et=n, where n is the number of spaces which will be represented by one tab. But note that your text will not be displayed properly unless viewed with software that is configured to display n spaces per tab.

+ +

Input/Output Control

+ +

In the first example, we saw that if we pass perltidy the name of a file on the command line, it reformats it and creates a new filename by appending an extension, .tdy. This is the default behavior, but there are several other options.

+ +

On most systems, you may use wildcards to reformat a whole batch of files at once, like this for example:

+ +
 perltidy *.pl
+ +

and in this case, each of the output files will be have a name equal to the input file with the extension .tdy appended. If you decide that the formatting is acceptable, you will want to backup your originals and then remove the .tdy extensions from the reformatted files. There is an powerful perl script called rename that can be used for this purpose; if you don't have it, you can find it for example in The Perl Cookbook.

+ +

If you find that the formatting done by perltidy is usually acceptable, you may want to save some effort by letting perltidy do a simple backup of the original files and then reformat them in place. You specify this with a -b flag. For example, the command

+ +
 perltidy -b *.pl
+ +

will rename the original files by appending a .bak extension, and then create reformatted files with the same names as the originals. (If you don't like the default backup extension choice .bak, the manual tells how to change it). Each time you run perltidy with the -b option, the previous .bak files will be overwritten, so please make regular separate backups.

+ +

If there is no input filename specified on the command line, then input is assumed to come from standard input and output will go to standard output. On systems with a Unix-like interface, you can use perltidy as a filter, like this:

+ +
 perltidy <somefile.pl >newfile.pl
+ +

What happens in this case is that the shell takes care of the redirected input files, '<somefile.pl', and so perltidy never sees the filename. Therefore, it knows to use the standard input and standard output channels.

+ +

If you are executing perltidy on a file and want to force the output to standard output, rather than create a .tdy file, you can indicate this with the flag -st, like this:

+ +
 perltidy somefile.pl -st >otherfile.pl
+ +

You can also control the name of the output file with the -o flag, like this:

+ +
 perltidy testfile.pl -o=testfile.new.pl
+ +

Style Variations

+ +

Perltidy has to make some kind of default selection of formatting options, and its choice is to try to follow the suggestions in the perlstyle man pages. Many programmers more or less follow these suggestions with a few exceptions. In this section we will look at just a few of the most commonly used style parameters. Later, you may want to systematically develop a set of style parameters with the help of the perltidy stylekey web page at http://perltidy.sourceforge.net/stylekey.html

+ +
+ +
-ce, cuddled elses
+
+ +

If you prefer cuddled elses, use the -ce flag.

+ +
+
-bl, braces left
+
+ +

Here is what the if block in the above script looks like with -bl:

+ +
 if ( $editor =~ /emacs/i )
+ {
+     print "Why aren't you using vi?\n";
+ }
+ elsif ( $editor =~ /vi/i )
+ {
+     print "Why aren't you using emacs?\n";
+ }
+ else
+ {
+     print "I think that's the problem\n";
+ }
+ +
+
-lp, Lining up with parentheses
+
+ +

The -lp parameter can enhance the readability of lists by adding extra indentation. Consider:

+ +
        %romanNumerals = (
+            one   => 'I',
+            two   => 'II',
+            three => 'III',
+            four  => 'IV',
+            five  => 'V',
+            six   => 'VI',
+            seven => 'VII',
+            eight => 'VIII',
+            nine  => 'IX',
+            ten   => 'X'
+        );
+ +

With the -lp flag, this is formatted as:

+ +
        %romanNumerals = (
+                           one   => 'I',
+                           two   => 'II',
+                           three => 'III',
+                           four  => 'IV',
+                           five  => 'V',
+                           six   => 'VI',
+                           seven => 'VII',
+                           eight => 'VIII',
+                           nine  => 'IX',
+                           ten   => 'X'
+                         );
+ +

which is preferred by some. (I've actually used -lp and -cti=1 to format this block. The -cti=1 flag causes the closing paren to align vertically with the opening paren, which works well with the -lp indentation style). An advantage of -lp indentation are that it displays lists nicely. A disadvantage is that deeply nested lists can require a long line length.

+ +
+
-bt,-pt,-sbt: Container tightness
+
+ +

These are parameters for controlling the amount of space within containing parentheses, braces, and square brackets. The example below shows the effect of the three possible values, 0, 1, and 2, for the case of parentheses:

+ +
 if ( ( my $len_tab = length( $tabstr ) ) > 0 ) {  # -pt=0
+ if ( ( my $len_tab = length($tabstr) ) > 0 ) {    # -pt=1 (default)
+ if ((my $len_tab = length($tabstr)) > 0) {        # -pt=2
+ +

A value of 0 causes all parens to be padded on the inside with a space, and a value of 2 causes this never to happen. With a value of 1, spaces will be introduced if the item within is more than a single token.

+ +
+
+ +

Configuration Files

+ +

While style preferences vary, most people would agree that it is important to maintain a uniform style within a script, and this is a major benefit provided by perltidy. Once you have decided on which, if any, special options you prefer, you may want to avoid having to enter them each time you run it. You can do this by creating a special file named .perltidyrc in either your home directory, your current directory, or certain system-dependent locations. (Note the leading "." in the file name).

+ +

A handy command to know when you start using a configuration file is

+ +
  perltidy -dpro
+ +

which will dump to standard output the search that perltidy makes when looking for a configuration file, and the contents of the one that it selects, if any. This is one of a number of useful "dump and die" commands, in which perltidy will dump some information to standard output and then immediately exit. Others include -h, which dumps help information, and -v, which dumps the version number.

+ +

Another useful command when working with configuration files is

+ +
 perltidy -pro=file
+ +

which causes the contents of file to be used as the configuration file instead of a .perltidyrc file. With this command, you can easily switch among several different candidate configuration files during testing.

+ +

This .perltidyrc file is free format. It is simply a list of parameters, just as they would be entered on a command line. Any number of lines may be used, with any number of parameters per line, although it may be easiest to read with one parameter per line. Blank lines are ignored, and text after a '#' is ignored to the end of a line.

+ +

Here is an example of a .perltidyrc file:

+ +
  # This is a simple of a .perltidyrc configuration file
+  # This implements a highly spaced style
+  -bl    # braces on new lines
+  -pt=0  # parens not tight at all
+  -bt=0  # braces not tight
+  -sbt=0 # square brackets not tight
+ +

If you experiment with this file, remember that it is in your directory, since if you are running on a Unix system, files beginning with a "." are normally hidden.

+ +

If you have a .perltidyrc file, and want perltidy to ignore it, use the -npro flag on the command line.

+ +

Error Reporting

+ +

Let's run through a 'fire drill' to see how perltidy reports errors. Try introducing an extra opening brace somewhere in a test file. For example, introducing an extra brace in the file listed above produces the following message on the terminal (or standard error output):

+ +
 ## Please see file testfile.pl.ERR!
+ +

Here is what testfile.pl.ERR contains:

+ +
 10:    final indentation level: 1
+ 
+ Final nesting depth of '{'s is 1
+ The most recent un-matched '{' is on line 6
+ 6: } elsif ($temperature < 68) {{
+                                ^
+ +

This shows how perltidy will, by default, write error messages to a file with the extension .ERR, and it will write a note that it did so to the standard error device. If you would prefer to have the error messages sent to standard output, instead of to a .ERR file, use the -se flag.

+ +

Almost every programmer would want to see error messages of this type, but there are a number of messages which, if reported, would be annoying. To manage this problem, perltidy puts its messages into two categories: errors and warnings. The default is to just report the errors, but you can control this with input flags, as follows:

+ +
 flag  what this does
+ ----  --------------
+       default: report errors but not warnings
+ -w    report all errors and warnings
+ -q    quiet! do not report either errors or warnings
+ +

The default is generally a good choice, but it's not a bad idea to check programs with -w occasionally, especially if your are looking for a bug. For example, it will ask if you really want '=' instead of '=~' in this line:

+ +
    $line = s/^\s*//;
+ +

This kind of error can otherwise be hard to find.

+ +

The Log File

+ +

One last topic that needs to be touched upon concerns the .LOG file. This is where perltidy records messages that are not normally of any interest, but which just might occasionally be useful. This file is not saved, though, unless perltidy detects that it has made a mistake or you ask for it to be saved.

+ +

There are a couple of ways to ask perltidy to save a log file. To create a relatively sparse log file, use

+ +
 perltidy -log testfile.pl
+ +

and for a verbose log file, use

+ +
 perltidy -g testfile.pl
+ +

The difference is that the first form only saves detailed information at least every 50th line, while the second form saves detailed information about every line.

+ +

So returning to our example, lets force perltidy to save a verbose log file by issuing the following command

+ +
 perltidy -g testfile.pl
+ +

You will find that a file named testfile.pl.LOG has been created in your directory.

+ +

If you open this file, you will see that it is a text file with a combination of warning messages and informative messages. All you need to know for now is that it exists; someday it may be useful.

+ +

Using Perltidy as a Filter on Selected Text from an Editor

+ +

Most programmer's editors allow a selected group of lines to be passed through an external filter. Perltidy has been designed to work well as a filter, and it is well worthwhile learning the appropriate commands to do this with your editor. This means that you can enter a few keystrokes and watch a block of text get reformatted. If you are not doing this, you are missing out of a lot of fun! You may want to supply the -q flag to prevent error messages regarding incorrect syntax, since errors may be obvious in the indentation of the reformatted text. This is entirely optional, but if you do not use the -q flag, you will need to use the undo keys in case an error message appears on the screen.

+ +

For example, within the vim editor it is only necessary to select the text by any of the text selection methods, and then issue the command !perltidy in command mode. Thus, an entire file can be formatted using

+ +
 :%!perltidy -q
+ +

or, without the -q flag, just

+ +
 :%!perltidy
+ +

It isn't necessary to format an entire file, however. Perltidy will probably work well as long as you select blocks of text whose braces, parentheses, and square brackets are properly balanced. You can even format an elsif block without the leading if block, as long as the text you select has all braces balanced.

+ +

For the emacs editor, first mark a region and then pipe it through perltidy. For example, to format an entire file, select it with C-x h and then pipe it with M-1 M-| and then perltidy. The numeric argument, M-1 causes the output from perltidy to replace the marked text. See "GNU Emacs Manual" for more information, http://www.gnu.org/manual/emacs-20.3/html_node/emacs_toc.html

+ +

If you have difficulty with an editor, try the -st flag, which will force perltidy to send output to standard output. This might be needed, for example, if the editor passes text to perltidy as temporary filename instead of through the standard input. If this works, you might put the -st flag in your .perltidyrc file.

+ +

If you have some tips for making perltidy work with your editor, and are willing to share them, please email me (see below) and I'll try to incorporate them in this document or put up a link to them.

+ +

After you get your editor and perltidy successfully talking to each other, try formatting a snippet of code with a brace error to see what happens. (Do not use the quiet flag, -q, for this test). Perltidy will send one line starting with ## to standard error output. Your editor may either display it at the top of the reformatted text or at the bottom (or even midstream!). You probably cannot control this, and perltidy can't, but you need to know where to look when an actual error is detected.

+ +

Writing an HTML File

+ +

Perltidy can switch between two different output modes. We have been discussing what might be called its "beautifier" mode, but it can also output in HTML. To do this, use the -html flag, like this:

+ +
 perltidy -html testfile.pl
+ +

which will produce a file testfile.pl.html. There are many parameters available for adjusting the appearance of an HTML file, but a very easy way is to just write the HTML file with this simple command and then edit the stylesheet which is embedded at its top.

+ +

One important thing to know about the -html flag is that perltidy can either send its output to its beautifier or to its HTML writer, but (unfortunately) not both in a single run. So the situation can be represented like this:

+ +
                  ------------
+                  |          |     --->beautifier--> testfile.pl.tdy
+ testfile.pl -->  | perltidy | -->
+                  |          |     --->HTML -------> testfile.pl.html
+                  ------------
+ +

And in the future, there may be more output filters. So if you would like to both beautify a script and write it to HTML, you need to do it in two steps.

+ +

Summary

+ +

That's enough to get started using perltidy. When you are ready to create a .perltidyrc file, you may find it helpful to use the stylekey page as a guide at http://perltidy.sourceforge.net/stylekey.html

+ +

Many additional special features and capabilities can be found in the manual pages for perltidy at http://perltidy.sourceforge.net/perltidy.html

+ +

We hope that perltidy makes perl programming a little more fun. Please check the perltidy web site http://perltidy.sourceforge.net occasionally for updates.

+ +

The author may be contacted at perltidy at users.sourceforge.net.

+ + + + + + + diff --git a/docs/tutorial.pod b/docs/tutorial.pod deleted file mode 100644 index 9d1f260..0000000 --- a/docs/tutorial.pod +++ /dev/null @@ -1,534 +0,0 @@ -=head1 A Brief Perltidy Tutorial - -Perltidy can save you a lot of tedious editing if you spend a few -minutes learning to use it effectively. Perltidy is highly -configurable, but for many programmers the default parameter set will be -satisfactory, with perhaps a few additional parameters to account for -style preferences. - -This tutorial assumes that perltidy has been installed on your system. -Installation instructions accompany the package. To follow along with -this tutorial, please find a small Perl script and place a copy in a -temporary directory. For example, here is a small (and silly) script: - - print "Help Desk -- What Editor do you use?"; - chomp($editor = ); - if ($editor =~ /emacs/i) { - print "Why aren't you using vi?\n"; - } elsif ($editor =~ /vi/i) { - print "Why aren't you using emacs?\n"; - } else { - print "I think that's the problem\n"; - } - -It is included in the F section of the distribution. - -=head2 A First Test - -Assume that the name of your script is F. You can reformat it -with the default options to use the style recommended in the perlstyle man -pages with the command: - - perltidy testfile.pl - -For safety, perltidy never overwrites your original file. In this case, -its output will go to a file named F, which you should -examine now with your editor. Here is what the above file looks like -with the default options: - - print "Help Desk -- What Editor do you use?"; - chomp( $editor = ); - if ( $editor =~ /emacs/i ) { - print "Why aren't you using vi?\n"; - } - elsif ( $editor =~ /vi/i ) { - print "Why aren't you using emacs?\n"; - } - else { - print "I think that's the problem\n"; - } - -You'll notice an immediate style change from the "cuddled-else" style of -the original to the default "non-cuddled-else" style. This is because -perltidy has to make some kind of default selection of formatting -options, and this default tries to follow the suggestions in the -perlstyle man pages. - -If you prefer the original "cuddled-else" style, don't worry, you can -indicate that with a B<-ce> flag. So if you rerun with that flag - - perltidy -ce testfile.pl - -you will see a return to the original "cuddled-else" style. There are -many more parameters for controlling style, and some of the most useful -of these are discussed below. - -=head2 Indentation - -Another noticeable difference between the original and the reformatted -file is that the indentation has been changed from 2 spaces to 4 spaces. -That's because 4 spaces is the default. You may change this to be a -different number with B<-i=n>. - -To get some practice, try these examples, and examine the resulting -F file: - - perltidy -i=8 testfile.pl - -This changes the default of 4 spaces per indentation level to be 8. Now -just to emphasize the point, try this and examine the result: - - perltidy -i=0 testfile.pl - -There will be no indentation at all in this case. - -=head2 Input Flags - -This is a good place to mention a few points regarding the input flags. -First, for each option, there are two forms, a long form and a short -form, and either may be used. - -For example, if you want to change the number of columns corresponding to one -indentation level to 3 (from the default of 4) you may use either - - -i=3 or --indent-columns=3 - -The short forms are convenient for entering parameters by hand, whereas -the long forms, though often ridiculously long, are self-documenting and -therefore useful in configuration scripts. You may use either one or -two dashes ahead of the parameters. Also, the '=' sign is optional, -and may be a single space instead. However, the value of a parameter -must NOT be adjacent to the flag, like this B<-i3> (WRONG). Also, -flags must be input separately, never bundled together. - -=head2 Line Length and Continuation Indentation. - -If you change the indentation spaces you will probably also need to -change the continuation indentation spaces with the parameter B<-ci=n>. -The continuation indentation is the extra indentation -- 2 spaces by -default -- given to that portion of a long line which has been placed -below the start of a statement. For example: - - croak "Couldn't pop genome file" - unless sysread( $impl->{file}, $element, $impl->{group} ) - and truncate( $impl->{file}, $new_end ); - -There is no fixed rule for setting the value for B<-ci=n>, but it should -probably not exceed one-half of the number of spaces of a full -indentation level. - -In the above snippet, the statement was broken into three lines. The -actual number is governed by a parameter, the maximum line length, as -well as by what perltidy considers to be good break points. The maximum -line length is 80 characters by default. You can change this to be any -number B with the B<-l=n> flag. Perltidy tries to produce lines -which do not exceed this length, and it does this by finding good break -points. For example, the above snippet would look like this with -B: - - croak "Couldn't pop genome file" - unless - sysread( $impl->{file}, $element, - $impl->{group} ) - and - truncate( $impl->{file}, $new_end ); - -You may be wondering what would happen with, say, B<-l=1>. Go -ahead and try it. - -=head2 Tabs or Spaces? - -With indentation, there is always a tab issue to resolve. By default, -perltidy will use leading ascii space characters instead of tabs. The -reason is that this will be displayed correctly by virtually all -editors, and in the long run, will avoid maintenance problems. - -However, if you prefer, you may have perltidy entab the leading -whitespace of a line with the command B<-et=n>, where B is the number -of spaces which will be represented by one tab. But note that your text -will not be displayed properly unless viewed with software that is -configured to display B spaces per tab. - -=head2 Input/Output Control - -In the first example, we saw that if we pass perltidy the name -of a file on the command line, it reformats it and creates a -new filename by appending an extension, F<.tdy>. This is the -default behavior, but there are several other options. - -On most systems, you may use wildcards to reformat a whole batch of -files at once, like this for example: - - perltidy *.pl - -and in this case, each of the output files will be have a name equal to -the input file with the extension F<.tdy> appended. If you decide that -the formatting is acceptable, you will want to backup your originals and -then remove the F<.tdy> extensions from the reformatted files. There is -an powerful perl script called C that can be used for this -purpose; if you don't have it, you can find it for example in B. - -If you find that the formatting done by perltidy is usually acceptable, -you may want to save some effort by letting perltidy do a simple backup -of the original files and then reformat them in place. You specify this -with a B<-b> flag. For example, the command - - perltidy -b *.pl - -will rename the original files by appending a F<.bak> extension, and then -create reformatted files with the same names as the originals. (If you don't -like the default backup extension choice F<.bak>, the manual tells how to -change it). Each time you run perltidy with the B<-b> option, the previous -F<.bak> files will be overwritten, so please make regular separate backups. - -If there is no input filename specified on the command line, then input -is assumed to come from standard input and output will go to standard -output. On systems with a Unix-like interface, you can use perltidy as -a filter, like this: - - perltidy newfile.pl - -What happens in this case is that the shell takes care of the redirected -input files, ' file, you can -indicate this with the flag B<-st>, like this: - - perltidy somefile.pl -st >otherfile.pl - -You can also control the name of the output file with the B<-o> flag, -like this: - - perltidy testfile.pl -o=testfile.new.pl - -=head2 Style Variations - -Perltidy has to make some kind of default selection of formatting -options, and its choice is to try to follow the suggestions in the -perlstyle man pages. Many programmers more or less follow these -suggestions with a few exceptions. In this section we will -look at just a few of the most commonly used style parameters. Later, -you may want to systematically develop a set of style -parameters with the help of -the perltidy B web page at -http://perltidy.sourceforge.net/stylekey.html - -=over 4 - -=item B<-ce>, cuddled elses - -If you prefer cuddled elses, use the B<-ce> flag. - -=item B<-bl>, braces left - -Here is what the C block in the above script looks like with B<-bl>: - - if ( $editor =~ /emacs/i ) - { - print "Why aren't you using vi?\n"; - } - elsif ( $editor =~ /vi/i ) - { - print "Why aren't you using emacs?\n"; - } - else - { - print "I think that's the problem\n"; - } - -=item B<-lp>, Lining up with parentheses - -The B<-lp> parameter can enhance the readability of lists by adding -extra indentation. Consider: - - %romanNumerals = ( - one => 'I', - two => 'II', - three => 'III', - four => 'IV', - five => 'V', - six => 'VI', - seven => 'VII', - eight => 'VIII', - nine => 'IX', - ten => 'X' - ); - -With the B<-lp> flag, this is formatted as: - - %romanNumerals = ( - one => 'I', - two => 'II', - three => 'III', - four => 'IV', - five => 'V', - six => 'VI', - seven => 'VII', - eight => 'VIII', - nine => 'IX', - ten => 'X' - ); - -which is preferred by some. (I've actually used B<-lp> and B<-cti=1> to -format this block. The B<-cti=1> flag causes the closing paren to align -vertically with the opening paren, which works well with the B<-lp> -indentation style). An advantage of B<-lp> indentation are that it -displays lists nicely. A disadvantage is that deeply nested lists can -require a long line length. - -=item B<-bt>,B<-pt>,B<-sbt>: Container tightness - -These are parameters for controlling the amount of space within -containing parentheses, braces, and square brackets. The example below -shows the effect of the three possible values, 0, 1, and 2, for the case -of parentheses: - - if ( ( my $len_tab = length( $tabstr ) ) > 0 ) { # -pt=0 - if ( ( my $len_tab = length($tabstr) ) > 0 ) { # -pt=1 (default) - if ((my $len_tab = length($tabstr)) > 0) { # -pt=2 - -A value of 0 causes all parens to be padded on the inside with a space, -and a value of 2 causes this never to happen. With a value of 1, spaces -will be introduced if the item within is more than a single token. - -=back - -=head2 Configuration Files - -While style preferences vary, most people would agree that it is -important to maintain a uniform style within a script, and this is a -major benefit provided by perltidy. Once you have decided on which, if -any, special options you prefer, you may want to avoid having to enter -them each time you run it. You can do this by creating a special file -named F<.perltidyrc> in either your home directory, your current -directory, or certain system-dependent locations. (Note the leading "." -in the file name). - -A handy command to know when you start using a configuration file is - - perltidy -dpro - -which will dump to standard output the search that perltidy makes when -looking for a configuration file, and the contents of the one that it -selects, if any. This is one of a number of useful "dump and die" -commands, in which perltidy will dump some information to standard -output and then immediately exit. Others include B<-h>, which dumps -help information, and B<-v>, which dumps the version number. - -Another useful command when working with configuration files is - - perltidy -pro=file - -which causes the contents of F to be used as the configuration -file instead of a F<.perltidyrc> file. With this command, you can -easily switch among several different candidate configuration files -during testing. - -This F<.perltidyrc> file is free format. It is simply a list of -parameters, just as they would be entered on a command line. Any number -of lines may be used, with any number of parameters per line, although -it may be easiest to read with one parameter per line. Blank lines are -ignored, and text after a '#' is ignored to the end of a line. - -Here is an example of a F<.perltidyrc> file: - - # This is a simple of a .perltidyrc configuration file - # This implements a highly spaced style - -bl # braces on new lines - -pt=0 # parens not tight at all - -bt=0 # braces not tight - -sbt=0 # square brackets not tight - -If you experiment with this file, remember that it is in your directory, -since if you are running on a Unix system, files beginning with a "." -are normally hidden. - -If you have a F<.perltidyrc> file, and want perltidy to ignore it, -use the B<-npro> flag on the command line. - -=head2 Error Reporting - -Let's run through a 'fire drill' to see how perltidy reports errors. Try -introducing an extra opening brace somewhere in a test file. For example, -introducing an extra brace in the file listed above produces the following -message on the terminal (or standard error output): - - ## Please see file testfile.pl.ERR! - -Here is what F contains: - - 10: final indentation level: 1 - - Final nesting depth of '{'s is 1 - The most recent un-matched '{' is on line 6 - 6: } elsif ($temperature < 68) {{ - ^ - -This shows how perltidy will, by default, write error messages to a file -with the extension F<.ERR>, and it will write a note that it did so to -the standard error device. If you would prefer to have the error -messages sent to standard output, instead of to a F<.ERR> file, use the -B<-se> flag. - -Almost every programmer would want to see error messages of this type, -but there are a number of messages which, if reported, would be -annoying. To manage this problem, perltidy puts its messages into two -categories: errors and warnings. The default is to just report the -errors, but you can control this with input flags, as follows: - - flag what this does - ---- -------------- - default: report errors but not warnings - -w report all errors and warnings - -q quiet! do not report either errors or warnings - -The default is generally a good choice, but it's not a bad idea to check -programs with B<-w> occasionally, especially if your are looking for a -bug. For example, it will ask if you really want '=' instead of '=~' in -this line: - - $line = s/^\s*//; - -This kind of error can otherwise be hard to find. - -=head2 The Log File - -One last topic that needs to be touched upon concerns the F<.LOG> file. -This is where perltidy records messages that are not normally of any -interest, but which just might occasionally be useful. This file is not -saved, though, unless perltidy detects that it has made a mistake or you -ask for it to be saved. - -There are a couple of ways to ask perltidy to save a log file. To -create a relatively sparse log file, use - - perltidy -log testfile.pl - -and for a verbose log file, use - - perltidy -g testfile.pl - -The difference is that the first form only saves detailed information at -least every 50th line, while the second form saves detailed information -about every line. - -So returning to our example, lets force perltidy to save a -verbose log file by issuing the following command - - perltidy -g testfile.pl - -You will find that a file named F has been -created in your directory. - -If you open this file, you will see that it is a text file with a -combination of warning messages and informative messages. All you need -to know for now is that it exists; someday it may be useful. - -=head2 Using Perltidy as a Filter on Selected Text from an Editor - -Most programmer's editors allow a selected group of lines to be passed -through an external filter. Perltidy has been designed to work well as -a filter, and it is well worthwhile learning the appropriate commands to -do this with your editor. This means that you can enter a few -keystrokes and watch a block of text get reformatted. If you are not -doing this, you are missing out of a lot of fun! You may want to supply -the B<-q> flag to prevent error messages regarding incorrect syntax, -since errors may be obvious in the indentation of the reformatted text. -This is entirely optional, but if you do not use the B<-q> flag, you -will need to use the undo keys in case an error message appears on the -screen. - -For example, within the B editor it is only necessary to select the -text by any of the text selection methods, and then issue the command -!perltidy in command mode. Thus, an entire file can be formatted using - - :%!perltidy -q - -or, without the B<-q> flag, just - - :%!perltidy - -It isn't necessary to format an entire file, however. Perltidy will -probably work well as long as you select blocks of text whose braces, -parentheses, and square brackets are properly balanced. You can -even format an C block without the leading C block, as -long as the text you select has all braces balanced. - -For the B editor, first mark a region and then pipe it through -perltidy. For example, to format an entire file, select it with C -and then pipe it with C and then C. The numeric -argument, C causes the output from perltidy to replace the marked -text. See "GNU Emacs Manual" for more information, -http://www.gnu.org/manual/emacs-20.3/html_node/emacs_toc.html - -If you have difficulty with an editor, try the B<-st> flag, which will -force perltidy to send output to standard output. This might be needed, -for example, if the editor passes text to perltidy as temporary filename -instead of through the standard input. If this works, you might put the -B<-st> flag in your F<.perltidyrc> file. - -If you have some tips for making perltidy work with your editor, and -are willing to share them, please email me (see below) and I'll try to -incorporate them in this document or put up a link to them. - -After you get your editor and perltidy successfully talking to each -other, try formatting a snippet of code with a brace error to see what -happens. (Do not use the quiet flag, B<-q>, for this test). Perltidy -will send one line starting with C<##> to standard error output. Your -editor may either display it at the top of the reformatted text or at -the bottom (or even midstream!). You probably cannot control this, and -perltidy can't, but you need to know where to look when an actual error -is detected. - -=head2 Writing an HTML File - -Perltidy can switch between two different output modes. We have been -discussing what might be called its "beautifier" mode, but it can also -output in HTML. To do this, use the B<-html> flag, like this: - - perltidy -html testfile.pl - -which will produce a file F. There are many -parameters available for adjusting the appearance of an HTML file, but a -very easy way is to just write the HTML file with this simple command -and then edit the stylesheet which is embedded at its top. - -One important thing to know about the B<-html> flag is that perltidy can -either send its output to its beautifier or to its HTML writer, but -(unfortunately) not both in a single run. So the situation can be -represented like this: - - ------------ - | | --->beautifier--> testfile.pl.tdy - testfile.pl --> | perltidy | --> - | | --->HTML -------> testfile.pl.html - ------------ - -And in the future, there may be more output filters. So if you would -like to both beautify a script and write it to HTML, you need to do it -in two steps. - -=head2 Summary - -That's enough to get started using perltidy. -When you are ready to create a F<.perltidyrc> file, you may find it -helpful to use the F page as a guide at -http://perltidy.sourceforge.net/stylekey.html - -Many additional special -features and capabilities can be found in the manual pages for perltidy -at -http://perltidy.sourceforge.net/perltidy.html - -We hope that perltidy makes perl programming a little more fun. -Please check the perltidy -web site http://perltidy.sourceforge.net occasionally -for updates. - -The author may be contacted at perltidy at users.sourceforge.net. - -=cut diff --git a/examples/delete_ending_blank_lines.pl b/examples/delete_ending_blank_lines.pl new file mode 100755 index 0000000..7acad2e --- /dev/null +++ b/examples/delete_ending_blank_lines.pl @@ -0,0 +1,99 @@ +#!/usr/bin/perl -w +use strict; + +# Example script for removing trailing blank lines of code from a perl script +# This is from the examples/ directory of the perltidy distribution and may +# be modified as needed. + +# This was written in response to RT #118553, "leave only one newline at end of file". +# Adding the requested feature to perltidy itself would have very undesirable +# side-effects when perltidy is operated from within an editor. So it is best +# done with a separate filter script on entire files. + +# usage: +# delete_ending_blank_lines.pl myfile.pl >myfile.new +# delete_ending_blank_lines.pl myfile.new +use Getopt::Std; +use Perl::Tidy; +use IO::File; +$| = 1; +use vars qw($opt_h); +my $usage = <outfile +EOM +getopts('h') or die "$usage"; +if ($opt_h) { die $usage } + +# Make the source for perltidy, which will be a filehandle +# or just '-' if the source is stdin +my ($file, $fh, $source); +if ( @ARGV == 0 ) { + $source = '-'; +} +elsif ( @ARGV == 1 ) { + $file = $ARGV[0]; + $fh = IO::File->new( $file, 'r' ); + unless ($fh) { die "cannot open '$file': $!\n" } + $source = $fh; +} +else { die $usage } + +# make the callback object +my $formatter = MyFormatter->new(); + +my $dest; + +# start perltidy, which will start calling our write_line() +my $err=perltidy( + 'formatter' => $formatter, # callback object + 'source' => $source, + 'destination' => \$dest, # (not really needed) + 'argv' => "-npro -se", # dont need .perltidyrc + # errors to STDOUT +); +if ($err) { + die "Error calling perltidy\n"; +} +$fh->close() if $fh; + +package MyFormatter; + +my @lines; + +sub new { + my ($class) = @_; + bless {}, $class; +} + +sub write_line { + + # This is called from perltidy line-by-line; we just save lines + my $self = shift; + my $line_of_tokens = shift; + push @lines, $line_of_tokens; +} + +# called once after the last line of a file +sub finish_formatting { + my $self = shift; + + # remove all trailing blank lines of code + while (my $line_of_tokens = pop(@lines)) { + my $line_type = $line_of_tokens->{_line_type}; + my $input_line = $line_of_tokens->{_line_text}; + if ( $line_type eq 'CODE' ) { + chomp $input_line; + next unless ($input_line); + } + push @lines, $line_of_tokens; + last; + } + + # write remaining lines + foreach my $line_of_tokens (@lines) { + my $line_type = $line_of_tokens->{_line_type}; + my $input_line = $line_of_tokens->{_line_text}; + print $input_line; + } + return; +} diff --git a/examples/perltidyrc_dump.pl b/examples/perltidyrc_dump.pl index 8c598cf..f2008e7 100755 --- a/examples/perltidyrc_dump.pl +++ b/examples/perltidyrc_dump.pl @@ -190,8 +190,17 @@ sub dump_options { # print the long version of the parameter # with the short version as a side comment my $short_name = $short_name{$long_name}; - my $short_option = $short_prefix . $short_name . $suffix; my $long_option = $prefix . $long_name . $suffix; + + # A few options do not have a short abbreviation. These include + # 'recombine' and 'valign', which are mainly for debugging. As a + # workaround, we will make it the same as the long option. This + # will insure that the -s -q flags work. + my $short_option = $long_option; + if ($short_name) { + $short_option = $short_prefix . $short_name . $suffix; + } + my $note = $requals_default->{$long_name} ? " [=default]" : ""; if ( $rmy_opts->{s} ) { print $short_option. "\n"; diff --git a/lib/Perl/Tidy.pm b/lib/Perl/Tidy.pm index d28e3f6..825b357 100644 --- a/lib/Perl/Tidy.pm +++ b/lib/Perl/Tidy.pm @@ -3,7 +3,7 @@ # # perltidy - a perl script indenter and formatter # -# Copyright (c) 2000-2018 by Steve Hancock +# Copyright (c) 2000-2019 by Steve Hancock # Distributed under the GPL license agreement; see file COPYING # # This program is free software; you can redistribute it and/or modify @@ -62,7 +62,22 @@ use warnings; use strict; use Exporter; use Carp; -$|++; +use Digest::MD5 qw(md5_hex); +use Perl::Tidy::Debugger; +use Perl::Tidy::DevNull; +use Perl::Tidy::Diagnostics; +use Perl::Tidy::FileWriter; +use Perl::Tidy::Formatter; +use Perl::Tidy::HtmlWriter; +use Perl::Tidy::IOScalar; +use Perl::Tidy::IOScalarArray; +use Perl::Tidy::IndentationItem; +use Perl::Tidy::LineSink; +use Perl::Tidy::LineSource; +use Perl::Tidy::Logger; +use Perl::Tidy::Tokenizer; +use Perl::Tidy::VerticalAligner; +local $| = 1; use vars qw{ $VERSION @@ -84,7 +99,17 @@ use File::Copy; use File::Temp qw(tempfile); BEGIN { - ( $VERSION = q($Id: Tidy.pm,v 1.74 2018/02/20 13:56:49 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker + + # Release version is the approximate YYMMDD of the release. + # Development version is (Last Release).(Development Number) + + # To make the number continually increasing, the Development Number is a 2 + # digit number starting at 01 after a release is continually bumped along + # at significant points during developement. If it ever reaches 99 then the + # Release version must be bumped, and it is probably past time for a + # release anyway. + + $VERSION = '20190601'; } sub streamhandle { @@ -184,7 +209,7 @@ EOM sub find_input_line_ending { # Peek at a file and return first line ending character. - # Quietly return undef in case of any trouble. + # Return undefined value in case of any trouble. my ($input_file) = @_; my $ending; @@ -229,7 +254,6 @@ sub catfile { my @parts = @_; - #BEGIN { eval "require File::Spec"; $missing_file_spec = $@; } BEGIN { eval { require File::Spec }; $missing_file_spec = $@; @@ -242,7 +266,7 @@ sub catfile { # Perl 5.004 systems may not have File::Spec so we'll make # a simple try. We assume File::Basename is available. - # return undef if not successful. + # return if not successful. my $name = pop @parts; my $path = join '/', @parts; my $test_file = $path . $name; @@ -385,9 +409,15 @@ EOM my $flag = shift; if ($flag) { goto ERROR_EXIT } else { goto NORMAL_EXIT } + croak "unexpectd return to Exit"; } - sub Die { my $msg = shift; Warn($msg); Exit(1); } + sub Die { + my $msg = shift; + Warn($msg); + Exit(1); + croak "unexpected return to Die"; + } # extract various dump parameters my $dump_options_type = $input_hash{'dump_options_type'}; @@ -449,7 +479,7 @@ EOM else { my ( $rargv, $msg ) = parse_args($argv); if ($msg) { - Die <{'dump-options'} ) { print STDOUT $readable_options; - Exit 0; + Exit(0); } #--------------------------------------------------------------- @@ -574,7 +604,7 @@ EOM my $formats = join ' ', sort map { "'" . $_ . "'" } keys %default_file_extension; my $fmt = $rOpts->{'format'}; - Die "-format='$fmt' but must be one of: $formats\n"; + Die("-format='$fmt' but must be one of: $formats\n"); } my $output_extension = make_extension( $rOpts->{'output-file-extension'}, @@ -600,7 +630,7 @@ EOM # At present only one forward slash is allowed. In the future multiple # slashes may be allowed to allow for other options if ( $delete_backup > 1 ) { - Die "-bext=$bext contains more than one '/'\n"; + Die("-bext=$bext contains more than one '/'\n"); } my $backup_extension = @@ -660,8 +690,9 @@ EOM # no filenames should be given if input is from an array if ($source_stream) { if ( @ARGV > 0 ) { - Die -"You may not specify any filenames when a source array is given\n"; + Die( +"You may not specify any filenames when a source array is given\n" + ); } # we'll stuff the source array into ARGV @@ -696,7 +727,7 @@ EOM while ( my $input_file = shift @ARGV ) { my $fileroot; - my $input_file_permissions; + my @input_file_stat; #--------------------------------------------------------------- # prepare this input stream @@ -738,12 +769,12 @@ EOM } } } - Warn "skipping file: '$input_file': no matches found\n"; + Warn("skipping file: '$input_file': no matches found\n"); next; } unless ( -f $input_file ) { - Warn "skipping file: $input_file: not a regular file\n"; + Warn("skipping file: $input_file: not a regular file\n"); next; } @@ -752,19 +783,20 @@ EOM # the old .tdy or .bak files might still exist so we # shouldn't overwrite them with zero length files. unless ( -s $input_file ) { - Warn "skipping file: $input_file: Zero size\n"; + Warn("skipping file: $input_file: Zero size\n"); next; } unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) { - Warn - "skipping file: $input_file: Non-text (override with -f)\n"; + Warn( + "skipping file: $input_file: Non-text (override with -f)\n" + ); next; } # we should have a valid filename now - $fileroot = $input_file; - $input_file_permissions = ( stat $input_file )[2] & oct(7777); + $fileroot = $input_file; + @input_file_stat = stat($input_file); if ( $^O eq 'VMS' ) { ( $fileroot, $dot ) = check_vms_filename($fileroot); @@ -777,13 +809,13 @@ EOM my $new_path = $rOpts->{'output-path'}; unless ( -d $new_path ) { unless ( mkdir $new_path, 0777 ) { - Die "unable to create directory $new_path: $!\n"; + Die("unable to create directory $new_path: $!\n"); } } my $path = $new_path; $fileroot = catfile( $path, $base ); unless ($fileroot) { - Die <{'standard-output'} ) { my $msg = "You may not use -o and -st together"; $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp); - Die "$msg\n"; + Die("$msg\n"); } elsif ($destination_stream) { - Die -"You may not specify a destination array and -o together\n"; + Die( +"You may not specify a destination array and -o together\n" + ); } elsif ( defined( $rOpts->{'output-path'} ) ) { - Die "You may not specify -o and -opath together\n"; + Die("You may not specify -o and -opath together\n"); } elsif ( defined( $rOpts->{'output-file-extension'} ) ) { - Die "You may not specify -o and -oext together\n"; + Die("You may not specify -o and -oext together\n"); } $output_file = $rOpts->{outfile}; # make sure user gives a file name after -o if ( $output_file =~ /^-/ ) { - Die "You must specify a valid filename after -o\n"; + Die("You must specify a valid filename after -o\n"); } # do not overwrite input file with -o - if ( defined($input_file_permissions) - && ( $output_file eq $input_file ) ) - { - Die "Use 'perltidy -b $input_file' to modify in-place\n"; + if ( @input_file_stat && ( $output_file eq $input_file ) ) { + Die("Use 'perltidy -b $input_file' to modify in-place\n"); } } else { - Die "You may not use -o with more than one input file\n"; + Die("You may not use -o with more than one input file\n"); } } elsif ( $rOpts->{'standard-output'} ) { @@ -899,14 +931,14 @@ EOM my $msg = "You may not specify a destination array and -st together\n"; $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp); - Die "$msg\n"; + Die("$msg\n"); } $output_file = '-'; if ( $number_of_files <= 1 ) { } else { - Die "You may not use -st with more than one input file\n"; + Die("You may not use -st with more than one input file\n"); } } elsif ($destination_stream) { @@ -921,7 +953,7 @@ EOM else { if ($in_place_modify) { $output_file = IO::File->new_tmpfile() - or Die "cannot open temp file for -b option: $!\n"; + or Die("cannot open temp file for -b option: $!\n"); } else { $actual_output_extension = $output_extension; @@ -999,14 +1031,17 @@ EOM my $convergence_log_message; my %saw_md5; my $do_convergence_test = $max_iterations > 2; - if ($do_convergence_test) { - eval "use Digest::MD5 qw(md5_hex)"; - $do_convergence_test = !$@; - ### Trying to avoid problems with ancient versions of perl - ##eval { my $string = "perltidy"; utf8::encode($string) }; - ##$do_convergence_test = $do_convergence_test && !$@; - } + # Since Digest::MD5 qw(md5_hex) has been in the earliest version of Perl + # we are requiring (5.8), I have commented out this check +##? if ($do_convergence_test) { +##? eval "use Digest::MD5 qw(md5_hex)"; +##? $do_convergence_test = !$@; +##? +##? ### Trying to avoid problems with ancient versions of perl +##? ##eval { my $string = "perltidy"; utf8::encode($string) }; +##? ##$do_convergence_test = $do_convergence_test && !$@; +##? } # save objects to allow redirecting output during iterations my $sink_object_final = $sink_object; @@ -1063,11 +1098,11 @@ EOM ); } else { - Die "I don't know how to do -format=$rOpts->{'format'}\n"; + Die("I don't know how to do -format=$rOpts->{'format'}\n"); } unless ($formatter) { - Die "Unable to continue with $rOpts->{'format'} formatting\n"; + Die("Unable to continue with $rOpts->{'format'} formatting\n"); } #--------------------------------------------------------------- @@ -1219,26 +1254,29 @@ EOM # oh, oh, no real file to backup .. # shouldn't happen because of numerous preliminary checks - Die -"problem with -b backing up input file '$input_file': not a file\n"; + Die( +"problem with -b backing up input file '$input_file': not a file\n" + ); } my $backup_name = $input_file . $backup_extension; if ( -f $backup_name ) { unlink($backup_name) - or Die -"unable to remove previous '$backup_name' for -b option; check permissions: $!\n"; + or Die( +"unable to remove previous '$backup_name' for -b option; check permissions: $!\n" + ); } # backup the input file # we use copy for symlinks, move for regular files if ( -l $input_file ) { File::Copy::copy( $input_file, $backup_name ) - or Die "File::Copy failed trying to backup source: $!"; + or Die("File::Copy failed trying to backup source: $!"); } else { rename( $input_file, $backup_name ) - or Die -"problem renaming $input_file to $backup_name for -b option: $!\n"; + or Die( +"problem renaming $input_file to $backup_name for -b option: $!\n" + ); } $ifname = $backup_name; @@ -1248,15 +1286,16 @@ EOM # handle of an open nameless temporary file so we would lose # everything if we closed it. seek( $output_file, 0, 0 ) - or Die "unable to rewind a temporary file for -b option: $!\n"; + or Die("unable to rewind a temporary file for -b option: $!\n"); my $fout = IO::File->new("> $input_file") - or Die -"problem re-opening $input_file for write for -b option; check file and directory permissions: $!\n"; + or Die( +"problem re-opening $input_file for write for -b option; check file and directory permissions: $!\n" + ); if ($binmode) { if ( $rOpts->{'character-encoding'} && $rOpts->{'character-encoding'} eq 'utf8' ) { - binmode $fout, ":encoding(UTF-8)"; + binmode $fout, ":raw:encoding(UTF-8)"; } else { binmode $fout } } @@ -1277,13 +1316,66 @@ EOM # set output file permissions if ( $output_file && -f $output_file && !-l $output_file ) { - if ($input_file_permissions) { + if (@input_file_stat) { - # give output script same permissions as input script, but - # make it user-writable or else we can't run perltidy again. - # Thus we retain whatever executable flags were set. + # Set file ownership and permissions if ( $rOpts->{'format'} eq 'tidy' ) { - chmod( $input_file_permissions | oct(600), $output_file ); + my ( $mode_i, $uid_i, $gid_i ) = + @input_file_stat[ 2, 4, 5 ]; + my ( $uid_o, $gid_o ) = ( stat($output_file) )[ 4, 5 ]; + my $input_file_permissions = $mode_i & oct(7777); + my $output_file_permissions = $input_file_permissions; + + #rt128477: avoid inconsistent owner/group and suid/sgid + if ( $uid_i != $uid_o || $gid_i != $gid_o ) { + + # try to change owner and group to match input file if in -b mode + # note: chown returns number of files successfully changed + if ( $in_place_modify + && chown( $uid_i, $gid_i, $output_file ) ) + { + # owner/group successfully changed + } + else { + + # owner or group differ: do not copy suid and sgid + $output_file_permissions = $mode_i & oct(777); + if ( $input_file_permissions != + $output_file_permissions ) + { + Warn( +"Unable to copy setuid and/or setgid bits for output file '$output_file'\n" + ); + } + } + } + + # Make the output file for rw unless we are in -b mode. + # Explanation: perltidy does not unlink existing output + # files before writing to them, for safety. If a + # designated output file exists and is not writable, + # perltidy will halt. This can prevent a data loss if a + # user accidentally enters "perltidy infile -o + # important_ro_file", or "perltidy infile -st + # >important_ro_file". But it also means that perltidy can + # get locked out of rerunning unless it marks its own + # output files writable. The alternative, of always + # unlinking the designated output file, is less safe and + # not always possible, except in -b mode, where there is an + # assumption that a previous backup can be unlinked even if + # not writable. + if ( !$in_place_modify ) { + $output_file_permissions |= oct(600); + } + + if ( !chmod( $output_file_permissions, $output_file ) ) { + + # couldn't change file permissions + my $operm = sprintf "%04o", $output_file_permissions; + Warn( +"Unable to set permissions for output file '$output_file' to $operm\n" + ); + } } # else use default permissions for html and any other format @@ -1328,8 +1420,9 @@ EOM } else { unlink($ifname) - or Die -"unable to remove previous '$ifname' for -b option; check permissions: $!\n"; + or Die( +"unable to remove previous '$ifname' for -b option; check permissions: $!\n" + ); } } @@ -1550,8 +1643,9 @@ sub generate_options { if ($short_name) { if ( $expansion{$short_name} ) { my $existing_name = $expansion{$short_name}[0]; - Die -"redefining abbreviation $short_name for $long_name; already used for $existing_name\n"; + Die( +"redefining abbreviation $short_name for $long_name; already used for $existing_name\n" + ); } $expansion{$short_name} = [$long_name]; if ( $flag eq '!' ) { @@ -1559,8 +1653,9 @@ sub generate_options { my $nolong_name = 'no' . $long_name; if ( $expansion{$nshort_name} ) { my $existing_name = $expansion{$nshort_name}[0]; - Die -"attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n"; + Die( +"attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n" + ); } $expansion{$nshort_name} = [$nolong_name]; } @@ -1693,8 +1788,8 @@ sub generate_options { $add_option->( 'brace-vertical-tightness', 'bvt', '=i' ); $add_option->( 'brace-vertical-tightness-closing', 'bvtc', '=i' ); $add_option->( 'cuddled-else', 'ce', '!' ); - $add_option->( 'cuddled-blocks', 'cb', '!' ); $add_option->( 'cuddled-block-list', 'cbl', '=s' ); + $add_option->( 'cuddled-block-list-exclusive', 'cblx', '!' ); $add_option->( 'cuddled-break-option', 'cbo', '=i' ); $add_option->( 'delete-old-newlines', 'dnl', '!' ); $add_option->( 'opening-brace-always-on-right', 'bar', '!' ); @@ -1723,6 +1818,7 @@ sub generate_options { $add_option->( 'break-after-all-operators', 'baao', '!' ); $add_option->( 'break-before-all-operators', 'bbao', '!' ); $add_option->( 'keep-interior-semicolons', 'kis', '!' ); + $add_option->( 'one-line-block-semicolons', 'olbs', '=i' ); ######################################## $category = 6; # Controlling list formatting @@ -1736,6 +1832,7 @@ sub generate_options { ######################################## $add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' ); $add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' ); + $add_option->( 'break-at-old-method-breakpoints', 'bom', '!' ); $add_option->( 'break-at-old-ternary-breakpoints', 'bot', '!' ); $add_option->( 'break-at-old-attribute-breakpoints', 'boa', '!' ); $add_option->( 'ignore-old-breakpoints', 'iob', '!' ); @@ -1751,6 +1848,14 @@ sub generate_options { $add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' ); $add_option->( 'keep-old-blank-lines', 'kbl', '=i' ); + $add_option->( 'keyword-group-blanks-list', 'kgbl', '=s' ); + $add_option->( 'keyword-group-blanks-size', 'kgbs', '=s' ); + $add_option->( 'keyword-group-blanks-repeat-count', 'kgbr', '=i' ); + $add_option->( 'keyword-group-blanks-before', 'kgbb', '=i' ); + $add_option->( 'keyword-group-blanks-after', 'kgba', '=i' ); + $add_option->( 'keyword-group-blanks-inside', 'kgbi', '!' ); + $add_option->( 'keyword-group-blanks-delete', 'kgbd', '!' ); + $add_option->( 'blank-lines-after-opening-block', 'blao', '=i' ); $add_option->( 'blank-lines-before-closing-block', 'blbc', '=i' ); $add_option->( 'blank-lines-after-opening-block-list', 'blaol', '=s' ); @@ -1789,6 +1894,7 @@ sub generate_options { $add_option->( 'help', 'h', '' ); $add_option->( 'short-concatenation-item-length', 'scl', '=i' ); $add_option->( 'show-options', 'opt', '!' ); + $add_option->( 'timestamp', 'ts', '!' ); $add_option->( 'version', 'v', '' ); $add_option->( 'memoize', 'mem', '!' ); $add_option->( 'file-size-order', 'fso', '!' ); @@ -1860,6 +1966,9 @@ sub generate_options { 'closing-side-comment-else-flag' => [ 0, 2 ], 'comma-arrow-breakpoints' => [ 0, 5 ], + + 'keyword-group-blanks-before' => [ 0, 2 ], + 'keyword-group-blanks-after' => [ 0, 2 ], ); # Note: we could actually allow negative ci if someone really wants it: @@ -1878,6 +1987,14 @@ sub generate_options { blanks-before-comments blank-lines-before-subs=1 blank-lines-before-packages=1 + + keyword-group-blanks-size=5 + keyword-group-blanks-repeat-count=0 + keyword-group-blanks-before=1 + keyword-group-blanks-after=1 + nokeyword-group-blanks-inside + nokeyword-group-blanks-delete + block-brace-tightness=0 block-brace-vertical-tightness=0 brace-tightness=1 @@ -1917,7 +2034,6 @@ sub generate_options { minimum-space-to-comment=4 nobrace-left-and-indent nocuddled-else - nocuddled-blocks nodelete-old-whitespace nohtml nologfile @@ -1927,6 +2043,7 @@ sub generate_options { notabs nowarning-output character-encoding=none + one-line-block-semicolons=1 outdent-labels outdent-long-quotes outdent-long-comments @@ -1944,6 +2061,7 @@ sub generate_options { square-bracket-vertical-tightness-closing=0 square-bracket-vertical-tightness=0 static-block-comments + timestamp trim-qw format=tidy backup-file-extension=bak @@ -1994,6 +2112,10 @@ sub generate_options { 'nhtml' => [qw(format=tidy)], 'tidy' => [qw(format=tidy)], + # -cb is now a synonym for -ce + 'cb' => [qw(cuddled-else)], + 'cuddled-blocks' => [qw(cuddled-else)], + 'utf8' => [qw(character-encoding=utf8)], 'UTF8' => [qw(character-encoding=utf8)], @@ -2012,6 +2134,11 @@ sub generate_options { 'noblanks-before-subs' => [qw(blbs=0 blbp=0)], 'nbbs' => [qw(blbs=0 blbp=0)], + 'keyword-group-blanks' => [qw(kgbb=2 kgbi kgba=2)], + 'kgb' => [qw(kgbb=2 kgbi kgba=2)], + 'nokeyword-group-blanks' => [qw(kgbb=1 nkgbi kgba=1)], + 'nkgb' => [qw(kgbb=1 nkgbi kgba=1)], + 'break-at-old-trinary-breakpoints' => [qw(bot)], 'cti=0' => [qw(cpi=0 cbi=0 csbi=0)], @@ -2178,10 +2305,11 @@ my %process_command_line_cache; sub process_command_line { + my @q = @_; my ( $perltidyrc_stream, $is_Windows, $Windows_type, $rpending_complaint, $dump_options_type - ) = @_; + ) = @q; my $use_cache = !defined($perltidyrc_stream) && !$dump_options_type; if ($use_cache) { @@ -2192,14 +2320,14 @@ sub process_command_line { return @retvals; } else { - my @retvals = _process_command_line(@_); + my @retvals = _process_command_line(@q); $process_command_line_cache{$cache_key} = [ \@ARGV, @retvals ] if $retvals[0]->{'memoize'}; return @retvals; } } else { - return _process_command_line(@_); + return _process_command_line(@q); } } @@ -2234,15 +2362,16 @@ sub _process_command_line { #--------------------------------------------------------------- my %Opts = (); { - local @ARGV; + local @ARGV = (); # do not load the defaults if we are just dumping perltidyrc unless ( $dump_options_type eq 'perltidyrc' ) { for my $i ( @{$rdefaults} ) { push @ARGV, "--" . $i } } if ( !GetOptions( \%Opts, @{$roption_string} ) ) { - Die -"Programming Bug reported by 'GetOptions': error in setting default options"; + Die( +"Programming Bug reported by 'GetOptions': error in setting default options" + ); } } @@ -2270,8 +2399,9 @@ sub _process_command_line { } elsif ( $i =~ /^-(pro|profile)=(.+)/ ) { if ($config_file) { - Warn -"Only one -pro=filename allowed, using '$2' instead of '$config_file'\n"; + Warn( +"Only one -pro=filename allowed, using '$2' instead of '$config_file'\n" + ); } $config_file = $2; @@ -2290,42 +2420,42 @@ sub _process_command_line { } } unless ( -e $config_file ) { - Warn "cannot find file given with -pro=$config_file: $!\n"; + Warn("cannot find file given with -pro=$config_file: $!\n"); $config_file = ""; } } elsif ( $i =~ /^-(pro|profile)=?$/ ) { - Die "usage: -pro=filename or --profile=filename, no spaces\n"; + Die("usage: -pro=filename or --profile=filename, no spaces\n"); } elsif ( $i =~ /^-(help|h|HELP|H|\?)$/ ) { usage(); - Exit 0; + Exit(0); } elsif ( $i =~ /^-(version|v)$/ ) { show_version(); - Exit 0; + Exit(0); } elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) { dump_defaults( @{$rdefaults} ); - Exit 0; + Exit(0); } elsif ( $i =~ /^-(dump-long-names|dln)$/ ) { dump_long_names( @{$roption_string} ); - Exit 0; + Exit(0); } elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) { dump_short_names($rexpansion); - Exit 0; + Exit(0); } elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) { Perl::Tidy::Tokenizer->dump_token_types(*STDOUT); - Exit 0; + Exit(0); } } if ( $saw_dump_profile && $saw_ignore_profile ) { - Warn "No profile to dump because of -npro\n"; - Exit 1; + Warn("No profile to dump because of -npro\n"); + Exit(1); } #--------------------------------------------------------------- @@ -2338,7 +2468,7 @@ sub _process_command_line { # line. if ($perltidyrc_stream) { if ($config_file) { - Warn <{'check-syntax'} = 0; } + ########################################################################### # Added Dec 2017: Deactivating check-syntax for all systems for safety # because unexpected results can occur when code in BEGIN blocks is # executed. This flag was included to help check for perltidy mistakes, # and may still be useful for debugging. To activate for testing comment - # out the next three lines. + # out the next three lines. Also fix sub 'do_check_syntax' in this file. + ########################################################################### else { $rOpts->{'check-syntax'} = 0; } @@ -2560,10 +2693,10 @@ sub check_options { if ( $rOpts->{$key} ) { if ( $rOpts->{$key} < 0 ) { $rOpts->{$key} = 0; - Warn "negative value of $abbrev, setting 0\n"; + Warn("negative value of $abbrev, setting 0\n"); } if ( $rOpts->{$key} > 100 ) { - Warn "unreasonably large value of $abbrev, reducing\n"; + Warn("unreasonably large value of $abbrev, reducing\n"); $rOpts->{$key} = 100; } } @@ -2604,7 +2737,7 @@ sub check_options { if ( $rOpts->{'opening-brace-always-on-right'} && $rOpts->{'opening-brace-on-new-line'} ) { - Warn <{'entab-leading-whitespace'} ) { if ( $rOpts->{'entab-leading-whitespace'} < 0 ) { - Warn "-et=n must use a positive integer; ignoring -et\n"; + Warn("-et=n must use a positive integer; ignoring -et\n"); $rOpts->{'entab-leading-whitespace'} = undef; } @@ -2637,11 +2770,11 @@ EOM # use tabs if ( $rOpts->{'default-tabsize'} ) { if ( $rOpts->{'default-tabsize'} < 0 ) { - Warn "negative value of -dt, setting 0\n"; + Warn("negative value of -dt, setting 0\n"); $rOpts->{'default-tabsize'} = 0; } if ( $rOpts->{'default-tabsize'} > 20 ) { - Warn "unreasonably large value of -dt, reducing\n"; + Warn("unreasonably large value of -dt, reducing\n"); $rOpts->{'default-tabsize'} = 20; } } @@ -2664,7 +2797,7 @@ EOM sub find_file_upwards { my ( $search_dir, $search_file ) = @_; - $search_dir =~ s{/+$}{}; + $search_dir =~ s{/+$}{}; $search_file =~ s{^/+}{}; while (1) { @@ -2679,6 +2812,10 @@ sub find_file_upwards { $search_dir = dirname($search_dir); } } + + # This return is for Perl-Critic. + # We shouldn't get out of the while loop without a return + return; } sub expand_command_abbreviations { @@ -2754,32 +2891,32 @@ sub expand_command_abbreviations { # make sure we are not in an infinite loop if ( $pass_count == $max_passes ) { local $" = ')('; - Warn < -bok break at old list keyword breakpoints such as map, sort (default) -bot break at old conditional (ternary ?:) operator breakpoints (default) -boa break at old attribute breakpoints @@ -3732,8 +3876,7 @@ sub check_syntax { $logger_object->report_definite_bug(); # the perl version number will be helpful for diagnosing the problem - $logger_object->write_logfile_entry( - qx/perl -v $error_redirection/ . "\n" ); + $logger_object->write_logfile_entry( $^V . "\n" ); } } else { @@ -3756,6 +3899,14 @@ sub check_syntax { return $infile_syntax_ok; } +sub do_syntax_check { + + # This should not be called; the syntax check is deactivated + Die("Unexpected call for syntax check-shouldn't happen\n"); + return; +} + +=pod sub do_syntax_check { my ( $stream, $flags, $error_redirection ) = @_; @@ -3784,7 +3935,7 @@ sub do_syntax_check { $flags = '"' . $flags . '"'; # now wish for luck... - my $msg = qx/perl $flags $quoted_stream_filename $error_redirection/; + my $msg = qx/perl $flags $quoted_stream_filename $error_redirection/; if ($is_tmpfile) { unlink $stream_filename @@ -3792,29762 +3943,7 @@ sub do_syntax_check { } return $stream_filename, $msg; } +=cut -##################################################################### -# -# This is a stripped down version of IO::Scalar -# Given a reference to a scalar, it supplies either: -# a getline method which reads lines (mode='r'), or -# a print method which reads lines (mode='w') -# -##################################################################### -package Perl::Tidy::IOScalar; -use Carp; - -sub new { - my ( $package, $rscalar, $mode ) = @_; - my $ref = ref $rscalar; - if ( $ref ne 'SCALAR' ) { - confess <[1]; - if ( $mode ne 'r' ) { - confess <[2]++; - return $self->[0]->[$i]; -} - -sub print { - my ( $self, $msg ) = @_; - my $mode = $self->[1]; - if ( $mode ne 'w' ) { - confess <[0] } .= $msg; -} -sub close { return } - -##################################################################### -# -# This is a stripped down version of IO::ScalarArray -# Given a reference to an array, it supplies either: -# a getline method which reads lines (mode='r'), or -# a print method which reads lines (mode='w') -# -# NOTE: this routine assumes that there aren't any embedded -# newlines within any of the array elements. There are no checks -# for that. -# -##################################################################### -package Perl::Tidy::IOScalarArray; -use Carp; - -sub new { - my ( $package, $rarray, $mode ) = @_; - my $ref = ref $rarray; - if ( $ref ne 'ARRAY' ) { - confess <[1]; - if ( $mode ne 'r' ) { - confess <[2]++; - return $self->[0]->[$i]; -} - -sub print { - my ( $self, $msg ) = @_; - my $mode = $self->[1]; - if ( $mode ne 'w' ) { - confess <[0] }, $msg; -} -sub close { return } - -##################################################################### -# -# the Perl::Tidy::LineSource class supplies an object with a 'get_line()' method -# which returns the next line to be parsed -# -##################################################################### - -package Perl::Tidy::LineSource; - -sub new { - - my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_; - - my $input_line_ending; - if ( $rOpts->{'preserve-line-endings'} ) { - $input_line_ending = Perl::Tidy::find_input_line_ending($input_file); - } - - ( my $fh, $input_file ) = Perl::Tidy::streamhandle( $input_file, 'r' ); - return unless $fh; - - # in order to check output syntax when standard output is used, - # or when it is an object, we have to make a copy of the file - if ( ( $input_file eq '-' || ref $input_file ) && $rOpts->{'check-syntax'} ) - { - - # Turning off syntax check when input output is used. - # The reason is that temporary files cause problems on - # on many systems. - $rOpts->{'check-syntax'} = 0; - - ${$rpending_logfile_message} .= < $fh, - _filename => $input_file, - _input_line_ending => $input_line_ending, - _rinput_buffer => [], - _started => 0, - }, $class; -} - -sub close_input_file { - my $self = shift; - - # Only close physical files, not STDIN and other objects - my $filename = $self->{_filename}; - if ( $filename ne '-' && !ref $filename ) { - eval { $self->{_fh}->close() }; - } - return; -} - -sub get_line { - my $self = shift; - my $line = undef; - my $fh = $self->{_fh}; - my $rinput_buffer = $self->{_rinput_buffer}; - - if ( scalar( @{$rinput_buffer} ) ) { - $line = shift @{$rinput_buffer}; - } - else { - $line = $fh->getline(); - - # patch to read raw mac files under unix, dos - # see if the first line has embedded \r's - if ( $line && !$self->{_started} ) { - if ( $line =~ /[\015][^\015\012]/ ) { - - # found one -- break the line up and store in a buffer - @{$rinput_buffer} = map { $_ . "\n" } split /\015/, $line; - my $count = @{$rinput_buffer}; - $line = shift @{$rinput_buffer}; - } - $self->{_started}++; - } - } - return $line; -} - -##################################################################### -# -# the Perl::Tidy::LineSink class supplies a write_line method for -# actual file writing -# -##################################################################### - -package Perl::Tidy::LineSink; - -sub new { - - my ( $class, $output_file, $tee_file, $line_separator, $rOpts, - $rpending_logfile_message, $binmode ) - = @_; - my $fh = undef; - my $fh_tee = undef; - - my $output_file_open = 0; - - if ( $rOpts->{'format'} eq 'tidy' ) { - ( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' ); - unless ($fh) { Perl::Tidy::Die "Cannot write to output stream\n"; } - $output_file_open = 1; - if ($binmode) { - if ( $rOpts->{'character-encoding'} - && $rOpts->{'character-encoding'} eq 'utf8' ) - { - if ( ref($fh) eq 'IO::File' ) { - $fh->binmode(":encoding(UTF-8)"); - } - elsif ( $output_file eq '-' ) { - binmode STDOUT, ":encoding(UTF-8)"; - } - } - - # Patch for RT 122030 - elsif ( ref($fh) eq 'IO::File' ) { $fh->binmode(); } - - elsif ( $output_file eq '-' ) { binmode STDOUT } - } - } - - # in order to check output syntax when standard output is used, - # or when it is an object, we have to make a copy of the file - if ( $output_file eq '-' || ref $output_file ) { - if ( $rOpts->{'check-syntax'} ) { - - # Turning off syntax check when standard output is used. - # The reason is that temporary files cause problems on - # on many systems. - $rOpts->{'check-syntax'} = 0; - ${$rpending_logfile_message} .= < $fh, - _fh_tee => $fh_tee, - _output_file => $output_file, - _output_file_open => $output_file_open, - _tee_flag => 0, - _tee_file => $tee_file, - _tee_file_opened => 0, - _line_separator => $line_separator, - _binmode => $binmode, - }, $class; -} - -sub write_line { - - my ( $self, $line ) = @_; - my $fh = $self->{_fh}; - - my $output_file_open = $self->{_output_file_open}; - chomp $line; - $line .= $self->{_line_separator}; - - $fh->print($line) if ( $self->{_output_file_open} ); - - if ( $self->{_tee_flag} ) { - unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() } - my $fh_tee = $self->{_fh_tee}; - print $fh_tee $line; - } - return; -} - -sub tee_on { - my $self = shift; - $self->{_tee_flag} = 1; - return; -} - -sub tee_off { - my $self = shift; - $self->{_tee_flag} = 0; - return; -} - -sub really_open_tee_file { - my $self = shift; - my $tee_file = $self->{_tee_file}; - my $fh_tee; - $fh_tee = IO::File->new(">$tee_file") - or Perl::Tidy::Die("couldn't open TEE file $tee_file: $!\n"); - binmode $fh_tee if $self->{_binmode}; - $self->{_tee_file_opened} = 1; - $self->{_fh_tee} = $fh_tee; - return; -} - -sub close_output_file { - my $self = shift; - - # Only close physical files, not STDOUT and other objects - my $output_file = $self->{_output_file}; - if ( $output_file ne '-' && !ref $output_file ) { - eval { $self->{_fh}->close() } if $self->{_output_file_open}; - } - $self->close_tee_file(); - return; -} - -sub close_tee_file { - my $self = shift; - - # Only close physical files, not STDOUT and other objects - if ( $self->{_tee_file_opened} ) { - my $tee_file = $self->{_tee_file}; - if ( $tee_file ne '-' && !ref $tee_file ) { - eval { $self->{_fh_tee}->close() }; - $self->{_tee_file_opened} = 0; - } - } - return; -} - -##################################################################### -# -# The Perl::Tidy::Diagnostics class writes the DIAGNOSTICS file, which is -# useful for program development. -# -# Only one such file is created regardless of the number of input -# files processed. This allows the results of processing many files -# to be summarized in a single file. - -# Output messages go to a file named DIAGNOSTICS, where -# they are labeled by file and line. This allows many files to be -# scanned at once for some particular condition of interest. It was -# particularly useful for developing guessing strategies. -# -# NOTE: This feature is deactivated in final releases but can be -# reactivated for debugging by un-commenting the 'I' options flag -# -##################################################################### - -package Perl::Tidy::Diagnostics; - -sub new { - - my $class = shift; - return bless { - _write_diagnostics_count => 0, - _last_diagnostic_file => "", - _input_file => "", - _fh => undef, - }, $class; -} - -sub set_input_file { - my ( $self, $input_file ) = @_; - $self->{_input_file} = $input_file; - return; -} - -sub write_diagnostics { - my ( $self, $msg ) = @_; - - unless ( $self->{_write_diagnostics_count} ) { - open( $self->{_fh}, ">", "DIAGNOSTICS" ) - or Perl::Tidy::Die("couldn't open DIAGNOSTICS: $!\n"); - } - - my $fh = $self->{_fh}; - my $last_diagnostic_file = $self->{_last_diagnostic_file}; - my $input_file = $self->{_input_file}; - if ( $last_diagnostic_file ne $input_file ) { - $fh->print("\nFILE:$input_file\n"); - } - $self->{_last_diagnostic_file} = $input_file; - my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number(); - $fh->print("$input_line_number:\t$msg"); - $self->{_write_diagnostics_count}++; - return; -} - -##################################################################### -# -# The Perl::Tidy::Logger class writes the .LOG and .ERR files -# -##################################################################### - -package Perl::Tidy::Logger; - -sub new { - - my ( $class, $rOpts, $log_file, $warning_file, $fh_stderr, $saw_extrude ) = - @_; - - my $fh_warnings = $rOpts->{'standard-error-output'} ? $fh_stderr : undef; - - # remove any old error output file if we might write a new one - unless ( $fh_warnings || ref($warning_file) ) { - if ( -e $warning_file ) { - unlink($warning_file) - or Perl::Tidy::Die( - "couldn't unlink warning file $warning_file: $!\n"); - } - } - - my $logfile_gap = - defined( $rOpts->{'logfile-gap'} ) - ? $rOpts->{'logfile-gap'} - : 50; - if ( $logfile_gap == 0 ) { $logfile_gap = 1 } - - return bless { - _log_file => $log_file, - _logfile_gap => $logfile_gap, - _rOpts => $rOpts, - _fh_warnings => $fh_warnings, - _last_input_line_written => 0, - _at_end_of_file => 0, - _use_prefix => 1, - _block_log_output => 0, - _line_of_tokens => undef, - _output_line_number => undef, - _wrote_line_information_string => 0, - _wrote_column_headings => 0, - _warning_file => $warning_file, - _warning_count => 0, - _complaint_count => 0, - _saw_code_bug => -1, # -1=no 0=maybe 1=for sure - _saw_brace_error => 0, - _saw_extrude => $saw_extrude, - _output_array => [], - }, $class; -} - -sub get_warning_count { - my $self = shift; - return $self->{_warning_count}; -} - -sub get_use_prefix { - my $self = shift; - return $self->{_use_prefix}; -} - -sub block_log_output { - my $self = shift; - $self->{_block_log_output} = 1; - return; -} - -sub unblock_log_output { - my $self = shift; - $self->{_block_log_output} = 0; - return; -} +1; -sub interrupt_logfile { - my $self = shift; - $self->{_use_prefix} = 0; - $self->warning("\n"); - $self->write_logfile_entry( '#' x 24 . " WARNING " . '#' x 25 . "\n" ); - return; -} - -sub resume_logfile { - my $self = shift; - $self->write_logfile_entry( '#' x 60 . "\n" ); - $self->{_use_prefix} = 1; - return; -} - -sub we_are_at_the_last_line { - my $self = shift; - unless ( $self->{_wrote_line_information_string} ) { - $self->write_logfile_entry("Last line\n\n"); - } - $self->{_at_end_of_file} = 1; - return; -} - -# record some stuff in case we go down in flames -sub black_box { - my ( $self, $line_of_tokens, $output_line_number ) = @_; - my $input_line = $line_of_tokens->{_line_text}; - my $input_line_number = $line_of_tokens->{_line_number}; - - # save line information in case we have to write a logfile message - $self->{_line_of_tokens} = $line_of_tokens; - $self->{_output_line_number} = $output_line_number; - $self->{_wrote_line_information_string} = 0; - - my $last_input_line_written = $self->{_last_input_line_written}; - my $rOpts = $self->{_rOpts}; - if ( - ( - ( $input_line_number - $last_input_line_written ) >= - $self->{_logfile_gap} - ) - || ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) - ) - { - my $structural_indentation_level = $line_of_tokens->{_level_0}; - $structural_indentation_level = 0 - if ( $structural_indentation_level < 0 ); - $self->{_last_input_line_written} = $input_line_number; - ( my $out_str = $input_line ) =~ s/^\s*//; - chomp $out_str; - - $out_str = ( '.' x $structural_indentation_level ) . $out_str; - - if ( length($out_str) > 35 ) { - $out_str = substr( $out_str, 0, 35 ) . " ...."; - } - $self->logfile_output( "", "$out_str\n" ); - } - return; -} - -sub write_logfile_entry { - - my ( $self, @msg ) = @_; - - # add leading >>> to avoid confusing error messages and code - $self->logfile_output( ">>>", "@msg" ); - return; -} - -sub write_column_headings { - my $self = shift; - - $self->{_wrote_column_headings} = 1; - my $routput_array = $self->{_output_array}; - push @{$routput_array}, <>>) -lines levels i k (code begins with one '.' per indent level) ------- ----- - - -------- ------------------------------------------- -EOM - return; -} - -sub make_line_information_string { - - # make columns of information when a logfile message needs to go out - my $self = shift; - my $line_of_tokens = $self->{_line_of_tokens}; - my $input_line_number = $line_of_tokens->{_line_number}; - my $line_information_string = ""; - if ($input_line_number) { - - my $output_line_number = $self->{_output_line_number}; - my $brace_depth = $line_of_tokens->{_curly_brace_depth}; - my $paren_depth = $line_of_tokens->{_paren_depth}; - my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth}; - my $guessed_indentation_level = - $line_of_tokens->{_guessed_indentation_level}; - ##my $rtoken_array = $line_of_tokens->{_rtoken_array}; - - my $structural_indentation_level = $line_of_tokens->{_level_0}; - - $self->write_column_headings() unless $self->{_wrote_column_headings}; - - # keep logfile columns aligned for scripts up to 999 lines; - # for longer scripts it doesn't really matter - my $extra_space = ""; - $extra_space .= - ( $input_line_number < 10 ) ? " " - : ( $input_line_number < 100 ) ? " " - : ""; - $extra_space .= - ( $output_line_number < 10 ) ? " " - : ( $output_line_number < 100 ) ? " " - : ""; - - # there are 2 possible nesting strings: - # the original which looks like this: (0 [1 {2 - # the new one, which looks like this: {{[ - # the new one is easier to read, and shows the order, but - # could be arbitrarily long, so we use it unless it is too long - my $nesting_string = - "($paren_depth [$square_bracket_depth {$brace_depth"; - my $nesting_string_new = $line_of_tokens->{_nesting_tokens_0}; - my $ci_level = $line_of_tokens->{_ci_level_0}; - if ( $ci_level > 9 ) { $ci_level = '*' } - my $bk = ( $line_of_tokens->{_nesting_blocks_0} =~ /1$/ ) ? '1' : '0'; - - if ( length($nesting_string_new) <= 8 ) { - $nesting_string = - $nesting_string_new . " " x ( 8 - length($nesting_string_new) ); - } - $line_information_string = -"L$input_line_number:$output_line_number$extra_space i$guessed_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string"; - } - return $line_information_string; -} - -sub logfile_output { - my ( $self, $prompt, $msg ) = @_; - return if ( $self->{_block_log_output} ); - - my $routput_array = $self->{_output_array}; - if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) { - push @{$routput_array}, "$msg"; - } - else { - my $line_information_string = $self->make_line_information_string(); - $self->{_wrote_line_information_string} = 1; - - if ($line_information_string) { - push @{$routput_array}, "$line_information_string $prompt$msg"; - } - else { - push @{$routput_array}, "$msg"; - } - } - return; -} - -sub get_saw_brace_error { - my $self = shift; - return $self->{_saw_brace_error}; -} - -sub increment_brace_error { - my $self = shift; - $self->{_saw_brace_error}++; - return; -} - -sub brace_warning { - my ( $self, $msg ) = @_; - - #use constant BRACE_WARNING_LIMIT => 10; - my $BRACE_WARNING_LIMIT = 10; - my $saw_brace_error = $self->{_saw_brace_error}; - - if ( $saw_brace_error < $BRACE_WARNING_LIMIT ) { - $self->warning($msg); - } - $saw_brace_error++; - $self->{_saw_brace_error} = $saw_brace_error; - - if ( $saw_brace_error == $BRACE_WARNING_LIMIT ) { - $self->warning("No further warnings of this type will be given\n"); - } - return; -} - -sub complain { - - # handle non-critical warning messages based on input flag - my ( $self, $msg ) = @_; - my $rOpts = $self->{_rOpts}; - - # these appear in .ERR output only if -w flag is used - if ( $rOpts->{'warning-output'} ) { - $self->warning($msg); - } - - # otherwise, they go to the .LOG file - else { - $self->{_complaint_count}++; - $self->write_logfile_entry($msg); - } - return; -} - -sub warning { - - # report errors to .ERR file (or stdout) - my ( $self, $msg ) = @_; - - #use constant WARNING_LIMIT => 50; - my $WARNING_LIMIT = 50; - - my $rOpts = $self->{_rOpts}; - unless ( $rOpts->{'quiet'} ) { - - my $warning_count = $self->{_warning_count}; - my $fh_warnings = $self->{_fh_warnings}; - if ( !$fh_warnings ) { - my $warning_file = $self->{_warning_file}; - ( $fh_warnings, my $filename ) = - Perl::Tidy::streamhandle( $warning_file, 'w' ); - $fh_warnings or Perl::Tidy::Die("couldn't open $filename $!\n"); - Perl::Tidy::Warn "## Please see file $filename\n" - unless ref($warning_file); - $self->{_fh_warnings} = $fh_warnings; - $fh_warnings->print("Perltidy version is $Perl::Tidy::VERSION\n"); - } - - if ( $warning_count < $WARNING_LIMIT ) { - if ( $self->get_use_prefix() > 0 ) { - my $input_line_number = - Perl::Tidy::Tokenizer::get_input_line_number(); - if ( !defined($input_line_number) ) { $input_line_number = -1 } - $fh_warnings->print("$input_line_number:\t$msg"); - $self->write_logfile_entry("WARNING: $msg"); - } - else { - $fh_warnings->print($msg); - $self->write_logfile_entry($msg); - } - } - $warning_count++; - $self->{_warning_count} = $warning_count; - - if ( $warning_count == $WARNING_LIMIT ) { - $fh_warnings->print("No further warnings will be given\n"); - } - } - return; -} - -# programming bug codes: -# -1 = no bug -# 0 = maybe, not sure. -# 1 = definitely -sub report_possible_bug { - my $self = shift; - my $saw_code_bug = $self->{_saw_code_bug}; - $self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug; - return; -} - -sub report_definite_bug { - my $self = shift; - $self->{_saw_code_bug} = 1; - return; -} - -sub ask_user_for_bug_report { - - my ( $self, $infile_syntax_ok, $formatter ) = @_; - my $saw_code_bug = $self->{_saw_code_bug}; - if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) { - $self->warning(<{_saw_extrude} ) { - $self->warning(<warning(<get_added_semicolon_count(); - }; - if ( $added_semicolon_count > 0 ) { - $self->warning(<{_rOpts}; - my $warning_count = $self->{_warning_count}; - my $saw_code_bug = $self->{_saw_code_bug}; - - my $save_logfile = - ( $saw_code_bug == 0 && $infile_syntax_ok == 1 ) - || $saw_code_bug == 1 - || $rOpts->{'logfile'}; - my $log_file = $self->{_log_file}; - if ($warning_count) { - if ($save_logfile) { - $self->block_log_output(); # avoid echoing this to the logfile - $self->warning( - "The logfile $log_file may contain useful information\n"); - $self->unblock_log_output(); - } - - if ( $self->{_complaint_count} > 0 ) { - $self->warning( -"To see $self->{_complaint_count} non-critical warnings rerun with -w\n" - ); - } - - if ( $self->{_saw_brace_error} - && ( $self->{_logfile_gap} > 1 || !$save_logfile ) ) - { - $self->warning("To save a full .LOG file rerun with -g\n"); - } - } - $self->ask_user_for_bug_report( $infile_syntax_ok, $formatter ); - - if ($save_logfile) { - my $log_file = $self->{_log_file}; - my ( $fh, $filename ) = Perl::Tidy::streamhandle( $log_file, 'w' ); - if ($fh) { - my $routput_array = $self->{_output_array}; - foreach ( @{$routput_array} ) { $fh->print($_) } - if ( $log_file ne '-' && !ref $log_file ) { - eval { $fh->close() }; - } - } - } - return; -} - -##################################################################### -# -# The Perl::Tidy::DevNull class supplies a dummy print method -# -##################################################################### - -package Perl::Tidy::DevNull; -sub new { my $self = shift; return bless {}, $self } -sub print { return } -sub close { return } - -##################################################################### -# -# The Perl::Tidy::HtmlWriter class writes a copy of the input stream in html -# -##################################################################### - -package Perl::Tidy::HtmlWriter; - -use File::Basename; - -# class variables -use vars qw{ - %html_color - %html_bold - %html_italic - %token_short_names - %short_to_long_names - $rOpts - $css_filename - $css_linkname - $missing_html_entities -}; - -# replace unsafe characters with HTML entity representation if HTML::Entities -# is available -{ eval "use HTML::Entities"; $missing_html_entities = $@; } - -sub new { - - my ( $class, $input_file, $html_file, $extension, $html_toc_extension, - $html_src_extension ) - = @_; - - my $html_file_opened = 0; - my $html_fh; - ( $html_fh, my $html_filename ) = - Perl::Tidy::streamhandle( $html_file, 'w' ); - unless ($html_fh) { - Perl::Tidy::Warn("can't open $html_file: $!\n"); - return; - } - $html_file_opened = 1; - - if ( !$input_file || $input_file eq '-' || ref($input_file) ) { - $input_file = "NONAME"; - } - - # write the table of contents to a string - my $toc_string; - my $html_toc_fh = Perl::Tidy::IOScalar->new( \$toc_string, 'w' ); - - my $html_pre_fh; - my @pre_string_stack; - if ( $rOpts->{'html-pre-only'} ) { - - # pre section goes directly to the output stream - $html_pre_fh = $html_fh; - $html_pre_fh->print( <<"PRE_END"); -
-PRE_END
-    }
-    else {
-
-        # pre section go out to a temporary string
-        my $pre_string;
-        $html_pre_fh = Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
-        push @pre_string_stack, \$pre_string;
-    }
-
-    # pod text gets diverted if the 'pod2html' is used
-    my $html_pod_fh;
-    my $pod_string;
-    if ( $rOpts->{'pod2html'} ) {
-        if ( $rOpts->{'html-pre-only'} ) {
-            undef $rOpts->{'pod2html'};
-        }
-        else {
-            eval "use Pod::Html";
-            if ($@) {
-                Perl::Tidy::Warn
-"unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n";
-                undef $rOpts->{'pod2html'};
-            }
-            else {
-                $html_pod_fh = Perl::Tidy::IOScalar->new( \$pod_string, 'w' );
-            }
-        }
-    }
-
-    my $toc_filename;
-    my $src_filename;
-    if ( $rOpts->{'frames'} ) {
-        unless ($extension) {
-            Perl::Tidy::Warn
-"cannot use frames without a specified output extension; ignoring -frm\n";
-            undef $rOpts->{'frames'};
-        }
-        else {
-            $toc_filename = $input_file . $html_toc_extension . $extension;
-            $src_filename = $input_file . $html_src_extension . $extension;
-        }
-    }
-
-    # ----------------------------------------------------------
-    # Output is now directed as follows:
-    # html_toc_fh <-- table of contents items
-    # html_pre_fh <-- the 
 section of formatted code, except:
-    # html_pod_fh <-- pod goes here with the pod2html option
-    # ----------------------------------------------------------
-
-    my $title = $rOpts->{'title'};
-    unless ($title) {
-        ( $title, my $path ) = fileparse($input_file);
-    }
-    my $toc_item_count = 0;
-    my $in_toc_package = "";
-    my $last_level     = 0;
-    return bless {
-        _input_file        => $input_file,          # name of input file
-        _title             => $title,               # title, unescaped
-        _html_file         => $html_file,           # name of .html output file
-        _toc_filename      => $toc_filename,        # for frames option
-        _src_filename      => $src_filename,        # for frames option
-        _html_file_opened  => $html_file_opened,    # a flag
-        _html_fh           => $html_fh,             # the output stream
-        _html_pre_fh       => $html_pre_fh,         # pre section goes here
-        _rpre_string_stack => \@pre_string_stack,   # stack of pre sections
-        _html_pod_fh       => $html_pod_fh,         # pod goes here if pod2html
-        _rpod_string       => \$pod_string,         # string holding pod
-        _pod_cut_count     => 0,                    # how many =cut's?
-        _html_toc_fh       => $html_toc_fh,         # fh for table of contents
-        _rtoc_string       => \$toc_string,         # string holding toc
-        _rtoc_item_count   => \$toc_item_count,     # how many toc items
-        _rin_toc_package   => \$in_toc_package,     # package name
-        _rtoc_name_count   => {},                   # hash to track unique names
-        _rpackage_stack    => [],                   # stack to check for package
-                                                    # name changes
-        _rlast_level       => \$last_level,         # brace indentation level
-    }, $class;
-}
-
-sub add_toc_item {
-
-    # Add an item to the html table of contents.
-    # This is called even if no table of contents is written,
-    # because we still want to put the anchors in the 
 text.
-    # We are given an anchor name and its type; types are:
-    #      'package', 'sub', '__END__', '__DATA__', 'EOF'
-    # There must be an 'EOF' call at the end to wrap things up.
-    my ( $self, $name, $type ) = @_;
-    my $html_toc_fh     = $self->{_html_toc_fh};
-    my $html_pre_fh     = $self->{_html_pre_fh};
-    my $rtoc_name_count = $self->{_rtoc_name_count};
-    my $rtoc_item_count = $self->{_rtoc_item_count};
-    my $rlast_level     = $self->{_rlast_level};
-    my $rin_toc_package = $self->{_rin_toc_package};
-    my $rpackage_stack  = $self->{_rpackage_stack};
-
-    # packages contain sublists of subs, so to avoid errors all package
-    # items are written and finished with the following routines
-    my $end_package_list = sub {
-        if ( ${$rin_toc_package} ) {
-            $html_toc_fh->print("\n\n");
-            ${$rin_toc_package} = "";
-        }
-    };
-
-    my $start_package_list = sub {
-        my ( $unique_name, $package ) = @_;
-        if ( ${$rin_toc_package} ) { $end_package_list->() }
-        $html_toc_fh->print(<package $package
-
    -EOM - ${$rin_toc_package} = $package; - }; - - # start the table of contents on the first item - unless ( ${$rtoc_item_count} ) { - - # but just quit if we hit EOF without any other entries - # in this case, there will be no toc - return if ( $type eq 'EOF' ); - $html_toc_fh->print( <<"TOC_END"); - -
      -TOC_END - } - ${$rtoc_item_count}++; - - # make a unique anchor name for this location: - # - packages get a 'package-' prefix - # - subs use their names - my $unique_name = $name; - if ( $type eq 'package' ) { $unique_name = "package-$name" } - - # append '-1', '-2', etc if necessary to make unique; this will - # be unique because subs and packages cannot have a '-' - if ( my $count = $rtoc_name_count->{ lc $unique_name }++ ) { - $unique_name .= "-$count"; - } - - # - all names get terminal '-' if pod2html is used, to avoid - # conflicts with anchor names created by pod2html - if ( $rOpts->{'pod2html'} ) { $unique_name .= '-' } - - # start/stop lists of subs - if ( $type eq 'sub' ) { - my $package = $rpackage_stack->[ ${$rlast_level} ]; - unless ($package) { $package = 'main' } - - # if we're already in a package/sub list, be sure its the right - # package or else close it - if ( ${$rin_toc_package} && ${$rin_toc_package} ne $package ) { - $end_package_list->(); - } - - # start a package/sub list if necessary - unless ( ${$rin_toc_package} ) { - $start_package_list->( $unique_name, $package ); - } - } - - # now write an entry in the toc for this item - if ( $type eq 'package' ) { - $start_package_list->( $unique_name, $name ); - } - elsif ( $type eq 'sub' ) { - $html_toc_fh->print("
    • $name
    • \n"); - } - else { - $end_package_list->(); - $html_toc_fh->print("
    • $name
    • \n"); - } - - # write the anchor in the
       section
      -    $html_pre_fh->print("");
      -
      -    # end the table of contents, if any, on the end of file
      -    if ( $type eq 'EOF' ) {
      -        $html_toc_fh->print( <<"TOC_END");
      -
    - -TOC_END - } - return; -} - -BEGIN { - - # This is the official list of tokens which may be identified by the - # user. Long names are used as getopt keys. Short names are - # convenient short abbreviations for specifying input. Short names - # somewhat resemble token type characters, but are often different - # because they may only be alphanumeric, to allow command line - # input. Also, note that because of case insensitivity of html, - # this table must be in a single case only (I've chosen to use all - # lower case). - # When adding NEW_TOKENS: update this hash table - # short names => long names - %short_to_long_names = ( - 'n' => 'numeric', - 'p' => 'paren', - 'q' => 'quote', - 's' => 'structure', - 'c' => 'comment', - 'v' => 'v-string', - 'cm' => 'comma', - 'w' => 'bareword', - 'co' => 'colon', - 'pu' => 'punctuation', - 'i' => 'identifier', - 'j' => 'label', - 'h' => 'here-doc-target', - 'hh' => 'here-doc-text', - 'k' => 'keyword', - 'sc' => 'semicolon', - 'm' => 'subroutine', - 'pd' => 'pod-text', - ); - - # Now we have to map actual token types into one of the above short - # names; any token types not mapped will get 'punctuation' - # properties. - - # The values of this hash table correspond to the keys of the - # previous hash table. - # The keys of this hash table are token types and can be seen - # by running with --dump-token-types (-dtt). - - # When adding NEW_TOKENS: update this hash table - # $type => $short_name - %token_short_names = ( - '#' => 'c', - 'n' => 'n', - 'v' => 'v', - 'k' => 'k', - 'F' => 'k', - 'Q' => 'q', - 'q' => 'q', - 'J' => 'j', - 'j' => 'j', - 'h' => 'h', - 'H' => 'hh', - 'w' => 'w', - ',' => 'cm', - '=>' => 'cm', - ';' => 'sc', - ':' => 'co', - 'f' => 'sc', - '(' => 'p', - ')' => 'p', - 'M' => 'm', - 'P' => 'pd', - 'A' => 'co', - ); - - # These token types will all be called identifiers for now - # FIXME: could separate user defined modules as separate type - my @identifier = qw" i t U C Y Z G :: CORE::"; - @token_short_names{@identifier} = ('i') x scalar(@identifier); - - # These token types will be called 'structure' - my @structure = qw" { } "; - @token_short_names{@structure} = ('s') x scalar(@structure); - - # OLD NOTES: save for reference - # Any of these could be added later if it would be useful. - # For now, they will by default become punctuation - # my @list = qw" L R [ ] "; - # @token_long_names{@list} = ('non-structure') x scalar(@list); - # - # my @list = qw" - # / /= * *= ** **= + += - -= % %= = ++ -- << <<= >> >>= pp p m mm - # "; - # @token_long_names{@list} = ('math') x scalar(@list); - # - # my @list = qw" & &= ~ ~= ^ ^= | |= "; - # @token_long_names{@list} = ('bit') x scalar(@list); - # - # my @list = qw" == != < > <= <=> "; - # @token_long_names{@list} = ('numerical-comparison') x scalar(@list); - # - # my @list = qw" && || ! &&= ||= //= "; - # @token_long_names{@list} = ('logical') x scalar(@list); - # - # my @list = qw" . .= =~ !~ x x= "; - # @token_long_names{@list} = ('string-operators') x scalar(@list); - # - # # Incomplete.. - # my @list = qw" .. -> <> ... \ ? "; - # @token_long_names{@list} = ('misc-operators') x scalar(@list); - -} - -sub make_getopt_long_names { - my ( $class, $rgetopt_names ) = @_; - while ( my ( $short_name, $name ) = each %short_to_long_names ) { - push @{$rgetopt_names}, "html-color-$name=s"; - push @{$rgetopt_names}, "html-italic-$name!"; - push @{$rgetopt_names}, "html-bold-$name!"; - } - push @{$rgetopt_names}, "html-color-background=s"; - push @{$rgetopt_names}, "html-linked-style-sheet=s"; - push @{$rgetopt_names}, "nohtml-style-sheets"; - push @{$rgetopt_names}, "html-pre-only"; - push @{$rgetopt_names}, "html-line-numbers"; - push @{$rgetopt_names}, "html-entities!"; - push @{$rgetopt_names}, "stylesheet"; - push @{$rgetopt_names}, "html-table-of-contents!"; - push @{$rgetopt_names}, "pod2html!"; - push @{$rgetopt_names}, "frames!"; - push @{$rgetopt_names}, "html-toc-extension=s"; - push @{$rgetopt_names}, "html-src-extension=s"; - - # Pod::Html parameters: - push @{$rgetopt_names}, "backlink=s"; - push @{$rgetopt_names}, "cachedir=s"; - push @{$rgetopt_names}, "htmlroot=s"; - push @{$rgetopt_names}, "libpods=s"; - push @{$rgetopt_names}, "podpath=s"; - push @{$rgetopt_names}, "podroot=s"; - push @{$rgetopt_names}, "title=s"; - - # Pod::Html parameters with leading 'pod' which will be removed - # before the call to Pod::Html - push @{$rgetopt_names}, "podquiet!"; - push @{$rgetopt_names}, "podverbose!"; - push @{$rgetopt_names}, "podrecurse!"; - push @{$rgetopt_names}, "podflush"; - push @{$rgetopt_names}, "podheader!"; - push @{$rgetopt_names}, "podindex!"; - return; -} - -sub make_abbreviated_names { - - # We're appending things like this to the expansion list: - # 'hcc' => [qw(html-color-comment)], - # 'hck' => [qw(html-color-keyword)], - # etc - my ( $class, $rexpansion ) = @_; - - # abbreviations for color/bold/italic properties - while ( my ( $short_name, $long_name ) = each %short_to_long_names ) { - ${$rexpansion}{"hc$short_name"} = ["html-color-$long_name"]; - ${$rexpansion}{"hb$short_name"} = ["html-bold-$long_name"]; - ${$rexpansion}{"hi$short_name"} = ["html-italic-$long_name"]; - ${$rexpansion}{"nhb$short_name"} = ["nohtml-bold-$long_name"]; - ${$rexpansion}{"nhi$short_name"} = ["nohtml-italic-$long_name"]; - } - - # abbreviations for all other html options - ${$rexpansion}{"hcbg"} = ["html-color-background"]; - ${$rexpansion}{"pre"} = ["html-pre-only"]; - ${$rexpansion}{"toc"} = ["html-table-of-contents"]; - ${$rexpansion}{"ntoc"} = ["nohtml-table-of-contents"]; - ${$rexpansion}{"nnn"} = ["html-line-numbers"]; - ${$rexpansion}{"hent"} = ["html-entities"]; - ${$rexpansion}{"nhent"} = ["nohtml-entities"]; - ${$rexpansion}{"css"} = ["html-linked-style-sheet"]; - ${$rexpansion}{"nss"} = ["nohtml-style-sheets"]; - ${$rexpansion}{"ss"} = ["stylesheet"]; - ${$rexpansion}{"pod"} = ["pod2html"]; - ${$rexpansion}{"npod"} = ["nopod2html"]; - ${$rexpansion}{"frm"} = ["frames"]; - ${$rexpansion}{"nfrm"} = ["noframes"]; - ${$rexpansion}{"text"} = ["html-toc-extension"]; - ${$rexpansion}{"sext"} = ["html-src-extension"]; - return; -} - -sub check_options { - - # This will be called once after options have been parsed - my ( $class, $rOpts ) = @_; - - # X11 color names for default settings that seemed to look ok - # (these color names are only used for programming clarity; the hex - # numbers are actually written) - use constant ForestGreen => "#228B22"; - use constant SaddleBrown => "#8B4513"; - use constant magenta4 => "#8B008B"; - use constant IndianRed3 => "#CD5555"; - use constant DeepSkyBlue4 => "#00688B"; - use constant MediumOrchid3 => "#B452CD"; - use constant black => "#000000"; - use constant white => "#FFFFFF"; - use constant red => "#FF0000"; - - # set default color, bold, italic properties - # anything not listed here will be given the default (punctuation) color -- - # these types currently not listed and get default: ws pu s sc cm co p - # When adding NEW_TOKENS: add an entry here if you don't want defaults - - # set_default_properties( $short_name, default_color, bold?, italic? ); - set_default_properties( 'c', ForestGreen, 0, 0 ); - set_default_properties( 'pd', ForestGreen, 0, 1 ); - set_default_properties( 'k', magenta4, 1, 0 ); # was SaddleBrown - set_default_properties( 'q', IndianRed3, 0, 0 ); - set_default_properties( 'hh', IndianRed3, 0, 1 ); - set_default_properties( 'h', IndianRed3, 1, 0 ); - set_default_properties( 'i', DeepSkyBlue4, 0, 0 ); - set_default_properties( 'w', black, 0, 0 ); - set_default_properties( 'n', MediumOrchid3, 0, 0 ); - set_default_properties( 'v', MediumOrchid3, 0, 0 ); - set_default_properties( 'j', IndianRed3, 1, 0 ); - set_default_properties( 'm', red, 1, 0 ); - - set_default_color( 'html-color-background', white ); - set_default_color( 'html-color-punctuation', black ); - - # setup property lookup tables for tokens based on their short names - # every token type has a short name, and will use these tables - # to do the html markup - while ( my ( $short_name, $long_name ) = each %short_to_long_names ) { - $html_color{$short_name} = $rOpts->{"html-color-$long_name"}; - $html_bold{$short_name} = $rOpts->{"html-bold-$long_name"}; - $html_italic{$short_name} = $rOpts->{"html-italic-$long_name"}; - } - - # write style sheet to STDOUT and die if requested - if ( defined( $rOpts->{'stylesheet'} ) ) { - write_style_sheet_file('-'); - Perl::Tidy::Exit 0; - } - - # make sure user gives a file name after -css - if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) { - $css_linkname = $rOpts->{'html-linked-style-sheet'}; - if ( $css_linkname =~ /^-/ ) { - Perl::Tidy::Die "You must specify a valid filename after -css\n"; - } - } - - # check for conflict - if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) { - $rOpts->{'nohtml-style-sheets'} = 0; - warning("You can't specify both -css and -nss; -nss ignored\n"); - } - - # write a style sheet file if necessary - if ($css_linkname) { - - # if the selected filename exists, don't write, because user may - # have done some work by hand to create it; use backup name instead - # Also, this will avoid a potential disaster in which the user - # forgets to specify the style sheet, like this: - # perltidy -html -css myfile1.pl myfile2.pl - # This would cause myfile1.pl to parsed as the style sheet by GetOpts - my $css_filename = $css_linkname; - unless ( -e $css_filename ) { - write_style_sheet_file($css_filename); - } - } - $missing_html_entities = 1 unless $rOpts->{'html-entities'}; - return; -} - -sub write_style_sheet_file { - - my $css_filename = shift; - my $fh; - unless ( $fh = IO::File->new("> $css_filename") ) { - Perl::Tidy::Die "can't open $css_filename: $!\n"; - } - write_style_sheet_data($fh); - eval { $fh->close }; - return; -} - -sub write_style_sheet_data { - - # write the style sheet data to an open file handle - my $fh = shift; - - my $bg_color = $rOpts->{'html-color-background'}; - my $text_color = $rOpts->{'html-color-punctuation'}; - - # pre-bgcolor is new, and may not be defined - my $pre_bg_color = $rOpts->{'html-pre-color-background'}; - $pre_bg_color = $bg_color unless $pre_bg_color; - - $fh->print(<<"EOM"); -/* default style sheet generated by perltidy */ -body {background: $bg_color; color: $text_color} -pre { color: $text_color; - background: $pre_bg_color; - font-family: courier; - } - -EOM - - foreach my $short_name ( sort keys %short_to_long_names ) { - my $long_name = $short_to_long_names{$short_name}; - - my $abbrev = '.' . $short_name; - if ( length($short_name) == 1 ) { $abbrev .= ' ' } # for alignment - my $color = $html_color{$short_name}; - if ( !defined($color) ) { $color = $text_color } - $fh->print("$abbrev \{ color: $color;"); - - if ( $html_bold{$short_name} ) { - $fh->print(" font-weight:bold;"); - } - - if ( $html_italic{$short_name} ) { - $fh->print(" font-style:italic;"); - } - $fh->print("} /* $long_name */\n"); - } - return; -} - -sub set_default_color { - - # make sure that options hash $rOpts->{$key} contains a valid color - my ( $key, $color ) = @_; - if ( $rOpts->{$key} ) { $color = $rOpts->{$key} } - $rOpts->{$key} = check_RGB($color); - return; -} - -sub check_RGB { - - # if color is a 6 digit hex RGB value, prepend a #, otherwise - # assume that it is a valid ascii color name - my ($color) = @_; - if ( $color =~ /^[0-9a-fA-F]{6,6}$/ ) { $color = "#$color" } - return $color; -} - -sub set_default_properties { - my ( $short_name, $color, $bold, $italic ) = @_; - - set_default_color( "html-color-$short_to_long_names{$short_name}", $color ); - my $key; - $key = "html-bold-$short_to_long_names{$short_name}"; - $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold; - $key = "html-italic-$short_to_long_names{$short_name}"; - $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic; - return; -} - -sub pod_to_html { - - # Use Pod::Html to process the pod and make the page - # then merge the perltidy code sections into it. - # return 1 if success, 0 otherwise - my ( $self, $pod_string, $css_string, $toc_string, $rpre_string_stack ) = - @_; - my $input_file = $self->{_input_file}; - my $title = $self->{_title}; - my $success_flag = 0; - - # don't try to use pod2html if no pod - unless ($pod_string) { - return $success_flag; - } - - # Pod::Html requires a real temporary filename - my ( $fh_tmp, $tmpfile ) = File::Temp::tempfile(); - unless ($fh_tmp) { - Perl::Tidy::Warn - "unable to open temporary file $tmpfile; cannot use pod2html\n"; - return $success_flag; - } - - #------------------------------------------------------------------ - # Warning: a temporary file is open; we have to clean up if - # things go bad. From here on all returns should be by going to - # RETURN so that the temporary file gets unlinked. - #------------------------------------------------------------------ - - # write the pod text to the temporary file - $fh_tmp->print($pod_string); - $fh_tmp->close(); - - # Hand off the pod to pod2html. - # Note that we can use the same temporary filename for input and output - # because of the way pod2html works. - { - - my @args; - push @args, "--infile=$tmpfile", "--outfile=$tmpfile", "--title=$title"; - - # Flags with string args: - # "backlink=s", "cachedir=s", "htmlroot=s", "libpods=s", - # "podpath=s", "podroot=s" - # Note: -css=s is handled by perltidy itself - foreach my $kw (qw(backlink cachedir htmlroot libpods podpath podroot)) - { - if ( $rOpts->{$kw} ) { push @args, "--$kw=$rOpts->{$kw}" } - } - - # Toggle switches; these have extra leading 'pod' - # "header!", "index!", "recurse!", "quiet!", "verbose!" - foreach my $kw (qw(podheader podindex podrecurse podquiet podverbose)) { - my $kwd = $kw; # allows us to strip 'pod' - if ( $rOpts->{$kw} ) { $kwd =~ s/^pod//; push @args, "--$kwd" } - elsif ( defined( $rOpts->{$kw} ) ) { - $kwd =~ s/^pod//; - push @args, "--no$kwd"; - } - } - - # "flush", - my $kw = 'podflush'; - if ( $rOpts->{$kw} ) { $kw =~ s/^pod//; push @args, "--$kw" } - - # Must clean up if pod2html dies (it can); - # Be careful not to overwrite callers __DIE__ routine - local $SIG{__DIE__} = sub { - unlink $tmpfile if -e $tmpfile; - Perl::Tidy::Die $_[0]; - }; - - pod2html(@args); - } - $fh_tmp = IO::File->new( $tmpfile, 'r' ); - unless ($fh_tmp) { - - # this error shouldn't happen ... we just used this filename - Perl::Tidy::Warn - "unable to open temporary file $tmpfile; cannot use pod2html\n"; - goto RETURN; - } - - my $html_fh = $self->{_html_fh}; - my @toc; - my $in_toc; - my $ul_level = 0; - my $no_print; - - # This routine will write the html selectively and store the toc - my $html_print = sub { - foreach (@_) { - $html_fh->print($_) unless ($no_print); - if ($in_toc) { push @toc, $_ } - } - }; - - # loop over lines of html output from pod2html and merge in - # the necessary perltidy html sections - my ( $saw_body, $saw_index, $saw_body_end ); - while ( my $line = $fh_tmp->getline() ) { - - if ( $line =~ /^\s*\s*$/i ) { - my $date = localtime; - $html_print->("\n"); - $html_print->($line); - } - - # Copy the perltidy css, if any, after tag - elsif ( $line =~ /^\s*\s*$/i ) { - $saw_body = 1; - $html_print->($css_string) if $css_string; - $html_print->($line); - - # add a top anchor and heading - $html_print->("\n"); - $title = escape_html($title); - $html_print->("

    $title

    \n"); - } - - # check for start of index, old pod2html - # before Pod::Html VERSION 1.15_02 it is delimited by comments as: - # - #
      - # ... - #
    - # - # - elsif ( $line =~ /^\s*\s*$/i ) { - $in_toc = 'INDEX'; - - # when frames are used, an extra table of contents in the - # contents panel is confusing, so don't print it - $no_print = $rOpts->{'frames'} - || !$rOpts->{'html-table-of-contents'}; - $html_print->("

    Doc Index:

    \n") if $rOpts->{'frames'}; - $html_print->($line); - } - - # check for start of index, new pod2html - # After Pod::Html VERSION 1.15_02 it is delimited as: - #
      - # ... - #
    - elsif ( $line =~ /^\s*/i ) { - $in_toc = 'UL'; - $ul_level = 1; - - # when frames are used, an extra table of contents in the - # contents panel is confusing, so don't print it - $no_print = $rOpts->{'frames'} - || !$rOpts->{'html-table-of-contents'}; - $html_print->("

    Doc Index:

    \n") if $rOpts->{'frames'}; - $html_print->($line); - } - - # Check for end of index, old pod2html - elsif ( $line =~ /^\s*\s*$/i ) { - $saw_index = 1; - $html_print->($line); - - # Copy the perltidy toc, if any, after the Pod::Html toc - if ($toc_string) { - $html_print->("
    \n") if $rOpts->{'frames'}; - $html_print->("

    Code Index:

    \n"); - my @toc = map { $_ .= "\n" } split /\n/, $toc_string; - $html_print->(@toc); - } - $in_toc = ""; - $no_print = 0; - } - - # must track
      depth level for new pod2html - elsif ( $line =~ /\s*
        \s*$/i && $in_toc eq 'UL' ) { - $ul_level++; - $html_print->($line); - } - - # Check for end of index, for new pod2html - elsif ( $line =~ /\s*<\/ul>/i && $in_toc eq 'UL' ) { - $ul_level--; - $html_print->($line); - - # Copy the perltidy toc, if any, after the Pod::Html toc - if ( $ul_level <= 0 ) { - $saw_index = 1; - if ($toc_string) { - $html_print->("
        \n") if $rOpts->{'frames'}; - $html_print->("

        Code Index:

        \n"); - my @toc = map { $_ .= "\n" } split /\n/, $toc_string; - $html_print->(@toc); - } - $in_toc = ""; - $ul_level = 0; - $no_print = 0; - } - } - - # Copy one perltidy section after each marker - elsif ( $line =~ /^(.*)(.*)$/ ) { - $line = $2; - $html_print->($1) if $1; - - # Intermingle code and pod sections if we saw multiple =cut's. - if ( $self->{_pod_cut_count} > 1 ) { - my $rpre_string = shift( @{$rpre_string_stack} ); - if ( ${$rpre_string} ) { - $html_print->('
        ');
        -                    $html_print->( ${$rpre_string} );
        -                    $html_print->('
        '); - } - else { - - # shouldn't happen: we stored a string before writing - # each marker. - Perl::Tidy::Warn -"Problem merging html stream with pod2html; order may be wrong\n"; - } - $html_print->($line); - } - - # If didn't see multiple =cut lines, we'll put the pod out first - # and then the code, because it's less confusing. - else { - - # since we are not intermixing code and pod, we don't need - # or want any
        lines which separated pod and code - $html_print->($line) unless ( $line =~ /^\s*
        \s*$/i ); - } - } - - # Copy any remaining code section before the tag - elsif ( $line =~ /^\s*<\/body>\s*$/i ) { - $saw_body_end = 1; - if ( @{$rpre_string_stack} ) { - unless ( $self->{_pod_cut_count} > 1 ) { - $html_print->('
        '); - } - while ( my $rpre_string = shift( @{$rpre_string_stack} ) ) { - $html_print->('
        ');
        -                    $html_print->( ${$rpre_string} );
        -                    $html_print->('
        '); - } - } - $html_print->($line); - } - else { - $html_print->($line); - } - } - - $success_flag = 1; - unless ($saw_body) { - Perl::Tidy::Warn "Did not see in pod2html output\n"; - $success_flag = 0; - } - unless ($saw_body_end) { - Perl::Tidy::Warn "Did not see in pod2html output\n"; - $success_flag = 0; - } - unless ($saw_index) { - Perl::Tidy::Warn "Did not find INDEX END in pod2html output\n"; - $success_flag = 0; - } - - RETURN: - eval { $html_fh->close() }; - - # note that we have to unlink tmpfile before making frames - # because the tmpfile may be one of the names used for frames - if ( -e $tmpfile ) { - unless ( unlink($tmpfile) ) { - Perl::Tidy::Warn("couldn't unlink temporary file $tmpfile: $!\n"); - $success_flag = 0; - } - } - - if ( $success_flag && $rOpts->{'frames'} ) { - $self->make_frame( \@toc ); - } - return $success_flag; -} - -sub make_frame { - - # Make a frame with table of contents in the left panel - # and the text in the right panel. - # On entry: - # $html_filename contains the no-frames html output - # $rtoc is a reference to an array with the table of contents - my ( $self, $rtoc ) = @_; - my $input_file = $self->{_input_file}; - my $html_filename = $self->{_html_file}; - my $toc_filename = $self->{_toc_filename}; - my $src_filename = $self->{_src_filename}; - my $title = $self->{_title}; - $title = escape_html($title); - - # FUTURE input parameter: - my $top_basename = ""; - - # We need to produce 3 html files: - # 1. - the table of contents - # 2. - the contents (source code) itself - # 3. - the frame which contains them - - # get basenames for relative links - my ( $toc_basename, $toc_path ) = fileparse($toc_filename); - my ( $src_basename, $src_path ) = fileparse($src_filename); - - # 1. Make the table of contents panel, with appropriate changes - # to the anchor names - my $src_frame_name = 'SRC'; - my $first_anchor = - write_toc_html( $title, $toc_filename, $src_basename, $rtoc, - $src_frame_name ); - - # 2. The current .html filename is renamed to be the contents panel - rename( $html_filename, $src_filename ) - or Perl::Tidy::Die "Cannot rename $html_filename to $src_filename:$!\n"; - - # 3. Then use the original html filename for the frame - write_frame_html( - $title, $html_filename, $top_basename, - $toc_basename, $src_basename, $src_frame_name - ); - return; -} - -sub write_toc_html { - - # write a separate html table of contents file for frames - my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_; - my $fh = IO::File->new( $toc_filename, 'w' ) - or Perl::Tidy::Die "Cannot open $toc_filename:$!\n"; - $fh->print(< - -$title - - -

        $title

        -EOM - - my $first_anchor = - change_anchor_names( $rtoc, $src_basename, "$src_frame_name" ); - $fh->print( join "", @{$rtoc} ); - - $fh->print(< - -EOM - - return; -} - -sub write_frame_html { - - # write an html file to be the table of contents frame - my ( - $title, $frame_filename, $top_basename, - $toc_basename, $src_basename, $src_frame_name - ) = @_; - - my $fh = IO::File->new( $frame_filename, 'w' ) - or Perl::Tidy::Die "Cannot open $toc_basename:$!\n"; - - $fh->print(< - - - -$title - -EOM - - # two left panels, one right, if master index file - if ($top_basename) { - $fh->print(< - - - - -EOM - } - - # one left panels, one right, if no master index file - else { - $fh->print(< - -EOM - } - $fh->print(< - -<body> -<p>If you see this message, you are using a non-frame-capable web client.</p> -<p>This document contains:</p> -<ul> -<li><a href="$toc_basename">A table of contents</a></li> -<li><a href="$src_basename">The source code</a></li> -</ul> -</body> - - - -EOM - return; -} - -sub change_anchor_names { - - # add a filename and target to anchors - # also return the first anchor - my ( $rlines, $filename, $target ) = @_; - my $first_anchor; - foreach my $line ( @{$rlines} ) { - - # We're looking for lines like this: - #
      • SYNOPSIS
      • - # ---- - -------- ----------------- - # $1 $4 $5 - if ( $line =~ /^(.*)]*>(.*)$/i ) { - my $pre = $1; - my $name = $4; - my $post = $5; - my $href = "$filename#$name"; - $line = "$pre$post\n"; - unless ($first_anchor) { $first_anchor = $href } - } - } - return $first_anchor; -} - -sub close_html_file { - my $self = shift; - return unless $self->{_html_file_opened}; - - my $html_fh = $self->{_html_fh}; - my $rtoc_string = $self->{_rtoc_string}; - - # There are 3 basic paths to html output... - - # --------------------------------- - # Path 1: finish up if in -pre mode - # --------------------------------- - if ( $rOpts->{'html-pre-only'} ) { - $html_fh->print( <<"PRE_END"); -
-PRE_END - eval { $html_fh->close() }; - return; - } - - # Finish the index - $self->add_toc_item( 'EOF', 'EOF' ); - - my $rpre_string_stack = $self->{_rpre_string_stack}; - - # Patch to darken the
 background color in case of pod2html and
-    # interleaved code/documentation.  Otherwise, the distinction
-    # between code and documentation is blurred.
-    if (   $rOpts->{pod2html}
-        && $self->{_pod_cut_count} >= 1
-        && $rOpts->{'html-color-background'} eq '#FFFFFF' )
-    {
-        $rOpts->{'html-pre-color-background'} = '#F0F0F0';
-    }
-
-    # put the css or its link into a string, if used
-    my $css_string;
-    my $fh_css = Perl::Tidy::IOScalar->new( \$css_string, 'w' );
-
-    # use css linked to another file
-    if ( $rOpts->{'html-linked-style-sheet'} ) {
-        $fh_css->print(
-            qq());
-    }
-
-    # use css embedded in this file
-    elsif ( !$rOpts->{'nohtml-style-sheets'} ) {
-        $fh_css->print( <<'ENDCSS');
-
-ENDCSS
-    }
-
-    # -----------------------------------------------------------
-    # path 2: use pod2html if requested
-    #         If we fail for some reason, continue on to path 3
-    # -----------------------------------------------------------
-    if ( $rOpts->{'pod2html'} ) {
-        my $rpod_string = $self->{_rpod_string};
-        $self->pod_to_html(
-            ${$rpod_string}, $css_string,
-            ${$rtoc_string}, $rpre_string_stack
-        ) && return;
-    }
-
-    # --------------------------------------------------
-    # path 3: write code in html, with pod only in italics
-    # --------------------------------------------------
-    my $input_file = $self->{_input_file};
-    my $title      = escape_html($input_file);
-    my $date       = localtime;
-    $html_fh->print( <<"HTML_START");
-
-
-
-
-$title
-HTML_START
-
-    # output the css, if used
-    if ($css_string) {
-        $html_fh->print($css_string);
-        $html_fh->print( <<"ENDCSS");
-
-
-ENDCSS
-    }
-    else {
-
-        $html_fh->print( <<"HTML_START");
-
-{'html-color-background'}\" text=\"$rOpts->{'html-color-punctuation'}\">
-HTML_START
-    }
-
-    $html_fh->print("\n");
-    $html_fh->print( <<"EOM");
-

$title

-EOM - - # copy the table of contents - if ( ${$rtoc_string} - && !$rOpts->{'frames'} - && $rOpts->{'html-table-of-contents'} ) - { - $html_fh->print( ${$rtoc_string} ); - } - - # copy the pre section(s) - my $fname_comment = $input_file; - $fname_comment =~ s/--+/-/g; # protect HTML comment tags - $html_fh->print( <<"END_PRE"); -
- -
-END_PRE
-
-    foreach my $rpre_string ( @{$rpre_string_stack} ) {
-        $html_fh->print( ${$rpre_string} );
-    }
-
-    # and finish the html page
-    $html_fh->print( <<"HTML_END");
-
- - -HTML_END - eval { $html_fh->close() }; # could be object without close method - - if ( $rOpts->{'frames'} ) { - my @toc = map { $_ .= "\n" } split /\n/, ${$rtoc_string}; - $self->make_frame( \@toc ); - } - return; -} - -sub markup_tokens { - my ( $self, $rtokens, $rtoken_type, $rlevels ) = @_; - my ( @colored_tokens, $string, $type, $token, $level ); - my $rlast_level = $self->{_rlast_level}; - my $rpackage_stack = $self->{_rpackage_stack}; - - for ( my $j = 0 ; $j < @{$rtoken_type} ; $j++ ) { - $type = $rtoken_type->[$j]; - $token = $rtokens->[$j]; - $level = $rlevels->[$j]; - $level = 0 if ( $level < 0 ); - - #------------------------------------------------------- - # Update the package stack. The package stack is needed to keep - # the toc correct because some packages may be declared within - # blocks and go out of scope when we leave the block. - #------------------------------------------------------- - if ( $level > ${$rlast_level} ) { - unless ( $rpackage_stack->[ $level - 1 ] ) { - $rpackage_stack->[ $level - 1 ] = 'main'; - } - $rpackage_stack->[$level] = $rpackage_stack->[ $level - 1 ]; - } - elsif ( $level < ${$rlast_level} ) { - my $package = $rpackage_stack->[$level]; - unless ($package) { $package = 'main' } - - # if we change packages due to a nesting change, we - # have to make an entry in the toc - if ( $package ne $rpackage_stack->[ $level + 1 ] ) { - $self->add_toc_item( $package, 'package' ); - } - } - ${$rlast_level} = $level; - - #------------------------------------------------------- - # Intercept a sub name here; split it - # into keyword 'sub' and sub name; and add an - # entry in the toc - #------------------------------------------------------- - if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) { - $token = $self->markup_html_element( $1, 'k' ); - push @colored_tokens, $token; - $token = $2; - $type = 'M'; - - # but don't include sub declarations in the toc; - # these wlll have leading token types 'i;' - my $signature = join "", @{$rtoken_type}; - unless ( $signature =~ /^i;/ ) { - my $subname = $token; - $subname =~ s/[\s\(].*$//; # remove any attributes and prototype - $self->add_toc_item( $subname, 'sub' ); - } - } - - #------------------------------------------------------- - # Intercept a package name here; split it - # into keyword 'package' and name; add to the toc, - # and update the package stack - #------------------------------------------------------- - if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) { - $token = $self->markup_html_element( $1, 'k' ); - push @colored_tokens, $token; - $token = $2; - $type = 'i'; - $self->add_toc_item( "$token", 'package' ); - $rpackage_stack->[$level] = $token; - } - - $token = $self->markup_html_element( $token, $type ); - push @colored_tokens, $token; - } - return ( \@colored_tokens ); -} - -sub markup_html_element { - my ( $self, $token, $type ) = @_; - - return $token if ( $type eq 'b' ); # skip a blank token - return $token if ( $token =~ /^\s*$/ ); # skip a blank line - $token = escape_html($token); - - # get the short abbreviation for this token type - my $short_name = $token_short_names{$type}; - if ( !defined($short_name) ) { - $short_name = "pu"; # punctuation is default - } - - # handle style sheets.. - if ( !$rOpts->{'nohtml-style-sheets'} ) { - if ( $short_name ne 'pu' ) { - $token = qq() . $token . ""; - } - } - - # handle no style sheets.. - else { - my $color = $html_color{$short_name}; - - if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) { - $token = qq() . $token . ""; - } - if ( $html_italic{$short_name} ) { $token = "$token" } - if ( $html_bold{$short_name} ) { $token = "$token" } - } - return $token; -} - -sub escape_html { - - my $token = shift; - if ($missing_html_entities) { - $token =~ s/\&/&/g; - $token =~ s/\/>/g; - $token =~ s/\"/"/g; - } - else { - HTML::Entities::encode_entities($token); - } - return $token; -} - -sub finish_formatting { - - # called after last line - my $self = shift; - $self->close_html_file(); - return; -} - -sub write_line { - - my ( $self, $line_of_tokens ) = @_; - return unless $self->{_html_file_opened}; - my $html_pre_fh = $self->{_html_pre_fh}; - my $line_type = $line_of_tokens->{_line_type}; - my $input_line = $line_of_tokens->{_line_text}; - my $line_number = $line_of_tokens->{_line_number}; - chomp $input_line; - - # markup line of code.. - my $html_line; - if ( $line_type eq 'CODE' ) { - my $rtoken_type = $line_of_tokens->{_rtoken_type}; - my $rtokens = $line_of_tokens->{_rtokens}; - my $rlevels = $line_of_tokens->{_rlevels}; - - if ( $input_line =~ /(^\s*)/ ) { - $html_line = $1; - } - else { - $html_line = ""; - } - my ($rcolored_tokens) = - $self->markup_tokens( $rtokens, $rtoken_type, $rlevels ); - $html_line .= join '', @{$rcolored_tokens}; - } - - # markup line of non-code.. - else { - my $line_character; - if ( $line_type eq 'HERE' ) { $line_character = 'H' } - elsif ( $line_type eq 'HERE_END' ) { $line_character = 'h' } - elsif ( $line_type eq 'FORMAT' ) { $line_character = 'H' } - elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' } - elsif ( $line_type eq 'SYSTEM' ) { $line_character = 'c' } - elsif ( $line_type eq 'END_START' ) { - $line_character = 'k'; - $self->add_toc_item( '__END__', '__END__' ); - } - elsif ( $line_type eq 'DATA_START' ) { - $line_character = 'k'; - $self->add_toc_item( '__DATA__', '__DATA__' ); - } - elsif ( $line_type =~ /^POD/ ) { - $line_character = 'P'; - if ( $rOpts->{'pod2html'} ) { - my $html_pod_fh = $self->{_html_pod_fh}; - if ( $line_type eq 'POD_START' ) { - - my $rpre_string_stack = $self->{_rpre_string_stack}; - my $rpre_string = $rpre_string_stack->[-1]; - - # if we have written any non-blank lines to the - # current pre section, start writing to a new output - # string - if ( ${$rpre_string} =~ /\S/ ) { - my $pre_string; - $html_pre_fh = - Perl::Tidy::IOScalar->new( \$pre_string, 'w' ); - $self->{_html_pre_fh} = $html_pre_fh; - push @{$rpre_string_stack}, \$pre_string; - - # leave a marker in the pod stream so we know - # where to put the pre section we just - # finished. - my $for_html = '=for html'; # don't confuse pod utils - $html_pod_fh->print(< - -EOM - } - - # otherwise, just clear the current string and start - # over - else { - ${$rpre_string} = ""; - $html_pod_fh->print("\n"); - } - } - $html_pod_fh->print( $input_line . "\n" ); - if ( $line_type eq 'POD_END' ) { - $self->{_pod_cut_count}++; - $html_pod_fh->print("\n"); - } - return; - } - } - else { $line_character = 'Q' } - $html_line = $self->markup_html_element( $input_line, $line_character ); - } - - # add the line number if requested - if ( $rOpts->{'html-line-numbers'} ) { - my $extra_space = - ( $line_number < 10 ) ? " " - : ( $line_number < 100 ) ? " " - : ( $line_number < 1000 ) ? " " - : ""; - $html_line = $extra_space . $line_number . " " . $html_line; - } - - # write the line - $html_pre_fh->print("$html_line\n"); - return; -} - -##################################################################### -# -# The Perl::Tidy::Formatter package adds indentation, whitespace, and -# line breaks to the token stream -# -# WARNING: This is not a real class for speed reasons. Only one -# Formatter may be used. -# -##################################################################### - -package Perl::Tidy::Formatter; - -BEGIN { - - # Caution: these debug flags produce a lot of output - # They should all be 0 except when debugging small scripts - use constant FORMATTER_DEBUG_FLAG_RECOMBINE => 0; - use constant FORMATTER_DEBUG_FLAG_BOND_TABLES => 0; - use constant FORMATTER_DEBUG_FLAG_BOND => 0; - use constant FORMATTER_DEBUG_FLAG_BREAK => 0; - use constant FORMATTER_DEBUG_FLAG_CI => 0; - use constant FORMATTER_DEBUG_FLAG_FLUSH => 0; - use constant FORMATTER_DEBUG_FLAG_FORCE => 0; - use constant FORMATTER_DEBUG_FLAG_LIST => 0; - use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0; - use constant FORMATTER_DEBUG_FLAG_OUTPUT => 0; - use constant FORMATTER_DEBUG_FLAG_SPARSE => 0; - use constant FORMATTER_DEBUG_FLAG_STORE => 0; - use constant FORMATTER_DEBUG_FLAG_UNDOBP => 0; - use constant FORMATTER_DEBUG_FLAG_WHITE => 0; - - my $debug_warning = sub { - print STDOUT "FORMATTER_DEBUGGING with key $_[0]\n"; - }; - - FORMATTER_DEBUG_FLAG_RECOMBINE && $debug_warning->('RECOMBINE'); - FORMATTER_DEBUG_FLAG_BOND_TABLES && $debug_warning->('BOND_TABLES'); - FORMATTER_DEBUG_FLAG_BOND && $debug_warning->('BOND'); - FORMATTER_DEBUG_FLAG_BREAK && $debug_warning->('BREAK'); - FORMATTER_DEBUG_FLAG_CI && $debug_warning->('CI'); - FORMATTER_DEBUG_FLAG_FLUSH && $debug_warning->('FLUSH'); - FORMATTER_DEBUG_FLAG_FORCE && $debug_warning->('FORCE'); - FORMATTER_DEBUG_FLAG_LIST && $debug_warning->('LIST'); - FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK'); - FORMATTER_DEBUG_FLAG_OUTPUT && $debug_warning->('OUTPUT'); - FORMATTER_DEBUG_FLAG_SPARSE && $debug_warning->('SPARSE'); - FORMATTER_DEBUG_FLAG_STORE && $debug_warning->('STORE'); - FORMATTER_DEBUG_FLAG_UNDOBP && $debug_warning->('UNDOBP'); - FORMATTER_DEBUG_FLAG_WHITE && $debug_warning->('WHITE'); -} - -use Carp; -use vars qw{ - - @gnu_stack - $max_gnu_stack_index - $gnu_position_predictor - $line_start_index_to_go - $last_indentation_written - $last_unadjusted_indentation - $last_leading_token - $last_output_short_opening_token - $peak_batch_size - - $saw_VERSION_in_this_file - $saw_END_or_DATA_ - - @gnu_item_list - $max_gnu_item_index - $gnu_sequence_number - $last_output_indentation - %last_gnu_equals - %gnu_comma_count - %gnu_arrow_count - - @block_type_to_go - @type_sequence_to_go - @container_environment_to_go - @bond_strength_to_go - @forced_breakpoint_to_go - @token_lengths_to_go - @summed_lengths_to_go - @levels_to_go - @leading_spaces_to_go - @reduced_spaces_to_go - @matching_token_to_go - @mate_index_to_go - @ci_levels_to_go - @nesting_depth_to_go - @nobreak_to_go - @old_breakpoint_to_go - @tokens_to_go - @rtoken_vars_to_go - @K_to_go - @types_to_go - @inext_to_go - @iprev_to_go - - %saved_opening_indentation - - $max_index_to_go - $comma_count_in_batch - $last_nonblank_index_to_go - $last_nonblank_type_to_go - $last_nonblank_token_to_go - $last_last_nonblank_index_to_go - $last_last_nonblank_type_to_go - $last_last_nonblank_token_to_go - @nonblank_lines_at_depth - $starting_in_quote - $ending_in_quote - @whitespace_level_stack - $whitespace_last_level - - $format_skipping_pattern_begin - $format_skipping_pattern_end - - $forced_breakpoint_count - $forced_breakpoint_undo_count - @forced_breakpoint_undo_stack - %postponed_breakpoint - - $tabbing - $embedded_tab_count - $first_embedded_tab_at - $last_embedded_tab_at - $deleted_semicolon_count - $first_deleted_semicolon_at - $last_deleted_semicolon_at - $added_semicolon_count - $first_added_semicolon_at - $last_added_semicolon_at - $first_tabbing_disagreement - $last_tabbing_disagreement - $in_tabbing_disagreement - $tabbing_disagreement_count - $input_line_tabbing - - $last_line_leading_type - $last_line_leading_level - $last_last_line_leading_level - - %block_leading_text - %block_opening_line_number - $csc_new_statement_ok - $csc_last_label - %csc_block_label - $accumulating_text_for_block - $leading_block_text - $rleading_block_if_elsif_text - $leading_block_text_level - $leading_block_text_length_exceeded - $leading_block_text_line_length - $leading_block_text_line_number - $closing_side_comment_prefix_pattern - $closing_side_comment_list_pattern - - $blank_lines_after_opening_block_pattern - $blank_lines_before_closing_block_pattern - - $last_nonblank_token - $last_nonblank_type - $last_last_nonblank_token - $last_last_nonblank_type - $last_nonblank_block_type - $last_output_level - %is_do_follower - %is_if_brace_follower - %space_after_keyword - $rbrace_follower - $looking_for_else - %is_last_next_redo_return - %is_other_brace_follower - %is_else_brace_follower - %is_anon_sub_brace_follower - %is_anon_sub_1_brace_follower - %is_sort_map_grep - %is_sort_map_grep_eval - %is_sort_map_grep_eval_do - %is_block_without_semicolon - %is_if_unless - %is_and_or - %is_assignment - %is_chain_operator - %is_if_unless_and_or_last_next_redo_return - %ok_to_add_semicolon_for_block_type - - @has_broken_sublist - @dont_align - @want_comma_break - - $is_static_block_comment - $index_start_one_line_block - $semicolons_before_block_self_destruct - $index_max_forced_break - $input_line_number - $diagnostics_object - $vertical_aligner_object - $logger_object - $file_writer_object - $formatter_self - @ci_stack - %want_break_before - %outdent_keyword - $static_block_comment_pattern - $static_side_comment_pattern - %opening_vertical_tightness - %closing_vertical_tightness - %closing_token_indentation - $some_closing_token_indentation - - %opening_token_right - %stack_opening_token - %stack_closing_token - - $block_brace_vertical_tightness_pattern - - $rOpts_add_newlines - $rOpts_add_whitespace - $rOpts_block_brace_tightness - $rOpts_block_brace_vertical_tightness - $rOpts_brace_left_and_indent - $rOpts_comma_arrow_breakpoints - $rOpts_break_at_old_keyword_breakpoints - $rOpts_break_at_old_comma_breakpoints - $rOpts_break_at_old_logical_breakpoints - $rOpts_break_at_old_ternary_breakpoints - $rOpts_break_at_old_attribute_breakpoints - $rOpts_closing_side_comment_else_flag - $rOpts_closing_side_comment_maximum_text - $rOpts_continuation_indentation - $rOpts_cuddled_else - $rOpts_delete_old_whitespace - $rOpts_fuzzy_line_length - $rOpts_indent_columns - $rOpts_line_up_parentheses - $rOpts_maximum_fields_per_table - $rOpts_maximum_line_length - $rOpts_variable_maximum_line_length - $rOpts_short_concatenation_item_length - $rOpts_keep_old_blank_lines - $rOpts_ignore_old_breakpoints - $rOpts_format_skipping - $rOpts_space_function_paren - $rOpts_space_keyword_paren - $rOpts_keep_interior_semicolons - $rOpts_ignore_side_comment_lengths - $rOpts_stack_closing_block_brace - $rOpts_space_backslash_quote - $rOpts_whitespace_cycle - - %is_opening_type - %is_closing_type - %is_keyword_returning_list - %tightness - %matching_token - $rOpts - %right_bond_strength - %left_bond_strength - %binary_ws_rules - %want_left_space - %want_right_space - %is_digraph - %is_trigraph - $bli_pattern - $bli_list_string - %is_closing_type - %is_opening_type - %is_closing_token - %is_opening_token - - %weld_len_left_closing - %weld_len_right_closing - %weld_len_left_opening - %weld_len_right_opening - - $rcuddled_block_types - - $SUB_PATTERN - $ASUB_PATTERN - - $NVARS - -}; - -BEGIN { - - # Array index names for token vars - my $i = 0; - use constant { - _BLOCK_TYPE_ => $i++, - _CI_LEVEL_ => $i++, - _CONTAINER_ENVIRONMENT_ => $i++, - _CONTAINER_TYPE_ => $i++, - _CUMULATIVE_LENGTH_ => $i++, - _LINE_INDEX_ => $i++, - _KNEXT_SEQ_ITEM_ => $i++, - _LEVEL_ => $i++, - _LEVEL_TRUE_ => $i++, - _SLEVEL_ => $i++, - _TOKEN_ => $i++, - _TYPE_ => $i++, - _TYPE_SEQUENCE_ => $i++, - }; - $NVARS = 1 + _TYPE_SEQUENCE_; - - # default list of block types for which -bli would apply - $bli_list_string = 'if else elsif unless while for foreach do : sub'; - - my @q; - - @q = qw( - .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <> - <= >= == =~ !~ != ++ -- /= x= - ); - @is_digraph{@q} = (1) x scalar(@q); - - @q = qw( ... **= <<= >>= &&= ||= //= <=> <<~ ); - @is_trigraph{@q} = (1) x scalar(@q); - - @q = qw( - = **= += *= &= <<= &&= - -= /= |= >>= ||= //= - .= %= ^= - x= - ); - @is_assignment{@q} = (1) x scalar(@q); - - @q = qw( - grep - keys - map - reverse - sort - split - ); - @is_keyword_returning_list{@q} = (1) x scalar(@q); - - @q = qw(is if unless and or err last next redo return); - @is_if_unless_and_or_last_next_redo_return{@q} = (1) x scalar(@q); - - @q = qw(last next redo return); - @is_last_next_redo_return{@q} = (1) x scalar(@q); - - @q = qw(sort map grep); - @is_sort_map_grep{@q} = (1) x scalar(@q); - - @q = qw(sort map grep eval); - @is_sort_map_grep_eval{@q} = (1) x scalar(@q); - - @q = qw(sort map grep eval do); - @is_sort_map_grep_eval_do{@q} = (1) x scalar(@q); - - @q = qw(if unless); - @is_if_unless{@q} = (1) x scalar(@q); - - @q = qw(and or err); - @is_and_or{@q} = (1) x scalar(@q); - - # Identify certain operators which often occur in chains. - # Note: the minus (-) causes a side effect of padding of the first line in - # something like this (by sub set_logical_padding): - # Checkbutton => 'Transmission checked', - # -variable => \$TRANS - # This usually improves appearance so it seems ok. - @q = qw(&& || and or : ? . + - * /); - @is_chain_operator{@q} = (1) x scalar(@q); - - # We can remove semicolons after blocks preceded by these keywords - @q = - qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else - unless while until for foreach given when default); - @is_block_without_semicolon{@q} = (1) x scalar(@q); - - # We will allow semicolons to be added within these block types - # as well as sub and package blocks. - # NOTES: - # 1. Note that these keywords are omitted: - # switch case given when default sort map grep - # 2. It is also ok to add for sub and package blocks and a labeled block - # 3. But not okay for other perltidy types including: - # { } ; G t - # 4. Test files: blktype.t, blktype1.t, semicolon.t - @q = - qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else - unless do while until eval for foreach ); - @ok_to_add_semicolon_for_block_type{@q} = (1) x scalar(@q); - - # 'L' is token for opening { at hash key - @q = qw" L { ( [ "; - @is_opening_type{@q} = (1) x scalar(@q); - - # 'R' is token for closing } at hash key - @q = qw" R } ) ] "; - @is_closing_type{@q} = (1) x scalar(@q); - - @q = qw" { ( [ "; - @is_opening_token{@q} = (1) x scalar(@q); - - @q = qw" } ) ] "; - @is_closing_token{@q} = (1) x scalar(@q); - - # Patterns for standardizing matches to block types for regular subs and - # anonymous subs. Examples - # 'sub process' is a named sub - # 'sub ::m' is a named sub - # 'sub' is an anonymous sub - # 'sub:' is a label, not a sub - # 'substr' is a keyword - $SUB_PATTERN = '^sub\s+(::|\w)'; - $ASUB_PATTERN = '^sub$'; -} - -# whitespace codes -use constant WS_YES => 1; -use constant WS_OPTIONAL => 0; -use constant WS_NO => -1; - -# Token bond strengths. -use constant NO_BREAK => 10000; -use constant VERY_STRONG => 100; -use constant STRONG => 2.1; -use constant NOMINAL => 1.1; -use constant WEAK => 0.8; -use constant VERY_WEAK => 0.55; - -# values for testing indexes in output array -use constant UNDEFINED_INDEX => -1; - -# Maximum number of little messages; probably need not be changed. -use constant MAX_NAG_MESSAGES => 6; - -# increment between sequence numbers for each type -# For example, ?: pairs might have numbers 7,11,15,... -use constant TYPE_SEQUENCE_INCREMENT => 4; - -{ - - # methods to count instances - my $_count = 0; - sub get_count { return $_count; } - sub _increment_count { return ++$_count } - sub _decrement_count { return --$_count } -} - -sub trim { - - # trim leading and trailing whitespace from a string - my $str = shift; - $str =~ s/\s+$//; - $str =~ s/^\s+//; - return $str; -} - -sub max { - my @vals = @_; - my $max = shift @vals; - foreach my $val (@vals) { - $max = ( $max < $val ) ? $val : $max; - } - return $max; -} - -sub min { - my @vals = @_; - my $min = shift @vals; - foreach my $val (@vals) { - $min = ( $min > $val ) ? $val : $min; - } - return $min; -} - -sub split_words { - - # given a string containing words separated by whitespace, - # return the list of words - my ($str) = @_; - return unless $str; - $str =~ s/\s+$//; - $str =~ s/^\s+//; - return split( /\s+/, $str ); -} - -sub check_keys { - my ( $rtest, $rvalid, $msg, $exact_match ) = @_; - - # Check the keys of a hash: - # $rtest = ref to hash to test - # $rexpected = ref to has with valid keys - - # $msg = a message to write in case of error - # $exact_match defines the type of check: - # = false: test hash must not have unknown key - # = true: test hash must have exactly same keys as known hash - my @unknown_keys = - grep { !exists $rvalid->{$_} } keys %{$rtest}; - my @missing_keys = - grep { !exists $rtest->{$_} } keys %{$rvalid}; - my $error = @unknown_keys; - if ($exact_match) { $error ||= @missing_keys } - if ($error) { - local $" = ')('; - my @expected_keys = sort keys %{$rvalid}; - @unknown_keys = sort @unknown_keys; - Perl::Tidy::Die <warning($msg); } - return; -} - -sub complain { - my ($msg) = @_; - if ($logger_object) { - $logger_object->complain($msg); - } - return; -} - -sub write_logfile_entry { - my @msg = @_; - if ($logger_object) { - $logger_object->write_logfile_entry(@msg); - } - return; -} - -sub black_box { - my @msg = @_; - if ($logger_object) { $logger_object->black_box(@msg); } - return; -} - -sub report_definite_bug { - if ($logger_object) { - $logger_object->report_definite_bug(); - } - return; -} - -sub get_saw_brace_error { - if ($logger_object) { - $logger_object->get_saw_brace_error(); - } - return; -} - -sub we_are_at_the_last_line { - if ($logger_object) { - $logger_object->we_are_at_the_last_line(); - } - return; -} - -# interface to Perl::Tidy::Diagnostics routine -sub write_diagnostics { - my $msg = shift; - if ($diagnostics_object) { $diagnostics_object->write_diagnostics($msg); } - return; -} - -sub get_added_semicolon_count { - my $self = shift; - return $added_semicolon_count; -} - -sub DESTROY { - my $self = shift; - $self->_decrement_count(); - return; -} - -sub get_output_line_number { - return $vertical_aligner_object->get_output_line_number(); -} - -sub new { - - my $class = shift; - - # we are given an object with a write_line() method to take lines - my %defaults = ( - sink_object => undef, - diagnostics_object => undef, - logger_object => undef, - ); - my %args = ( %defaults, @_ ); - - $logger_object = $args{logger_object}; - $diagnostics_object = $args{diagnostics_object}; - - # we create another object with a get_line() and peek_ahead() method - my $sink_object = $args{sink_object}; - $file_writer_object = - Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object ); - - # initialize the leading whitespace stack to negative levels - # so that we can never run off the end of the stack - $peak_batch_size = 0; # flag to determine if we have output code - $gnu_position_predictor = 0; # where the current token is predicted to be - $max_gnu_stack_index = 0; - $max_gnu_item_index = -1; - $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 ); - @gnu_item_list = (); - $last_output_indentation = 0; - $last_indentation_written = 0; - $last_unadjusted_indentation = 0; - $last_leading_token = ""; - $last_output_short_opening_token = 0; - - $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'}; - $saw_END_or_DATA_ = 0; - - @block_type_to_go = (); - @type_sequence_to_go = (); - @container_environment_to_go = (); - @bond_strength_to_go = (); - @forced_breakpoint_to_go = (); - @summed_lengths_to_go = (); # line length to start of ith token - @token_lengths_to_go = (); - @levels_to_go = (); - @matching_token_to_go = (); - @mate_index_to_go = (); - @ci_levels_to_go = (); - @nesting_depth_to_go = (0); - @nobreak_to_go = (); - @old_breakpoint_to_go = (); - @tokens_to_go = (); - @rtoken_vars_to_go = (); - @K_to_go = (); - @types_to_go = (); - @leading_spaces_to_go = (); - @reduced_spaces_to_go = (); - @inext_to_go = (); - @iprev_to_go = (); - - @whitespace_level_stack = (); - $whitespace_last_level = -1; - - @dont_align = (); - @has_broken_sublist = (); - @want_comma_break = (); - - @ci_stack = (""); - $first_tabbing_disagreement = 0; - $last_tabbing_disagreement = 0; - $tabbing_disagreement_count = 0; - $in_tabbing_disagreement = 0; - $input_line_tabbing = undef; - - $last_last_line_leading_level = 0; - $last_line_leading_level = 0; - $last_line_leading_type = '#'; - - $last_nonblank_token = ';'; - $last_nonblank_type = ';'; - $last_last_nonblank_token = ';'; - $last_last_nonblank_type = ';'; - $last_nonblank_block_type = ""; - $last_output_level = 0; - $looking_for_else = 0; - $embedded_tab_count = 0; - $first_embedded_tab_at = 0; - $last_embedded_tab_at = 0; - $deleted_semicolon_count = 0; - $first_deleted_semicolon_at = 0; - $last_deleted_semicolon_at = 0; - $added_semicolon_count = 0; - $first_added_semicolon_at = 0; - $last_added_semicolon_at = 0; - $is_static_block_comment = 0; - %postponed_breakpoint = (); - - # variables for adding side comments - %block_leading_text = (); - %block_opening_line_number = (); - $csc_new_statement_ok = 1; - %csc_block_label = (); - - %saved_opening_indentation = (); - - reset_block_text_accumulator(); - - prepare_for_new_input_lines(); - - $vertical_aligner_object = - Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object, - $logger_object, $diagnostics_object ); - - if ( $rOpts->{'entab-leading-whitespace'} ) { - write_logfile_entry( -"Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n" - ); - } - elsif ( $rOpts->{'tabs'} ) { - write_logfile_entry("Indentation will be with a tab character\n"); - } - else { - write_logfile_entry( - "Indentation will be with $rOpts->{'indent-columns'} spaces\n"); - } - - # This hash holds the main data structures for formatting - # All hash keys must be defined here. - $formatter_self = { - rlines => [], # = ref to array of lines of the file - rLL => [], # = ref to array with all tokens - # in the file. LL originally meant - # 'Linked List'. Linked lists were a - # bad idea but LL is easy to type. - Klimit => undef, # = maximum K index for rLL. This is - # needed to catch any autovivification - # problems. - rnested_pairs => [], # for welding decisions - K_opening_container => {}, # for quickly traversing structure - K_closing_container => {}, # for quickly traversing structure - K_opening_ternary => {}, # for quickly traversing structure - K_closing_ternary => {}, # for quickly traversing structure - rK_phantom_semicolons => - undef, # for undoing phantom semicolons if iterating - rpaired_to_inner_container => {}, - rbreak_container => {}, # prevent one-line blocks - rvalid_self_keys => [], # for checking - }; - my @valid_keys = keys %{$formatter_self}; - $formatter_self->{rvalid_self_keys} = \@valid_keys; - - bless $formatter_self, $class; - - # Safety check..this is not a class yet - if ( _increment_count() > 1 ) { - confess -"Attempt to create more than 1 object in $class, which is not a true class yet\n"; - } - return $formatter_self; -} - -sub Fault { - my ($msg) = @_; - - # This routine is called for errors that really should not occur - # except if there has been a bug introduced by a recent program change - my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0); - my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1); - my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2); - - Perl::Tidy::Die(<{rvalid_self_keys} }; - my %valid_self_hash; - @valid_self_hash{@valid_self_keys} = (1) x scalar(@valid_self_keys); - check_keys( $self, \%valid_self_hash, "Checkpoint: self error", 1 ); - return; -} - -sub check_token_array { - my $self = shift; - - # Check for errors in the array of tokens - # Uses package variable $NVARS - $self->check_self_hash(); - my $rLL = $self->{rLL}; - for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) { - my $nvars = @{ $rLL->[$KK] }; - if ( $nvars != $NVARS ) { - my $type = $rLL->[$KK]->[_TYPE_]; - $type = '*' unless defined($type); - Fault( -"number of vars for node $KK, type '$type', is $nvars but should be $NVARS" - ); - } - foreach my $var ( _TOKEN_, _TYPE_ ) { - if ( !defined( $rLL->[$KK]->[$var] ) ) { - my $iline = $rLL->[$KK]->[_LINE_INDEX_]; - Fault("Undefined variable $var for K=$KK, line=$iline\n"); - } - } - return; - } -} - -sub set_rLL_max_index { - my $self = shift; - - # Set the limit of the rLL array, assuming that it is correct. - # This should only be called by routines after they make changes - # to tokenization - my $rLL = $self->{rLL}; - if ( !defined($rLL) ) { - - # Shouldn't happen because rLL was initialized to be an array ref - Fault("Undefined Memory rLL"); - } - my $Klimit_old = $self->{Klimit}; - my $num = @{$rLL}; - my $Klimit; - if ( $num > 0 ) { $Klimit = $num - 1 } - $self->{Klimit} = $Klimit; - return ($Klimit); -} - -sub get_rLL_max_index { - my $self = shift; - - # the memory location $rLL and number of tokens should be obtained - # from this routine so that any autovivication can be immediately caught. - my $rLL = $self->{rLL}; - my $Klimit = $self->{Klimit}; - if ( !defined($rLL) ) { - - # Shouldn't happen because rLL was initialized to be an array ref - Fault("Undefined Memory rLL"); - } - my $num = @{$rLL}; - if ( $num == 0 && defined($Klimit) - || $num > 0 && !defined($Klimit) - || $num > 0 && $Klimit != $num - 1 ) - { - - # Possible autovivification problem... - if ( !defined($Klimit) ) { $Klimit = '*' } - Fault("Error getting rLL: Memory items=$num and Klimit=$Klimit"); - } - return ($Klimit); -} - -sub prepare_for_new_input_lines { - - # Remember the largest batch size processed. This is needed - # by the pad routine to avoid padding the first nonblank token - if ( $max_index_to_go && $max_index_to_go > $peak_batch_size ) { - $peak_batch_size = $max_index_to_go; - } - - $gnu_sequence_number++; # increment output batch counter - %last_gnu_equals = (); - %gnu_comma_count = (); - %gnu_arrow_count = (); - $line_start_index_to_go = 0; - $max_gnu_item_index = UNDEFINED_INDEX; - $index_max_forced_break = UNDEFINED_INDEX; - $max_index_to_go = UNDEFINED_INDEX; - $last_nonblank_index_to_go = UNDEFINED_INDEX; - $last_nonblank_type_to_go = ''; - $last_nonblank_token_to_go = ''; - $last_last_nonblank_index_to_go = UNDEFINED_INDEX; - $last_last_nonblank_type_to_go = ''; - $last_last_nonblank_token_to_go = ''; - $forced_breakpoint_count = 0; - $forced_breakpoint_undo_count = 0; - $rbrace_follower = undef; - $summed_lengths_to_go[0] = 0; - $comma_count_in_batch = 0; - $starting_in_quote = 0; - - destroy_one_line_block(); - return; -} - -sub break_lines { - - # Loop over old lines to set new line break points - - my $self = shift; - my $rlines = $self->{rlines}; - - # Flag to prevent blank lines when POD occurs in a format skipping sect. - my $in_format_skipping_section; - - my $line_type = ""; - foreach my $line_of_tokens ( @{$rlines} ) { - - my $last_line_type = $line_type; - $line_type = $line_of_tokens->{_line_type}; - my $input_line = $line_of_tokens->{_line_text}; - - # _line_type codes are: - # SYSTEM - system-specific code before hash-bang line - # CODE - line of perl code (including comments) - # POD_START - line starting pod, such as '=head' - # POD - pod documentation text - # POD_END - last line of pod section, '=cut' - # HERE - text of here-document - # HERE_END - last line of here-doc (target word) - # FORMAT - format section - # FORMAT_END - last line of format section, '.' - # DATA_START - __DATA__ line - # DATA - unidentified text following __DATA__ - # END_START - __END__ line - # END - unidentified text following __END__ - # ERROR - we are in big trouble, probably not a perl script - - # put a blank line after an =cut which comes before __END__ and __DATA__ - # (required by podchecker) - if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) { - $file_writer_object->reset_consecutive_blank_lines(); - if ( !$in_format_skipping_section && $input_line !~ /^\s*$/ ) { - $self->want_blank_line(); - } - } - - # handle line of code.. - if ( $line_type eq 'CODE' ) { - - my $CODE_type = $line_of_tokens->{_code_type}; - $in_format_skipping_section = $CODE_type eq 'FS'; - - # Handle blank lines - if ( $CODE_type eq 'BL' ) { - - # If keep-old-blank-lines is zero, we delete all - # old blank lines and let the blank line rules generate any - # needed blanks. - if ($rOpts_keep_old_blank_lines) { - $self->flush(); - $file_writer_object->write_blank_code_line( - $rOpts_keep_old_blank_lines == 2 ); - $last_line_leading_type = 'b'; - } - next; - } - else { - - # let logger see all non-blank lines of code - my $output_line_number = get_output_line_number(); - ##$vertical_aligner_object->get_output_line_number(); - black_box( $line_of_tokens, $output_line_number ); - } - - # Handle Format Skipping (FS) and Verbatim (VB) Lines - if ( $CODE_type eq 'VB' || $CODE_type eq 'FS' ) { - $self->write_unindented_line("$input_line"); - $file_writer_object->reset_consecutive_blank_lines(); - next; - } - - # Handle all other lines of code - $self->print_line_of_tokens($line_of_tokens); - } - - # handle line of non-code.. - else { - - # set special flags - my $skip_line = 0; - my $tee_line = 0; - if ( $line_type =~ /^POD/ ) { - - # Pod docs should have a preceding blank line. But stay - # out of __END__ and __DATA__ sections, because - # the user may be using this section for any purpose whatsoever - if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; } - if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; } - if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// } - if ( !$skip_line - && !$in_format_skipping_section - && $line_type eq 'POD_START' - && !$saw_END_or_DATA_ ) - { - $self->want_blank_line(); - } - } - - # leave the blank counters in a predictable state - # after __END__ or __DATA__ - elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) { - $file_writer_object->reset_consecutive_blank_lines(); - $saw_END_or_DATA_ = 1; - } - - # write unindented non-code line - if ( !$skip_line ) { - if ($tee_line) { $file_writer_object->tee_on() } - $self->write_unindented_line($input_line); - if ($tee_line) { $file_writer_object->tee_off() } - } - } - } - return; -} - -{ ## Beginning of routine to check line hashes - - my %valid_line_hash; - - BEGIN { - - # These keys are defined for each line in the formatter - # Each line must have exactly these quantities - my @valid_line_keys = qw( - _curly_brace_depth - _ending_in_quote - _guessed_indentation_level - _line_number - _line_text - _line_type - _paren_depth - _quote_character - _rK_range - _square_bracket_depth - _starting_in_quote - _ended_in_blank_token - _code_type - - _ci_level_0 - _level_0 - _nesting_blocks_0 - _nesting_tokens_0 - ); - - @valid_line_hash{@valid_line_keys} = (1) x scalar(@valid_line_keys); - } - - sub check_line_hashes { - my $self = shift; - $self->check_self_hash(); - my $rlines = $self->{rlines}; - foreach my $rline ( @{$rlines} ) { - my $iline = $rline->{_line_number}; - my $line_type = $rline->{_line_type}; - check_keys( $rline, \%valid_line_hash, - "Checkpoint: line number =$iline, line_type=$line_type", 1 ); - } - return; - } - -} ## End check line hashes - -sub write_line { - - # We are caching tokenized lines as they arrive and converting them to the - # format needed for the final formatting. - my ( $self, $line_of_tokens_old ) = @_; - my $rLL = $self->{rLL}; - my $Klimit = $self->{Klimit}; - my $rlines_new = $self->{rlines}; - - my $Kfirst; - my $line_of_tokens = {}; - foreach my $key ( - qw( - _curly_brace_depth - _ending_in_quote - _guessed_indentation_level - _line_number - _line_text - _line_type - _paren_depth - _quote_character - _square_bracket_depth - _starting_in_quote - ) - ) - { - $line_of_tokens->{$key} = $line_of_tokens_old->{$key}; - } - - # Data needed by Logger - $line_of_tokens->{_level_0} = 0; - $line_of_tokens->{_ci_level_0} = 0; - $line_of_tokens->{_nesting_blocks_0} = ""; - $line_of_tokens->{_nesting_tokens_0} = ""; - - # Needed to avoid trimming quotes - $line_of_tokens->{_ended_in_blank_token} = undef; - - my $line_type = $line_of_tokens_old->{_line_type}; - my $input_line_no = $line_of_tokens_old->{_line_number} - 1; - if ( $line_type eq 'CODE' ) { - - my $rtokens = $line_of_tokens_old->{_rtokens}; - my $rtoken_type = $line_of_tokens_old->{_rtoken_type}; - my $rblock_type = $line_of_tokens_old->{_rblock_type}; - my $rcontainer_type = $line_of_tokens_old->{_rcontainer_type}; - my $rcontainer_environment = - $line_of_tokens_old->{_rcontainer_environment}; - my $rtype_sequence = $line_of_tokens_old->{_rtype_sequence}; - my $rlevels = $line_of_tokens_old->{_rlevels}; - my $rslevels = $line_of_tokens_old->{_rslevels}; - my $rci_levels = $line_of_tokens_old->{_rci_levels}; - my $rnesting_blocks = $line_of_tokens_old->{_rnesting_blocks}; - my $rnesting_tokens = $line_of_tokens_old->{_rnesting_tokens}; - - my $jmax = @{$rtokens} - 1; - if ( $jmax >= 0 ) { - $Kfirst = defined($Klimit) ? $Klimit + 1 : 0; - foreach my $j ( 0 .. $jmax ) { - my @tokary; - @tokary[ - _TOKEN_, _TYPE_, - _BLOCK_TYPE_, _CONTAINER_TYPE_, - _CONTAINER_ENVIRONMENT_, _TYPE_SEQUENCE_, - _LEVEL_, _LEVEL_TRUE_, - _SLEVEL_, _CI_LEVEL_, - _LINE_INDEX_, - ] - = ( - $rtokens->[$j], $rtoken_type->[$j], - $rblock_type->[$j], $rcontainer_type->[$j], - $rcontainer_environment->[$j], $rtype_sequence->[$j], - $rlevels->[$j], $rlevels->[$j], - $rslevels->[$j], $rci_levels->[$j], - $input_line_no, - ); - ##push @token_array, \@tokary; - push @{$rLL}, \@tokary; - } - - #$Klast=@{$rLL}-1; - $Klimit = @{$rLL} - 1; - - # Need to remember if we can trim the input line - $line_of_tokens->{_ended_in_blank_token} = - $rtoken_type->[$jmax] eq 'b'; - - $line_of_tokens->{_level_0} = $rlevels->[0]; - $line_of_tokens->{_ci_level_0} = $rci_levels->[0]; - $line_of_tokens->{_nesting_blocks_0} = $rnesting_blocks->[0]; - $line_of_tokens->{_nesting_tokens_0} = $rnesting_tokens->[0]; - } - } - - $line_of_tokens->{_rK_range} = [ $Kfirst, $Klimit ]; - $line_of_tokens->{_code_type} = ""; - $self->{Klimit} = $Klimit; - - push @{$rlines_new}, $line_of_tokens; - return; -} - -BEGIN { - - # initialize these global hashes, which control the use of - # whitespace around tokens: - # - # %binary_ws_rules - # %want_left_space - # %want_right_space - # %space_after_keyword - # - # Many token types are identical to the tokens themselves. - # See the tokenizer for a complete list. Here are some special types: - # k = perl keyword - # f = semicolon in for statement - # m = unary minus - # p = unary plus - # Note that :: is excluded since it should be contained in an identifier - # Note that '->' is excluded because it never gets space - # parentheses and brackets are excluded since they are handled specially - # curly braces are included but may be overridden by logic, such as - # newline logic. - - # NEW_TOKENS: create a whitespace rule here. This can be as - # simple as adding your new letter to @spaces_both_sides, for - # example. - - my @q; - - @q = qw" L { ( [ "; - @is_opening_type{@q} = (1) x scalar(@q); - - @q = qw" R } ) ] "; - @is_closing_type{@q} = (1) x scalar(@q); - - my @spaces_both_sides = qw" - + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -= - .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~ - &&= ||= //= <=> A k f w F n C Y U G v - "; - - my @spaces_left_side = qw" - t ! ~ m p { \ h pp mm Z j - "; - push( @spaces_left_side, '#' ); # avoids warning message - - my @spaces_right_side = qw" - ; } ) ] R J ++ -- **= - "; - push( @spaces_right_side, ',' ); # avoids warning message - - # Note that we are in a BEGIN block here. Later in processing - # the values of %want_left_space and %want_right_space - # may be overridden by any user settings specified by the - # -wls and -wrs parameters. However the binary_whitespace_rules - # are hardwired and have priority. - @want_left_space{@spaces_both_sides} = - (1) x scalar(@spaces_both_sides); - @want_right_space{@spaces_both_sides} = - (1) x scalar(@spaces_both_sides); - @want_left_space{@spaces_left_side} = - (1) x scalar(@spaces_left_side); - @want_right_space{@spaces_left_side} = - (-1) x scalar(@spaces_left_side); - @want_left_space{@spaces_right_side} = - (-1) x scalar(@spaces_right_side); - @want_right_space{@spaces_right_side} = - (1) x scalar(@spaces_right_side); - $want_left_space{'->'} = WS_NO; - $want_right_space{'->'} = WS_NO; - $want_left_space{'**'} = WS_NO; - $want_right_space{'**'} = WS_NO; - $want_right_space{'CORE::'} = WS_NO; - - # These binary_ws_rules are hardwired and have priority over the above - # settings. It would be nice to allow adjustment by the user, - # but it would be complicated to specify. - # - # hash type information must stay tightly bound - # as in : ${xxxx} - $binary_ws_rules{'i'}{'L'} = WS_NO; - $binary_ws_rules{'i'}{'{'} = WS_YES; - $binary_ws_rules{'k'}{'{'} = WS_YES; - $binary_ws_rules{'U'}{'{'} = WS_YES; - $binary_ws_rules{'i'}{'['} = WS_NO; - $binary_ws_rules{'R'}{'L'} = WS_NO; - $binary_ws_rules{'R'}{'{'} = WS_NO; - $binary_ws_rules{'t'}{'L'} = WS_NO; - $binary_ws_rules{'t'}{'{'} = WS_NO; - $binary_ws_rules{'}'}{'L'} = WS_NO; - $binary_ws_rules{'}'}{'{'} = WS_NO; - $binary_ws_rules{'$'}{'L'} = WS_NO; - $binary_ws_rules{'$'}{'{'} = WS_NO; - $binary_ws_rules{'@'}{'L'} = WS_NO; - $binary_ws_rules{'@'}{'{'} = WS_NO; - $binary_ws_rules{'='}{'L'} = WS_YES; - $binary_ws_rules{'J'}{'J'} = WS_YES; - - # the following includes ') {' - # as in : if ( xxx ) { yyy } - $binary_ws_rules{']'}{'L'} = WS_NO; - $binary_ws_rules{']'}{'{'} = WS_NO; - $binary_ws_rules{')'}{'{'} = WS_YES; - $binary_ws_rules{')'}{'['} = WS_NO; - $binary_ws_rules{']'}{'['} = WS_NO; - $binary_ws_rules{']'}{'{'} = WS_NO; - $binary_ws_rules{'}'}{'['} = WS_NO; - $binary_ws_rules{'R'}{'['} = WS_NO; - - $binary_ws_rules{']'}{'++'} = WS_NO; - $binary_ws_rules{']'}{'--'} = WS_NO; - $binary_ws_rules{')'}{'++'} = WS_NO; - $binary_ws_rules{')'}{'--'} = WS_NO; - - $binary_ws_rules{'R'}{'++'} = WS_NO; - $binary_ws_rules{'R'}{'--'} = WS_NO; - - $binary_ws_rules{'i'}{'Q'} = WS_YES; - $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()' - - # FIXME: we could to split 'i' into variables and functions - # and have no space for functions but space for variables. For now, - # I have a special patch in the special rules below - $binary_ws_rules{'i'}{'('} = WS_NO; - - $binary_ws_rules{'w'}{'('} = WS_NO; - $binary_ws_rules{'w'}{'{'} = WS_YES; -} ## end BEGIN block - -sub set_whitespace_flags { - - # This routine examines each pair of nonblank tokens and - # sets a flag indicating if white space is needed. - # - # $rwhitespace_flags->[$j] is a flag indicating whether a white space - # BEFORE token $j is needed, with the following values: - # - # WS_NO = -1 do not want a space before token $j - # WS_OPTIONAL= 0 optional space or $j is a whitespace - # WS_YES = 1 want a space before token $j - # - - my $self = shift; - my $rLL = $self->{rLL}; - - my $rwhitespace_flags = []; - - my ( $last_token, $last_type, $last_block_type, $last_input_line_no, - $token, $type, $block_type, $input_line_no ); - my $j_tight_closing_paren = -1; - - $token = ' '; - $type = 'b'; - $block_type = ''; - $input_line_no = 0; - $last_token = ' '; - $last_type = 'b'; - $last_block_type = ''; - $last_input_line_no = 0; - - my $jmax = @{$rLL} - 1; - - my ($ws); - - # This is some logic moved to a sub to avoid deep nesting of if stmts - my $ws_in_container = sub { - - my ($j) = @_; - my $ws = WS_YES; - if ( $j + 1 > $jmax ) { return (WS_NO) } - - # Patch to count '-foo' as single token so that - # each of $a{-foo} and $a{foo} and $a{'foo'} do - # not get spaces with default formatting. - my $j_here = $j; - ++$j_here - if ( $token eq '-' - && $last_token eq '{' - && $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' ); - - # $j_next is where a closing token should be if - # the container has a single token - if ( $j_here + 1 > $jmax ) { return (WS_NO) } - my $j_next = - ( $rLL->[ $j_here + 1 ]->[_TYPE_] eq 'b' ) - ? $j_here + 2 - : $j_here + 1; - - if ( $j_next > $jmax ) { return WS_NO } - my $tok_next = $rLL->[$j_next]->[_TOKEN_]; - my $type_next = $rLL->[$j_next]->[_TYPE_]; - - # for tightness = 1, if there is just one token - # within the matching pair, we will keep it tight - if ( - $tok_next eq $matching_token{$last_token} - - # but watch out for this: [ [ ] (misc.t) - && $last_token ne $token - - # double diamond is usually spaced - && $token ne '<<>>' - - ) - { - - # remember where to put the space for the closing paren - $j_tight_closing_paren = $j_next; - return (WS_NO); - } - return (WS_YES); - }; - - # main loop over all tokens to define the whitespace flags - for ( my $j = 0 ; $j <= $jmax ; $j++ ) { - - my $rtokh = $rLL->[$j]; - - # Set a default - $rwhitespace_flags->[$j] = WS_OPTIONAL; - - if ( $rtokh->[_TYPE_] eq 'b' ) { - next; - } - - # set a default value, to be changed as needed - $ws = undef; - $last_token = $token; - $last_type = $type; - $last_block_type = $block_type; - $last_input_line_no = $input_line_no; - $token = $rtokh->[_TOKEN_]; - $type = $rtokh->[_TYPE_]; - $block_type = $rtokh->[_BLOCK_TYPE_]; - $input_line_no = $rtokh->[_LINE_INDEX_]; - - #--------------------------------------------------------------- - # Whitespace Rules Section 1: - # Handle space on the inside of opening braces. - #--------------------------------------------------------------- - - # /^[L\{\(\[]$/ - if ( $is_opening_type{$last_type} ) { - - $j_tight_closing_paren = -1; - - # let us keep empty matched braces together: () {} [] - # except for BLOCKS - if ( $token eq $matching_token{$last_token} ) { - if ($block_type) { - $ws = WS_YES; - } - else { - $ws = WS_NO; - } - } - else { - - # we're considering the right of an opening brace - # tightness = 0 means always pad inside with space - # tightness = 1 means pad inside if "complex" - # tightness = 2 means never pad inside with space - - my $tightness; - if ( $last_type eq '{' - && $last_token eq '{' - && $last_block_type ) - { - $tightness = $rOpts_block_brace_tightness; - } - else { $tightness = $tightness{$last_token} } - - #============================================================= - # Patch for test problem fabrice_bug.pl - # We must always avoid spaces around a bare word beginning - # with ^ as in: - # my $before = ${^PREMATCH}; - # Because all of the following cause an error in perl: - # my $before = ${ ^PREMATCH }; - # my $before = ${ ^PREMATCH}; - # my $before = ${^PREMATCH }; - # So if brace tightness flag is -bt=0 we must temporarily reset - # to bt=1. Note that here we must set tightness=1 and not 2 so - # that the closing space - # is also avoided (via the $j_tight_closing_paren flag in coding) - if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 } - - #============================================================= - - if ( $tightness <= 0 ) { - $ws = WS_YES; - } - elsif ( $tightness > 1 ) { - $ws = WS_NO; - } - else { - $ws = $ws_in_container->($j); - } - } - } # end setting space flag inside opening tokens - my $ws_1; - $ws_1 = $ws - if FORMATTER_DEBUG_FLAG_WHITE; - - #--------------------------------------------------------------- - # Whitespace Rules Section 2: - # Handle space on inside of closing brace pairs. - #--------------------------------------------------------------- - - # /[\}\)\]R]/ - if ( $is_closing_type{$type} ) { - - if ( $j == $j_tight_closing_paren ) { - - $j_tight_closing_paren = -1; - $ws = WS_NO; - } - else { - - if ( !defined($ws) ) { - - my $tightness; - if ( $type eq '}' && $token eq '}' && $block_type ) { - $tightness = $rOpts_block_brace_tightness; - } - else { $tightness = $tightness{$token} } - - $ws = ( $tightness > 1 ) ? WS_NO : WS_YES; - } - } - } # end setting space flag inside closing tokens - - my $ws_2; - $ws_2 = $ws - if FORMATTER_DEBUG_FLAG_WHITE; - - #--------------------------------------------------------------- - # Whitespace Rules Section 3: - # Use the binary rule table. - #--------------------------------------------------------------- - if ( !defined($ws) ) { - $ws = $binary_ws_rules{$last_type}{$type}; - } - my $ws_3; - $ws_3 = $ws - if FORMATTER_DEBUG_FLAG_WHITE; - - #--------------------------------------------------------------- - # Whitespace Rules Section 4: - # Handle some special cases. - #--------------------------------------------------------------- - if ( $token eq '(' ) { - - # This will have to be tweaked as tokenization changes. - # We usually want a space at '} (', for example: - # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s ); - # - # But not others: - # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } ); - # At present, the above & block is marked as type L/R so this case - # won't go through here. - if ( $last_type eq '}' ) { $ws = WS_YES } - - # NOTE: some older versions of Perl had occasional problems if - # spaces are introduced between keywords or functions and opening - # parens. So the default is not to do this except is certain - # cases. The current Perl seems to tolerate spaces. - - # Space between keyword and '(' - elsif ( $last_type eq 'k' ) { - $ws = WS_NO - unless ( $rOpts_space_keyword_paren - || $space_after_keyword{$last_token} ); - } - - # Space between function and '(' - # ----------------------------------------------------- - # 'w' and 'i' checks for something like: - # myfun( &myfun( ->myfun( - # ----------------------------------------------------- - elsif (( $last_type =~ /^[wUG]$/ ) - || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) ) - { - $ws = WS_NO unless ($rOpts_space_function_paren); - } - - # space between something like $i and ( in - # for $i ( 0 .. 20 ) { - # FIXME: eventually, type 'i' needs to be split into multiple - # token types so this can be a hardwired rule. - elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) { - $ws = WS_YES; - } - - # allow constant function followed by '()' to retain no space - elsif ($last_type eq 'C' - && $rLL->[ $j + 1 ]->[_TOKEN_] eq ')' ) - { - $ws = WS_NO; - } - } - - # patch for SWITCH/CASE: make space at ']{' optional - # since the '{' might begin a case or when block - elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) { - $ws = WS_OPTIONAL; - } - - # keep space between 'sub' and '{' for anonymous sub definition - if ( $type eq '{' ) { - if ( $last_token eq 'sub' ) { - $ws = WS_YES; - } - - # this is needed to avoid no space in '){' - if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES } - - # avoid any space before the brace or bracket in something like - # @opts{'a','b',...} - if ( $last_type eq 'i' && $last_token =~ /^\@/ ) { - $ws = WS_NO; - } - } - - elsif ( $type eq 'i' ) { - - # never a space before -> - if ( $token =~ /^\-\>/ ) { - $ws = WS_NO; - } - } - - # retain any space between '-' and bare word - elsif ( $type eq 'w' || $type eq 'C' ) { - $ws = WS_OPTIONAL if $last_type eq '-'; - - # never a space before -> - if ( $token =~ /^\-\>/ ) { - $ws = WS_NO; - } - } - - # retain any space between '-' and bare word - # example: avoid space between 'USER' and '-' here: - # $myhash{USER-NAME}='steve'; - elsif ( $type eq 'm' || $type eq '-' ) { - $ws = WS_OPTIONAL if ( $last_type eq 'w' ); - } - - # always space before side comment - elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 } - - # always preserver whatever space was used after a possible - # filehandle (except _) or here doc operator - if ( - $type ne '#' - && ( ( $last_type eq 'Z' && $last_token ne '_' ) - || $last_type eq 'h' ) - ) - { - $ws = WS_OPTIONAL; - } - - # space_backslash_quote; RT #123774 - # allow a space between a backslash and single or double quote - # to avoid fooling html formatters - elsif ( $last_type eq '\\' && $type eq 'Q' && $token =~ /^[\"\']/ ) { - if ($rOpts_space_backslash_quote) { - if ( $rOpts_space_backslash_quote == 1 ) { - $ws = WS_OPTIONAL; - } - elsif ( $rOpts_space_backslash_quote == 2 ) { $ws = WS_YES } - else { } # shouldnt happen - } - else { - $ws = WS_NO; - } - } - - my $ws_4; - $ws_4 = $ws - if FORMATTER_DEBUG_FLAG_WHITE; - - #--------------------------------------------------------------- - # Whitespace Rules Section 5: - # Apply default rules not covered above. - #--------------------------------------------------------------- - - # If we fall through to here, look at the pre-defined hash tables for - # the two tokens, and: - # if (they are equal) use the common value - # if (either is zero or undef) use the other - # if (either is -1) use it - # That is, - # left vs right - # 1 vs 1 --> 1 - # 0 vs 0 --> 0 - # -1 vs -1 --> -1 - # - # 0 vs -1 --> -1 - # 0 vs 1 --> 1 - # 1 vs 0 --> 1 - # -1 vs 0 --> -1 - # - # -1 vs 1 --> -1 - # 1 vs -1 --> -1 - if ( !defined($ws) ) { - my $wl = $want_left_space{$type}; - my $wr = $want_right_space{$last_type}; - if ( !defined($wl) ) { $wl = 0 } - if ( !defined($wr) ) { $wr = 0 } - $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr; - } - - if ( !defined($ws) ) { - $ws = 0; - write_diagnostics( - "WS flag is undefined for tokens $last_token $token\n"); - } - - # Treat newline as a whitespace. Otherwise, we might combine - # 'Send' and '-recipients' here according to the above rules: - # my $msg = new Fax::Send - # -recipients => $to, - # -data => $data; - if ( $ws == 0 && $input_line_no != $last_input_line_no ) { $ws = 1 } - - if ( ( $ws == 0 ) - && $j > 0 - && $j < $jmax - && ( $last_type !~ /^[Zh]$/ ) ) - { - - # If this happens, we have a non-fatal but undesirable - # hole in the above rules which should be patched. - write_diagnostics( - "WS flag is zero for tokens $last_token $token\n"); - } - - $rwhitespace_flags->[$j] = $ws; - - FORMATTER_DEBUG_FLAG_WHITE && do { - my $str = substr( $last_token, 0, 15 ); - $str .= ' ' x ( 16 - length($str) ); - if ( !defined($ws_1) ) { $ws_1 = "*" } - if ( !defined($ws_2) ) { $ws_2 = "*" } - if ( !defined($ws_3) ) { $ws_3 = "*" } - if ( !defined($ws_4) ) { $ws_4 = "*" } - print STDOUT -"NEW WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n"; - }; - } ## end main loop - - if ( $rOpts->{'tight-secret-operators'} ) { - new_secret_operator_whitespace( $rLL, $rwhitespace_flags ); - } - return $rwhitespace_flags; -} ## end sub set_whitespace_flags - -sub respace_tokens { - - my $self = shift; - return if $rOpts->{'indent-only'}; - - # This routine makes all necessary changes to the tokenization after the - # file has been read. This consists mostly of inserting and deleting spaces - # according to the selected parameters. In a few cases non-space characters - # are added, deleted or modified. - - # The old tokens are copied one-by-one, with changes, from the old - # linear storage array to a new array. - - my $rLL = $self->{rLL}; - my $Klimit_old = $self->{Klimit}; - my $rlines = $self->{rlines}; - my $rpaired_to_inner_container = $self->{rpaired_to_inner_container}; - - my $rLL_new = []; # This is the new array - my $KK = 0; - my $rtoken_vars; - my $Kmax = @{$rLL} - 1; - - # Set the whitespace flags, which indicate the token spacing preference. - my $rwhitespace_flags = $self->set_whitespace_flags(); - - # we will be setting token lengths as we go - my $cumulative_length = 0; - - # We also define these hash indexes giving container token array indexes - # as a function of the container sequence numbers. For example, - my $K_opening_container = {}; # opening [ { or ( - my $K_closing_container = {}; # closing ] } or ) - my $K_opening_ternary = {}; # opening ? of ternary - my $K_closing_ternary = {}; # closing : of ternary - - # List of new K indexes of phantom semicolons - # This will be needed if we want to undo them for iterations - my $rK_phantom_semicolons = []; - - # Temporary hashes for adding semicolons - ##my $rKfirst_new = {}; - - # a sub to link preceding nodes forward to a new node type - my $link_back = sub { - my ( $Ktop, $key ) = @_; - - my $Kprev = $Ktop - 1; - while ( $Kprev >= 0 - && !defined( $rLL_new->[$Kprev]->[$key] ) ) - { - $rLL_new->[$Kprev]->[$key] = $Ktop; - $Kprev -= 1; - } - }; - - # A sub to store each token in the new array - # All new tokens must be stored by this sub so that it can update - # all data structures on the fly. - my $last_nonblank_type = ';'; - my $store_token = sub { - my ($item) = @_; - - # This will be the index of this item in the new array - my $KK_new = @{$rLL_new}; - - # check for a sequenced item (i.e., container or ?/:) - my $type_sequence = $item->[_TYPE_SEQUENCE_]; - if ($type_sequence) { - - $link_back->( $KK_new, _KNEXT_SEQ_ITEM_ ); - - my $token = $item->[_TOKEN_]; - if ( $is_opening_token{$token} ) { - - $K_opening_container->{$type_sequence} = $KK_new; - } - elsif ( $is_closing_token{$token} ) { - - $K_closing_container->{$type_sequence} = $KK_new; - } - - # These are not yet used but could be useful - else { - if ( $token eq '?' ) { - $K_opening_ternary->{$type_sequence} = $KK; - } - elsif ( $token eq ':' ) { - $K_closing_ternary->{$type_sequence} = $KK; - } - else { - # shouldn't happen - print STDERR "Ugh: shouldn't happen\n"; - } - } - } - - # Save the length sum to just BEFORE this token - $item->[_CUMULATIVE_LENGTH_] = $cumulative_length; - - # now set the length of this token - my $token_length = length( $item->[_TOKEN_] ); - - # and update the cumulative length - $cumulative_length += $token_length; - - my $type = $item->[_TYPE_]; - if ( $type ne 'b' ) { $last_nonblank_type = $type } - - # and finally, add this item to the new array - push @{$rLL_new}, $item; - }; - - my $add_phantom_semicolon = sub { - - my ($KK) = @_; - - my $Kp = $self->K_previous_nonblank( undef, $rLL_new ); - return unless ( defined($Kp) ); - - # we are only adding semicolons for certain block types - my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_]; - return - unless ( $ok_to_add_semicolon_for_block_type{$block_type} - || $block_type =~ /^(sub|package)/ - || $block_type =~ /^\w+\:$/ ); - - my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_]; - - my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_]; - my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_]; - - # Do not add a semicolon if... - return - if ( - - # it would follow a comment (and be isolated) - $previous_nonblank_type eq '#' - - # it follows a code block ( because they are not always wanted - # there and may add clutter) - || $rLL_new->[$Kp]->[_BLOCK_TYPE_] - - # it would follow a label - || $previous_nonblank_type eq 'J' - - # it would be inside a 'format' statement (and cause syntax error) - || ( $previous_nonblank_type eq 'k' - && $previous_nonblank_token =~ /format/ ) - - # if it would prevent welding two containers - || $rpaired_to_inner_container->{$type_sequence} - - ); - - # We will insert an empty semicolon here as a placeholder. - # Later, if it becomes the last token on a line, we will bring it to life. - # The advantage of doing this is that (1) we just have to check line endings, - # and (2) the phantom semicolon has zero width and therefore won't cause - # needless breaks of one-line blocks. - my $Ktop = -1; - if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b' - && $want_left_space{';'} == WS_NO ) - { - - # convert the blank into a semicolon.. - # be careful: we are working on the new stack top - # on a token which has been stored. - my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', ' ' ); - - # Convert the existing blank to a semicolon - $rLL_new->[$Ktop]->[_TOKEN_] = ''; # zero length - $rLL_new->[$Ktop]->[_TYPE_] = ';'; - $rLL_new->[$Ktop]->[_SLEVEL_] = - $rLL->[$KK]->[_SLEVEL_]; - - push @{$rK_phantom_semicolons}, @{$rLL_new} - 1; - - # Then store a new blank - $store_token->($rcopy); - } - else { - - # insert a new token - my $rcopy = copy_token_as_type( $rLL_new->[$Kp], ';', '' ); - $rcopy->[_SLEVEL_] = $rLL->[$KK]->[_SLEVEL_]; - $store_token->($rcopy); - push @{$rK_phantom_semicolons}, @{$rLL_new} - 1; - } - }; - - my $check_Q = sub { - - # Check that a quote looks okay - # This works but needs to by sync'd with the log file output - my ( $KK, $Kfirst ) = @_; - my $token = $rLL->[$KK]->[_TOKEN_]; - note_embedded_tab() if ( $token =~ "\t" ); - - my $Kp = $self->K_previous_nonblank( undef, $rLL_new ); - return unless ( defined($Kp) ); - my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_]; - my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_]; - - my $previous_nonblank_type_2 = 'b'; - my $previous_nonblank_token_2 = ""; - my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new ); - if ( defined($Kpp) ) { - $previous_nonblank_type_2 = $rLL_new->[$Kpp]->[_TYPE_]; - $previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_]; - } - - my $Kn = $self->K_next_nonblank($KK); - my $next_nonblank_token = ""; - if ( defined($Kn) ) { - $next_nonblank_token = $rLL->[$Kn]->[_TOKEN_]; - } - - my $token_0 = $rLL->[$Kfirst]->[_TOKEN_]; - my $type_0 = $rLL->[$Kfirst]->[_TYPE_]; - - # make note of something like '$var = s/xxx/yyy/;' - # in case it should have been '$var =~ s/xxx/yyy/;' - if ( - $token =~ /^(s|tr|y|m|\/)/ - && $previous_nonblank_token =~ /^(=|==|!=)$/ - - # preceded by simple scalar - && $previous_nonblank_type_2 eq 'i' - && $previous_nonblank_token_2 =~ /^\$/ - - # followed by some kind of termination - # (but give complaint if we can not see far enough ahead) - && $next_nonblank_token =~ /^[; \)\}]$/ - - # scalar is not declared - && !( $type_0 eq 'k' && $token_0 =~ /^(my|our|local)$/ ) - ) - { - my $guess = substr( $last_nonblank_token, 0, 1 ) . '~'; - complain( -"Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n" - ); - } - }; - - # Main loop over all lines of the file - my $last_K_out; - my $CODE_type = ""; - my $line_type = ""; - foreach my $line_of_tokens ( @{$rlines} ) { - - $input_line_number = $line_of_tokens->{_line_number}; - my $last_line_type = $line_type; - $line_type = $line_of_tokens->{_line_type}; - next unless ( $line_type eq 'CODE' ); - my $last_CODE_type = $CODE_type; - $CODE_type = $line_of_tokens->{_code_type}; - my $rK_range = $line_of_tokens->{_rK_range}; - my ( $Kfirst, $Klast ) = @{$rK_range}; - next unless defined($Kfirst); - - # Check for correct sequence of token indexes... - # An error here means that sub write_line() did not correctly - # package the tokenized lines as it received them. - if ( defined($last_K_out) ) { - if ( $Kfirst != $last_K_out + 1 ) { - Fault( - "Program Bug: last K out was $last_K_out but Kfirst=$Kfirst" - ); - } - } - else { - if ( $Kfirst != 0 ) { - Fault("Program Bug: first K is $Kfirst but should be 0"); - } - } - $last_K_out = $Klast; - - # Handle special lines of code - if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) { - - # CODE_types are as follows. - # 'BL' = Blank Line - # 'VB' = Verbatim - line goes out verbatim - # 'FS' = Format Skipping - line goes out verbatim, no blanks - # 'IO' = Indent Only - only indentation may be changed - # 'NIN' = No Internal Newlines - line does not get broken - # 'HSC'=Hanging Side Comment - fix this hanging side comment - # 'BC'=Block Comment - an ordinary full line comment - # 'SBC'=Static Block Comment - a block comment which does not get - # indented - # 'SBCX'=Static Block Comment Without Leading Space - # 'DEL'=Delete this line - # 'VER'=VERSION statement - # '' or (undefined) - no restructions - - # For a hanging side comment we insert an empty quote before - # the comment so that it becomes a normal side comment and - # will be aligned by the vertical aligner - if ( $CODE_type eq 'HSC' ) { - - # Safety Check: This must be a line with one token (a comment) - my $rtoken_vars = $rLL->[$Kfirst]; - if ( $Kfirst == $Klast && $rtoken_vars->[_TYPE_] eq '#' ) { - - # Note that even if the flag 'noadd-whitespace' is set, we will - # make an exception here and allow a blank to be inserted to push the comment - # to the right. We can think of this as an adjustment of indentation - # rather than whitespace between tokens. This will also prevent the hanging - # side comment from getting converted to a block comment if whitespace - # gets deleted, as for example with the -extrude and -mangle options. - my $rcopy = copy_token_as_type( $rtoken_vars, 'q', '' ); - $store_token->($rcopy); - $rcopy = copy_token_as_type( $rtoken_vars, 'b', ' ' ); - $store_token->($rcopy); - $store_token->($rtoken_vars); - next; - } - else { - - # This line was mis-marked by sub scan_comment - Fault( - "Program bug. A hanging side comment has been mismarked" - ); - } - } - - # Copy tokens unchanged - foreach my $KK ( $Kfirst .. $Klast ) { - $store_token->( $rLL->[$KK] ); - } - next; - } - - # Handle normal line.. - - # Insert any essential whitespace between lines - # if last line was normal CODE - my $type_next = $rLL->[$Kfirst]->[_TYPE_]; - my $token_next = $rLL->[$Kfirst]->[_TOKEN_]; - my $Kp = $self->K_previous_nonblank( undef, $rLL_new ); - if ( $last_line_type eq 'CODE' - && $type_next ne 'b' - && defined($Kp) ) - { - my $token_p = $rLL_new->[$Kp]->[_TOKEN_]; - my $type_p = $rLL_new->[$Kp]->[_TYPE_]; - - my ( $token_pp, $type_pp ); - my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new ); - if ( defined($Kpp) ) { - $token_pp = $rLL_new->[$Kpp]->[_TOKEN_]; - $type_pp = $rLL_new->[$Kpp]->[_TYPE_]; - } - else { - $token_pp = ";"; - $type_pp = ';'; - } - - if ( - is_essential_whitespace( - $token_pp, $type_pp, $token_p, - $type_p, $token_next, $type_next, - ) - ) - { - - # Copy this first token as blank, but use previous line number - my $rcopy = copy_token_as_type( $rLL->[$Kfirst], 'b', ' ' ); - $rcopy->[_LINE_INDEX_] = - $rLL_new->[-1]->[_LINE_INDEX_]; - $store_token->($rcopy); - } - } - - # loop to copy all tokens on this line, with any changes - my $type_sequence; - for ( my $KK = $Kfirst ; $KK <= $Klast ; $KK++ ) { - $rtoken_vars = $rLL->[$KK]; - my $token = $rtoken_vars->[_TOKEN_]; - my $type = $rtoken_vars->[_TYPE_]; - my $last_type_sequence = $type_sequence; - $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_]; - - # Handle a blank space ... - if ( $type eq 'b' ) { - - # Delete it if not wanted by whitespace rules - # or we are deleting all whitespace - # Note that whitespace flag is a flag indicating whether a - # white space BEFORE the token is needed - next if ( $KK >= $Kmax ); # skip terminal blank - my $Knext = $KK + 1; - my $ws = $rwhitespace_flags->[$Knext]; - if ( $ws == -1 - || $rOpts_delete_old_whitespace ) - { - - # FIXME: maybe switch to using _new - my $Kp = $self->K_previous_nonblank($KK); - next unless defined($Kp); - my $token_p = $rLL->[$Kp]->[_TOKEN_]; - my $type_p = $rLL->[$Kp]->[_TYPE_]; - - my ( $token_pp, $type_pp ); - - #my $Kpp = $K_previous_nonblank->($Kp); - my $Kpp = $self->K_previous_nonblank($Kp); - if ( defined($Kpp) ) { - $token_pp = $rLL->[$Kpp]->[_TOKEN_]; - $type_pp = $rLL->[$Kpp]->[_TYPE_]; - } - else { - $token_pp = ";"; - $type_pp = ';'; - } - my $token_next = $rLL->[$Knext]->[_TOKEN_]; - my $type_next = $rLL->[$Knext]->[_TYPE_]; - - my $do_not_delete = is_essential_whitespace( - $token_pp, $type_pp, $token_p, - $type_p, $token_next, $type_next, - ); - - next unless ($do_not_delete); - } - - # make it just one character if allowed - if ($rOpts_add_whitespace) { - $rtoken_vars->[_TOKEN_] = ' '; - } - $store_token->($rtoken_vars); - next; - } - - # Handle a nonblank token... - - # Modify certain tokens here for whitespace - # The following is not yet done, but could be: - # sub (x x x) - if ( $type =~ /^[wit]$/ ) { - - # Examples: - # change '$ var' to '$var' etc - # '-> new' to '->new' - if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) { - $token =~ s/\s*//g; - $rtoken_vars->[_TOKEN_] = $token; - } - - # Split identifiers with leading arrows, inserting blanks if - # necessary. It is easier and safer here than in the - # tokenizer. For example '->new' becomes two tokens, '->' and - # 'new' with a possible blank between. - # - # Note: there is a related patch in sub set_whitespace_flags - if ( $token =~ /^\-\>(.*)$/ && $1 ) { - my $token_save = $1; - my $type_save = $type; - - # store a blank to left of arrow if necessary - my $Kprev = $self->K_previous_nonblank($KK); - if ( defined($Kprev) - && $rLL->[$Kprev]->[_TYPE_] ne 'b' - && $rOpts_add_whitespace - && $want_left_space{'->'} == WS_YES ) - { - my $rcopy = - copy_token_as_type( $rtoken_vars, 'b', ' ' ); - $store_token->($rcopy); - } - - # then store the arrow - my $rcopy = copy_token_as_type( $rtoken_vars, '->', '->' ); - $store_token->($rcopy); - - # then reset the current token to be the remainder, - # and reset the whitespace flag according to the arrow - $token = $rtoken_vars->[_TOKEN_] = $token_save; - $type = $rtoken_vars->[_TYPE_] = $type_save; - $store_token->($rtoken_vars); - next; - } - - if ( $token =~ /$SUB_PATTERN/ ) { - $token =~ s/\s+/ /g; - $rtoken_vars->[_TOKEN_] = $token; - } - - # trim identifiers of trailing blanks which can occur - # under some unusual circumstances, such as if the - # identifier 'witch' has trailing blanks on input here: - # - # sub - # witch - # () # prototype may be on new line ... - # ... - if ( $type eq 'i' ) { - $token =~ s/\s+$//g; - $rtoken_vars->[_TOKEN_] = $token; - } - } - - # change 'LABEL :' to 'LABEL:' - elsif ( $type eq 'J' ) { - $token =~ s/\s+//g; - $rtoken_vars->[_TOKEN_] = $token; - } - - # patch to add space to something like "x10" - # This avoids having to split this token in the pre-tokenizer - elsif ( $type eq 'n' ) { - if ( $token =~ /^x\d+/ ) { - $token =~ s/x/x /; - $rtoken_vars->[_TOKEN_] = $token; - } - } - - # check a quote for problems - elsif ( $type eq 'Q' ) { - - # This is ready to go but is commented out because there is - # still identical logic in sub break_lines. - # $check_Q->($KK, $Kfirst); - } - - # trim blanks from right of qw quotes - # (To avoid trimming qw quotes use -ntqw; the tokenizer handles this) - elsif ( $type eq 'q' ) { - $token =~ s/\s*$//; - $rtoken_vars->[_TOKEN_] = $token; - note_embedded_tab() if ( $token =~ "\t" ); - } - - elsif ($type_sequence) { - - # if ( $is_opening_token{$token} ) { - # } - - if ( $is_closing_token{$token} ) { - - # Insert a tentative missing semicolon if the next token is - # a closing block brace - if ( - $type eq '}' - && $token eq '}' - - # not preceded by a ';' - && $last_nonblank_type ne ';' - - # and this is not a VERSION stmt (is all one line, we are not - # inserting semicolons on one-line blocks) - && $CODE_type ne 'VER' - - # and we are allowed to add semicolons - && $rOpts->{'add-semicolons'} - ) - { - $add_phantom_semicolon->($KK); - } - } - } - - # Insert any needed whitespace - if ( @{$rLL_new} - && $rLL_new->[-1]->[_TYPE_] ne 'b' - && $rOpts_add_whitespace ) - { - my $ws = $rwhitespace_flags->[$KK]; - if ( $ws == 1 ) { - - my $rcopy = copy_token_as_type( $rtoken_vars, 'b', ' ' ); - $rcopy->[_LINE_INDEX_] = - $rLL_new->[-1]->[_LINE_INDEX_]; - $store_token->($rcopy); - } - } - $store_token->($rtoken_vars); - } # End token loop - } # End line loop - - # Reset memory to be the new array - $self->{rLL} = $rLL_new; - $self->set_rLL_max_index(); - $self->{K_opening_container} = $K_opening_container; - $self->{K_closing_container} = $K_closing_container; - $self->{K_opening_ternary} = $K_opening_ternary; - $self->{K_closing_ternary} = $K_closing_ternary; - $self->{rK_phantom_semicolons} = $rK_phantom_semicolons; - - # make sure the new array looks okay - $self->check_token_array(); - - # reset the token limits of each line - $self->resync_lines_and_tokens(); - - return; -} - -{ # scan_comments - - my $Last_line_had_side_comment; - my $In_format_skipping_section; - my $Saw_VERSION_in_this_file; - - sub scan_comments { - my $self = shift; - my $rlines = $self->{rlines}; - - $Last_line_had_side_comment = undef; - $In_format_skipping_section = undef; - $Saw_VERSION_in_this_file = undef; - - # Loop over all lines - foreach my $line_of_tokens ( @{$rlines} ) { - my $line_type = $line_of_tokens->{_line_type}; - next unless ( $line_type eq 'CODE' ); - my $CODE_type = $self->get_CODE_type($line_of_tokens); - $line_of_tokens->{_code_type} = $CODE_type; - } - return; - } - - sub get_CODE_type { - my ( $self, $line_of_tokens ) = @_; - - # We are looking at a line of code and setting a flag to - # describe any special processing that it requires - - # Possible CODE_types are as follows. - # 'BL' = Blank Line - # 'VB' = Verbatim - line goes out verbatim - # 'IO' = Indent Only - line goes out unchanged except for indentation - # 'NIN' = No Internal Newlines - line does not get broken - # 'HSC'=Hanging Side Comment - fix this hanging side comment - # 'BC'=Block Comment - an ordinary full line comment - # 'SBC'=Static Block Comment - a block comment which does not get - # indented - # 'SBCX'=Static Block Comment Without Leading Space - # 'DEL'=Delete this line - # 'VER'=VERSION statement - # '' or (undefined) - no restructions - - my $rLL = $self->{rLL}; - my $Klimit = $self->{Klimit}; - - my $CODE_type = $rOpts->{'indent-only'} ? 'IO' : ""; - my $no_internal_newlines = 1 - $rOpts_add_newlines; - if ( !$CODE_type && $no_internal_newlines ) { $CODE_type = 'NIN' } - - # extract what we need for this line.. - - # Global value for error messages: - $input_line_number = $line_of_tokens->{_line_number}; - - my $rK_range = $line_of_tokens->{_rK_range}; - my ( $Kfirst, $Klast ) = @{$rK_range}; - my $jmax = -1; - if ( defined($Kfirst) ) { $jmax = $Klast - $Kfirst } - my $input_line = $line_of_tokens->{_line_text}; - my $in_continued_quote = my $starting_in_quote = - $line_of_tokens->{_starting_in_quote}; - my $in_quote = $line_of_tokens->{_ending_in_quote}; - my $ending_in_quote = $in_quote; - my $guessed_indentation_level = - $line_of_tokens->{_guessed_indentation_level}; - - my $is_static_block_comment = 0; - - # Handle a continued quote.. - if ($in_continued_quote) { - - # A line which is entirely a quote or pattern must go out - # verbatim. Note: the \n is contained in $input_line. - if ( $jmax <= 0 ) { - if ( ( $input_line =~ "\t" ) ) { - note_embedded_tab(); - } - $Last_line_had_side_comment = 0; - return 'VB'; - } - } - - my $is_block_comment = - ( $jmax == 0 && $rLL->[$Kfirst]->[_TYPE_] eq '#' ); - - # Write line verbatim if we are in a formatting skip section - if ($In_format_skipping_section) { - $Last_line_had_side_comment = 0; - - # Note: extra space appended to comment simplifies pattern matching - if ( $is_block_comment - && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~ - /$format_skipping_pattern_end/o ) - { - $In_format_skipping_section = 0; - write_logfile_entry("Exiting formatting skip section\n"); - } - return 'FS'; - } - - # See if we are entering a formatting skip section - if ( $rOpts_format_skipping - && $is_block_comment - && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~ - /$format_skipping_pattern_begin/o ) - { - $In_format_skipping_section = 1; - write_logfile_entry("Entering formatting skip section\n"); - $Last_line_had_side_comment = 0; - return 'FS'; - } - - # ignore trailing blank tokens (they will get deleted later) - if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) { - $jmax--; - } - - # Handle a blank line.. - if ( $jmax < 0 ) { - $Last_line_had_side_comment = 0; - return 'BL'; - } - - # see if this is a static block comment (starts with ## by default) - my $is_static_block_comment_without_leading_space = 0; - if ( $is_block_comment - && $rOpts->{'static-block-comments'} - && $input_line =~ /$static_block_comment_pattern/o ) - { - $is_static_block_comment = 1; - $is_static_block_comment_without_leading_space = - substr( $input_line, 0, 1 ) eq '#'; - } - - # Check for comments which are line directives - # Treat exactly as static block comments without leading space - # reference: perlsyn, near end, section Plain Old Comments (Not!) - # example: '# line 42 "new_filename.plx"' - if ( - $is_block_comment - && $input_line =~ /^\# \s* - line \s+ (\d+) \s* - (?:\s("?)([^"]+)\2)? \s* - $/x - ) - { - $is_static_block_comment = 1; - $is_static_block_comment_without_leading_space = 1; - } - - # look for hanging side comment - if ( - $is_block_comment - && $Last_line_had_side_comment # last line had side comment - && $input_line =~ /^\s/ # there is some leading space - && !$is_static_block_comment # do not make static comment hanging - && $rOpts->{'hanging-side-comments'} # user is allowing - # hanging side comments - # like this - ) - { - $Last_line_had_side_comment = 1; - return 'HSC'; - } - - # remember if this line has a side comment - $Last_line_had_side_comment = - ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq '#' ); - - # Handle a block (full-line) comment.. - if ($is_block_comment) { - - if ( $rOpts->{'delete-block-comments'} ) { return 'DEL' } - - # TRIM COMMENTS -- This could be turned off as a option - $rLL->[$Kfirst]->[_TOKEN_] =~ s/\s*$//; # trim right end - - if ($is_static_block_comment_without_leading_space) { - return 'SBCX'; - } - elsif ($is_static_block_comment) { - return 'SBC'; - } - else { - return 'BC'; - } - } - -=pod - # NOTE: This does not work yet. Version in print-line-of-tokens - # is Still used until fixed - - # compare input/output indentation except for continuation lines - # (because they have an unknown amount of initial blank space) - # and lines which are quotes (because they may have been outdented) - # Note: this test is placed here because we know the continuation flag - # at this point, which allows us to avoid non-meaningful checks. - my $structural_indentation_level = $rLL->[$Kfirst]->[_LEVEL_]; - compare_indentation_levels( $guessed_indentation_level, - $structural_indentation_level ) - unless ( $rLL->[$Kfirst]->[_CI_LEVEL_] > 0 - || $guessed_indentation_level == 0 - && $rLL->[$Kfirst]->[_TYPE_] eq 'Q' ); -=cut - - # Patch needed for MakeMaker. Do not break a statement - # in which $VERSION may be calculated. See MakeMaker.pm; - # this is based on the coding in it. - # The first line of a file that matches this will be eval'd: - # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ - # Examples: - # *VERSION = \'1.01'; - # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/; - # We will pass such a line straight through without breaking - # it unless -npvl is used. - - # Patch for problem reported in RT #81866, where files - # had been flattened into a single line and couldn't be - # tidied without -npvl. There are two parts to this patch: - # First, it is not done for a really long line (80 tokens for now). - # Second, we will only allow up to one semicolon - # before the VERSION. We need to allow at least one semicolon - # for statements like this: - # require Exporter; our $VERSION = $Exporter::VERSION; - # where both statements must be on a single line for MakeMaker - - my $is_VERSION_statement = 0; - if ( !$Saw_VERSION_in_this_file - && $jmax < 80 - && $input_line =~ - /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ ) - { - $Saw_VERSION_in_this_file = 1; - write_logfile_entry("passing VERSION line; -npvl deactivates\n"); - $CODE_type = 'VER'; - } - return $CODE_type; - } -} - -sub find_nested_pairs { - my $self = shift; - - my $rLL = $self->{rLL}; - return unless ( defined($rLL) && @{$rLL} ); - - # We define an array of pairs of nested containers - my @nested_pairs; - - # We also set the following hash values to identify container pairs for - # which the opening and closing tokens are adjacent in the token stream: - # $rpaired_to_inner_container->{$seqno_out}=$seqno_in where $seqno_out and - # $seqno_in are the seqence numbers of the outer and inner containers of - # the pair We need these later to decide if we can insert a missing - # semicolon - my $rpaired_to_inner_container = {}; - - # This local hash remembers if an outer container has a close following - # inner container; - # The key is the outer sequence number - # The value is the token_hash of the inner container - - my %has_close_following_opening; - - # Names of calling routines can either be marked as 'i' or 'w', - # and they may invoke a sub call with an '->'. We will consider - # any consecutive string of such types as a single unit when making - # weld decisions. We also allow a leading ! - my $is_name_type = { - 'i' => 1, - 'w' => 1, - 'U' => 1, - '->' => 1, - '!' => 1, - }; - - my $is_name = sub { - my $type = shift; - return $type && $is_name_type->{$type}; - }; - - my $last_container; - my $last_last_container; - my $last_nonblank_token_vars; - my $last_count; - - my $nonblank_token_count = 0; - - # loop over all tokens - foreach my $rtoken_vars ( @{$rLL} ) { - - my $type = $rtoken_vars->[_TYPE_]; - - next if ( $type eq 'b' ); - - # long identifier-like items are counted as a single item - $nonblank_token_count++ - unless ( $is_name->($type) - && $is_name->( $last_nonblank_token_vars->[_TYPE_] ) ); - - my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_]; - if ($type_sequence) { - - my $token = $rtoken_vars->[_TOKEN_]; - - if ( $is_opening_token{$token} ) { - - # following previous opening token ... - if ( $last_container - && $is_opening_token{ $last_container->[_TOKEN_] } ) - { - - # adjacent to this one - my $tok_diff = $nonblank_token_count - $last_count; - - my $last_tok = $last_nonblank_token_vars->[_TOKEN_]; - - if ( $tok_diff == 1 - || $tok_diff == 2 && $last_container->[_TOKEN_] eq '(' ) - { - - # remember this pair... - my $outer_seqno = $last_container->[_TYPE_SEQUENCE_]; - my $inner_seqno = $type_sequence; - $has_close_following_opening{$outer_seqno} = - $rtoken_vars; - } - } - } - - elsif ( $is_closing_token{$token} ) { - - # if the corresponding opening token had an adjacent opening - if ( $has_close_following_opening{$type_sequence} - && $is_closing_token{ $last_container->[_TOKEN_] } - && $has_close_following_opening{$type_sequence} - ->[_TYPE_SEQUENCE_] == $last_container->[_TYPE_SEQUENCE_] ) - { - - # The closing weld tokens must be adjacent - # NOTE: so intermediate commas and semicolons - # can currently block a weld. This is something - # that could be fixed in the future by including - # a flag to delete un-necessary commas and semicolons. - my $tok_diff = $nonblank_token_count - $last_count; - - if ( $tok_diff == 1 ) { - - # This is a closely nested pair .. - my $inner_seqno = $last_container->[_TYPE_SEQUENCE_]; - my $outer_seqno = $type_sequence; - $rpaired_to_inner_container->{$outer_seqno} = - $inner_seqno; - - push @nested_pairs, [ $inner_seqno, $outer_seqno ]; - } - } - } - - $last_last_container = $last_container; - $last_container = $rtoken_vars; - $last_count = $nonblank_token_count; - } - $last_nonblank_token_vars = $rtoken_vars; - } - $self->{rnested_pairs} = \@nested_pairs; - $self->{rpaired_to_inner_container} = $rpaired_to_inner_container; - return; -} - -sub dump_tokens { - - # a debug routine, not normally used - my ( $self, $msg ) = @_; - my $rLL = $self->{rLL}; - my $nvars = @{$rLL}; - print STDERR "$msg\n"; - print STDERR "ntokens=$nvars\n"; - print STDERR "K\t_TOKEN_\t_TYPE_\n"; - my $K = 0; - foreach my $item ( @{$rLL} ) { - print STDERR "$K\t$item->[_TOKEN_]\t$item->[_TYPE_]\n"; - $K++; - } -} - -sub K_next_nonblank { - my ( $self, $KK, $rLL ) = @_; - - # return the index K of the next nonblank token - return unless ( defined($KK) && $KK >= 0 ); - $rLL = $self->{rLL} unless ( defined($rLL) ); - my $Num = @{$rLL}; - my $Knnb = $KK + 1; - while ( $Knnb < $Num ) { - if ( !defined( $rLL->[$Knnb] ) ) { - Fault("Undefined entry for k=$Knnb"); - } - if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb } - $Knnb++; - } - return; -} - -sub K_previous_nonblank { - - # return index of previous nonblank token before item K - # Call with $KK=undef to start search at the top of the array - my ( $self, $KK, $rLL ) = @_; - $rLL = $self->{rLL} unless ( defined($rLL) ); - my $Num = @{$rLL}; - if ( !defined($KK) ) { $KK = $Num } - elsif ( $KK > $Num ) { - - # The caller should make the first call with KK_new=undef to - # avoid this error - Fault( -"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num" - ); - } - my $Kpnb = $KK - 1; - while ( $Kpnb >= 0 ) { - if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb } - $Kpnb--; - } - return; -} - -sub weld_containers { - - # do any welding operations - my $self = shift; - - # initialize weld length hashes needed later for checking line lengths - # TODO: These should eventually be stored in $self rather than be package vars - %weld_len_left_closing = (); - %weld_len_right_closing = (); - %weld_len_left_opening = (); - %weld_len_right_opening = (); - - return if ( $rOpts->{'indent-only'} ); - return unless ($rOpts_add_newlines); - - $self->weld_nested_containers() - if $rOpts->{'weld-nested-containers'}; - - # Note that these two calls are order-dependent. - # sub weld_nested_containers() must be called before sub - # weld_cuddled_blocks(). This is because it is more complex and could - # overwrite the %weld_len_... hash values written by weld_cuddled_blocks(). - # sub weld_cuddled_blocks(), on the other hand, is much simpler and will - # not overwrite the values written by weld_nested_containers. But - # note that weld_nested_containers() changes the _LEVEL_ values, so - # weld_cuddled_blocks must use the _TRUE_LEVEL_ values instead. - - # Here is a good test case to Be sure that both cuddling and welding - # are working and not interfering with each other: - - # perltidy -wn -cb -cbl='if-elsif-else' - - # if ($BOLD_MATH) { ( - # $labels, $comment, - # join( '', '', &make_math( $mode, '', '', $_ ), '' ) - # ) } else { ( - # &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ), - # $after - # ) } - - $self->weld_cuddled_blocks() - if $rOpts->{'cuddled-blocks'}; - - return; -} - -sub weld_cuddled_blocks { - my $self = shift; - - # This routine implements the -cb flag by finding the appropriate - # closing and opening block braces and welding them together. - - my $rLL = $self->{rLL}; - return unless ( defined($rLL) && @{$rLL} ); - my $rbreak_container = $self->{rbreak_container}; - - my $K_opening_container = $self->{K_opening_container}; - my $K_closing_container = $self->{K_closing_container}; - - my $length_to_opening_seqno = sub { - my ($seqno) = @_; - my $KK = $K_opening_container->{$seqno}; - my $lentot = $rLL->[$KK]->[_CUMULATIVE_LENGTH_]; - return $lentot; - }; - my $length_to_closing_seqno = sub { - my ($seqno) = @_; - my $KK = $K_closing_container->{$seqno}; - my $lentot = $rLL->[$KK]->[_CUMULATIVE_LENGTH_]; - return $lentot; - }; - - my $is_broken_block = sub { - - # a block is broken if the input line numbers of the braces differ - # we can only cuddle between broken blocks - my ($seqno) = @_; - my $K_opening = $K_opening_container->{$seqno}; - return unless ( defined($K_opening) ); - my $K_closing = $K_closing_container->{$seqno}; - return unless ( defined($K_closing) ); - return $rbreak_container->{$seqno} - || $rLL->[$K_closing]->[_LINE_INDEX_] != - $rLL->[$K_opening]->[_LINE_INDEX_]; - }; - - # A stack to remember open chains at all levels: - # $in_chain[$level] = [$chain_type, $type_sequence]; - my @in_chain; - my $CBO = $rOpts->{'cuddled-break-option'}; - - # loop over structure items to find cuddled pairs - my $level = 0; - my $KK = 0; - while ( defined( $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_] ) ) { - my $rtoken_vars = $rLL->[$KK]; - my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_]; - if ( !$type_sequence ) { - Fault("sequence = $type_sequence not defined"); - } - - # We use the original levels because they get changed by sub - # 'weld_nested_containers'. So if this were to be called before that - # routine, the levels would be wrong and things would go bad. - my $last_level = $level; - $level = $rtoken_vars->[_LEVEL_TRUE_]; - - if ( $level < $last_level ) { $in_chain[$last_level] = undef } - elsif ( $level > $last_level ) { $in_chain[$level] = undef } - - # We are only looking at code blocks - my $token = $rtoken_vars->[_TOKEN_]; - my $type = $rtoken_vars->[_TYPE_]; - next unless ( $type eq $token ); - - if ( $token eq '{' ) { - - my $block_type = $rtoken_vars->[_BLOCK_TYPE_]; - if ( !$block_type ) { - - # patch for unrecognized block types which may not be labeled - my $Kp = $self->K_previous_nonblank($KK); - while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) { - $Kp = $self->K_previous_nonblank($Kp); - } - next unless $Kp; - $block_type = $rLL->[$Kp]->[_TOKEN_]; - } - if ( $in_chain[$level] ) { - - # we are in a chain and are at an opening block brace. - # See if we are welding this opening brace with the previous - # block brace. Get their identification numbers: - my $closing_seqno = $in_chain[$level]->[1]; - my $opening_seqno = $type_sequence; - - # The preceding block must be on multiple lines so that its - # closing brace will start a new line. - if ( !$is_broken_block->($closing_seqno) ) { - next unless ( $CBO == 2 ); - $rbreak_container->{$closing_seqno} = 1; - } - - # we will let the trailing block be either broken or intact - ## && $is_broken_block->($opening_seqno); - - # We can weld the closing brace to its following word .. - my $Ko = $K_closing_container->{$closing_seqno}; - my $Kon = $self->K_next_nonblank($Ko); - - # ..unless it is a comment - if ( $rLL->[$Kon]->[_TYPE_] ne '#' ) { - my $dlen = - $rLL->[ $Kon + 1 ]->[_CUMULATIVE_LENGTH_] - - $rLL->[$Ko]->[_CUMULATIVE_LENGTH_]; - $weld_len_right_closing{$closing_seqno} = $dlen; - - # Set flag that we want to break the next container - # so that the cuddled line is balanced. - $rbreak_container->{$opening_seqno} = 1 - if ($CBO); - } - - } - else { - - # We are not in a chain. Start a new chain if we see the - # starting block type. - if ( $rcuddled_block_types->{$block_type} ) { - $in_chain[$level] = [ $block_type, $type_sequence ]; - } - else { - $block_type = '*'; - $in_chain[$level] = [ $block_type, $type_sequence ]; - } - } - } - elsif ( $token eq '}' ) { - if ( $in_chain[$level] ) { - - # We are in a chain at a closing brace. See if this chain - # continues.. - my $Knn = $self->K_next_nonblank($KK); - - # skip past comments - while ( $Knn && $rLL->[$Knn]->[_TYPE_] eq '#' ) { - $Knn = $self->K_next_nonblank($Knn); - } - next unless $Knn; - - my $chain_type = $in_chain[$level]->[0]; - my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_]; - if ( - $rcuddled_block_types->{$chain_type}->{$next_nonblank_token} - ) - { - - # Note that we do not weld yet because we must wait until - # we we are sure that an opening brace for this follows. - $in_chain[$level]->[1] = $type_sequence; - } - else { $in_chain[$level] = undef } - } - } - } - - return; -} - -sub weld_nested_containers { - my $self = shift; - - # This routine implements the -wn flag by "welding together" - # the nested closing and opening tokens which were previously - # identified by sub 'find_nested_pairs'. "welding" simply - # involves setting certain hash values which will be checked - # later during formatting. - - my $rLL = $self->{rLL}; - my $Klimit = $self->get_rLL_max_index(); - my $rnested_pairs = $self->{rnested_pairs}; - my $rlines = $self->{rlines}; - my $K_opening_container = $self->{K_opening_container}; - my $K_closing_container = $self->{K_closing_container}; - - # Return unless there are nested pairs to weld - return unless defined($rnested_pairs) && @{$rnested_pairs}; - - # This array will hold the sequence numbers of the tokens to be welded. - my @welds; - - # Variables needed for estimating line lengths - my $starting_indent; - my $starting_lentot; - - # A tolerance to the length for length estimates. In some rare cases - # this can avoid problems where a final weld slightly exceeds the - # line length and gets broken in a bad spot. - my $length_tol = 1; - - my $excess_length_to = sub { - my ($rtoken_hash) = @_; - - # Estimate the length from the line start to a given token - my $length = $rtoken_hash->[_CUMULATIVE_LENGTH_] - $starting_lentot; - - my $excess_length = - $starting_indent + $length + $length_tol - $rOpts_maximum_line_length; - return ($excess_length); - }; - my $length_to_opening_seqno = sub { - my ($seqno) = @_; - my $KK = $K_opening_container->{$seqno}; - my $lentot = $rLL->[$KK]->[_CUMULATIVE_LENGTH_]; - return $lentot; - }; - my $length_to_closing_seqno = sub { - my ($seqno) = @_; - my $KK = $K_closing_container->{$seqno}; - my $lentot = $rLL->[$KK]->[_CUMULATIVE_LENGTH_]; - return $lentot; - }; - - # Abbreviations: - # _oo=outer opening, i.e. first of { { - # _io=inner opening, i.e. second of { { - # _oc=outer closing, i.e. second of } { - # _ic=inner closing, i.e. first of } } - - my $previous_pair; - - # We are working from outermost to innermost pairs so that - # level changes will be complete when we arrive at the inner pairs. - - while ( my $item = pop( @{$rnested_pairs} ) ) { - my ( $inner_seqno, $outer_seqno ) = @{$item}; - - my $Kouter_opening = $K_opening_container->{$outer_seqno}; - my $Kinner_opening = $K_opening_container->{$inner_seqno}; - my $Kouter_closing = $K_closing_container->{$outer_seqno}; - my $Kinner_closing = $K_closing_container->{$inner_seqno}; - - my $outer_opening = $rLL->[$Kouter_opening]; - my $inner_opening = $rLL->[$Kinner_opening]; - my $outer_closing = $rLL->[$Kouter_closing]; - my $inner_closing = $rLL->[$Kinner_closing]; - - my $iline_oo = $outer_opening->[_LINE_INDEX_]; - my $iline_io = $inner_opening->[_LINE_INDEX_]; - - # Set flag saying if this pair starts a new weld - my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] ); - - # Set flag saying if this pair is adjacent to the previous nesting pair - # (even if previous pair was rejected as a weld) - my $touch_previous_pair = - defined($previous_pair) && $outer_seqno == $previous_pair->[0]; - $previous_pair = $item; - - # Set a flag if we should not weld. It sometimes looks best not to weld - # when the opening and closing tokens are very close. However, there - # is a danger that we will create a "blinker", which oscillates between - # two semi-stable states, if we do not weld. So the rules for - # not welding have to be carefully defined and tested. - my $do_not_weld; - if ( !$touch_previous_pair ) { - - # If this pair is not adjacent to the previous pair (skipped or - # not), then measure lengths from the start of line of oo - - my $rK_range = $rlines->[$iline_oo]->{_rK_range}; - my ( $Kfirst, $Klast ) = @{$rK_range}; - $starting_lentot = $rLL->[$Kfirst]->[_CUMULATIVE_LENGTH_]; - $starting_indent = 0; - if ( !$rOpts_variable_maximum_line_length ) { - my $level = $rLL->[$Kfirst]->[_LEVEL_]; - $starting_indent = $rOpts_indent_columns * $level; - } - - # DO-NOT-WELD RULE 1: - # Do not weld something that looks like the start of a two-line - # function call, like this: - # $trans->add_transformation( - # PDL::Graphics::TriD::Scale->new( $sx, $sy, $sz ) ); - # We will look for a semicolon after the closing paren. - - # We want to weld something complex, like this though - # my $compass = uc( opposite_direction( line_to_canvas_direction( - # @{ $coords[0] }, @{ $coords[1] } ) ) ); - # Otherwise we will get a 'blinker' - - my $iline_oc = $outer_closing->[_LINE_INDEX_]; - if ( $iline_oc <= $iline_oo + 1 ) { - - # Look for following semicolon... - my $Knext_nonblank = $self->K_next_nonblank($Kouter_closing); - my $next_nonblank_type = - defined($Knext_nonblank) - ? $rLL->[$Knext_nonblank]->[_TYPE_] - : 'b'; - if ( $next_nonblank_type eq ';' ) { - - # Then do not weld if no other containers between inner - # opening and closing. - my $Knext_seq_item = $inner_opening->[_KNEXT_SEQ_ITEM_]; - if ( $Knext_seq_item == $Kinner_closing ) { - $do_not_weld ||= 1; - } - } - } - } - - my $iline_ic = $inner_closing->[_LINE_INDEX_]; - - # DO-NOT-WELD RULE 2: - # Do not weld an opening paren to an inner one line brace block - # We will just use old line numbers for this test and require - # iterations if necessary for convergence - - # For example, otherwise we could cause the opening paren - # in the following example to separate from the caller name - # as here: - - # $_[0]->code_handler - # ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } ); - - # Here is another example where we do not want to weld: - # $wrapped->add_around_modifier( - # sub { push @tracelog => 'around 1'; $_[0]->(); } ); - - # If the one line sub block gets broken due to length or by the - # user, then we can weld. The result will then be: - # $wrapped->add_around_modifier( sub { - # push @tracelog => 'around 1'; - # $_[0]->(); - # } ); - - if ( $iline_ic == $iline_io ) { - - my $token_oo = $outer_opening->[_TOKEN_]; - my $block_type_io = $inner_opening->[_BLOCK_TYPE_]; - my $token_io = $inner_opening->[_TOKEN_]; - $do_not_weld ||= $token_oo eq '(' && $token_io eq '{'; - } - - # DO-NOT-WELD RULE 3: - # Do not weld if this makes our line too long - $do_not_weld ||= $excess_length_to->($inner_opening) > 0; - - if ($do_not_weld) { - - # After neglecting a pair, we start measuring from start of point io - $starting_lentot = $inner_opening->[_CUMULATIVE_LENGTH_]; - $starting_indent = 0; - if ( !$rOpts_variable_maximum_line_length ) { - my $level = $inner_opening->[_LEVEL_]; - $starting_indent = $rOpts_indent_columns * $level; - } - - # Normally, a broken pair should not decrease indentation of - # intermediate tokens: - ## if ( $last_pair_broken ) { next } - # However, for long strings of welded tokens, such as '{{{{{{...' - # we will allow broken pairs to also remove indentation. - # This will keep very long strings of opening and closing - # braces from marching off to the right. We will do this if the - # number of tokens in a weld before the broken weld is 4 or more. - # This rule will mainly be needed for test scripts, since typical - # welds have fewer than about 4 welded tokens. - if ( !@welds || @{ $welds[-1] } < 4 ) { next } - } - - # otherwise start new weld ... - elsif ($starting_new_weld) { - push @welds, $item; - } - - # ... or extend current weld - else { - unshift @{ $welds[-1] }, $inner_seqno; - } - - ######################################################################## - # After welding, reduce the indentation level if all intermediate tokens - ######################################################################## - - my $dlevel = $outer_opening->[_LEVEL_] - $inner_opening->[_LEVEL_]; - if ( $dlevel != 0 ) { - my $Kstart = $Kinner_opening; - my $Kstop = $Kinner_closing; - for ( my $KK = $Kstart ; $KK <= $Kstop ; $KK++ ) { - $rLL->[$KK]->[_LEVEL_] += $dlevel; - } - } - } - - ##################################################### - # Define weld lengths needed later to set line breaks - ##################################################### - foreach my $item (@welds) { - - # sweep from inner to outer - - my $inner_seqno; - my $len_close = 0; - my $len_open = 0; - foreach my $outer_seqno ( @{$item} ) { - if ($inner_seqno) { - - my $dlen_opening = - $length_to_opening_seqno->($inner_seqno) - - $length_to_opening_seqno->($outer_seqno); - - my $dlen_closing = - $length_to_closing_seqno->($outer_seqno) - - $length_to_closing_seqno->($inner_seqno); - - $len_open += $dlen_opening; - $len_close += $dlen_closing; - - } - - $weld_len_left_closing{$outer_seqno} = $len_close; - $weld_len_right_opening{$outer_seqno} = $len_open; - - $inner_seqno = $outer_seqno; - } - - # sweep from outer to inner - foreach my $seqno ( reverse @{$item} ) { - $weld_len_right_closing{$seqno} = - $len_close - $weld_len_left_closing{$seqno}; - $weld_len_left_opening{$seqno} = - $len_open - $weld_len_right_opening{$seqno}; - } - } - - ##################################### - # DEBUG - ##################################### - if (0) { - my $count = 0; - local $" = ')('; - foreach my $weld (@welds) { - print "\nWeld number $count has seq: (@{$weld})\n"; - foreach my $seq ( @{$weld} ) { - print < 0 && $types_to_go[$i] eq 'b' ) { $i-- } - my $weld_len = - weld_len_right( $type_sequence_to_go[$i], $types_to_go[$i] ); - return $weld_len; -} - -sub link_sequence_items { - - # This has been merged into 'respace_tokens' but retained for reference - my $self = shift; - my $rlines = $self->{rlines}; - my $rLL = $self->{rLL}; - - # We walk the token list and make links to the next sequence item. - # We also define these hashes to container tokens using sequence number as - # the key: - my $K_opening_container = {}; # opening [ { or ( - my $K_closing_container = {}; # closing ] } or ) - my $K_opening_ternary = {}; # opening ? of ternary - my $K_closing_ternary = {}; # closing : of ternary - - # sub to link preceding nodes forward to a new node type - my $link_back = sub { - my ( $Ktop, $key ) = @_; - - my $Kprev = $Ktop - 1; - while ( $Kprev >= 0 - && !defined( $rLL->[$Kprev]->[$key] ) ) - { - $rLL->[$Kprev]->[$key] = $Ktop; - $Kprev -= 1; - } - }; - - for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) { - - $rLL->[$KK]->[_KNEXT_SEQ_ITEM_] = undef; - - my $type = $rLL->[$KK]->[_TYPE_]; - - next if ( $type eq 'b' ); - - my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_]; - if ($type_sequence) { - - $link_back->( $KK, _KNEXT_SEQ_ITEM_ ); - - my $token = $rLL->[$KK]->[_TOKEN_]; - if ( $is_opening_token{$token} ) { - - $K_opening_container->{$type_sequence} = $KK; - } - elsif ( $is_closing_token{$token} ) { - - $K_closing_container->{$type_sequence} = $KK; - } - - # These are not yet used but could be useful - else { - if ( $token eq '?' ) { - $K_opening_ternary->{$type_sequence} = $KK; - } - elsif ( $token eq ':' ) { - $K_closing_ternary->{$type_sequence} = $KK; - } - else { - Fault(<{K_opening_container} = $K_opening_container; - $self->{K_closing_container} = $K_closing_container; - $self->{K_opening_ternary} = $K_opening_ternary; - $self->{K_closing_ternary} = $K_closing_ternary; - return; -} - -sub sum_token_lengths { - my $self = shift; - - # This has been merged into 'respace_tokens' but retained for reference - my $rLL = $self->{rLL}; - my $cumulative_length = 0; - for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) { - - # Save the length sum to just BEFORE this token - $rLL->[$KK]->[_CUMULATIVE_LENGTH_] = $cumulative_length; - - # now set the length of this token - my $token_length = length( $rLL->[$KK]->[_TOKEN_] ); - - $cumulative_length += $token_length; - } - return; -} - -sub resync_lines_and_tokens { - - my $self = shift; - my $rLL = $self->{rLL}; - my $Klimit = $self->{Klimit}; - my $rlines = $self->{rlines}; - - # Re-construct the arrays of tokens associated with the original input lines - # since they have probably changed due to inserting and deleting blanks - # and a few other tokens. - - my $Kmax = -1; - - # This is the next token and its line index: - my $Knext = 0; - my $inext; - if ( defined($rLL) && @{$rLL} ) { - $Kmax = @{$rLL} - 1; - $inext = $rLL->[$Knext]->[_LINE_INDEX_]; - } - - my $get_inext = sub { - if ( $Knext < 0 || $Knext > $Kmax ) { $inext = undef } - else { - $inext = $rLL->[$Knext]->[_LINE_INDEX_]; - } - return $inext; - }; - - # Remember the most recently output token index - my $Klast_out; - - my $iline = -1; - foreach my $line_of_tokens ( @{$rlines} ) { - $iline++; - my $line_type = $line_of_tokens->{_line_type}; - if ( $line_type eq 'CODE' ) { - - my @K_array; - my $rK_range; - $inext = $get_inext->(); - while ( defined($inext) && $inext <= $iline ) { - push @{K_array}, $Knext; - $Knext += 1; - $inext = $get_inext->(); - } - - # Delete any terminal blank token - if (@K_array) { - if ( $rLL->[ $K_array[-1] ]->[_TYPE_] eq 'b' ) { - pop @K_array; - } - } - - # Define the range of K indexes for the line: - # $Kfirst = index of first token on line - # $Klast_out = index of last token on line - my ( $Kfirst, $Klast ); - if (@K_array) { - $Kfirst = $K_array[0]; - $Klast = $K_array[-1]; - $Klast_out = $Klast; - } - - # It is only safe to trim the actual line text if the input - # line had a terminal blank token. Otherwise, we may be - # in a quote. - if ( $line_of_tokens->{_ended_in_blank_token} ) { - $line_of_tokens->{_line_text} =~ s/\s+$//; - } - $line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ]; - } - } - - # There shouldn't be any nodes beyond the last one unless we start - # allowing 'link_after' calls - if ( defined($inext) ) { - - Fault("unexpected tokens at end of file when reconstructing lines"); - } - - return; -} - -sub dump_verbatim { - my $self = shift; - my $rlines = $self->{rlines}; - foreach my $line ( @{$rlines} ) { - my $input_line = $line->{_line_text}; - $self->write_unindented_line($input_line); - } - return; -} - -sub finish_formatting { - - my ( $self, $severe_error ) = @_; - - # The file has been tokenized and is ready to be formatted. - # All of the relevant data is stored in $self, ready to go. - - # output file verbatim if severe error or no formatting requested - if ( $severe_error || $rOpts->{notidy} ) { - $self->dump_verbatim(); - $self->wrapup(); - return; - } - - # Make a pass through the lines, looking at lines of CODE and identifying - # special processing needs, such format skipping sections marked by - # special comments - $self->scan_comments(); - - # Find nested pairs of container tokens for any welding. This information - # is also needed for adding semicolons, so it is split apart from the - # welding step. - $self->find_nested_pairs(); - - # Make sure everything looks good - $self->check_line_hashes(); - - # Future: Place to Begin future Iteration Loop - # foreach my $it_count(1..$maxit) { - - # Future: We must reset some things after the first iteration. - # This includes: - # - resetting levels if there was any welding - # - resetting any phantom semicolons - # - dealing with any line numbering issues so we can relate final lines - # line numbers with input line numbers. - # - # If ($it_count>1) { - # Copy {level_raw} to [_LEVEL_] if ($it_count>1) - # Renumber lines - # } - - # Make a pass through all tokens, adding or deleting any whitespace as - # required. Also make any other changes, such as adding semicolons. - # All token changes must be made here so that the token data structure - # remains fixed for the rest of this iteration. - $self->respace_tokens(); - - # Implement any welding needed for the -wn or -cb options - $self->weld_containers(); - - # Finishes formatting and write the result to the line sink. - # Eventually this call should just change the 'rlines' data according to the - # new line breaks and then return so that we can do an internal iteration - # before continuing with the next stages of formatting. - $self->break_lines(); - - ############################################################ - # A possible future decomposition of 'break_lines()' follows. - # Benefits: - # - allow perltidy to do an internal iteration which eliminates - # many unnecessary steps, such as re-parsing and vertical alignment. - # This will allow iterations to be automatic. - # - consolidate all length calculations to allow utf8 alignment - ############################################################ - - # Future: Check for convergence of beginning tokens on CODE lines - - # Future: End of Iteration Loop - - # Future: add_padding($rargs); - - # Future: add_closing_side_comments($rargs); - - # Future: vertical_alignment($rargs); - - # Future: output results - - # A final routine to tie up any loose ends - $self->wrapup(); - return; -} - -sub create_one_line_block { - ( $index_start_one_line_block, $semicolons_before_block_self_destruct ) = - @_; - return; -} - -sub destroy_one_line_block { - $index_start_one_line_block = UNDEFINED_INDEX; - $semicolons_before_block_self_destruct = 0; - return; -} - -sub leading_spaces_to_go { - - # return the number of indentation spaces for a token in the output stream; - # these were previously stored by 'set_leading_whitespace'. - - my $ii = shift; - if ( $ii < 0 ) { $ii = 0 } - return get_spaces( $leading_spaces_to_go[$ii] ); - -} - -sub get_spaces { - - # return the number of leading spaces associated with an indentation - # variable $indentation is either a constant number of spaces or an object - # with a get_spaces method. - my $indentation = shift; - return ref($indentation) ? $indentation->get_spaces() : $indentation; -} - -sub get_recoverable_spaces { - - # return the number of spaces (+ means shift right, - means shift left) - # that we would like to shift a group of lines with the same indentation - # to get them to line up with their opening parens - my $indentation = shift; - return ref($indentation) ? $indentation->get_recoverable_spaces() : 0; -} - -sub get_available_spaces_to_go { - - my $ii = shift; - my $item = $leading_spaces_to_go[$ii]; - - # return the number of available leading spaces associated with an - # indentation variable. $indentation is either a constant number of - # spaces or an object with a get_available_spaces method. - return ref($item) ? $item->get_available_spaces() : 0; -} - -sub new_lp_indentation_item { - - # this is an interface to the IndentationItem class - my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_; - - # A negative level implies not to store the item in the item_list - my $index = 0; - if ( $level >= 0 ) { $index = ++$max_gnu_item_index; } - - my $item = Perl::Tidy::IndentationItem->new( - $spaces, $level, - $ci_level, $available_spaces, - $index, $gnu_sequence_number, - $align_paren, $max_gnu_stack_index, - $line_start_index_to_go, - ); - - if ( $level >= 0 ) { - $gnu_item_list[$max_gnu_item_index] = $item; - } - - return $item; -} - -sub set_leading_whitespace { - - # This routine defines leading whitespace - # given: the level and continuation_level of a token, - # define: space count of leading string which would apply if it - # were the first token of a new line. - - my ( $level_abs, $ci_level, $in_continued_quote ) = @_; - - # Adjust levels if necessary to recycle whitespace: - # given $level_abs, the absolute level - # define $level, a possibly reduced level for whitespace - my $level = $level_abs; - if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle > 0 ) { - if ( $level_abs < $whitespace_last_level ) { - pop(@whitespace_level_stack); - } - if ( !@whitespace_level_stack ) { - push @whitespace_level_stack, $level_abs; - } - elsif ( $level_abs > $whitespace_last_level ) { - $level = $whitespace_level_stack[-1] + - ( $level_abs - $whitespace_last_level ); - - if ( - # 1 Try to break at a block brace - ( - $level > $rOpts_whitespace_cycle - && $last_nonblank_type eq '{' - && $last_nonblank_token eq '{' - ) - - # 2 Then either a brace or bracket - || ( $level > $rOpts_whitespace_cycle + 1 - && $last_nonblank_token =~ /^[\{\[]$/ ) - - # 3 Then a paren too - || $level > $rOpts_whitespace_cycle + 2 - ) - { - $level = 1; - } - push @whitespace_level_stack, $level; - } - $level = $whitespace_level_stack[-1]; - } - $whitespace_last_level = $level_abs; - - # modify for -bli, which adds one continuation indentation for - # opening braces - if ( $rOpts_brace_left_and_indent - && $max_index_to_go == 0 - && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o ) - { - $ci_level++; - } - - # patch to avoid trouble when input file has negative indentation. - # other logic should catch this error. - if ( $level < 0 ) { $level = 0 } - - #------------------------------------------- - # handle the standard indentation scheme - #------------------------------------------- - unless ($rOpts_line_up_parentheses) { - my $space_count = - $ci_level * $rOpts_continuation_indentation + - $level * $rOpts_indent_columns; - my $ci_spaces = - ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation; - - if ($in_continued_quote) { - $space_count = 0; - $ci_spaces = 0; - } - $leading_spaces_to_go[$max_index_to_go] = $space_count; - $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces; - return; - } - - #------------------------------------------------------------- - # handle case of -lp indentation.. - #------------------------------------------------------------- - - # The continued_quote flag means that this is the first token of a - # line, and it is the continuation of some kind of multi-line quote - # or pattern. It requires special treatment because it must have no - # added leading whitespace. So we create a special indentation item - # which is not in the stack. - if ($in_continued_quote) { - my $space_count = 0; - my $available_space = 0; - $level = -1; # flag to prevent storing in item_list - $leading_spaces_to_go[$max_index_to_go] = - $reduced_spaces_to_go[$max_index_to_go] = - new_lp_indentation_item( $space_count, $level, $ci_level, - $available_space, 0 ); - return; - } - - # get the top state from the stack - my $space_count = $gnu_stack[$max_gnu_stack_index]->get_spaces(); - my $current_level = $gnu_stack[$max_gnu_stack_index]->get_level(); - my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_ci_level(); - - my $type = $types_to_go[$max_index_to_go]; - my $token = $tokens_to_go[$max_index_to_go]; - my $total_depth = $nesting_depth_to_go[$max_index_to_go]; - - if ( $type eq '{' || $type eq '(' ) { - - $gnu_comma_count{ $total_depth + 1 } = 0; - $gnu_arrow_count{ $total_depth + 1 } = 0; - - # If we come to an opening token after an '=' token of some type, - # see if it would be helpful to 'break' after the '=' to save space - my $last_equals = $last_gnu_equals{$total_depth}; - if ( $last_equals && $last_equals > $line_start_index_to_go ) { - - # find the position if we break at the '=' - my $i_test = $last_equals; - if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ } - - # TESTING - ##my $too_close = ($i_test==$max_index_to_go-1); - - my $test_position = total_line_length( $i_test, $max_index_to_go ); - my $mll = maximum_line_length($i_test); - - if ( - - # the equals is not just before an open paren (testing) - ##!$too_close && - - # if we are beyond the midpoint - $gnu_position_predictor > $mll - $rOpts_maximum_line_length / 2 - - # or we are beyond the 1/4 point and there was an old - # break at the equals - || ( - $gnu_position_predictor > - $mll - $rOpts_maximum_line_length * 3 / 4 - && ( - $old_breakpoint_to_go[$last_equals] - || ( $last_equals > 0 - && $old_breakpoint_to_go[ $last_equals - 1 ] ) - || ( $last_equals > 1 - && $types_to_go[ $last_equals - 1 ] eq 'b' - && $old_breakpoint_to_go[ $last_equals - 2 ] ) - ) - ) - ) - { - - # then make the switch -- note that we do not set a real - # breakpoint here because we may not really need one; sub - # scan_list will do that if necessary - $line_start_index_to_go = $i_test + 1; - $gnu_position_predictor = $test_position; - } - } - } - - my $halfway = - maximum_line_length_for_level($level) - $rOpts_maximum_line_length / 2; - - # Check for decreasing depth .. - # Note that one token may have both decreasing and then increasing - # depth. For example, (level, ci) can go from (1,1) to (2,0). So, - # in this example we would first go back to (1,0) then up to (2,0) - # in a single call. - if ( $level < $current_level || $ci_level < $current_ci_level ) { - - # loop to find the first entry at or completely below this level - my ( $lev, $ci_lev ); - while (1) { - if ($max_gnu_stack_index) { - - # save index of token which closes this level - $gnu_stack[$max_gnu_stack_index]->set_closed($max_index_to_go); - - # Undo any extra indentation if we saw no commas - my $available_spaces = - $gnu_stack[$max_gnu_stack_index]->get_available_spaces(); - - my $comma_count = 0; - my $arrow_count = 0; - if ( $type eq '}' || $type eq ')' ) { - $comma_count = $gnu_comma_count{$total_depth}; - $arrow_count = $gnu_arrow_count{$total_depth}; - $comma_count = 0 unless $comma_count; - $arrow_count = 0 unless $arrow_count; - } - $gnu_stack[$max_gnu_stack_index]->set_comma_count($comma_count); - $gnu_stack[$max_gnu_stack_index]->set_arrow_count($arrow_count); - - if ( $available_spaces > 0 ) { - - if ( $comma_count <= 0 || $arrow_count > 0 ) { - - my $i = $gnu_stack[$max_gnu_stack_index]->get_index(); - my $seqno = - $gnu_stack[$max_gnu_stack_index] - ->get_sequence_number(); - - # Be sure this item was created in this batch. This - # should be true because we delete any available - # space from open items at the end of each batch. - if ( $gnu_sequence_number != $seqno - || $i > $max_gnu_item_index ) - { - warning( -"Program bug with -lp. seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n" - ); - report_definite_bug(); - } - - else { - if ( $arrow_count == 0 ) { - $gnu_item_list[$i] - ->permanently_decrease_available_spaces( - $available_spaces); - } - else { - $gnu_item_list[$i] - ->tentatively_decrease_available_spaces( - $available_spaces); - } - foreach my $j ( $i + 1 .. $max_gnu_item_index ) { - $gnu_item_list[$j] - ->decrease_SPACES($available_spaces); - } - } - } - } - - # go down one level - --$max_gnu_stack_index; - $lev = $gnu_stack[$max_gnu_stack_index]->get_level(); - $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_ci_level(); - - # stop when we reach a level at or below the current level - if ( $lev <= $level && $ci_lev <= $ci_level ) { - $space_count = - $gnu_stack[$max_gnu_stack_index]->get_spaces(); - $current_level = $lev; - $current_ci_level = $ci_lev; - last; - } - } - - # reached bottom of stack .. should never happen because - # only negative levels can get here, and $level was forced - # to be positive above. - else { - warning( -"program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n" - ); - report_definite_bug(); - last; - } - } - } - - # handle increasing depth - if ( $level > $current_level || $ci_level > $current_ci_level ) { - - # Compute the standard incremental whitespace. This will be - # the minimum incremental whitespace that will be used. This - # choice results in a smooth transition between the gnu-style - # and the standard style. - my $standard_increment = - ( $level - $current_level ) * $rOpts_indent_columns + - ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation; - - # Now we have to define how much extra incremental space - # ("$available_space") we want. This extra space will be - # reduced as necessary when long lines are encountered or when - # it becomes clear that we do not have a good list. - my $available_space = 0; - my $align_paren = 0; - my $excess = 0; - - # initialization on empty stack.. - if ( $max_gnu_stack_index == 0 ) { - $space_count = $level * $rOpts_indent_columns; - } - - # if this is a BLOCK, add the standard increment - elsif ($last_nonblank_block_type) { - $space_count += $standard_increment; - } - - # if last nonblank token was not structural indentation, - # just use standard increment - elsif ( $last_nonblank_type ne '{' ) { - $space_count += $standard_increment; - } - - # otherwise use the space to the first non-blank level change token - else { - - $space_count = $gnu_position_predictor; - - my $min_gnu_indentation = - $gnu_stack[$max_gnu_stack_index]->get_spaces(); - - $available_space = $space_count - $min_gnu_indentation; - if ( $available_space >= $standard_increment ) { - $min_gnu_indentation += $standard_increment; - } - elsif ( $available_space > 1 ) { - $min_gnu_indentation += $available_space + 1; - } - elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) { - if ( ( $tightness{$last_nonblank_token} < 2 ) ) { - $min_gnu_indentation += 2; - } - else { - $min_gnu_indentation += 1; - } - } - else { - $min_gnu_indentation += $standard_increment; - } - $available_space = $space_count - $min_gnu_indentation; - - if ( $available_space < 0 ) { - $space_count = $min_gnu_indentation; - $available_space = 0; - } - $align_paren = 1; - } - - # update state, but not on a blank token - if ( $types_to_go[$max_index_to_go] ne 'b' ) { - - $gnu_stack[$max_gnu_stack_index]->set_have_child(1); - - ++$max_gnu_stack_index; - $gnu_stack[$max_gnu_stack_index] = - new_lp_indentation_item( $space_count, $level, $ci_level, - $available_space, $align_paren ); - - # If the opening paren is beyond the half-line length, then - # we will use the minimum (standard) indentation. This will - # help avoid problems associated with running out of space - # near the end of a line. As a result, in deeply nested - # lists, there will be some indentations which are limited - # to this minimum standard indentation. But the most deeply - # nested container will still probably be able to shift its - # parameters to the right for proper alignment, so in most - # cases this will not be noticeable. - if ( $available_space > 0 && $space_count > $halfway ) { - $gnu_stack[$max_gnu_stack_index] - ->tentatively_decrease_available_spaces($available_space); - } - } - } - - # Count commas and look for non-list characters. Once we see a - # non-list character, we give up and don't look for any more commas. - if ( $type eq '=>' ) { - $gnu_arrow_count{$total_depth}++; - - # tentatively treating '=>' like '=' for estimating breaks - # TODO: this could use some experimentation - $last_gnu_equals{$total_depth} = $max_index_to_go; - } - - elsif ( $type eq ',' ) { - $gnu_comma_count{$total_depth}++; - } - - elsif ( $is_assignment{$type} ) { - $last_gnu_equals{$total_depth} = $max_index_to_go; - } - - # this token might start a new line - # if this is a non-blank.. - if ( $type ne 'b' ) { - - # and if .. - if ( - - # this is the first nonblank token of the line - $max_index_to_go == 1 && $types_to_go[0] eq 'b' - - # or previous character was one of these: - || $last_nonblank_type_to_go =~ /^([\:\?\,f])$/ - - # or previous character was opening and this does not close it - || ( $last_nonblank_type_to_go eq '{' && $type ne '}' ) - || ( $last_nonblank_type_to_go eq '(' and $type ne ')' ) - - # or this token is one of these: - || $type =~ /^([\.]|\|\||\&\&)$/ - - # or this is a closing structure - || ( $last_nonblank_type_to_go eq '}' - && $last_nonblank_token_to_go eq $last_nonblank_type_to_go ) - - # or previous token was keyword 'return' - || ( $last_nonblank_type_to_go eq 'k' - && ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) ) - - # or starting a new line at certain keywords is fine - || ( $type eq 'k' - && $is_if_unless_and_or_last_next_redo_return{$token} ) - - # or this is after an assignment after a closing structure - || ( - $is_assignment{$last_nonblank_type_to_go} - && ( - $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/ - - # and it is significantly to the right - || $gnu_position_predictor > $halfway - ) - ) - ) - { - check_for_long_gnu_style_lines(); - $line_start_index_to_go = $max_index_to_go; - - # back up 1 token if we want to break before that type - # otherwise, we may strand tokens like '?' or ':' on a line - if ( $line_start_index_to_go > 0 ) { - if ( $last_nonblank_type_to_go eq 'k' ) { - - if ( $want_break_before{$last_nonblank_token_to_go} ) { - $line_start_index_to_go--; - } - } - elsif ( $want_break_before{$last_nonblank_type_to_go} ) { - $line_start_index_to_go--; - } - } - } - } - - # remember the predicted position of this token on the output line - if ( $max_index_to_go > $line_start_index_to_go ) { - $gnu_position_predictor = - total_line_length( $line_start_index_to_go, $max_index_to_go ); - } - else { - $gnu_position_predictor = - $space_count + $token_lengths_to_go[$max_index_to_go]; - } - - # store the indentation object for this token - # this allows us to manipulate the leading whitespace - # (in case we have to reduce indentation to fit a line) without - # having to change any token values - $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index]; - $reduced_spaces_to_go[$max_index_to_go] = - ( $max_gnu_stack_index > 0 && $ci_level ) - ? $gnu_stack[ $max_gnu_stack_index - 1 ] - : $gnu_stack[$max_gnu_stack_index]; - return; -} - -sub check_for_long_gnu_style_lines { - - # look at the current estimated maximum line length, and - # remove some whitespace if it exceeds the desired maximum - - # this is only for the '-lp' style - return unless ($rOpts_line_up_parentheses); - - # nothing can be done if no stack items defined for this line - return if ( $max_gnu_item_index == UNDEFINED_INDEX ); - - # see if we have exceeded the maximum desired line length - # keep 2 extra free because they are needed in some cases - # (result of trial-and-error testing) - my $spaces_needed = - $gnu_position_predictor - maximum_line_length($max_index_to_go) + 2; - - return if ( $spaces_needed <= 0 ); - - # We are over the limit, so try to remove a requested number of - # spaces from leading whitespace. We are only allowed to remove - # from whitespace items created on this batch, since others have - # already been used and cannot be undone. - my @candidates = (); - my $i; - - # loop over all whitespace items created for the current batch - for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) { - my $item = $gnu_item_list[$i]; - - # item must still be open to be a candidate (otherwise it - # cannot influence the current token) - next if ( $item->get_closed() >= 0 ); - - my $available_spaces = $item->get_available_spaces(); - - if ( $available_spaces > 0 ) { - push( @candidates, [ $i, $available_spaces ] ); - } - } - - return unless (@candidates); - - # sort by available whitespace so that we can remove whitespace - # from the maximum available first - @candidates = sort { $b->[1] <=> $a->[1] } @candidates; - - # keep removing whitespace until we are done or have no more - foreach my $candidate (@candidates) { - my ( $i, $available_spaces ) = @{$candidate}; - my $deleted_spaces = - ( $available_spaces > $spaces_needed ) - ? $spaces_needed - : $available_spaces; - - # remove the incremental space from this item - $gnu_item_list[$i]->decrease_available_spaces($deleted_spaces); - - my $i_debug = $i; - - # update the leading whitespace of this item and all items - # that came after it - for ( ; $i <= $max_gnu_item_index ; $i++ ) { - - my $old_spaces = $gnu_item_list[$i]->get_spaces(); - if ( $old_spaces >= $deleted_spaces ) { - $gnu_item_list[$i]->decrease_SPACES($deleted_spaces); - } - - # shouldn't happen except for code bug: - else { - my $level = $gnu_item_list[$i_debug]->get_level(); - my $ci_level = $gnu_item_list[$i_debug]->get_ci_level(); - my $old_level = $gnu_item_list[$i]->get_level(); - my $old_ci_level = $gnu_item_list[$i]->get_ci_level(); - warning( -"program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n" - ); - report_definite_bug(); - } - } - $gnu_position_predictor -= $deleted_spaces; - $spaces_needed -= $deleted_spaces; - last unless ( $spaces_needed > 0 ); - } - return; -} - -sub finish_lp_batch { - - # This routine is called once after each output stream batch is - # finished to undo indentation for all incomplete -lp - # indentation levels. It is too risky to leave a level open, - # because then we can't backtrack in case of a long line to follow. - # This means that comments and blank lines will disrupt this - # indentation style. But the vertical aligner may be able to - # get the space back if there are side comments. - - # this is only for the 'lp' style - return unless ($rOpts_line_up_parentheses); - - # nothing can be done if no stack items defined for this line - return if ( $max_gnu_item_index == UNDEFINED_INDEX ); - - # loop over all whitespace items created for the current batch - foreach my $i ( 0 .. $max_gnu_item_index ) { - my $item = $gnu_item_list[$i]; - - # only look for open items - next if ( $item->get_closed() >= 0 ); - - # Tentatively remove all of the available space - # (The vertical aligner will try to get it back later) - my $available_spaces = $item->get_available_spaces(); - if ( $available_spaces > 0 ) { - - # delete incremental space for this item - $gnu_item_list[$i] - ->tentatively_decrease_available_spaces($available_spaces); - - # Reduce the total indentation space of any nodes that follow - # Note that any such nodes must necessarily be dependents - # of this node. - foreach ( $i + 1 .. $max_gnu_item_index ) { - $gnu_item_list[$_]->decrease_SPACES($available_spaces); - } - } - } - return; -} - -sub reduce_lp_indentation { - - # reduce the leading whitespace at token $i if possible by $spaces_needed - # (a large value of $spaces_needed will remove all excess space) - # NOTE: to be called from scan_list only for a sequence of tokens - # contained between opening and closing parens/braces/brackets - - my ( $i, $spaces_wanted ) = @_; - my $deleted_spaces = 0; - - my $item = $leading_spaces_to_go[$i]; - my $available_spaces = $item->get_available_spaces(); - - if ( - $available_spaces > 0 - && ( ( $spaces_wanted <= $available_spaces ) - || !$item->get_have_child() ) - ) - { - - # we'll remove these spaces, but mark them as recoverable - $deleted_spaces = - $item->tentatively_decrease_available_spaces($spaces_wanted); - } - - return $deleted_spaces; -} - -sub token_sequence_length { - - # return length of tokens ($ibeg .. $iend) including $ibeg & $iend - # returns 0 if $ibeg > $iend (shouldn't happen) - my ( $ibeg, $iend ) = @_; - return 0 if ( $iend < 0 || $ibeg > $iend ); - return $summed_lengths_to_go[ $iend + 1 ] if ( $ibeg < 0 ); - return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg]; -} - -sub total_line_length { - - # return length of a line of tokens ($ibeg .. $iend) - my ( $ibeg, $iend ) = @_; - return leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend ); -} - -sub maximum_line_length_for_level { - - # return maximum line length for line starting with a given level - my $maximum_line_length = $rOpts_maximum_line_length; - - # Modify if -vmll option is selected - if ($rOpts_variable_maximum_line_length) { - my $level = shift; - if ( $level < 0 ) { $level = 0 } - $maximum_line_length += $level * $rOpts_indent_columns; - } - return $maximum_line_length; -} - -sub maximum_line_length { - - # return maximum line length for line starting with the token at given index - my $ii = shift; - return maximum_line_length_for_level( $levels_to_go[$ii] ); -} - -sub excess_line_length { - - # return number of characters by which a line of tokens ($ibeg..$iend) - # exceeds the allowable line length. - my ( $ibeg, $iend, $ignore_left_weld, $ignore_right_weld ) = @_; - - # Include left and right weld lengths unless requested not to - my $wl = $ignore_left_weld ? 0 : weld_len_left_to_go($iend); - my $wr = $ignore_right_weld ? 0 : weld_len_right_to_go($iend); - - return total_line_length( $ibeg, $iend ) + $wl + $wr - - maximum_line_length($ibeg); -} - -sub wrapup { - - # flush buffer and write any informative messages - my $self = shift; - - $self->flush(); - $file_writer_object->decrement_output_line_number() - ; # fix up line number since it was incremented - we_are_at_the_last_line(); - if ( $added_semicolon_count > 0 ) { - my $first = ( $added_semicolon_count > 1 ) ? "First" : ""; - my $what = - ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was"; - write_logfile_entry("$added_semicolon_count $what added:\n"); - write_logfile_entry( - " $first at input line $first_added_semicolon_at\n"); - - if ( $added_semicolon_count > 1 ) { - write_logfile_entry( - " Last at input line $last_added_semicolon_at\n"); - } - write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n"); - write_logfile_entry("\n"); - } - - if ( $deleted_semicolon_count > 0 ) { - my $first = ( $deleted_semicolon_count > 1 ) ? "First" : ""; - my $what = - ( $deleted_semicolon_count > 1 ) - ? "semicolons were" - : "semicolon was"; - write_logfile_entry( - "$deleted_semicolon_count unnecessary $what deleted:\n"); - write_logfile_entry( - " $first at input line $first_deleted_semicolon_at\n"); - - if ( $deleted_semicolon_count > 1 ) { - write_logfile_entry( - " Last at input line $last_deleted_semicolon_at\n"); - } - write_logfile_entry(" (Use -ndsc to prevent semicolon deletion)\n"); - write_logfile_entry("\n"); - } - - if ( $embedded_tab_count > 0 ) { - my $first = ( $embedded_tab_count > 1 ) ? "First" : ""; - my $what = - ( $embedded_tab_count > 1 ) - ? "quotes or patterns" - : "quote or pattern"; - write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n"); - write_logfile_entry( -"This means the display of this script could vary with device or software\n" - ); - write_logfile_entry(" $first at input line $first_embedded_tab_at\n"); - - if ( $embedded_tab_count > 1 ) { - write_logfile_entry( - " Last at input line $last_embedded_tab_at\n"); - } - write_logfile_entry("\n"); - } - - if ($first_tabbing_disagreement) { - write_logfile_entry( -"First indentation disagreement seen at input line $first_tabbing_disagreement\n" - ); - } - - if ($in_tabbing_disagreement) { - write_logfile_entry( -"Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n" - ); - } - else { - - if ($last_tabbing_disagreement) { - - write_logfile_entry( -"Last indentation disagreement seen at input line $last_tabbing_disagreement\n" - ); - } - else { - write_logfile_entry("No indentation disagreement seen\n"); - } - } - if ($first_tabbing_disagreement) { - write_logfile_entry( -"Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n" - ); - } - write_logfile_entry("\n"); - - $vertical_aligner_object->report_anything_unusual(); - - $file_writer_object->report_line_length_errors(); - - return; -} - -sub check_options { - - # This routine is called to check the Opts hash after it is defined - $rOpts = shift; - - make_static_block_comment_pattern(); - make_static_side_comment_pattern(); - make_closing_side_comment_prefix(); - make_closing_side_comment_list_pattern(); - $format_skipping_pattern_begin = - make_format_skipping_pattern( 'format-skipping-begin', '#<<<' ); - $format_skipping_pattern_end = - make_format_skipping_pattern( 'format-skipping-end', '#>>>' ); - - # If closing side comments ARE selected, then we can safely - # delete old closing side comments unless closing side comment - # warnings are requested. This is a good idea because it will - # eliminate any old csc's which fall below the line count threshold. - # We cannot do this if warnings are turned on, though, because we - # might delete some text which has been added. So that must - # be handled when comments are created. - if ( $rOpts->{'closing-side-comments'} ) { - if ( !$rOpts->{'closing-side-comment-warnings'} ) { - $rOpts->{'delete-closing-side-comments'} = 1; - } - } - - # If closing side comments ARE NOT selected, but warnings ARE - # selected and we ARE DELETING csc's, then we will pretend to be - # adding with a huge interval. This will force the comments to be - # generated for comparison with the old comments, but not added. - elsif ( $rOpts->{'closing-side-comment-warnings'} ) { - if ( $rOpts->{'delete-closing-side-comments'} ) { - $rOpts->{'delete-closing-side-comments'} = 0; - $rOpts->{'closing-side-comments'} = 1; - $rOpts->{'closing-side-comment-interval'} = 100000000; - } - } - - make_bli_pattern(); - make_block_brace_vertical_tightness_pattern(); - make_blank_line_pattern(); - - prepare_cuddled_block_types(); - if ( $rOpts->{'dump-cuddled-block-list'} ) { - dump_cuddled_block_list(*STDOUT); - Perl::Tidy::Exit 0; - } - - if ( $rOpts->{'line-up-parentheses'} ) { - - if ( $rOpts->{'indent-only'} - || !$rOpts->{'add-newlines'} - || !$rOpts->{'delete-old-newlines'} ) - { - Perl::Tidy::Warn <{'line-up-parentheses'} = 0; - } - } - - # At present, tabs are not compatible with the line-up-parentheses style - # (it would be possible to entab the total leading whitespace - # just prior to writing the line, if desired). - if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) { - Perl::Tidy::Warn <{'tabs'} = 0; - } - - # Likewise, tabs are not compatible with outdenting.. - if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) { - Perl::Tidy::Warn <{'tabs'} = 0; - } - - if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) { - Perl::Tidy::Warn <{'tabs'} = 0; - } - - if ( !$rOpts->{'space-for-semicolon'} ) { - $want_left_space{'f'} = -1; - } - - if ( $rOpts->{'space-terminal-semicolon'} ) { - $want_left_space{';'} = 1; - } - - # implement outdenting preferences for keywords - %outdent_keyword = (); - unless ( @_ = split_words( $rOpts->{'outdent-keyword-okl'} ) ) { - @_ = qw(next last redo goto return); # defaults - } - - # FUTURE: if not a keyword, assume that it is an identifier - foreach (@_) { - if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) { - $outdent_keyword{$_} = 1; - } - else { - Perl::Tidy::Warn "ignoring '$_' in -okwl list; not a perl keyword"; - } - } - - # implement user whitespace preferences - if ( @_ = split_words( $rOpts->{'want-left-space'} ) ) { - @want_left_space{@_} = (1) x scalar(@_); - } - - if ( @_ = split_words( $rOpts->{'want-right-space'} ) ) { - @want_right_space{@_} = (1) x scalar(@_); - } - - if ( @_ = split_words( $rOpts->{'nowant-left-space'} ) ) { - @want_left_space{@_} = (-1) x scalar(@_); - } - - if ( @_ = split_words( $rOpts->{'nowant-right-space'} ) ) { - @want_right_space{@_} = (-1) x scalar(@_); - } - if ( $rOpts->{'dump-want-left-space'} ) { - dump_want_left_space(*STDOUT); - Perl::Tidy::Exit 0; - } - - if ( $rOpts->{'dump-want-right-space'} ) { - dump_want_right_space(*STDOUT); - Perl::Tidy::Exit 0; - } - - # default keywords for which space is introduced before an opening paren - # (at present, including them messes up vertical alignment) - @_ = qw(my local our and or err eq ne if else elsif until - unless while for foreach return switch case given when catch); - @space_after_keyword{@_} = (1) x scalar(@_); - - # first remove any or all of these if desired - if ( @_ = split_words( $rOpts->{'nospace-after-keyword'} ) ) { - - # -nsak='*' selects all the above keywords - if ( @_ == 1 && $_[0] eq '*' ) { @_ = keys(%space_after_keyword) } - @space_after_keyword{@_} = (0) x scalar(@_); - } - - # then allow user to add to these defaults - if ( @_ = split_words( $rOpts->{'space-after-keyword'} ) ) { - @space_after_keyword{@_} = (1) x scalar(@_); - } - - # implement user break preferences - my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | & - = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= - . : ? && || and or err xor - ); - - my $break_after = sub { - foreach my $tok (@_) { - if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/: - my $lbs = $left_bond_strength{$tok}; - my $rbs = $right_bond_strength{$tok}; - if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) { - ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) = - ( $lbs, $rbs ); - } - } - }; - - my $break_before = sub { - foreach my $tok (@_) { - my $lbs = $left_bond_strength{$tok}; - my $rbs = $right_bond_strength{$tok}; - if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) { - ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) = - ( $lbs, $rbs ); - } - } - }; - - $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} ); - $break_before->(@all_operators) - if ( $rOpts->{'break-before-all-operators'} ); - - $break_after->( split_words( $rOpts->{'want-break-after'} ) ); - $break_before->( split_words( $rOpts->{'want-break-before'} ) ); - - # make note if breaks are before certain key types - %want_break_before = (); - foreach my $tok ( @all_operators, ',' ) { - $want_break_before{$tok} = - $left_bond_strength{$tok} < $right_bond_strength{$tok}; - } - - # Coordinate ?/: breaks, which must be similar - if ( !$want_break_before{':'} ) { - $want_break_before{'?'} = $want_break_before{':'}; - $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01; - $left_bond_strength{'?'} = NO_BREAK; - } - - # Define here tokens which may follow the closing brace of a do statement - # on the same line, as in: - # } while ( $something); - @_ = qw(until while unless if ; : ); - push @_, ','; - @is_do_follower{@_} = (1) x scalar(@_); - - # These tokens may follow the closing brace of an if or elsif block. - # In other words, for cuddled else we want code to look like: - # } elsif ( $something) { - # } else { - if ( $rOpts->{'cuddled-else'} ) { - @_ = qw(else elsif); - @is_if_brace_follower{@_} = (1) x scalar(@_); - } - else { - %is_if_brace_follower = (); - } - - # nothing can follow the closing curly of an else { } block: - %is_else_brace_follower = (); - - # what can follow a multi-line anonymous sub definition closing curly: - @_ = qw# ; : => or and && || ~~ !~~ ) #; - push @_, ','; - @is_anon_sub_brace_follower{@_} = (1) x scalar(@_); - - # what can follow a one-line anonymous sub closing curly: - # one-line anonymous subs also have ']' here... - # see tk3.t and PP.pm - @_ = qw# ; : => or and && || ) ] ~~ !~~ #; - push @_, ','; - @is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_); - - # What can follow a closing curly of a block - # which is not an if/elsif/else/do/sort/map/grep/eval/sub - # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl' - @_ = qw# ; : => or and && || ) #; - push @_, ','; - - # allow cuddled continue if cuddled else is specified - if ( $rOpts->{'cuddled-else'} ) { push @_, 'continue'; } - - @is_other_brace_follower{@_} = (1) x scalar(@_); - - $right_bond_strength{'{'} = WEAK; - $left_bond_strength{'{'} = VERY_STRONG; - - # make -l=0 equal to -l=infinite - if ( !$rOpts->{'maximum-line-length'} ) { - $rOpts->{'maximum-line-length'} = 1000000; - } - - # make -lbl=0 equal to -lbl=infinite - if ( !$rOpts->{'long-block-line-count'} ) { - $rOpts->{'long-block-line-count'} = 1000000; - } - - my $enc = $rOpts->{'character-encoding'}; - if ( $enc && $enc !~ /^(none|utf8)$/i ) { - Perl::Tidy::Die <{'output-line-ending'}; - if ($ole) { - my %endings = ( - dos => "\015\012", - win => "\015\012", - mac => "\015", - unix => "\012", - ); - - # Patch for RT #99514, a memoization issue. - # Normally, the user enters one of 'dos', 'win', etc, and we change the - # value in the options parameter to be the corresponding line ending - # character. But, if we are using memoization, on later passes through - # here the option parameter will already have the desired ending - # character rather than the keyword 'dos', 'win', etc. So - # we must check to see if conversion has already been done and, if so, - # bypass the conversion step. - my %endings_inverted = ( - "\015\012" => 'dos', - "\015\012" => 'win', - "\015" => 'mac', - "\012" => 'unix', - ); - - if ( defined( $endings_inverted{$ole} ) ) { - - # we already have valid line ending, nothing more to do - } - else { - $ole = lc $ole; - unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) { - my $str = join " ", keys %endings; - Perl::Tidy::Die <{'preserve-line-endings'} ) { - Perl::Tidy::Warn "Ignoring -ple; conflicts with -ole\n"; - $rOpts->{'preserve-line-endings'} = undef; - } - } - } - - # hashes used to simplify setting whitespace - %tightness = ( - '{' => $rOpts->{'brace-tightness'}, - '}' => $rOpts->{'brace-tightness'}, - '(' => $rOpts->{'paren-tightness'}, - ')' => $rOpts->{'paren-tightness'}, - '[' => $rOpts->{'square-bracket-tightness'}, - ']' => $rOpts->{'square-bracket-tightness'}, - ); - %matching_token = ( - '{' => '}', - '(' => ')', - '[' => ']', - '?' => ':', - ); - - # frequently used parameters - $rOpts_add_newlines = $rOpts->{'add-newlines'}; - $rOpts_add_whitespace = $rOpts->{'add-whitespace'}; - $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'}; - $rOpts_block_brace_vertical_tightness = - $rOpts->{'block-brace-vertical-tightness'}; - $rOpts_brace_left_and_indent = $rOpts->{'brace-left-and-indent'}; - $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'}; - $rOpts_break_at_old_ternary_breakpoints = - $rOpts->{'break-at-old-ternary-breakpoints'}; - $rOpts_break_at_old_attribute_breakpoints = - $rOpts->{'break-at-old-attribute-breakpoints'}; - $rOpts_break_at_old_comma_breakpoints = - $rOpts->{'break-at-old-comma-breakpoints'}; - $rOpts_break_at_old_keyword_breakpoints = - $rOpts->{'break-at-old-keyword-breakpoints'}; - $rOpts_break_at_old_logical_breakpoints = - $rOpts->{'break-at-old-logical-breakpoints'}; - $rOpts_closing_side_comment_else_flag = - $rOpts->{'closing-side-comment-else-flag'}; - $rOpts_closing_side_comment_maximum_text = - $rOpts->{'closing-side-comment-maximum-text'}; - $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'}; - $rOpts_cuddled_else = $rOpts->{'cuddled-else'}; - $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'}; - $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'}; - $rOpts_indent_columns = $rOpts->{'indent-columns'}; - $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'}; - $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'}; - $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'}; - $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'}; - - $rOpts_variable_maximum_line_length = - $rOpts->{'variable-maximum-line-length'}; - $rOpts_short_concatenation_item_length = - $rOpts->{'short-concatenation-item-length'}; - - $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'}; - $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'}; - $rOpts_format_skipping = $rOpts->{'format-skipping'}; - $rOpts_space_function_paren = $rOpts->{'space-function-paren'}; - $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'}; - $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'}; - $rOpts_ignore_side_comment_lengths = - $rOpts->{'ignore-side-comment-lengths'}; - - # Note that both opening and closing tokens can access the opening - # and closing flags of their container types. - %opening_vertical_tightness = ( - '(' => $rOpts->{'paren-vertical-tightness'}, - '{' => $rOpts->{'brace-vertical-tightness'}, - '[' => $rOpts->{'square-bracket-vertical-tightness'}, - ')' => $rOpts->{'paren-vertical-tightness'}, - '}' => $rOpts->{'brace-vertical-tightness'}, - ']' => $rOpts->{'square-bracket-vertical-tightness'}, - ); - - %closing_vertical_tightness = ( - '(' => $rOpts->{'paren-vertical-tightness-closing'}, - '{' => $rOpts->{'brace-vertical-tightness-closing'}, - '[' => $rOpts->{'square-bracket-vertical-tightness-closing'}, - ')' => $rOpts->{'paren-vertical-tightness-closing'}, - '}' => $rOpts->{'brace-vertical-tightness-closing'}, - ']' => $rOpts->{'square-bracket-vertical-tightness-closing'}, - ); - - # assume flag for '>' same as ')' for closing qw quotes - %closing_token_indentation = ( - ')' => $rOpts->{'closing-paren-indentation'}, - '}' => $rOpts->{'closing-brace-indentation'}, - ']' => $rOpts->{'closing-square-bracket-indentation'}, - '>' => $rOpts->{'closing-paren-indentation'}, - ); - - # flag indicating if any closing tokens are indented - $some_closing_token_indentation = - $rOpts->{'closing-paren-indentation'} - || $rOpts->{'closing-brace-indentation'} - || $rOpts->{'closing-square-bracket-indentation'} - || $rOpts->{'indent-closing-brace'}; - - %opening_token_right = ( - '(' => $rOpts->{'opening-paren-right'}, - '{' => $rOpts->{'opening-hash-brace-right'}, - '[' => $rOpts->{'opening-square-bracket-right'}, - ); - - %stack_opening_token = ( - '(' => $rOpts->{'stack-opening-paren'}, - '{' => $rOpts->{'stack-opening-hash-brace'}, - '[' => $rOpts->{'stack-opening-square-bracket'}, - ); - - %stack_closing_token = ( - ')' => $rOpts->{'stack-closing-paren'}, - '}' => $rOpts->{'stack-closing-hash-brace'}, - ']' => $rOpts->{'stack-closing-square-bracket'}, - ); - $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'}; - $rOpts_space_backslash_quote = $rOpts->{'space-backslash-quote'}; - return; -} - -sub bad_pattern { - - # See if a pattern will compile. We have to use a string eval here, - # but it should be safe because the pattern has been constructed - # by this program. - my ($pattern) = @_; - eval "'##'=~/$pattern/"; - return $@; -} - -sub prepare_cuddled_block_types { - - my $cuddled_string = $rOpts->{'cuddled-block-list'}; - $cuddled_string = "try-catch-finally" unless defined($cuddled_string); - - # we have a cuddled string of the form - # 'try-catch-finally' - - # we want to prepare a hash of the form - - # $rcuddled_block_types = { - # 'try' => { - # 'catch' => 1, - # 'finally' => 1 - # }, - # }; - - # use -dcbl to dump this hash - - # Multiple such strings are input as a space or comma separated list - - # If we get two lists with the same leading type, such as - # -cbl = "-try-catch-finally -try-catch-otherwise" - # then they will get merged as follows: - # $rcuddled_block_types = { - # 'try' => { - # 'catch' => 1, - # 'finally' => 2, - # 'otherwise' => 1, - # }, - # }; - # This will allow either type of chain to be followed. - - $cuddled_string =~ s/,/ /g; # allow space or comma separated lists - my @cuddled_strings = split /\s+/, $cuddled_string; - - $rcuddled_block_types = {}; - - # process each dash-separated string... - my $string_count = 0; - foreach my $string (@cuddled_strings) { - next unless $string; - my @words = split /-+/, $string; # allow multiple dashes - - # we could look for and report possible errors here... - next unless ( @words && @words > 0 ); - my $start = shift @words; - - # allow either '-continue' or *-continue' for arbitrary starting type - $start = '*' unless $start; - - # always make an entry for the leading word. If none follow, this - # will still prevent a wildcard from matching this word. - if ( !defined( $rcuddled_block_types->{$start} ) ) { - $rcuddled_block_types->{$start} = {}; - } - - # The count gives the original word order in case we ever want it. - $string_count++; - my $word_count = 0; - foreach my $word (@words) { - next unless $word; - $word_count++; - $rcuddled_block_types->{$start}->{$word} = - 1; #"$string_count.$word_count"; - } - } - - return; -} - -sub dump_cuddled_block_list { - my ($fh) = @_; - - # Here is the format of the cuddled block type hash - # which controls this routine - # my $rcuddled_block_types = { - # 'if' => { - # 'else' => 1, - # 'elsif' => 1 - # }, - # 'try' => { - # 'catch' => 1, - # 'finally' => 1 - # }, - # }; - #The numerical values are string.word, - #where string = string number and word = word number in that string - - my $cuddled_string = $rOpts->{'cuddled-block-list'}; - $cuddled_string = '' unless $cuddled_string; - $fh->print(<print( Dumper($rcuddled_block_types) ); - - $fh->print(<{'static-block-comment-prefix'} ) { - my $prefix = $rOpts->{'static-block-comment-prefix'}; - $prefix =~ s/^\s*//; - my $pattern = $prefix; - - # user may give leading caret to force matching left comments only - if ( $prefix !~ /^\^#/ ) { - if ( $prefix !~ /^#/ ) { - Perl::Tidy::Die -"ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n"; - } - $pattern = '^\s*' . $prefix; - } - if ( bad_pattern($pattern) ) { - Perl::Tidy::Die -"ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n"; - } - $static_block_comment_pattern = $pattern; - } - return; -} - -sub make_format_skipping_pattern { - my ( $opt_name, $default ) = @_; - my $param = $rOpts->{$opt_name}; - unless ($param) { $param = $default } - $param =~ s/^\s*//; - if ( $param !~ /^#/ ) { - Perl::Tidy::Die - "ERROR: the $opt_name parameter '$param' must begin with '#'\n"; - } - my $pattern = '^' . $param . '\s'; - if ( bad_pattern($pattern) ) { - Perl::Tidy::Die -"ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"; - } - return $pattern; -} - -sub make_closing_side_comment_list_pattern { - - # turn any input list into a regex for recognizing selected block types - $closing_side_comment_list_pattern = '^\w+'; - if ( defined( $rOpts->{'closing-side-comment-list'} ) - && $rOpts->{'closing-side-comment-list'} ) - { - $closing_side_comment_list_pattern = - make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} ); - } - return; -} - -sub make_bli_pattern { - - if ( defined( $rOpts->{'brace-left-and-indent-list'} ) - && $rOpts->{'brace-left-and-indent-list'} ) - { - $bli_list_string = $rOpts->{'brace-left-and-indent-list'}; - } - - $bli_pattern = make_block_pattern( '-blil', $bli_list_string ); - return; -} - -sub make_block_brace_vertical_tightness_pattern { - - # turn any input list into a regex for recognizing selected block types - $block_brace_vertical_tightness_pattern = - '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)'; - if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} ) - && $rOpts->{'block-brace-vertical-tightness-list'} ) - { - $block_brace_vertical_tightness_pattern = - make_block_pattern( '-bbvtl', - $rOpts->{'block-brace-vertical-tightness-list'} ); - } - return; -} - -sub make_blank_line_pattern { - - $blank_lines_before_closing_block_pattern = $SUB_PATTERN; - my $key = 'blank-lines-before-closing-block-list'; - if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) { - $blank_lines_before_closing_block_pattern = - make_block_pattern( '-blbcl', $rOpts->{$key} ); - } - - $blank_lines_after_opening_block_pattern = $SUB_PATTERN; - $key = 'blank-lines-after-opening-block-list'; - if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) { - $blank_lines_after_opening_block_pattern = - make_block_pattern( '-blaol', $rOpts->{$key} ); - } - return; -} - -sub make_block_pattern { - - # given a string of block-type keywords, return a regex to match them - # The only tricky part is that labels are indicated with a single ':' - # and the 'sub' token text may have additional text after it (name of - # sub). - # - # Example: - # - # input string: "if else elsif unless while for foreach do : sub"; - # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)'; - - # Minor Update: - # - # To distinguish between anonymous subs and named subs, use 'sub' to - # indicate a named sub, and 'asub' to indicate an anonymous sub - - my ( $abbrev, $string ) = @_; - my @list = split_words($string); - my @words = (); - my %seen; - for my $i (@list) { - if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern } - next if $seen{$i}; - $seen{$i} = 1; - if ( $i eq 'sub' ) { - } - elsif ( $i eq 'asub' ) { - } - elsif ( $i eq ';' ) { - push @words, ';'; - } - elsif ( $i eq '{' ) { - push @words, '\{'; - } - elsif ( $i eq ':' ) { - push @words, '\w+:'; - } - elsif ( $i =~ /^\w/ ) { - push @words, $i; - } - else { - Perl::Tidy::Warn - "unrecognized block type $i after $abbrev, ignoring\n"; - } - } - my $pattern = '(' . join( '|', @words ) . ')$'; - my $sub_patterns = ""; - if ( $seen{'sub'} ) { - $sub_patterns .= '|' . $SUB_PATTERN; - } - if ( $seen{'asub'} ) { - $sub_patterns .= '|' . $ASUB_PATTERN; - } - if ($sub_patterns) { - $pattern = '(' . $pattern . $sub_patterns . ')'; - } - $pattern = '^' . $pattern; - return $pattern; -} - -sub make_static_side_comment_pattern { - - # create the pattern used to identify static side comments - $static_side_comment_pattern = '^##'; - - # allow the user to change it - if ( $rOpts->{'static-side-comment-prefix'} ) { - my $prefix = $rOpts->{'static-side-comment-prefix'}; - $prefix =~ s/^\s*//; - my $pattern = '^' . $prefix; - if ( bad_pattern($pattern) ) { - Perl::Tidy::Die -"ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n"; - } - $static_side_comment_pattern = $pattern; - } - return; -} - -sub make_closing_side_comment_prefix { - - # Be sure we have a valid closing side comment prefix - my $csc_prefix = $rOpts->{'closing-side-comment-prefix'}; - my $csc_prefix_pattern; - if ( !defined($csc_prefix) ) { - $csc_prefix = '## end'; - $csc_prefix_pattern = '^##\s+end'; - } - else { - my $test_csc_prefix = $csc_prefix; - if ( $test_csc_prefix !~ /^#/ ) { - $test_csc_prefix = '#' . $test_csc_prefix; - } - - # make a regex to recognize the prefix - my $test_csc_prefix_pattern = $test_csc_prefix; - - # escape any special characters - $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g; - - $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern; - - # allow exact number of intermediate spaces to vary - $test_csc_prefix_pattern =~ s/\s+/\\s\+/g; - - # make sure we have a good pattern - # if we fail this we probably have an error in escaping - # characters. - - if ( bad_pattern($test_csc_prefix_pattern) ) { - - # shouldn't happen..must have screwed up escaping, above - report_definite_bug(); - Perl::Tidy::Warn -"Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n"; - - # just warn and keep going with defaults - Perl::Tidy::Warn "Please consider using a simpler -cscp prefix\n"; - Perl::Tidy::Warn - "Using default -cscp instead; please check output\n"; - } - else { - $csc_prefix = $test_csc_prefix; - $csc_prefix_pattern = $test_csc_prefix_pattern; - } - } - $rOpts->{'closing-side-comment-prefix'} = $csc_prefix; - $closing_side_comment_prefix_pattern = $csc_prefix_pattern; - return; -} - -sub dump_want_left_space { - my $fh = shift; - local $" = "\n"; - print $fh <1; - # $a = $b - III; - # and even this: - # $a = - III; - || ( ( $tokenl eq '-' ) - && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) ) - - # '= -' should not become =- or you will get a warning - # about reversed -= - # || ($tokenr eq '-') - - # keep a space between a quote and a bareword to prevent the - # bareword from becoming a quote modifier. - || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) ) - - # keep a space between a token ending in '$' and any word; - # this caused trouble: "die @$ if $@" - || ( ( $typel eq 'i' && $tokenl =~ /\$$/ ) - && ( $tokenr =~ /^[a-zA-Z_]/ ) ) - - # perl is very fussy about spaces before << - || ( $tokenr =~ /^\<\ [qw#= ( ) =#], #=( )= - 'Venus1' => [qw#0 +#], # 0+ - 'Venus2' => [qw#+ 0#], # +0 - 'Enterprise' => [qw#) x ! !#], # ()x!! - 'Kite1' => [qw#~ ~ <>#], # ~~<> - 'Kite2' => [qw#~~ <>#], # ~~<> - 'Winking Fat Comma' => [ ( ',', '=>' ) ], # ,=> - 'Bang bang ' => [qw#! !#], # !! - ); - - # The following operators and constants are not included because they - # are normally kept tight by perltidy: - # ~~ <~> - # - - # Make a lookup table indexed by the first token of each operator: - # first token => [list, list, ...] - foreach my $value ( values(%secret_operators) ) { - my $tok = $value->[0]; - push @{ $is_leading_secret_token{$tok} }, $value; - } - } - - sub new_secret_operator_whitespace { - - my ( $rlong_array, $rwhitespace_flags ) = @_; - - # Loop over all tokens in this line - my ( $token, $type ); - my $jmax = @{$rlong_array} - 1; - foreach my $j ( 0 .. $jmax ) { - - $token = $rlong_array->[$j]->[_TOKEN_]; - $type = $rlong_array->[$j]->[_TYPE_]; - - # Skip unless this token might start a secret operator - next if ( $type eq 'b' ); - next unless ( $is_leading_secret_token{$token} ); - - # Loop over all secret operators with this leading token - foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) { - my $jend = $j - 1; - foreach my $tok ( @{$rpattern} ) { - $jend++; - $jend++ - - if ( $jend <= $jmax - && $rlong_array->[$jend]->[_TYPE_] eq 'b' ); - if ( $jend > $jmax - || $tok ne $rlong_array->[$jend]->[_TOKEN_] ) - { - $jend = undef; - last; - } - } - - if ($jend) { - - # set flags to prevent spaces within this operator - foreach my $jj ( $j + 1 .. $jend ) { - $rwhitespace_flags->[$jj] = WS_NO; - } - $j = $jend; - last; - } - } ## End Loop over all operators - } ## End loop over all tokens - return; - } # End sub -} - -{ # begin print_line_of_tokens - - my $rinput_token_array; # Current working array - my $rinput_K_array; # Future working array - - my $in_quote; - my $guessed_indentation_level; - - # This should be a return variable from extract_token - # These local token variables are stored by store_token_to_go: - my $rtoken_vars; - my $Ktoken_vars; - my $block_type; - my $ci_level; - my $container_environment; - my $container_type; - my $in_continued_quote; - my $level; - my $no_internal_newlines; - my $slevel; - my $token; - my $type; - my $type_sequence; - - # routine to pull the jth token from the line of tokens - sub extract_token { - my ( $self, $j ) = @_; - - my $rLL = $self->{rLL}; - $Ktoken_vars = $rinput_K_array->[$j]; - if ( !defined($Ktoken_vars) ) { - - # Shouldn't happen: an error here would be due to a recent program change - Fault("undefined index K for j=$j"); - } - $rtoken_vars = $rLL->[$Ktoken_vars]; - - if ( $rtoken_vars->[_TOKEN_] ne $rLL->[$Ktoken_vars]->[_TOKEN_] ) { - - # Shouldn't happen: an error here would be due to a recent program change - Fault(<[_TOKEN_]' ne '$rLL->[$Ktoken_vars]' -EOM - } - - ######################################################### - # these are now redundant and can eventually be eliminated - - $token = $rtoken_vars->[_TOKEN_]; - $type = $rtoken_vars->[_TYPE_]; - $block_type = $rtoken_vars->[_BLOCK_TYPE_]; - $container_type = $rtoken_vars->[_CONTAINER_TYPE_]; - $container_environment = $rtoken_vars->[_CONTAINER_ENVIRONMENT_]; - $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_]; - $level = $rtoken_vars->[_LEVEL_]; - $slevel = $rtoken_vars->[_SLEVEL_]; - $ci_level = $rtoken_vars->[_CI_LEVEL_]; - ######################################################### - - return; - } - - { - my @saved_token; - - sub save_current_token { - - @saved_token = ( - $block_type, $ci_level, - $container_environment, $container_type, - $in_continued_quote, $level, - $no_internal_newlines, $slevel, - $token, $type, - $type_sequence, $rtoken_vars, - $Ktoken_vars, - ); - return; - } - - sub restore_current_token { - ( - $block_type, $ci_level, - $container_environment, $container_type, - $in_continued_quote, $level, - $no_internal_newlines, $slevel, - $token, $type, - $type_sequence, $rtoken_vars, - $Ktoken_vars, - ) = @saved_token; - return; - } - } - - sub token_length { - - # Returns the length of a token, given: - # $token=text of the token - # $type = type - # $not_first_token = should be TRUE if this is not the first token of - # the line. It might the index of this token in an array. It is - # used to test for a side comment vs a block comment. - # Note: Eventually this should be the only routine determining the - # length of a token in this package. - my ( $token, $type, $not_first_token ) = @_; - my $token_length = length($token); - - # We mark lengths of side comments as just 1 if we are - # ignoring their lengths when setting line breaks. - $token_length = 1 - if ( $rOpts_ignore_side_comment_lengths - && $not_first_token - && $type eq '#' ); - return $token_length; - } - - sub rtoken_length { - - # return length of ith token in @{$rtokens} - my ($i) = @_; - return token_length( $rinput_token_array->[$i]->[_TOKEN_], - $rinput_token_array->[$i]->[_TYPE_], $i ); - } - - # Routine to place the current token into the output stream. - # Called once per output token. - sub store_token_to_go { - - my ( $self, $side_comment_follows ) = @_; - - my $flag = $side_comment_follows ? 1 : $no_internal_newlines; - - ++$max_index_to_go; - $K_to_go[$max_index_to_go] = $Ktoken_vars; - $rtoken_vars_to_go[$max_index_to_go] = $rtoken_vars; - $tokens_to_go[$max_index_to_go] = $token; - $types_to_go[$max_index_to_go] = $type; - $nobreak_to_go[$max_index_to_go] = $flag; - $old_breakpoint_to_go[$max_index_to_go] = 0; - $forced_breakpoint_to_go[$max_index_to_go] = 0; - $block_type_to_go[$max_index_to_go] = $block_type; - $type_sequence_to_go[$max_index_to_go] = $type_sequence; - $container_environment_to_go[$max_index_to_go] = $container_environment; - $ci_levels_to_go[$max_index_to_go] = $ci_level; - $mate_index_to_go[$max_index_to_go] = -1; - $matching_token_to_go[$max_index_to_go] = ''; - $bond_strength_to_go[$max_index_to_go] = 0; - - # Note: negative levels are currently retained as a diagnostic so that - # the 'final indentation level' is correctly reported for bad scripts. - # But this means that every use of $level as an index must be checked. - # If this becomes too much of a problem, we might give up and just clip - # them at zero. - ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0; - $levels_to_go[$max_index_to_go] = $level; - $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0; - - # link the non-blank tokens - my $iprev = $max_index_to_go - 1; - $iprev-- if ( $iprev >= 0 && $types_to_go[$iprev] eq 'b' ); - $iprev_to_go[$max_index_to_go] = $iprev; - $inext_to_go[$iprev] = $max_index_to_go - if ( $iprev >= 0 && $type ne 'b' ); - $inext_to_go[$max_index_to_go] = $max_index_to_go + 1; - - $token_lengths_to_go[$max_index_to_go] = - token_length( $token, $type, $max_index_to_go ); - - # We keep a running sum of token lengths from the start of this batch: - # summed_lengths_to_go[$i] = total length to just before token $i - # summed_lengths_to_go[$i+1] = total length to just after token $i - $summed_lengths_to_go[ $max_index_to_go + 1 ] = - $summed_lengths_to_go[$max_index_to_go] + - $token_lengths_to_go[$max_index_to_go]; - - # Define the indentation that this token would have if it started - # a new line. We have to do this now because we need to know this - # when considering one-line blocks. - set_leading_whitespace( $level, $ci_level, $in_continued_quote ); - - # remember previous nonblank tokens seen - if ( $type ne 'b' ) { - $last_last_nonblank_index_to_go = $last_nonblank_index_to_go; - $last_last_nonblank_type_to_go = $last_nonblank_type_to_go; - $last_last_nonblank_token_to_go = $last_nonblank_token_to_go; - $last_nonblank_index_to_go = $max_index_to_go; - $last_nonblank_type_to_go = $type; - $last_nonblank_token_to_go = $token; - if ( $type eq ',' ) { - $comma_count_in_batch++; - } - } - - FORMATTER_DEBUG_FLAG_STORE && do { - my ( $a, $b, $c ) = caller(); - print STDOUT -"STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n"; - }; - return; - } - - sub insert_new_token_to_go { - - # insert a new token into the output stream. use same level as - # previous token; assumes a character at max_index_to_go. - my $self = shift; - my @args = @_; - save_current_token(); - ( $token, $type, $slevel, $no_internal_newlines ) = @args; - - if ( $max_index_to_go == UNDEFINED_INDEX ) { - warning("code bug: bad call to insert_new_token_to_go\n"); - } - $level = $levels_to_go[$max_index_to_go]; - - # FIXME: it seems to be necessary to use the next, rather than - # previous, value of this variable when creating a new blank (align.t) - #my $slevel = $nesting_depth_to_go[$max_index_to_go]; - $ci_level = $ci_levels_to_go[$max_index_to_go]; - $container_environment = $container_environment_to_go[$max_index_to_go]; - $in_continued_quote = 0; - $block_type = ""; - $type_sequence = ""; - $self->store_token_to_go(); - restore_current_token(); - return; - } - - sub copy_hash { - my ($rold_token_hash) = @_; - my %new_token_hash = - map { $_, $rold_token_hash->{$_} } keys %{$rold_token_hash}; - return \%new_token_hash; - } - - sub copy_array { - my ($rold) = @_; - my @new = map { $_ } @{$rold}; - return \@new; - } - - sub copy_token_as_type { - my ( $rold_token, $type, $token ) = @_; - if ( $type eq 'b' ) { - $token = " " unless defined($token); - } - elsif ( $type eq 'q' ) { - $token = '' unless defined($token); - } - elsif ( $type eq '->' ) { - $token = '->' unless defined($token); - } - elsif ( $type eq ';' ) { - $token = ';' unless defined($token); - } - else { - Fault( -"Programming error: copy_token_as has type $type but should be 'b' or 'q'" - ); - } - my $rnew_token = copy_array($rold_token); - $rnew_token->[_TYPE_] = $type; - $rnew_token->[_TOKEN_] = $token; - $rnew_token->[_BLOCK_TYPE_] = ''; - $rnew_token->[_CONTAINER_TYPE_] = ''; - $rnew_token->[_CONTAINER_ENVIRONMENT_] = ''; - $rnew_token->[_TYPE_SEQUENCE_] = ''; - return $rnew_token; - } - - sub boolean_equals { - my ( $val1, $val2 ) = @_; - return ( $val1 && $val2 || !$val1 && !$val2 ); - } - - sub print_line_of_tokens { - - my ( $self, $line_of_tokens ) = @_; - - # This routine is called once per input line to process all of - # the tokens on that line. This is the first stage of - # beautification. - # - # Full-line comments and blank lines may be processed immediately. - # - # For normal lines of code, the tokens are stored one-by-one, - # via calls to 'sub store_token_to_go', until a known line break - # point is reached. Then, the batch of collected tokens is - # passed along to 'sub output_line_to_go' for further - # processing. This routine decides if there should be - # whitespace between each pair of non-white tokens, so later - # routines only need to decide on any additional line breaks. - # Any whitespace is initially a single space character. Later, - # the vertical aligner may expand that to be multiple space - # characters if necessary for alignment. - - $input_line_number = $line_of_tokens->{_line_number}; - my $input_line = $line_of_tokens->{_line_text}; - my $CODE_type = $line_of_tokens->{_code_type}; - - my $rK_range = $line_of_tokens->{_rK_range}; - my ( $K_first, $K_last ) = @{$rK_range}; - - my $rLL = $self->{rLL}; - my $rbreak_container = $self->{rbreak_container}; - - if ( !defined($K_first) ) { - - # Unexpected blank line.. - # Calling routine was supposed to handle this - Perl::Tidy::Warn( -"Programming Error: Unexpected Blank Line in print_line_of_tokens. Ignoring" - ); - return; - } - - $no_internal_newlines = 1 - $rOpts_add_newlines; - my $is_comment = - ( $K_first == $K_last && $rLL->[$K_first]->[_TYPE_] eq '#' ); - my $is_static_block_comment_without_leading_space = - $CODE_type eq 'SBCX'; - $is_static_block_comment = - $CODE_type eq 'SBC' || $is_static_block_comment_without_leading_space; - my $is_hanging_side_comment = $CODE_type eq 'HSC'; - my $is_VERSION_statement = $CODE_type eq 'VER'; - if ($is_VERSION_statement) { - $saw_VERSION_in_this_file = 1; - $no_internal_newlines = 1; - } - - # Add interline blank if any - my $last_old_nonblank_type = "b"; - my $first_new_nonblank_type = "b"; - my $first_new_nonblank_token = " "; - if ( $max_index_to_go >= 0 ) { - $last_old_nonblank_type = $types_to_go[$max_index_to_go]; - $first_new_nonblank_type = $rLL->[$K_first]->[_TYPE_]; - $first_new_nonblank_token = $rLL->[$K_first]->[_TOKEN_]; - if ( !$is_comment - && $types_to_go[$max_index_to_go] ne 'b' - && $K_first > 0 - && $rLL->[ $K_first - 1 ]->[_TYPE_] eq 'b' ) - { - $K_first -= 1; - } - } - - # Copy the tokens into local arrays - $rinput_token_array = []; - $rinput_K_array = []; - $rinput_K_array = [ ( $K_first .. $K_last ) ]; - $rinput_token_array = [ map { $rLL->[$_] } @{$rinput_K_array} ]; - my $jmax = @{$rinput_K_array} - 1; - - $in_continued_quote = $starting_in_quote = - $line_of_tokens->{_starting_in_quote}; - $in_quote = $line_of_tokens->{_ending_in_quote}; - $ending_in_quote = $in_quote; - $guessed_indentation_level = - $line_of_tokens->{_guessed_indentation_level}; - - my $j_next; - my $next_nonblank_token; - my $next_nonblank_token_type; - - $block_type = ""; - $container_type = ""; - $container_environment = ""; - $type_sequence = ""; - - ###################################### - # Handle a block (full-line) comment.. - ###################################### - if ($is_comment) { - - if ( $rOpts->{'delete-block-comments'} ) { return } - - if ( $rOpts->{'tee-block-comments'} ) { - $file_writer_object->tee_on(); - } - - destroy_one_line_block(); - $self->output_line_to_go(); - - # output a blank line before block comments - if ( - # unless we follow a blank or comment line - $last_line_leading_type !~ /^[#b]$/ - - # only if allowed - && $rOpts->{'blanks-before-comments'} - - # if this is NOT an empty comment line - && $rinput_token_array->[0]->[_TOKEN_] ne '#' - - # not after a short line ending in an opening token - # because we already have space above this comment. - # Note that the first comment in this if block, after - # the 'if (', does not get a blank line because of this. - && !$last_output_short_opening_token - - # never before static block comments - && !$is_static_block_comment - ) - { - $self->flush(); # switching to new output stream - $file_writer_object->write_blank_code_line(); - $last_line_leading_type = 'b'; - } - - # TRIM COMMENTS -- This could be turned off as a option - $rinput_token_array->[0]->[_TOKEN_] =~ s/\s*$//; # trim right end - - if ( - $rOpts->{'indent-block-comments'} - && ( !$rOpts->{'indent-spaced-block-comments'} - || $input_line =~ /^\s+/ ) - && !$is_static_block_comment_without_leading_space - ) - { - $self->extract_token(0); - $self->store_token_to_go(); - $self->output_line_to_go(); - } - else { - $self->flush(); # switching to new output stream - $file_writer_object->write_code_line( - $rinput_token_array->[0]->[_TOKEN_] . "\n" ); - $last_line_leading_type = '#'; - } - if ( $rOpts->{'tee-block-comments'} ) { - $file_writer_object->tee_off(); - } - return; - } - - # TODO: Move to sub scan_comments - # compare input/output indentation except for continuation lines - # (because they have an unknown amount of initial blank space) - # and lines which are quotes (because they may have been outdented) - # Note: this test is placed here because we know the continuation flag - # at this point, which allows us to avoid non-meaningful checks. - my $structural_indentation_level = $rinput_token_array->[0]->[_LEVEL_]; - compare_indentation_levels( $guessed_indentation_level, - $structural_indentation_level ) - unless ( $is_hanging_side_comment - || $rinput_token_array->[0]->[_CI_LEVEL_] > 0 - || $guessed_indentation_level == 0 - && $rinput_token_array->[0]->[_TYPE_] eq 'Q' ); - - ########################## - # Handle indentation-only - ########################## - - # NOTE: In previous versions we sent all qw lines out immediately here. - # No longer doing this: also write a line which is entirely a 'qw' list - # to allow stacking of opening and closing tokens. Note that interior - # qw lines will still go out at the end of this routine. - ##if ( $rOpts->{'indent-only'} ) { - if ( $CODE_type eq 'IO' ) { - $self->flush(); - my $line = $input_line; - - # delete side comments if requested with -io, but - # we will not allow deleting of closing side comments with -io - # because the coding would be more complex - if ( $rOpts->{'delete-side-comments'} - && $rinput_token_array->[$jmax]->[_TYPE_] eq '#' ) - { - - $line = ""; - foreach my $jj ( 0 .. $jmax - 1 ) { - $line .= $rinput_token_array->[$jj]->[_TOKEN_]; - } - } - $line = trim($line); - - $self->extract_token(0); - $token = $line; - $type = 'q'; - $block_type = ""; - $container_type = ""; - $container_environment = ""; - $type_sequence = ""; - $self->store_token_to_go(); - $self->output_line_to_go(); - return; - } - - ############################ - # Handle all other lines ... - ############################ - - ####################################################### - # FIXME: this should become unnecessary - # making $j+2 valid simplifies coding - my $rnew_blank = - copy_token_as_type( $rinput_token_array->[$jmax], 'b' ); - push @{$rinput_token_array}, $rnew_blank; - push @{$rinput_token_array}, $rnew_blank; - ####################################################### - - # If we just saw the end of an elsif block, write nag message - # if we do not see another elseif or an else. - if ($looking_for_else) { - - unless ( $rinput_token_array->[0]->[_TOKEN_] =~ /^(elsif|else)$/ ) { - write_logfile_entry("(No else block)\n"); - } - $looking_for_else = 0; - } - - # This is a good place to kill incomplete one-line blocks - if ( - ( - ( $semicolons_before_block_self_destruct == 0 ) - && ( $max_index_to_go >= 0 ) - && ( $last_old_nonblank_type eq ';' ) - && ( $first_new_nonblank_token ne '}' ) - ) - - # Patch for RT #98902. Honor request to break at old commas. - || ( $rOpts_break_at_old_comma_breakpoints - && $max_index_to_go >= 0 - && $last_old_nonblank_type eq ',' ) - ) - { - $forced_breakpoint_to_go[$max_index_to_go] = 1 - if ($rOpts_break_at_old_comma_breakpoints); - destroy_one_line_block(); - $self->output_line_to_go(); - } - - # loop to process the tokens one-by-one - $type = 'b'; - $token = ""; - - # We do not want a leading blank if the previous batch just got output - my $jmin = 0; - if ( $max_index_to_go < 0 && $rLL->[$K_first]->[_TYPE_] eq 'b' ) { - $jmin = 1; - } - - foreach my $j ( $jmin .. $jmax ) { - - # pull out the local values for this token - $self->extract_token($j); - - if ( $type eq '#' ) { - - # trim trailing whitespace - # (there is no option at present to prevent this) - $token =~ s/\s*$//; - - if ( - $rOpts->{'delete-side-comments'} - - # delete closing side comments if necessary - || ( $rOpts->{'delete-closing-side-comments'} - && $token =~ /$closing_side_comment_prefix_pattern/o - && $last_nonblank_block_type =~ - /$closing_side_comment_list_pattern/o ) - ) - { - if ( $types_to_go[$max_index_to_go] eq 'b' ) { - unstore_token_to_go(); - } - last; - } - } - - # If we are continuing after seeing a right curly brace, flush - # buffer unless we see what we are looking for, as in - # } else ... - if ( $rbrace_follower && $type ne 'b' ) { - - unless ( $rbrace_follower->{$token} ) { - $self->output_line_to_go(); - } - $rbrace_follower = undef; - } - - $j_next = - ( $rinput_token_array->[ $j + 1 ]->[_TYPE_] eq 'b' ) - ? $j + 2 - : $j + 1; - $next_nonblank_token = $rinput_token_array->[$j_next]->[_TOKEN_]; - $next_nonblank_token_type = - $rinput_token_array->[$j_next]->[_TYPE_]; - - ###################### - # MAYBE MOVE ELSEWHERE? - ###################### - if ( $type eq 'Q' ) { - note_embedded_tab() if ( $token =~ "\t" ); - - # make note of something like '$var = s/xxx/yyy/;' - # in case it should have been '$var =~ s/xxx/yyy/;' - if ( - $token =~ /^(s|tr|y|m|\/)/ - && $last_nonblank_token =~ /^(=|==|!=)$/ - - # preceded by simple scalar - && $last_last_nonblank_type eq 'i' - && $last_last_nonblank_token =~ /^\$/ - - # followed by some kind of termination - # (but give complaint if we can's see far enough ahead) - && $next_nonblank_token =~ /^[; \)\}]$/ - - # scalar is not declared - && !( - $types_to_go[0] eq 'k' - && $tokens_to_go[0] =~ /^(my|our|local)$/ - ) - ) - { - my $guess = substr( $last_nonblank_token, 0, 1 ) . '~'; - complain( -"Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n" - ); - } - } - - # Do not allow breaks which would promote a side comment to a - # block comment. In order to allow a break before an opening - # or closing BLOCK, followed by a side comment, those sections - # of code will handle this flag separately. - my $side_comment_follows = ( $next_nonblank_token_type eq '#' ); - my $is_opening_BLOCK = - ( $type eq '{' - && $token eq '{' - && $block_type - && $block_type ne 't' ); - my $is_closing_BLOCK = - ( $type eq '}' - && $token eq '}' - && $block_type - && $block_type ne 't' ); - - if ( $side_comment_follows - && !$is_opening_BLOCK - && !$is_closing_BLOCK ) - { - $no_internal_newlines = 1; - } - - # We're only going to handle breaking for code BLOCKS at this - # (top) level. Other indentation breaks will be handled by - # sub scan_list, which is better suited to dealing with them. - if ($is_opening_BLOCK) { - - # Tentatively output this token. This is required before - # calling starting_one_line_block. We may have to unstore - # it, though, if we have to break before it. - $self->store_token_to_go($side_comment_follows); - - # Look ahead to see if we might form a one-line block.. - my $too_long = 0; - - # But obey any flag set for cuddled blocks - if ( $rbreak_container->{$type_sequence} ) { - destroy_one_line_block(); - } - else { - $too_long = - starting_one_line_block( $j, $jmax, $level, $slevel, - $ci_level, $rinput_token_array ); - } - clear_breakpoint_undo_stack(); - - # to simplify the logic below, set a flag to indicate if - # this opening brace is far from the keyword which introduces it - my $keyword_on_same_line = 1; - if ( ( $max_index_to_go >= 0 ) - && ( $last_nonblank_type eq ')' ) ) - { - if ( $block_type =~ /^(if|else|elsif)$/ - && ( $tokens_to_go[0] eq '}' ) - && $rOpts_cuddled_else ) - { - $keyword_on_same_line = 1; - } - elsif ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long ) - { - $keyword_on_same_line = 0; - } - } - - # decide if user requested break before '{' - my $want_break = - - # use -bl flag if not a sub block of any type - $block_type !~ /^sub\b/ - ? $rOpts->{'opening-brace-on-new-line'} - - # use -sbl flag for a named sub block - : $block_type !~ /$ASUB_PATTERN/ - ? $rOpts->{'opening-sub-brace-on-new-line'} - - # use -asbl flag for an anonymous sub block - : $rOpts->{'opening-anonymous-sub-brace-on-new-line'}; - - # Do not break if this token is welded to the left - if ( weld_len_left( $type_sequence, $token ) ) { - $want_break = 0; - } - - # Break before an opening '{' ... - if ( - - # if requested - $want_break - - # and we were unable to start looking for a block, - && $index_start_one_line_block == UNDEFINED_INDEX - - # or if it will not be on same line as its keyword, so that - # it will be outdented (eval.t, overload.t), and the user - # has not insisted on keeping it on the right - || ( !$keyword_on_same_line - && !$rOpts->{'opening-brace-always-on-right'} ) - - ) - { - - # but only if allowed - unless ($no_internal_newlines) { - - # since we already stored this token, we must unstore it - $self->unstore_token_to_go(); - - # then output the line - $self->output_line_to_go(); - - # and now store this token at the start of a new line - $self->store_token_to_go($side_comment_follows); - } - } - - # Now update for side comment - if ($side_comment_follows) { $no_internal_newlines = 1 } - - # now output this line - unless ($no_internal_newlines) { - $self->output_line_to_go(); - } - } - - elsif ($is_closing_BLOCK) { - - # If there is a pending one-line block .. - if ( $index_start_one_line_block != UNDEFINED_INDEX ) { - - # we have to terminate it if.. - if ( - - # it is too long (final length may be different from - # initial estimate). note: must allow 1 space for this - # token - excess_line_length( $index_start_one_line_block, - $max_index_to_go ) >= 0 - - # or if it has too many semicolons - || ( $semicolons_before_block_self_destruct == 0 - && $last_nonblank_type ne ';' ) - ) - { - destroy_one_line_block(); - } - } - - # put a break before this closing curly brace if appropriate - unless ( $no_internal_newlines - || $index_start_one_line_block != UNDEFINED_INDEX ) - { - - # write out everything before this closing curly brace - $self->output_line_to_go(); - } - - # Now update for side comment - if ($side_comment_follows) { $no_internal_newlines = 1 } - - # store the closing curly brace - $self->store_token_to_go(); - - # ok, we just stored a closing curly brace. Often, but - # not always, we want to end the line immediately. - # So now we have to check for special cases. - - # if this '}' successfully ends a one-line block.. - my $is_one_line_block = 0; - my $keep_going = 0; - if ( $index_start_one_line_block != UNDEFINED_INDEX ) { - - # Remember the type of token just before the - # opening brace. It would be more general to use - # a stack, but this will work for one-line blocks. - $is_one_line_block = - $types_to_go[$index_start_one_line_block]; - - # we have to actually make it by removing tentative - # breaks that were set within it - undo_forced_breakpoint_stack(0); - set_nobreaks( $index_start_one_line_block, - $max_index_to_go - 1 ); - - # then re-initialize for the next one-line block - destroy_one_line_block(); - - # then decide if we want to break after the '}' .. - # We will keep going to allow certain brace followers as in: - # do { $ifclosed = 1; last } unless $losing; - # - # But make a line break if the curly ends a - # significant block: - if ( - ( - $is_block_without_semicolon{$block_type} - - # Follow users break point for - # one line block types U & G, such as a 'try' block - || $is_one_line_block =~ /^[UG]$/ && $j == $jmax - ) - - # if needless semicolon follows we handle it later - && $next_nonblank_token ne ';' - ) - { - $self->output_line_to_go() - unless ($no_internal_newlines); - } - } - - # set string indicating what we need to look for brace follower - # tokens - if ( $block_type eq 'do' ) { - $rbrace_follower = \%is_do_follower; - } - elsif ( $block_type =~ /^(if|elsif|unless)$/ ) { - $rbrace_follower = \%is_if_brace_follower; - } - elsif ( $block_type eq 'else' ) { - $rbrace_follower = \%is_else_brace_follower; - } - - # added eval for borris.t - elsif ($is_sort_map_grep_eval{$block_type} - || $is_one_line_block eq 'G' ) - { - $rbrace_follower = undef; - $keep_going = 1; - } - - # anonymous sub - elsif ( $block_type =~ /$ASUB_PATTERN/ ) { - - if ($is_one_line_block) { - $rbrace_follower = \%is_anon_sub_1_brace_follower; - } - else { - $rbrace_follower = \%is_anon_sub_brace_follower; - } - } - - # None of the above: specify what can follow a closing - # brace of a block which is not an - # if/elsif/else/do/sort/map/grep/eval - # Testfiles: - # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t - else { - $rbrace_follower = \%is_other_brace_follower; - } - - # See if an elsif block is followed by another elsif or else; - # complain if not. - if ( $block_type eq 'elsif' ) { - - if ( $next_nonblank_token_type eq 'b' ) { # end of line? - $looking_for_else = 1; # ok, check on next line - } - else { - - unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) { - write_logfile_entry("No else block :(\n"); - } - } - } - - # keep going after certain block types (map,sort,grep,eval) - # added eval for borris.t - if ($keep_going) { - - # keep going - } - - # if no more tokens, postpone decision until re-entring - elsif ( ( $next_nonblank_token_type eq 'b' ) - && $rOpts_add_newlines ) - { - unless ($rbrace_follower) { - $self->output_line_to_go() - unless ($no_internal_newlines); - } - } - - elsif ($rbrace_follower) { - - unless ( $rbrace_follower->{$next_nonblank_token} ) { - $self->output_line_to_go() - unless ($no_internal_newlines); - } - $rbrace_follower = undef; - } - - else { - $self->output_line_to_go() unless ($no_internal_newlines); - } - - } # end treatment of closing block token - - # handle semicolon - elsif ( $type eq ';' ) { - - # kill one-line blocks with too many semicolons - $semicolons_before_block_self_destruct--; - if ( - ( $semicolons_before_block_self_destruct < 0 ) - || ( $semicolons_before_block_self_destruct == 0 - && $next_nonblank_token_type !~ /^[b\}]$/ ) - ) - { - destroy_one_line_block(); - } - - # Remove unnecessary semicolons, but not after bare - # blocks, where it could be unsafe if the brace is - # mistokenized. - if ( - ( - $last_nonblank_token eq '}' - && ( - $is_block_without_semicolon{ - $last_nonblank_block_type} - || $last_nonblank_block_type =~ /$SUB_PATTERN/ - || $last_nonblank_block_type =~ /^\w+:$/ ) - ) - || $last_nonblank_type eq ';' - ) - { - - if ( - $rOpts->{'delete-semicolons'} - - # don't delete ; before a # because it would promote it - # to a block comment - && ( $next_nonblank_token_type ne '#' ) - ) - { - note_deleted_semicolon(); - $self->output_line_to_go() - unless ( $no_internal_newlines - || $index_start_one_line_block != UNDEFINED_INDEX ); - next; - } - else { - write_logfile_entry("Extra ';'\n"); - } - } - $self->store_token_to_go(); - - $self->output_line_to_go() - unless ( $no_internal_newlines - || ( $rOpts_keep_interior_semicolons && $j < $jmax ) - || ( $next_nonblank_token eq '}' ) ); - - } - - # handle here_doc target string - elsif ( $type eq 'h' ) { - - # no newlines after seeing here-target - $no_internal_newlines = 1; - destroy_one_line_block(); - $self->store_token_to_go(); - } - - # handle all other token types - else { - - $self->store_token_to_go(); - } - - # remember two previous nonblank OUTPUT tokens - if ( $type ne '#' && $type ne 'b' ) { - $last_last_nonblank_token = $last_nonblank_token; - $last_last_nonblank_type = $last_nonblank_type; - $last_nonblank_token = $token; - $last_nonblank_type = $type; - $last_nonblank_block_type = $block_type; - } - - # unset the continued-quote flag since it only applies to the - # first token, and we want to resume normal formatting if - # there are additional tokens on the line - $in_continued_quote = 0; - - } # end of loop over all tokens in this 'line_of_tokens' - - # we have to flush .. - if ( - - # if there is a side comment - ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} ) - - # if this line ends in a quote - # NOTE: This is critically important for insuring that quoted lines - # do not get processed by things like -sot and -sct - || $in_quote - - # if this is a VERSION statement - || $is_VERSION_statement - - # to keep a label at the end of a line - || $type eq 'J' - - # if we are instructed to keep all old line breaks - || !$rOpts->{'delete-old-newlines'} - ) - { - destroy_one_line_block(); - $self->output_line_to_go(); - } - - # mark old line breakpoints in current output stream - if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) { - my $jobp = $max_index_to_go; - if ( $types_to_go[$max_index_to_go] eq 'b' && $max_index_to_go > 0 ) - { - $jobp--; - } - $old_breakpoint_to_go[$jobp] = 1; - } - return; - } ## end sub print_line_of_tokens -} ## end block print_line_of_tokens - -# sub output_line_to_go sends one logical line of tokens on down the -# pipeline to the VerticalAligner package, breaking the line into continuation -# lines as necessary. The line of tokens is ready to go in the "to_go" -# arrays. -sub output_line_to_go { - - my $self = shift; - - # debug stuff; this routine can be called from many points - FORMATTER_DEBUG_FLAG_OUTPUT && do { - my ( $a, $b, $c ) = caller; - write_diagnostics( -"OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n" - ); - my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ]; - write_diagnostics("$output_str\n"); - }; - - # Do not end line in a weld - # TODO: Move this fix into the routine? - #my $jnb = $max_index_to_go; - #if ( $jnb > 0 && $types_to_go[$jnb] eq 'b' ) { $jnb-- } - return if ( weld_len_right_to_go($max_index_to_go) ); - - # just set a tentative breakpoint if we might be in a one-line block - if ( $index_start_one_line_block != UNDEFINED_INDEX ) { - set_forced_breakpoint($max_index_to_go); - return; - } - - my $cscw_block_comment; - $cscw_block_comment = $self->add_closing_side_comment() - if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 ); - - my $comma_arrow_count_contained = match_opening_and_closing_tokens(); - - # tell the -lp option we are outputting a batch so it can close - # any unfinished items in its stack - finish_lp_batch(); - - # If this line ends in a code block brace, set breaks at any - # previous closing code block braces to breakup a chain of code - # blocks on one line. This is very rare but can happen for - # user-defined subs. For example we might be looking at this: - # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR { - my $saw_good_break = 0; # flag to force breaks even if short line - if ( - - # looking for opening or closing block brace - $block_type_to_go[$max_index_to_go] - - # but not one of these which are never duplicated on a line: - # until|while|for|if|elsif|else - && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] } - ) - { - my $lev = $nesting_depth_to_go[$max_index_to_go]; - - # Walk backwards from the end and - # set break at any closing block braces at the same level. - # But quit if we are not in a chain of blocks. - for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) { - last if ( $levels_to_go[$i] < $lev ); # stop at a lower level - next if ( $levels_to_go[$i] > $lev ); # skip past higher level - - if ( $block_type_to_go[$i] ) { - if ( $tokens_to_go[$i] eq '}' ) { - set_forced_breakpoint($i); - $saw_good_break = 1; - } - } - - # quit if we see anything besides words, function, blanks - # at this level - elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last } - } - } - - my $imin = 0; - my $imax = $max_index_to_go; - - # trim any blank tokens - if ( $max_index_to_go >= 0 ) { - if ( $types_to_go[$imin] eq 'b' ) { $imin++ } - if ( $types_to_go[$imax] eq 'b' ) { $imax-- } - } - - # anything left to write? - if ( $imin <= $imax ) { - - # add a blank line before certain key types but not after a comment - if ( $last_line_leading_type !~ /^[#]/ ) { - my $want_blank = 0; - my $leading_token = $tokens_to_go[$imin]; - my $leading_type = $types_to_go[$imin]; - - # blank lines before subs except declarations and one-liners - # MCONVERSION LOCATION - for sub tokenization change - if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) { - $want_blank = $rOpts->{'blank-lines-before-subs'} - if ( - terminal_type( \@types_to_go, \@block_type_to_go, $imin, - $imax ) !~ /^[\;\}]$/ - ); - } - - # break before all package declarations - # MCONVERSION LOCATION - for tokenizaton change - elsif ($leading_token =~ /^(package\s)/ - && $leading_type eq 'i' ) - { - $want_blank = $rOpts->{'blank-lines-before-packages'}; - } - - # break before certain key blocks except one-liners - if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) { - $want_blank = $rOpts->{'blank-lines-before-subs'} - if ( - terminal_type( \@types_to_go, \@block_type_to_go, $imin, - $imax ) ne '}' - ); - } - - # Break before certain block types if we haven't had a - # break at this level for a while. This is the - # difficult decision.. - elsif ($leading_type eq 'k' - && $last_line_leading_type ne 'b' - && $leading_token =~ /^(unless|if|while|until|for|foreach)$/ ) - { - my $lc = $nonblank_lines_at_depth[$last_line_leading_level]; - if ( !defined($lc) ) { $lc = 0 } - - $want_blank = - $rOpts->{'blanks-before-blocks'} - && $lc >= $rOpts->{'long-block-line-count'} - && $file_writer_object->get_consecutive_nonblank_lines() >= - $rOpts->{'long-block-line-count'} - && ( - terminal_type( \@types_to_go, \@block_type_to_go, $imin, - $imax ) ne '}' - ); - } - - # Check for blank lines wanted before a closing brace - if ( $leading_token eq '}' ) { - if ( $rOpts->{'blank-lines-before-closing-block'} - && $block_type_to_go[$imin] - && $block_type_to_go[$imin] =~ - /$blank_lines_before_closing_block_pattern/ ) - { - my $nblanks = $rOpts->{'blank-lines-before-closing-block'}; - if ( $nblanks > $want_blank ) { - $want_blank = $nblanks; - } - } - } - - if ($want_blank) { - - # future: send blank line down normal path to VerticalAligner - Perl::Tidy::VerticalAligner::flush(); - $file_writer_object->require_blank_code_lines($want_blank); - } - } - - # update blank line variables and count number of consecutive - # non-blank, non-comment lines at this level - $last_last_line_leading_level = $last_line_leading_level; - $last_line_leading_level = $levels_to_go[$imin]; - if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 } - $last_line_leading_type = $types_to_go[$imin]; - if ( $last_line_leading_level == $last_last_line_leading_level - && $last_line_leading_type ne 'b' - && $last_line_leading_type ne '#' - && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) ) - { - $nonblank_lines_at_depth[$last_line_leading_level]++; - } - else { - $nonblank_lines_at_depth[$last_line_leading_level] = 1; - } - - FORMATTER_DEBUG_FLAG_FLUSH && do { - my ( $package, $file, $line ) = caller; - print STDOUT -"FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n"; - }; - - # add a couple of extra terminal blank tokens - pad_array_to_go(); - - # set all forced breakpoints for good list formatting - my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0; - - my $old_line_count_in_batch = - $rtoken_vars_to_go[$max_index_to_go]->[_LINE_INDEX_] - - $rtoken_vars_to_go[0]->[_LINE_INDEX_] + 1; - - if ( - $is_long_line - || $old_line_count_in_batch > 1 - - # must always call scan_list() with unbalanced batches because it - # is maintaining some stacks - || is_unbalanced_batch() - - # call scan_list if we might want to break at commas - || ( - $comma_count_in_batch - && ( $rOpts_maximum_fields_per_table > 0 - || $rOpts_comma_arrow_breakpoints == 0 ) - ) - - # call scan_list if user may want to break open some one-line - # hash references - || ( $comma_arrow_count_contained - && $rOpts_comma_arrow_breakpoints != 3 ) - ) - { - ## This caused problems in one version of perl for unknown reasons: - ## $saw_good_break ||= scan_list(); - my $sgb = scan_list(); - $saw_good_break ||= $sgb; - } - - # let $ri_first and $ri_last be references to lists of - # first and last tokens of line fragments to output.. - my ( $ri_first, $ri_last ); - - # write a single line if.. - if ( - - # we aren't allowed to add any newlines - !$rOpts_add_newlines - - # or, we don't already have an interior breakpoint - # and we didn't see a good breakpoint - || ( - !$forced_breakpoint_count - && !$saw_good_break - - # and this line is 'short' - && !$is_long_line - ) - ) - { - @{$ri_first} = ($imin); - @{$ri_last} = ($imax); - } - - # otherwise use multiple lines - else { - - ( $ri_first, $ri_last, my $colon_count ) = - set_continuation_breaks($saw_good_break); - - break_all_chain_tokens( $ri_first, $ri_last ); - - break_equals( $ri_first, $ri_last ); - - # now we do a correction step to clean this up a bit - # (The only time we would not do this is for debugging) - if ( $rOpts->{'recombine'} ) { - ( $ri_first, $ri_last ) = - recombine_breakpoints( $ri_first, $ri_last ); - } - - insert_final_breaks( $ri_first, $ri_last ) if $colon_count; - } - - # do corrector step if -lp option is used - my $do_not_pad = 0; - if ($rOpts_line_up_parentheses) { - $do_not_pad = correct_lp_indentation( $ri_first, $ri_last ); - } - $self->unmask_phantom_semicolons( $ri_first, $ri_last ); - $self->send_lines_to_vertical_aligner( $ri_first, $ri_last, - $do_not_pad ); - - # Insert any requested blank lines after an opening brace. We have to - # skip back before any side comment to find the terminal token - my $iterm; - for ( $iterm = $imax ; $iterm >= $imin ; $iterm-- ) { - next if $types_to_go[$iterm] eq '#'; - next if $types_to_go[$iterm] eq 'b'; - last; - } - - # write requested number of blank lines after an opening block brace - if ( $iterm >= $imin && $types_to_go[$iterm] eq '{' ) { - if ( $rOpts->{'blank-lines-after-opening-block'} - && $block_type_to_go[$iterm] - && $block_type_to_go[$iterm] =~ - /$blank_lines_after_opening_block_pattern/ ) - { - my $nblanks = $rOpts->{'blank-lines-after-opening-block'}; - Perl::Tidy::VerticalAligner::flush(); - $file_writer_object->require_blank_code_lines($nblanks); - } - } - } - - prepare_for_new_input_lines(); - - # output any new -cscw block comment - if ($cscw_block_comment) { - $self->flush(); - $file_writer_object->write_code_line( $cscw_block_comment . "\n" ); - } - return; -} - -sub note_added_semicolon { - my ($line_number) = @_; - $last_added_semicolon_at = $line_number; - if ( $added_semicolon_count == 0 ) { - $first_added_semicolon_at = $last_added_semicolon_at; - } - $added_semicolon_count++; - write_logfile_entry("Added ';' here\n"); - return; -} - -sub note_deleted_semicolon { - $last_deleted_semicolon_at = $input_line_number; - if ( $deleted_semicolon_count == 0 ) { - $first_deleted_semicolon_at = $last_deleted_semicolon_at; - } - $deleted_semicolon_count++; - write_logfile_entry("Deleted unnecessary ';'\n"); # i hope ;) - return; -} - -sub note_embedded_tab { - $embedded_tab_count++; - $last_embedded_tab_at = $input_line_number; - if ( !$first_embedded_tab_at ) { - $first_embedded_tab_at = $last_embedded_tab_at; - } - - if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) { - write_logfile_entry("Embedded tabs in quote or pattern\n"); - } - return; -} - -sub starting_one_line_block { - - # after seeing an opening curly brace, look for the closing brace - # and see if the entire block will fit on a line. This routine is - # not always right because it uses the old whitespace, so a check - # is made later (at the closing brace) to make sure we really - # have a one-line block. We have to do this preliminary check, - # though, because otherwise we would always break at a semicolon - # within a one-line block if the block contains multiple statements. - - my ( $j, $jmax, $level, $slevel, $ci_level, $rtoken_array ) = @_; - - my $jmax_check = @{$rtoken_array}; - if ( $jmax_check < $jmax ) { - print STDERR "jmax=$jmax > $jmax_check\n"; - } - - # kill any current block - we can only go 1 deep - destroy_one_line_block(); - - # return value: - # 1=distance from start of block to opening brace exceeds line length - # 0=otherwise - - my $i_start = 0; - - # shouldn't happen: there must have been a prior call to - # store_token_to_go to put the opening brace in the output stream - if ( $max_index_to_go < 0 ) { - warning("program bug: store_token_to_go called incorrectly\n"); - report_definite_bug(); - } - else { - - # cannot use one-line blocks with cuddled else/elsif lines - if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) { - return 0; - } - } - - my $block_type = $rtoken_array->[$j]->[_BLOCK_TYPE_]; - - # find the starting keyword for this block (such as 'if', 'else', ...) - - if ( $block_type =~ /^[\{\}\;\:]$/ || $block_type =~ /^package/ ) { - $i_start = $max_index_to_go; - } - - # the previous nonblank token should start these block types - elsif (( $last_last_nonblank_token_to_go eq $block_type ) - || ( $block_type =~ /^sub\b/ ) - || $block_type =~ /\(\)/ ) - { - $i_start = $last_last_nonblank_index_to_go; - - # For signatures and extended syntax ... - # If this brace follows a parenthesized list, we should look back to - # find the keyword before the opening paren because otherwise we might - # form a one line block which stays intack, and cause the parenthesized - # expression to break open. That looks bad. However, actually - # searching for the opening paren is slow and tedius. - # The actual keyword is often at the start of a line, but might not be. - # For example, we might have an anonymous sub with signature list - # following a =>. It is safe to mark the start anywhere before the - # opening paren, so we just go back to the prevoious break (or start of - # the line) if that is before the opening paren. The minor downside is - # that we may very occasionally break open a block unnecessarily. - if ( $tokens_to_go[$i_start] eq ')' ) { - $i_start = $index_max_forced_break + 1; - if ( $types_to_go[$i_start] eq 'b' ) { $i_start++; } - my $lev = $levels_to_go[$i_start]; - if ( $lev > $level ) { return 0 } - } - } - - elsif ( $last_last_nonblank_token_to_go eq ')' ) { - - # For something like "if (xxx) {", the keyword "if" will be - # just after the most recent break. This will be 0 unless - # we have just killed a one-line block and are starting another. - # (doif.t) - # Note: cannot use inext_index_to_go[] here because that array - # is still being constructed. - $i_start = $index_max_forced_break + 1; - if ( $types_to_go[$i_start] eq 'b' ) { - $i_start++; - } - - # Patch to avoid breaking short blocks defined with extended_syntax: - # Strip off any trailing () which was added in the parser to mark - # the opening keyword. For example, in the following - # create( TypeFoo $e) {$bubba} - # the blocktype would be marked as create() - my $stripped_block_type = $block_type; - $stripped_block_type =~ s/\(\)$//; - - unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) { - return 0; - } - } - - # patch for SWITCH/CASE to retain one-line case/when blocks - elsif ( $block_type eq 'case' || $block_type eq 'when' ) { - - # Note: cannot use inext_index_to_go[] here because that array - # is still being constructed. - $i_start = $index_max_forced_break + 1; - if ( $types_to_go[$i_start] eq 'b' ) { - $i_start++; - } - unless ( $tokens_to_go[$i_start] eq $block_type ) { - return 0; - } - } - - else { - return 1; - } - - my $pos = total_line_length( $i_start, $max_index_to_go ) - 1; - - # see if length is too long to even start - if ( $pos > maximum_line_length($i_start) ) { - return 1; - } - - foreach my $i ( $j + 1 .. $jmax ) { - - # old whitespace could be arbitrarily large, so don't use it - if ( $rtoken_array->[$i]->[_TYPE_] eq 'b' ) { $pos += 1 } - else { $pos += rtoken_length($i) } - - # Return false result if we exceed the maximum line length, - if ( $pos > maximum_line_length($i_start) ) { - return 0; - } - - # or encounter another opening brace before finding the closing brace. - elsif ($rtoken_array->[$i]->[_TOKEN_] eq '{' - && $rtoken_array->[$i]->[_TYPE_] eq '{' - && $rtoken_array->[$i]->[_BLOCK_TYPE_] ) - { - return 0; - } - - # if we find our closing brace.. - elsif ($rtoken_array->[$i]->[_TOKEN_] eq '}' - && $rtoken_array->[$i]->[_TYPE_] eq '}' - && $rtoken_array->[$i]->[_BLOCK_TYPE_] ) - { - - # be sure any trailing comment also fits on the line - my $i_nonblank = - ( $rtoken_array->[ $i + 1 ]->[_TYPE_] eq 'b' ) ? $i + 2 : $i + 1; - - # Patch for one-line sort/map/grep/eval blocks with side comments: - # We will ignore the side comment length for sort/map/grep/eval - # because this can lead to statements which change every time - # perltidy is run. Here is an example from Denis Moskowitz which - # oscillates between these two states without this patch: - -## -------- -## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf -## @baz; -## -## grep { -## $_->foo ne 'bar' -## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf -## @baz; -## -------- - - # When the first line is input it gets broken apart by the main - # line break logic in sub print_line_of_tokens. - # When the second line is input it gets recombined by - # print_line_of_tokens and passed to the output routines. The - # output routines (set_continuation_breaks) do not break it apart - # because the bond strengths are set to the highest possible value - # for grep/map/eval/sort blocks, so the first version gets output. - # It would be possible to fix this by changing bond strengths, - # but they are high to prevent errors in older versions of perl. - - if ( $rtoken_array->[$i_nonblank]->[_TYPE_] eq '#' - && !$is_sort_map_grep{$block_type} ) - { - - $pos += rtoken_length($i_nonblank); - - if ( $i_nonblank > $i + 1 ) { - - # source whitespace could be anything, assume - # at least one space before the hash on output - if ( $rtoken_array->[ $i + 1 ]->[_TYPE_] eq 'b' ) { - $pos += 1; - } - else { $pos += rtoken_length( $i + 1 ) } - } - - if ( $pos >= maximum_line_length($i_start) ) { - return 0; - } - } - - # ok, it's a one-line block - create_one_line_block( $i_start, 20 ); - return 0; - } - - # just keep going for other characters - else { - } - } - - # Allow certain types of new one-line blocks to form by joining - # input lines. These can be safely done, but for other block types, - # we keep old one-line blocks but do not form new ones. It is not - # always a good idea to make as many one-line blocks as possible, - # so other types are not done. The user can always use -mangle. - if ( $is_sort_map_grep_eval{$block_type} ) { - create_one_line_block( $i_start, 1 ); - } - return 0; -} - -sub unstore_token_to_go { - - # remove most recent token from output stream - my $self = shift; - if ( $max_index_to_go > 0 ) { - $max_index_to_go--; - } - else { - $max_index_to_go = UNDEFINED_INDEX; - } - return; -} - -sub want_blank_line { - my $self = shift; - $self->flush(); - $file_writer_object->want_blank_line(); - return; -} - -sub write_unindented_line { - my ( $self, $line ) = @_; - $self->flush(); - $file_writer_object->write_line($line); - return; -} - -sub undo_ci { - - # Undo continuation indentation in certain sequences - # For example, we can undo continuation indentation in sort/map/grep chains - # my $dat1 = pack( "n*", - # map { $_, $lookup->{$_} } - # sort { $a <=> $b } - # grep { $lookup->{$_} ne $default } keys %$lookup ); - # To align the map/sort/grep keywords like this: - # my $dat1 = pack( "n*", - # map { $_, $lookup->{$_} } - # sort { $a <=> $b } - # grep { $lookup->{$_} ne $default } keys %$lookup ); - my ( $ri_first, $ri_last ) = @_; - my ( $line_1, $line_2, $lev_last ); - my $this_line_is_semicolon_terminated; - my $max_line = @{$ri_first} - 1; - - # looking at each line of this batch.. - # We are looking at leading tokens and looking for a sequence - # all at the same level and higher level than enclosing lines. - foreach my $line ( 0 .. $max_line ) { - - my $ibeg = $ri_first->[$line]; - my $lev = $levels_to_go[$ibeg]; - if ( $line > 0 ) { - - # if we have started a chain.. - if ($line_1) { - - # see if it continues.. - if ( $lev == $lev_last ) { - if ( $types_to_go[$ibeg] eq 'k' - && $is_sort_map_grep{ $tokens_to_go[$ibeg] } ) - { - - # chain continues... - # check for chain ending at end of a statement - if ( $line == $max_line ) { - - # see of this line ends a statement - my $iend = $ri_last->[$line]; - $this_line_is_semicolon_terminated = - $types_to_go[$iend] eq ';' - - # with possible side comment - || ( $types_to_go[$iend] eq '#' - && $iend - $ibeg >= 2 - && $types_to_go[ $iend - 2 ] eq ';' - && $types_to_go[ $iend - 1 ] eq 'b' ); - } - $line_2 = $line if ($this_line_is_semicolon_terminated); - } - else { - - # kill chain - $line_1 = undef; - } - } - elsif ( $lev < $lev_last ) { - - # chain ends with previous line - $line_2 = $line - 1; - } - elsif ( $lev > $lev_last ) { - - # kill chain - $line_1 = undef; - } - - # undo the continuation indentation if a chain ends - if ( defined($line_2) && defined($line_1) ) { - my $continuation_line_count = $line_2 - $line_1 + 1; - @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ] = - (0) x ($continuation_line_count) - if ( $continuation_line_count >= 0 ); - @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ] - = @reduced_spaces_to_go[ @{$ri_first} - [ $line_1 .. $line_2 ] ]; - $line_1 = undef; - } - } - - # not in a chain yet.. - else { - - # look for start of a new sort/map/grep chain - if ( $lev > $lev_last ) { - if ( $types_to_go[$ibeg] eq 'k' - && $is_sort_map_grep{ $tokens_to_go[$ibeg] } ) - { - $line_1 = $line; - } - } - } - } - $lev_last = $lev; - } - return; -} - -sub undo_lp_ci { - - # If there is a single, long parameter within parens, like this: - # - # $self->command( "/msg " - # . $infoline->chan - # . " You said $1, but did you know that it's square was " - # . $1 * $1 . " ?" ); - # - # we can remove the continuation indentation of the 2nd and higher lines - # to achieve this effect, which is more pleasing: - # - # $self->command("/msg " - # . $infoline->chan - # . " You said $1, but did you know that it's square was " - # . $1 * $1 . " ?"); - - my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_; - my $max_line = @{$ri_first} - 1; - - # must be multiple lines - return unless $max_line > $line_open; - - my $lev_start = $levels_to_go[$i_start]; - my $ci_start_plus = 1 + $ci_levels_to_go[$i_start]; - - # see if all additional lines in this container have continuation - # indentation - my $n; - my $line_1 = 1 + $line_open; - for ( $n = $line_1 ; $n <= $max_line ; ++$n ) { - my $ibeg = $ri_first->[$n]; - my $iend = $ri_last->[$n]; - if ( $ibeg eq $closing_index ) { $n--; last } - return if ( $lev_start != $levels_to_go[$ibeg] ); - return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] ); - last if ( $closing_index <= $iend ); - } - - # we can reduce the indentation of all continuation lines - my $continuation_line_count = $n - $line_open; - @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $n ] ] = - (0) x ($continuation_line_count); - @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ] = - @reduced_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ]; - return; -} - -sub pad_token { - - # insert $pad_spaces before token number $ipad - my ( $ipad, $pad_spaces ) = @_; - if ( $pad_spaces > 0 ) { - $tokens_to_go[$ipad] = ' ' x $pad_spaces . $tokens_to_go[$ipad]; - } - elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq ' ' ) { - $tokens_to_go[$ipad] = ""; - } - else { - - # shouldn't happen - return; - } - - $token_lengths_to_go[$ipad] += $pad_spaces; - foreach my $i ( $ipad .. $max_index_to_go ) { - $summed_lengths_to_go[ $i + 1 ] += $pad_spaces; - } - return; -} - -{ - my %is_math_op; - - BEGIN { - - my @q = qw( + - * / ); - @is_math_op{@q} = (1) x scalar(@q); - } - - sub set_logical_padding { - - # Look at a batch of lines and see if extra padding can improve the - # alignment when there are certain leading operators. Here is an - # example, in which some extra space is introduced before - # '( $year' to make it line up with the subsequent lines: - # - # if ( ( $Year < 1601 ) - # || ( $Year > 2899 ) - # || ( $EndYear < 1601 ) - # || ( $EndYear > 2899 ) ) - # { - # &Error_OutOfRange; - # } - # - my ( $ri_first, $ri_last ) = @_; - my $max_line = @{$ri_first} - 1; - - # FIXME: move these declarations below - my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $pad_spaces, - $tok_next, $type_next, $has_leading_op_next, $has_leading_op ); - - # looking at each line of this batch.. - foreach my $line ( 0 .. $max_line - 1 ) { - - # see if the next line begins with a logical operator - $ibeg = $ri_first->[$line]; - $iend = $ri_last->[$line]; - $ibeg_next = $ri_first->[ $line + 1 ]; - $tok_next = $tokens_to_go[$ibeg_next]; - $type_next = $types_to_go[$ibeg_next]; - - $has_leading_op_next = ( $tok_next =~ /^\w/ ) - ? $is_chain_operator{$tok_next} # + - * / : ? && || - : $is_chain_operator{$type_next}; # and, or - - next unless ($has_leading_op_next); - - # next line must not be at lesser depth - next - if ( $nesting_depth_to_go[$ibeg] > - $nesting_depth_to_go[$ibeg_next] ); - - # identify the token in this line to be padded on the left - $ipad = undef; - - # handle lines at same depth... - if ( $nesting_depth_to_go[$ibeg] == - $nesting_depth_to_go[$ibeg_next] ) - { - - # if this is not first line of the batch ... - if ( $line > 0 ) { - - # and we have leading operator.. - next if $has_leading_op; - - # Introduce padding if.. - # 1. the previous line is at lesser depth, or - # 2. the previous line ends in an assignment - # 3. the previous line ends in a 'return' - # 4. the previous line ends in a comma - # Example 1: previous line at lesser depth - # if ( ( $Year < 1601 ) # <- we are here but - # || ( $Year > 2899 ) # list has not yet - # || ( $EndYear < 1601 ) # collapsed vertically - # || ( $EndYear > 2899 ) ) - # { - # - # Example 2: previous line ending in assignment: - # $leapyear = - # $year % 4 ? 0 # <- We are here - # : $year % 100 ? 1 - # : $year % 400 ? 0 - # : 1; - # - # Example 3: previous line ending in comma: - # push @expr, - # /test/ ? undef - # : eval($_) ? 1 - # : eval($_) ? 1 - # : 0; - - # be sure levels agree (do not indent after an indented 'if') - next - if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] ); - - # allow padding on first line after a comma but only if: - # (1) this is line 2 and - # (2) there are at more than three lines and - # (3) lines 3 and 4 have the same leading operator - # These rules try to prevent padding within a long - # comma-separated list. - my $ok_comma; - if ( $types_to_go[$iendm] eq ',' - && $line == 1 - && $max_line > 2 ) - { - my $ibeg_next_next = $ri_first->[ $line + 2 ]; - my $tok_next_next = $tokens_to_go[$ibeg_next_next]; - $ok_comma = $tok_next_next eq $tok_next; - } - - next - unless ( - $is_assignment{ $types_to_go[$iendm] } - || $ok_comma - || ( $nesting_depth_to_go[$ibegm] < - $nesting_depth_to_go[$ibeg] ) - || ( $types_to_go[$iendm] eq 'k' - && $tokens_to_go[$iendm] eq 'return' ) - ); - - # we will add padding before the first token - $ipad = $ibeg; - } - - # for first line of the batch.. - else { - - # WARNING: Never indent if first line is starting in a - # continued quote, which would change the quote. - next if $starting_in_quote; - - # if this is text after closing '}' - # then look for an interior token to pad - if ( $types_to_go[$ibeg] eq '}' ) { - - } - - # otherwise, we might pad if it looks really good - else { - - # we might pad token $ibeg, so be sure that it - # is at the same depth as the next line. - next - if ( $nesting_depth_to_go[$ibeg] != - $nesting_depth_to_go[$ibeg_next] ); - - # We can pad on line 1 of a statement if at least 3 - # lines will be aligned. Otherwise, it - # can look very confusing. - - # We have to be careful not to pad if there are too few - # lines. The current rule is: - # (1) in general we require at least 3 consecutive lines - # with the same leading chain operator token, - # (2) but an exception is that we only require two lines - # with leading colons if there are no more lines. For example, - # the first $i in the following snippet would get padding - # by the second rule: - # - # $i == 1 ? ( "First", "Color" ) - # : $i == 2 ? ( "Then", "Rarity" ) - # : ( "Then", "Name" ); - - if ( $max_line > 1 ) { - my $leading_token = $tokens_to_go[$ibeg_next]; - my $tokens_differ; - - # never indent line 1 of a '.' series because - # previous line is most likely at same level. - # TODO: we should also look at the leasing_spaces - # of the last output line and skip if it is same - # as this line. - next if ( $leading_token eq '.' ); - - my $count = 1; - foreach my $l ( 2 .. 3 ) { - last if ( $line + $l > $max_line ); - my $ibeg_next_next = $ri_first->[ $line + $l ]; - if ( $tokens_to_go[$ibeg_next_next] ne - $leading_token ) - { - $tokens_differ = 1; - last; - } - $count++; - } - next if ($tokens_differ); - next if ( $count < 3 && $leading_token ne ':' ); - $ipad = $ibeg; - } - else { - next; - } - } - } - } - - # find interior token to pad if necessary - if ( !defined($ipad) ) { - - for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) { - - # find any unclosed container - next - unless ( $type_sequence_to_go[$i] - && $mate_index_to_go[$i] > $iend ); - - # find next nonblank token to pad - $ipad = $inext_to_go[$i]; - last if ( $ipad > $iend ); - } - last unless $ipad; - } - - # We cannot pad the first leading token of a file because - # it could cause a bug in which the starting indentation - # level is guessed incorrectly each time the code is run - # though perltidy, thus causing the code to march off to - # the right. For example, the following snippet would have - # this problem: - -## ov_method mycan( $package, '(""' ), $package -## or ov_method mycan( $package, '(0+' ), $package -## or ov_method mycan( $package, '(bool' ), $package -## or ov_method mycan( $package, '(nomethod' ), $package; - - # If this snippet is within a block this won't happen - # unless the user just processes the snippet alone within - # an editor. In that case either the user will see and - # fix the problem or it will be corrected next time the - # entire file is processed with perltidy. - ##next if ( $ipad == 0 && $levels_to_go[$ipad] == 0 ); - next if ( $ipad == 0 && $peak_batch_size <= 1 ); - -## THIS PATCH REMOVES THE FOLLOWING POOR PADDING (math.t) with -pbp, BUT -## IT DID MORE HARM THAN GOOD -## ceil( -## $font->{'loca'}->{'glyphs'}[$x]->read->{'xMin'} * 1000 -## / $upem -## ), -##? # do not put leading padding for just 2 lines of math -##? if ( $ipad == $ibeg -##? && $line > 0 -##? && $levels_to_go[$ipad] > $levels_to_go[ $ipad - 1 ] -##? && $is_math_op{$type_next} -##? && $line + 2 <= $max_line ) -##? { -##? my $ibeg_next_next = $ri_first->[ $line + 2 ]; -##? my $type_next_next = $types_to_go[$ibeg_next_next]; -##? next if !$is_math_op{$type_next_next}; -##? } - - # next line must not be at greater depth - my $iend_next = $ri_last->[ $line + 1 ]; - next - if ( $nesting_depth_to_go[ $iend_next + 1 ] > - $nesting_depth_to_go[$ipad] ); - - # lines must be somewhat similar to be padded.. - my $inext_next = $inext_to_go[$ibeg_next]; - my $type = $types_to_go[$ipad]; - my $type_next = $types_to_go[ $ipad + 1 ]; - - # see if there are multiple continuation lines - my $logical_continuation_lines = 1; - if ( $line + 2 <= $max_line ) { - my $leading_token = $tokens_to_go[$ibeg_next]; - my $ibeg_next_next = $ri_first->[ $line + 2 ]; - if ( $tokens_to_go[$ibeg_next_next] eq $leading_token - && $nesting_depth_to_go[$ibeg_next] eq - $nesting_depth_to_go[$ibeg_next_next] ) - { - $logical_continuation_lines++; - } - } - - # see if leading types match - my $types_match = $types_to_go[$inext_next] eq $type; - my $matches_without_bang; - - # if first line has leading ! then compare the following token - if ( !$types_match && $type eq '!' ) { - $types_match = $matches_without_bang = - $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ]; - } - - if ( - - # either we have multiple continuation lines to follow - # and we are not padding the first token - ( $logical_continuation_lines > 1 && $ipad > 0 ) - - # or.. - || ( - - # types must match - $types_match - - # and keywords must match if keyword - && !( - $type eq 'k' - && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next] - ) - ) - ) - { - - #----------------------begin special checks-------------- - # - # SPECIAL CHECK 1: - # A check is needed before we can make the pad. - # If we are in a list with some long items, we want each - # item to stand out. So in the following example, the - # first line beginning with '$casefold->' would look good - # padded to align with the next line, but then it - # would be indented more than the last line, so we - # won't do it. - # - # ok( - # $casefold->{code} eq '0041' - # && $casefold->{status} eq 'C' - # && $casefold->{mapping} eq '0061', - # 'casefold 0x41' - # ); - # - # Note: - # It would be faster, and almost as good, to use a comma - # count, and not pad if comma_count > 1 and the previous - # line did not end with a comma. - # - my $ok_to_pad = 1; - - my $ibg = $ri_first->[ $line + 1 ]; - my $depth = $nesting_depth_to_go[ $ibg + 1 ]; - - # just use simplified formula for leading spaces to avoid - # needless sub calls - my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg]; - - # look at each line beyond the next .. - my $l = $line + 1; - foreach my $ltest ( $line + 2 .. $max_line ) { - $l = $ltest; - my $ibg = $ri_first->[$l]; - - # quit looking at the end of this container - last - if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth ) - || ( $nesting_depth_to_go[$ibg] < $depth ); - - # cannot do the pad if a later line would be - # outdented more - if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) { - $ok_to_pad = 0; - last; - } - } - - # don't pad if we end in a broken list - if ( $l == $max_line ) { - my $i2 = $ri_last->[$l]; - if ( $types_to_go[$i2] eq '#' ) { - my $i1 = $ri_first->[$l]; - next - if ( - terminal_type( \@types_to_go, \@block_type_to_go, - $i1, $i2 ) eq ',' - ); - } - } - - # SPECIAL CHECK 2: - # a minus may introduce a quoted variable, and we will - # add the pad only if this line begins with a bare word, - # such as for the word 'Button' here: - # [ - # Button => "Print letter \"~$_\"", - # -command => [ sub { print "$_[0]\n" }, $_ ], - # -accelerator => "Meta+$_" - # ]; - # - # On the other hand, if 'Button' is quoted, it looks best - # not to pad: - # [ - # 'Button' => "Print letter \"~$_\"", - # -command => [ sub { print "$_[0]\n" }, $_ ], - # -accelerator => "Meta+$_" - # ]; - if ( $types_to_go[$ibeg_next] eq 'm' ) { - $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q'; - } - - next unless $ok_to_pad; - - #----------------------end special check--------------- - - my $length_1 = total_line_length( $ibeg, $ipad - 1 ); - my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 ); - $pad_spaces = $length_2 - $length_1; - - # If the first line has a leading ! and the second does - # not, then remove one space to try to align the next - # leading characters, which are often the same. For example: - # if ( !$ts - # || $ts == $self->Holder - # || $self->Holder->Type eq "Arena" ) - # - # This usually helps readability, but if there are subsequent - # ! operators things will still get messed up. For example: - # - # if ( !exists $Net::DNS::typesbyname{$qtype} - # && exists $Net::DNS::classesbyname{$qtype} - # && !exists $Net::DNS::classesbyname{$qclass} - # && exists $Net::DNS::typesbyname{$qclass} ) - # We can't fix that. - if ($matches_without_bang) { $pad_spaces-- } - - # make sure this won't change if -lp is used - my $indentation_1 = $leading_spaces_to_go[$ibeg]; - if ( ref($indentation_1) ) { - if ( $indentation_1->get_recoverable_spaces() == 0 ) { - my $indentation_2 = $leading_spaces_to_go[$ibeg_next]; - unless ( $indentation_2->get_recoverable_spaces() == 0 ) - { - $pad_spaces = 0; - } - } - } - - # we might be able to handle a pad of -1 by removing a blank - # token - if ( $pad_spaces < 0 ) { - - if ( $pad_spaces == -1 ) { - if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' ) - { - pad_token( $ipad - 1, $pad_spaces ); - } - } - $pad_spaces = 0; - } - - # now apply any padding for alignment - if ( $ipad >= 0 && $pad_spaces ) { - - my $length_t = total_line_length( $ibeg, $iend ); - if ( $pad_spaces + $length_t <= maximum_line_length($ibeg) ) - { - pad_token( $ipad, $pad_spaces ); - } - } - } - } - continue { - $iendm = $iend; - $ibegm = $ibeg; - $has_leading_op = $has_leading_op_next; - } # end of loop over lines - return; - } -} - -sub correct_lp_indentation { - - # When the -lp option is used, we need to make a last pass through - # each line to correct the indentation positions in case they differ - # from the predictions. This is necessary because perltidy uses a - # predictor/corrector method for aligning with opening parens. The - # predictor is usually good, but sometimes stumbles. The corrector - # tries to patch things up once the actual opening paren locations - # are known. - my ( $ri_first, $ri_last ) = @_; - my $do_not_pad = 0; - - # Note on flag '$do_not_pad': - # We want to avoid a situation like this, where the aligner inserts - # whitespace before the '=' to align it with a previous '=', because - # otherwise the parens might become mis-aligned in a situation like - # this, where the '=' has become aligned with the previous line, - # pushing the opening '(' forward beyond where we want it. - # - # $mkFloor::currentRoom = ''; - # $mkFloor::c_entry = $c->Entry( - # -width => '10', - # -relief => 'sunken', - # ... - # ); - # - # We leave it to the aligner to decide how to do this. - - # first remove continuation indentation if appropriate - my $max_line = @{$ri_first} - 1; - - # looking at each line of this batch.. - my ( $ibeg, $iend ); - foreach my $line ( 0 .. $max_line ) { - $ibeg = $ri_first->[$line]; - $iend = $ri_last->[$line]; - - # looking at each token in this output line.. - foreach my $i ( $ibeg .. $iend ) { - - # How many space characters to place before this token - # for special alignment. Actual padding is done in the - # continue block. - - # looking for next unvisited indentation item - my $indentation = $leading_spaces_to_go[$i]; - if ( !$indentation->get_marked() ) { - $indentation->set_marked(1); - - # looking for indentation item for which we are aligning - # with parens, braces, and brackets - next unless ( $indentation->get_align_paren() ); - - # skip closed container on this line - if ( $i > $ibeg ) { - my $im = max( $ibeg, $iprev_to_go[$i] ); - if ( $type_sequence_to_go[$im] - && $mate_index_to_go[$im] <= $iend ) - { - next; - } - } - - if ( $line == 1 && $i == $ibeg ) { - $do_not_pad = 1; - } - - # Ok, let's see what the error is and try to fix it - my $actual_pos; - my $predicted_pos = $indentation->get_spaces(); - if ( $i > $ibeg ) { - - # token is mid-line - use length to previous token - $actual_pos = total_line_length( $ibeg, $i - 1 ); - - # for mid-line token, we must check to see if all - # additional lines have continuation indentation, - # and remove it if so. Otherwise, we do not get - # good alignment. - my $closing_index = $indentation->get_closed(); - if ( $closing_index > $iend ) { - my $ibeg_next = $ri_first->[ $line + 1 ]; - if ( $ci_levels_to_go[$ibeg_next] > 0 ) { - undo_lp_ci( $line, $i, $closing_index, $ri_first, - $ri_last ); - } - } - } - elsif ( $line > 0 ) { - - # handle case where token starts a new line; - # use length of previous line - my $ibegm = $ri_first->[ $line - 1 ]; - my $iendm = $ri_last->[ $line - 1 ]; - $actual_pos = total_line_length( $ibegm, $iendm ); - - # follow -pt style - ++$actual_pos - if ( $types_to_go[ $iendm + 1 ] eq 'b' ); - } - else { - - # token is first character of first line of batch - $actual_pos = $predicted_pos; - } - - my $move_right = $actual_pos - $predicted_pos; - - # done if no error to correct (gnu2.t) - if ( $move_right == 0 ) { - $indentation->set_recoverable_spaces($move_right); - next; - } - - # if we have not seen closure for this indentation in - # this batch, we can only pass on a request to the - # vertical aligner - my $closing_index = $indentation->get_closed(); - - if ( $closing_index < 0 ) { - $indentation->set_recoverable_spaces($move_right); - next; - } - - # If necessary, look ahead to see if there is really any - # leading whitespace dependent on this whitespace, and - # also find the longest line using this whitespace. - # Since it is always safe to move left if there are no - # dependents, we only need to do this if we may have - # dependent nodes or need to move right. - - my $right_margin = 0; - my $have_child = $indentation->get_have_child(); - - my %saw_indentation; - my $line_count = 1; - $saw_indentation{$indentation} = $indentation; - - if ( $have_child || $move_right > 0 ) { - $have_child = 0; - my $max_length = 0; - if ( $i == $ibeg ) { - $max_length = total_line_length( $ibeg, $iend ); - } - - # look ahead at the rest of the lines of this batch.. - foreach my $line_t ( $line + 1 .. $max_line ) { - my $ibeg_t = $ri_first->[$line_t]; - my $iend_t = $ri_last->[$line_t]; - last if ( $closing_index <= $ibeg_t ); - - # remember all different indentation objects - my $indentation_t = $leading_spaces_to_go[$ibeg_t]; - $saw_indentation{$indentation_t} = $indentation_t; - $line_count++; - - # remember longest line in the group - my $length_t = total_line_length( $ibeg_t, $iend_t ); - if ( $length_t > $max_length ) { - $max_length = $length_t; - } - } - $right_margin = maximum_line_length($ibeg) - $max_length; - if ( $right_margin < 0 ) { $right_margin = 0 } - } - - my $first_line_comma_count = - grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ]; - my $comma_count = $indentation->get_comma_count(); - my $arrow_count = $indentation->get_arrow_count(); - - # This is a simple approximate test for vertical alignment: - # if we broke just after an opening paren, brace, bracket, - # and there are 2 or more commas in the first line, - # and there are no '=>'s, - # then we are probably vertically aligned. We could set - # an exact flag in sub scan_list, but this is good - # enough. - my $indentation_count = keys %saw_indentation; - my $is_vertically_aligned = - ( $i == $ibeg - && $first_line_comma_count > 1 - && $indentation_count == 1 - && ( $arrow_count == 0 || $arrow_count == $line_count ) ); - - # Make the move if possible .. - if ( - - # we can always move left - $move_right < 0 - - # but we should only move right if we are sure it will - # not spoil vertical alignment - || ( $comma_count == 0 ) - || ( $comma_count > 0 && !$is_vertically_aligned ) - ) - { - my $move = - ( $move_right <= $right_margin ) - ? $move_right - : $right_margin; - - foreach ( keys %saw_indentation ) { - $saw_indentation{$_} - ->permanently_decrease_available_spaces( -$move ); - } - } - - # Otherwise, record what we want and the vertical aligner - # will try to recover it. - else { - $indentation->set_recoverable_spaces($move_right); - } - } - } - } - return $do_not_pad; -} - -# flush is called to output any tokens in the pipeline, so that -# an alternate source of lines can be written in the correct order - -sub flush { - my $self = shift; - destroy_one_line_block(); - $self->output_line_to_go(); - Perl::Tidy::VerticalAligner::flush(); - return; -} - -sub reset_block_text_accumulator { - - # save text after 'if' and 'elsif' to append after 'else' - if ($accumulating_text_for_block) { - - if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) { - push @{$rleading_block_if_elsif_text}, $leading_block_text; - } - } - $accumulating_text_for_block = ""; - $leading_block_text = ""; - $leading_block_text_level = 0; - $leading_block_text_length_exceeded = 0; - $leading_block_text_line_number = 0; - $leading_block_text_line_length = 0; - return; -} - -sub set_block_text_accumulator { - my $i = shift; - $accumulating_text_for_block = $tokens_to_go[$i]; - if ( $accumulating_text_for_block !~ /^els/ ) { - $rleading_block_if_elsif_text = []; - } - $leading_block_text = ""; - $leading_block_text_level = $levels_to_go[$i]; - $leading_block_text_line_number = get_output_line_number(); - ##$vertical_aligner_object->get_output_line_number(); - $leading_block_text_length_exceeded = 0; - - # this will contain the column number of the last character - # of the closing side comment - $leading_block_text_line_length = - length($csc_last_label) + - length($accumulating_text_for_block) + - length( $rOpts->{'closing-side-comment-prefix'} ) + - $leading_block_text_level * $rOpts_indent_columns + 3; - return; -} - -sub accumulate_block_text { - my $i = shift; - - # accumulate leading text for -csc, ignoring any side comments - if ( $accumulating_text_for_block - && !$leading_block_text_length_exceeded - && $types_to_go[$i] ne '#' ) - { - - my $added_length = $token_lengths_to_go[$i]; - $added_length += 1 if $i == 0; - my $new_line_length = $leading_block_text_line_length + $added_length; - - # we can add this text if we don't exceed some limits.. - if ( - - # we must not have already exceeded the text length limit - length($leading_block_text) < - $rOpts_closing_side_comment_maximum_text - - # and either: - # the new total line length must be below the line length limit - # or the new length must be below the text length limit - # (ie, we may allow one token to exceed the text length limit) - && ( - $new_line_length < - maximum_line_length_for_level($leading_block_text_level) - - || length($leading_block_text) + $added_length < - $rOpts_closing_side_comment_maximum_text - ) - - # UNLESS: we are adding a closing paren before the brace we seek. - # This is an attempt to avoid situations where the ... to be - # added are longer than the omitted right paren, as in: - - # foreach my $item (@a_rather_long_variable_name_here) { - # &whatever; - # } ## end foreach my $item (@a_rather_long_variable_name_here... - - || ( - $tokens_to_go[$i] eq ')' - && ( - ( - $i + 1 <= $max_index_to_go - && $block_type_to_go[ $i + 1 ] eq - $accumulating_text_for_block - ) - || ( $i + 2 <= $max_index_to_go - && $block_type_to_go[ $i + 2 ] eq - $accumulating_text_for_block ) - ) - ) - ) - { - - # add an extra space at each newline - if ( $i == 0 ) { $leading_block_text .= ' ' } - - # add the token text - $leading_block_text .= $tokens_to_go[$i]; - $leading_block_text_line_length = $new_line_length; - } - - # show that text was truncated if necessary - elsif ( $types_to_go[$i] ne 'b' ) { - $leading_block_text_length_exceeded = 1; - $leading_block_text .= '...'; - } - } - return; -} - -{ - my %is_if_elsif_else_unless_while_until_for_foreach; - - BEGIN { - - # These block types may have text between the keyword and opening - # curly. Note: 'else' does not, but must be included to allow trailing - # if/elsif text to be appended. - # patch for SWITCH/CASE: added 'case' and 'when' - my @q = - qw(if elsif else unless while until for foreach case when catch); - @is_if_elsif_else_unless_while_until_for_foreach{@q} = - (1) x scalar(@q); - } - - sub accumulate_csc_text { - - # called once per output buffer when -csc is used. Accumulates - # the text placed after certain closing block braces. - # Defines and returns the following for this buffer: - - my $block_leading_text = ""; # the leading text of the last '}' - my $rblock_leading_if_elsif_text; - my $i_block_leading_text = - -1; # index of token owning block_leading_text - my $block_line_count = 100; # how many lines the block spans - my $terminal_type = 'b'; # type of last nonblank token - my $i_terminal = 0; # index of last nonblank token - my $terminal_block_type = ""; - - # update most recent statement label - $csc_last_label = "" unless ($csc_last_label); - if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] } - my $block_label = $csc_last_label; - - # Loop over all tokens of this batch - for my $i ( 0 .. $max_index_to_go ) { - my $type = $types_to_go[$i]; - my $block_type = $block_type_to_go[$i]; - my $token = $tokens_to_go[$i]; - - # remember last nonblank token type - if ( $type ne '#' && $type ne 'b' ) { - $terminal_type = $type; - $terminal_block_type = $block_type; - $i_terminal = $i; - } - - my $type_sequence = $type_sequence_to_go[$i]; - if ( $block_type && $type_sequence ) { - - if ( $token eq '}' ) { - - # restore any leading text saved when we entered this block - if ( defined( $block_leading_text{$type_sequence} ) ) { - ( $block_leading_text, $rblock_leading_if_elsif_text ) - = @{ $block_leading_text{$type_sequence} }; - $i_block_leading_text = $i; - delete $block_leading_text{$type_sequence}; - $rleading_block_if_elsif_text = - $rblock_leading_if_elsif_text; - } - - if ( defined( $csc_block_label{$type_sequence} ) ) { - $block_label = $csc_block_label{$type_sequence}; - delete $csc_block_label{$type_sequence}; - } - - # if we run into a '}' then we probably started accumulating - # at something like a trailing 'if' clause..no harm done. - if ( $accumulating_text_for_block - && $levels_to_go[$i] <= $leading_block_text_level ) - { - my $lev = $levels_to_go[$i]; - reset_block_text_accumulator(); - } - - if ( defined( $block_opening_line_number{$type_sequence} ) ) - { - my $output_line_number = get_output_line_number(); - ##$vertical_aligner_object->get_output_line_number(); - $block_line_count = - $output_line_number - - $block_opening_line_number{$type_sequence} + 1; - delete $block_opening_line_number{$type_sequence}; - } - else { - - # Error: block opening line undefined for this line.. - # This shouldn't be possible, but it is not a - # significant problem. - } - } - - elsif ( $token eq '{' ) { - - my $line_number = get_output_line_number(); - ##$vertical_aligner_object->get_output_line_number(); - $block_opening_line_number{$type_sequence} = $line_number; - - # set a label for this block, except for - # a bare block which already has the label - # A label can only be used on the next { - if ( $block_type =~ /:$/ ) { $csc_last_label = "" } - $csc_block_label{$type_sequence} = $csc_last_label; - $csc_last_label = ""; - - if ( $accumulating_text_for_block - && $levels_to_go[$i] == $leading_block_text_level ) - { - - if ( $accumulating_text_for_block eq $block_type ) { - - # save any leading text before we enter this block - $block_leading_text{$type_sequence} = [ - $leading_block_text, - $rleading_block_if_elsif_text - ]; - $block_opening_line_number{$type_sequence} = - $leading_block_text_line_number; - reset_block_text_accumulator(); - } - else { - - # shouldn't happen, but not a serious error. - # We were accumulating -csc text for block type - # $accumulating_text_for_block and unexpectedly - # encountered a '{' for block type $block_type. - } - } - } - } - - if ( $type eq 'k' - && $csc_new_statement_ok - && $is_if_elsif_else_unless_while_until_for_foreach{$token} - && $token =~ /$closing_side_comment_list_pattern/o ) - { - set_block_text_accumulator($i); - } - else { - - # note: ignoring type 'q' because of tricks being played - # with 'q' for hanging side comments - if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) { - $csc_new_statement_ok = - ( $block_type || $type eq 'J' || $type eq ';' ); - } - if ( $type eq ';' - && $accumulating_text_for_block - && $levels_to_go[$i] == $leading_block_text_level ) - { - reset_block_text_accumulator(); - } - else { - accumulate_block_text($i); - } - } - } - - # Treat an 'else' block specially by adding preceding 'if' and - # 'elsif' text. Otherwise, the 'end else' is not helpful, - # especially for cuddled-else formatting. - if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) { - $block_leading_text = - make_else_csc_text( $i_terminal, $terminal_block_type, - $block_leading_text, $rblock_leading_if_elsif_text ); - } - - # if this line ends in a label then remember it for the next pass - $csc_last_label = ""; - if ( $terminal_type eq 'J' ) { - $csc_last_label = $tokens_to_go[$i_terminal]; - } - - return ( $terminal_type, $i_terminal, $i_block_leading_text, - $block_leading_text, $block_line_count, $block_label ); - } -} - -sub make_else_csc_text { - - # create additional -csc text for an 'else' and optionally 'elsif', - # depending on the value of switch - # $rOpts_closing_side_comment_else_flag: - # - # = 0 add 'if' text to trailing else - # = 1 same as 0 plus: - # add 'if' to 'elsif's if can fit in line length - # add last 'elsif' to trailing else if can fit in one line - # = 2 same as 1 but do not check if exceed line length - # - # $rif_elsif_text = a reference to a list of all previous closing - # side comments created for this if block - # - my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_; - my $csc_text = $block_leading_text; - - if ( $block_type eq 'elsif' - && $rOpts_closing_side_comment_else_flag == 0 ) - { - return $csc_text; - } - - my $count = @{$rif_elsif_text}; - return $csc_text unless ($count); - - my $if_text = '[ if' . $rif_elsif_text->[0]; - - # always show the leading 'if' text on 'else' - if ( $block_type eq 'else' ) { - $csc_text .= $if_text; - } - - # see if that's all - if ( $rOpts_closing_side_comment_else_flag == 0 ) { - return $csc_text; - } - - my $last_elsif_text = ""; - if ( $count > 1 ) { - $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ]; - if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; } - } - - # tentatively append one more item - my $saved_text = $csc_text; - if ( $block_type eq 'else' ) { - $csc_text .= $last_elsif_text; - } - else { - $csc_text .= ' ' . $if_text; - } - - # all done if no length checks requested - if ( $rOpts_closing_side_comment_else_flag == 2 ) { - return $csc_text; - } - - # undo it if line length exceeded - my $length = - length($csc_text) + - length($block_type) + - length( $rOpts->{'closing-side-comment-prefix'} ) + - $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3; - if ( $length > maximum_line_length_for_level($leading_block_text_level) ) { - $csc_text = $saved_text; - } - return $csc_text; -} - -{ # sub balance_csc_text - - my %matching_char; - - BEGIN { - %matching_char = ( - '{' => '}', - '(' => ')', - '[' => ']', - '}' => '{', - ')' => '(', - ']' => '[', - ); - } - - sub balance_csc_text { - - # Append characters to balance a closing side comment so that editors - # such as vim can correctly jump through code. - # Simple Example: - # input = ## end foreach my $foo ( sort { $b ... - # output = ## end foreach my $foo ( sort { $b ...}) - - # NOTE: This routine does not currently filter out structures within - # quoted text because the bounce algorithms in text editors do not - # necessarily do this either (a version of vim was checked and - # did not do this). - - # Some complex examples which will cause trouble for some editors: - # while ( $mask_string =~ /\{[^{]*?\}/g ) { - # if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) { - # if ( $1 eq '{' ) { - # test file test1/braces.pl has many such examples. - - my ($csc) = @_; - - # loop to examine characters one-by-one, RIGHT to LEFT and - # build a balancing ending, LEFT to RIGHT. - for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) { - - my $char = substr( $csc, $pos, 1 ); - - # ignore everything except structural characters - next unless ( $matching_char{$char} ); - - # pop most recently appended character - my $top = chop($csc); - - # push it back plus the mate to the newest character - # unless they balance each other. - $csc = $csc . $top . $matching_char{$char} unless $top eq $char; - } - - # return the balanced string - return $csc; - } -} - -sub add_closing_side_comment { - - my $self = shift; - - # add closing side comments after closing block braces if -csc used - my $cscw_block_comment; - - #--------------------------------------------------------------- - # Step 1: loop through all tokens of this line to accumulate - # the text needed to create the closing side comments. Also see - # how the line ends. - #--------------------------------------------------------------- - - my ( $terminal_type, $i_terminal, $i_block_leading_text, - $block_leading_text, $block_line_count, $block_label ) - = accumulate_csc_text(); - - #--------------------------------------------------------------- - # Step 2: make the closing side comment if this ends a block - #--------------------------------------------------------------- - ##my $have_side_comment = $i_terminal != $max_index_to_go; - my $have_side_comment = $types_to_go[$max_index_to_go] eq '#'; - - # if this line might end in a block closure.. - if ( - $terminal_type eq '}' - - # ..and either - && ( - - # the block is long enough - ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} ) - - # or there is an existing comment to check - || ( $have_side_comment - && $rOpts->{'closing-side-comment-warnings'} ) - ) - - # .. and if this is one of the types of interest - && $block_type_to_go[$i_terminal] =~ - /$closing_side_comment_list_pattern/o - - # .. but not an anonymous sub - # These are not normally of interest, and their closing braces are - # often followed by commas or semicolons anyway. This also avoids - # possible erratic output due to line numbering inconsistencies - # in the cases where their closing braces terminate a line. - && $block_type_to_go[$i_terminal] ne 'sub' - - # ..and the corresponding opening brace must is not in this batch - # (because we do not need to tag one-line blocks, although this - # should also be caught with a positive -csci value) - && $mate_index_to_go[$i_terminal] < 0 - - # ..and either - && ( - - # this is the last token (line doesn't have a side comment) - !$have_side_comment - - # or the old side comment is a closing side comment - || $tokens_to_go[$max_index_to_go] =~ - /$closing_side_comment_prefix_pattern/o - ) - ) - { - - # then make the closing side comment text - if ($block_label) { $block_label .= " " } - my $token = -"$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]"; - - # append any extra descriptive text collected above - if ( $i_block_leading_text == $i_terminal ) { - $token .= $block_leading_text; - } - - $token = balance_csc_text($token) - if $rOpts->{'closing-side-comments-balanced'}; - - $token =~ s/\s*$//; # trim any trailing whitespace - - # handle case of existing closing side comment - if ($have_side_comment) { - - # warn if requested and tokens differ significantly - if ( $rOpts->{'closing-side-comment-warnings'} ) { - my $old_csc = $tokens_to_go[$max_index_to_go]; - my $new_csc = $token; - $new_csc =~ s/\s+//g; # trim all whitespace - $old_csc =~ s/\s+//g; # trim all whitespace - $new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures - $old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures - $new_csc =~ s/(\.\.\.)$//; # trim trailing '...' - my $new_trailing_dots = $1; - $old_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...' - - # Patch to handle multiple closing side comments at - # else and elsif's. These have become too complicated - # to check, so if we see an indication of - # '[ if' or '[ # elsif', then assume they were made - # by perltidy. - if ( $block_type_to_go[$i_terminal] eq 'else' ) { - if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc } - } - elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) { - if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc } - } - - # if old comment is contained in new comment, - # only compare the common part. - if ( length($new_csc) > length($old_csc) ) { - $new_csc = substr( $new_csc, 0, length($old_csc) ); - } - - # if the new comment is shorter and has been limited, - # only compare the common part. - if ( length($new_csc) < length($old_csc) - && $new_trailing_dots ) - { - $old_csc = substr( $old_csc, 0, length($new_csc) ); - } - - # any remaining difference? - if ( $new_csc ne $old_csc ) { - - # just leave the old comment if we are below the threshold - # for creating side comments - if ( $block_line_count < - $rOpts->{'closing-side-comment-interval'} ) - { - $token = undef; - } - - # otherwise we'll make a note of it - else { - - warning( -"perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n" - ); - - # save the old side comment in a new trailing block comment - my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ]; - $year += 1900; - $month += 1; - $cscw_block_comment = -"## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]"; - } - } - else { - - # No differences.. we can safely delete old comment if we - # are below the threshold - if ( $block_line_count < - $rOpts->{'closing-side-comment-interval'} ) - { - $token = undef; - $self->unstore_token_to_go() - if ( $types_to_go[$max_index_to_go] eq '#' ); - $self->unstore_token_to_go() - if ( $types_to_go[$max_index_to_go] eq 'b' ); - } - } - } - - # switch to the new csc (unless we deleted it!) - $tokens_to_go[$max_index_to_go] = $token if $token; - } - - # handle case of NO existing closing side comment - else { - - # Remove any existing blank and add another below. - # This is a tricky point. A side comment needs to have the same level - # as the preceding closing brace or else the line will not get the right - # indentation. So even if we have a blank, we are going to replace it. - if ( $types_to_go[$max_index_to_go] eq 'b' ) { - unstore_token_to_go(); - } - - # insert the new side comment into the output token stream - my $type = '#'; - my $block_type = ''; - my $type_sequence = ''; - my $container_environment = - $container_environment_to_go[$max_index_to_go]; - my $level = $levels_to_go[$max_index_to_go]; - my $slevel = $nesting_depth_to_go[$max_index_to_go]; - my $no_internal_newlines = 0; - - my $ci_level = $ci_levels_to_go[$max_index_to_go]; - my $in_continued_quote = 0; - - # insert a blank token - $self->insert_new_token_to_go( ' ', 'b', $slevel, - $no_internal_newlines ); - - # then the side comment - $self->insert_new_token_to_go( $token, $type, $slevel, - $no_internal_newlines ); - } - } - return $cscw_block_comment; -} - -sub previous_nonblank_token { - my ($i) = @_; - my $name = ""; - my $im = $i - 1; - return "" if ( $im < 0 ); - if ( $types_to_go[$im] eq 'b' ) { $im--; } - return "" if ( $im < 0 ); - $name = $tokens_to_go[$im]; - - # prepend any sub name to an isolated -> to avoid unwanted alignments - # [test case is test8/penco.pl] - if ( $name eq '->' ) { - $im--; - if ( $im >= 0 && $types_to_go[$im] ne 'b' ) { - $name = $tokens_to_go[$im] . $name; - } - } - return $name; -} - -sub send_lines_to_vertical_aligner { - - my ( $self, $ri_first, $ri_last, $do_not_pad ) = @_; - - my $rindentation_list = [0]; # ref to indentations for each line - - # define the array @matching_token_to_go for the output tokens - # which will be non-blank for each special token (such as =>) - # for which alignment is required. - set_vertical_alignment_markers( $ri_first, $ri_last ); - - # flush if necessary to avoid unwanted alignment - my $must_flush = 0; - if ( @{$ri_first} > 1 ) { - - # flush before a long if statement - if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) { - $must_flush = 1; - } - } - if ($must_flush) { - Perl::Tidy::VerticalAligner::flush(); - } - - undo_ci( $ri_first, $ri_last ); - - set_logical_padding( $ri_first, $ri_last ); - - # loop to prepare each line for shipment - my $n_last_line = @{$ri_first} - 1; - my $in_comma_list; - for my $n ( 0 .. $n_last_line ) { - my $ibeg = $ri_first->[$n]; - my $iend = $ri_last->[$n]; - - my ( $rtokens, $rfields, $rpatterns ) = - make_alignment_patterns( $ibeg, $iend ); - - # Set flag to show how much level changes between this line - # and the next line, if we have it. - my $ljump = 0; - if ( $n < $n_last_line ) { - my $ibegp = $ri_first->[ $n + 1 ]; - $ljump = $levels_to_go[$ibegp] - $levels_to_go[$iend]; - } - - my ( $indentation, $lev, $level_end, $terminal_type, - $is_semicolon_terminated, $is_outdented_line ) - = set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns, - $ri_first, $ri_last, $rindentation_list, $ljump ); - - # we will allow outdenting of long lines.. - my $outdent_long_lines = ( - - # which are long quotes, if allowed - ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} ) - - # which are long block comments, if allowed - || ( - $types_to_go[$ibeg] eq '#' - && $rOpts->{'outdent-long-comments'} - - # but not if this is a static block comment - && !$is_static_block_comment - ) - ); - - my $level_jump = - $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg]; - - my $rvertical_tightness_flags = - set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend, - $ri_first, $ri_last ); - - # flush an outdented line to avoid any unwanted vertical alignment - Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line); - - # Set a flag at the final ':' of a ternary chain to request - # vertical alignment of the final term. Here is a - # slightly complex example: - # - # $self->{_text} = ( - # !$section ? '' - # : $type eq 'item' ? "the $section entry" - # : "the section on $section" - # ) - # . ( - # $page - # ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage" - # : ' elsewhere in this document' - # ); - # - my $is_terminal_ternary = 0; - if ( $tokens_to_go[$ibeg] eq ':' - || $n > 0 && $tokens_to_go[ $ri_last->[ $n - 1 ] ] eq ':' ) - { - my $last_leading_type = ":"; - if ( $n > 0 ) { - my $iprev = $ri_first->[ $n - 1 ]; - $last_leading_type = $types_to_go[$iprev]; - } - if ( $terminal_type ne ';' - && $n_last_line > $n - && $level_end == $lev ) - { - my $inext = $ri_first->[ $n + 1 ]; - $level_end = $levels_to_go[$inext]; - $terminal_type = $types_to_go[$inext]; - } - - $is_terminal_ternary = $last_leading_type eq ':' - && ( ( $terminal_type eq ';' && $level_end <= $lev ) - || ( $terminal_type ne ':' && $level_end < $lev ) ) - - # the terminal term must not contain any ternary terms, as in - # my $ECHO = ( - # $Is_MSWin32 ? ".\\echo$$" - # : $Is_MacOS ? ":echo$$" - # : ( $Is_NetWare ? "echo$$" : "./echo$$" ) - # ); - && !grep /^[\?\:]$/, @types_to_go[ $ibeg + 1 .. $iend ]; - } - - # send this new line down the pipe - my $forced_breakpoint = $forced_breakpoint_to_go[$iend]; - Perl::Tidy::VerticalAligner::valign_input( - $lev, - $level_end, - $indentation, - $rfields, - $rtokens, - $rpatterns, - $forced_breakpoint_to_go[$iend] || $in_comma_list, - $outdent_long_lines, - $is_terminal_ternary, - $is_semicolon_terminated, - $do_not_pad, - $rvertical_tightness_flags, - $level_jump, - ); - $in_comma_list = - $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend]; - - # flush an outdented line to avoid any unwanted vertical alignment - Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line); - - $do_not_pad = 0; - - # Set flag indicating if this line ends in an opening - # token and is very short, so that a blank line is not - # needed if the subsequent line is a comment. - # Examples of what we are looking for: - # { - # && ( - # BEGIN { - # default { - # sub { - $last_output_short_opening_token - - # line ends in opening token - = $types_to_go[$iend] =~ /^[\{\(\[L]$/ - - # and either - && ( - # line has either single opening token - $iend == $ibeg - - # or is a single token followed by opening token. - # Note that sub identifiers have blanks like 'sub doit' - || ( $iend - $ibeg <= 2 && $tokens_to_go[$ibeg] !~ /\s+/ ) - ) - - # and limit total to 10 character widths - && token_sequence_length( $ibeg, $iend ) <= 10; - - } # end of loop to output each line - - # remember indentation of lines containing opening containers for - # later use by sub set_adjusted_indentation - save_opening_indentation( $ri_first, $ri_last, $rindentation_list ); - return; -} - -{ # begin make_alignment_patterns - - my %block_type_map; - my %keyword_map; - - BEGIN { - - # map related block names into a common name to - # allow alignment - %block_type_map = ( - 'unless' => 'if', - 'else' => 'if', - 'elsif' => 'if', - 'when' => 'if', - 'default' => 'if', - 'case' => 'if', - 'sort' => 'map', - 'grep' => 'map', - ); - - # map certain keywords to the same 'if' class to align - # long if/elsif sequences. [elsif.pl] - %keyword_map = ( - 'unless' => 'if', - 'else' => 'if', - 'elsif' => 'if', - 'when' => 'given', - 'default' => 'given', - 'case' => 'switch', - - # treat an 'undef' similar to numbers and quotes - 'undef' => 'Q', - ); - } - - sub make_alignment_patterns { - - # Here we do some important preliminary work for the - # vertical aligner. We create three arrays for one - # output line. These arrays contain strings that can - # be tested by the vertical aligner to see if - # consecutive lines can be aligned vertically. - # - # The three arrays are indexed on the vertical - # alignment fields and are: - # @tokens - a list of any vertical alignment tokens for this line. - # These are tokens, such as '=' '&&' '#' etc which - # we want to might align vertically. These are - # decorated with various information such as - # nesting depth to prevent unwanted vertical - # alignment matches. - # @fields - the actual text of the line between the vertical alignment - # tokens. - # @patterns - a modified list of token types, one for each alignment - # field. These should normally each match before alignment is - # allowed, even when the alignment tokens match. - my ( $ibeg, $iend ) = @_; - my @tokens = (); - my @fields = (); - my @patterns = (); - my $i_start = $ibeg; - - my $depth = 0; - my @container_name = (""); - my @multiple_comma_arrows = (undef); - - my $j = 0; # field index - - $patterns[0] = ""; - for my $i ( $ibeg .. $iend ) { - - # Keep track of containers balanced on this line only. - # These are used below to prevent unwanted cross-line alignments. - # Unbalanced containers already avoid aligning across - # container boundaries. - if ( $tokens_to_go[$i] eq '(' ) { - - # if container is balanced on this line... - my $i_mate = $mate_index_to_go[$i]; - if ( $i_mate > $i && $i_mate <= $iend ) { - $depth++; - my $seqno = $type_sequence_to_go[$i]; - my $count = comma_arrow_count($seqno); - $multiple_comma_arrows[$depth] = $count && $count > 1; - - # Append the previous token name to make the container name - # more unique. This name will also be given to any commas - # within this container, and it helps avoid undesirable - # alignments of different types of containers. - my $name = previous_nonblank_token($i); - $name =~ s/^->//; - $container_name[$depth] = "+" . $name; - - # Make the container name even more unique if necessary. - # If we are not vertically aligning this opening paren, - # append a character count to avoid bad alignment because - # it usually looks bad to align commas within containers - # for which the opening parens do not align. Here - # is an example very BAD alignment of commas (because - # the atan2 functions are not all aligned): - # $XY = - # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) + - # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) - - # $X * atan2( $X, 1 ) - - # $Y * atan2( $Y, 1 ); - # - # On the other hand, it is usually okay to align commas if - # opening parens align, such as: - # glVertex3d( $cx + $s * $xs, $cy, $z ); - # glVertex3d( $cx, $cy + $s * $ys, $z ); - # glVertex3d( $cx - $s * $xs, $cy, $z ); - # glVertex3d( $cx, $cy - $s * $ys, $z ); - # - # To distinguish between these situations, we will - # append the length of the line from the previous matching - # token, or beginning of line, to the function name. This - # will allow the vertical aligner to reject undesirable - # matches. - - # if we are not aligning on this paren... - if ( $matching_token_to_go[$i] eq '' ) { - - # Sum length from previous alignment, or start of line. - my $len = - ( $i_start == $ibeg ) - ? total_line_length( $i_start, $i - 1 ) - : token_sequence_length( $i_start, $i - 1 ); - - # tack length onto the container name to make unique - $container_name[$depth] .= "-" . $len; - } - } - } - elsif ( $tokens_to_go[$i] eq ')' ) { - $depth-- if $depth > 0; - } - - # if we find a new synchronization token, we are done with - # a field - if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) { - - my $tok = my $raw_tok = $matching_token_to_go[$i]; - - # make separators in different nesting depths unique - # by appending the nesting depth digit. - if ( $raw_tok ne '#' ) { - $tok .= "$nesting_depth_to_go[$i]"; - } - - # also decorate commas with any container name to avoid - # unwanted cross-line alignments. - if ( $raw_tok eq ',' || $raw_tok eq '=>' ) { - if ( $container_name[$depth] ) { - $tok .= $container_name[$depth]; - } - } - - # Patch to avoid aligning leading and trailing if, unless. - # Mark trailing if, unless statements with container names. - # This makes them different from leading if, unless which - # are not so marked at present. If we ever need to name - # them too, we could use ci to distinguish them. - # Example problem to avoid: - # return ( 2, "DBERROR" ) - # if ( $retval == 2 ); - # if ( scalar @_ ) { - # my ( $a, $b, $c, $d, $e, $f ) = @_; - # } - if ( $raw_tok eq '(' ) { - my $ci = $ci_levels_to_go[$ibeg]; - if ( $container_name[$depth] =~ /^\+(if|unless)/ - && $ci ) - { - $tok .= $container_name[$depth]; - } - } - - # Decorate block braces with block types to avoid - # unwanted alignments such as the following: - # foreach ( @{$routput_array} ) { $fh->print($_) } - # eval { $fh->close() }; - if ( $raw_tok eq '{' && $block_type_to_go[$i] ) { - my $block_type = $block_type_to_go[$i]; - - # map certain related block types to allow - # else blocks to align - $block_type = $block_type_map{$block_type} - if ( defined( $block_type_map{$block_type} ) ); - - # remove sub names to allow one-line sub braces to align - # regardless of name - #if ( $block_type =~ /^sub / ) { $block_type = 'sub' } - if ( $block_type =~ /$SUB_PATTERN/ ) { $block_type = 'sub' } - - # allow all control-type blocks to align - if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' } - - $tok .= $block_type; - } - - # concatenate the text of the consecutive tokens to form - # the field - push( @fields, - join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) ); - - # store the alignment token for this field - push( @tokens, $tok ); - - # get ready for the next batch - $i_start = $i; - $j++; - $patterns[$j] = ""; - } - - # continue accumulating tokens - # handle non-keywords.. - if ( $types_to_go[$i] ne 'k' ) { - my $type = $types_to_go[$i]; - - # Mark most things before arrows as a quote to - # get them to line up. Testfile: mixed.pl. - if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) { - my $next_type = $types_to_go[ $i + 1 ]; - my $i_next_nonblank = - ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); - - if ( $types_to_go[$i_next_nonblank] eq '=>' ) { - $type = 'Q'; - - # Patch to ignore leading minus before words, - # by changing pattern 'mQ' into just 'Q', - # so that we can align things like this: - # Button => "Print letter \"~$_\"", - # -command => [ sub { print "$_[0]\n" }, $_ ], - if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" } - } - } - - # patch to make numbers and quotes align - if ( $type eq 'n' ) { $type = 'Q' } - - # patch to ignore any ! in patterns - if ( $type eq '!' ) { $type = '' } - - $patterns[$j] .= $type; - } - - # for keywords we have to use the actual text - else { - - my $tok = $tokens_to_go[$i]; - - # but map certain keywords to a common string to allow - # alignment. - $tok = $keyword_map{$tok} - if ( defined( $keyword_map{$tok} ) ); - $patterns[$j] .= $tok; - } - } - - # done with this line .. join text of tokens to make the last field - push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) ); - return ( \@tokens, \@fields, \@patterns ); - } - -} # end make_alignment_patterns - -{ # begin unmatched_indexes - - # closure to keep track of unbalanced containers. - # arrays shared by the routines in this block: - my @unmatched_opening_indexes_in_this_batch; - my @unmatched_closing_indexes_in_this_batch; - my %comma_arrow_count; - - sub is_unbalanced_batch { - return @unmatched_opening_indexes_in_this_batch + - @unmatched_closing_indexes_in_this_batch; - } - - sub comma_arrow_count { - my $seqno = shift; - return $comma_arrow_count{$seqno}; - } - - sub match_opening_and_closing_tokens { - - # Match up indexes of opening and closing braces, etc, in this batch. - # This has to be done after all tokens are stored because unstoring - # of tokens would otherwise cause trouble. - - @unmatched_opening_indexes_in_this_batch = (); - @unmatched_closing_indexes_in_this_batch = (); - %comma_arrow_count = (); - my $comma_arrow_count_contained = 0; - - foreach my $i ( 0 .. $max_index_to_go ) { - if ( $type_sequence_to_go[$i] ) { - my $token = $tokens_to_go[$i]; - if ( $token =~ /^[\(\[\{\?]$/ ) { - push @unmatched_opening_indexes_in_this_batch, $i; - } - elsif ( $token =~ /^[\)\]\}\:]$/ ) { - - my $i_mate = pop @unmatched_opening_indexes_in_this_batch; - if ( defined($i_mate) && $i_mate >= 0 ) { - if ( $type_sequence_to_go[$i_mate] == - $type_sequence_to_go[$i] ) - { - $mate_index_to_go[$i] = $i_mate; - $mate_index_to_go[$i_mate] = $i; - my $seqno = $type_sequence_to_go[$i]; - if ( $comma_arrow_count{$seqno} ) { - $comma_arrow_count_contained += - $comma_arrow_count{$seqno}; - } - } - else { - push @unmatched_opening_indexes_in_this_batch, - $i_mate; - push @unmatched_closing_indexes_in_this_batch, $i; - } - } - else { - push @unmatched_closing_indexes_in_this_batch, $i; - } - } - } - elsif ( $tokens_to_go[$i] eq '=>' ) { - if (@unmatched_opening_indexes_in_this_batch) { - my $j = $unmatched_opening_indexes_in_this_batch[-1]; - my $seqno = $type_sequence_to_go[$j]; - $comma_arrow_count{$seqno}++; - } - } - } - return $comma_arrow_count_contained; - } - - sub save_opening_indentation { - - # This should be called after each batch of tokens is output. It - # saves indentations of lines of all unmatched opening tokens. - # These will be used by sub get_opening_indentation. - - my ( $ri_first, $ri_last, $rindentation_list ) = @_; - - # we no longer need indentations of any saved indentations which - # are unmatched closing tokens in this batch, because we will - # never encounter them again. So we can delete them to keep - # the hash size down. - foreach (@unmatched_closing_indexes_in_this_batch) { - my $seqno = $type_sequence_to_go[$_]; - delete $saved_opening_indentation{$seqno}; - } - - # we need to save indentations of any unmatched opening tokens - # in this batch because we may need them in a subsequent batch. - foreach (@unmatched_opening_indexes_in_this_batch) { - my $seqno = $type_sequence_to_go[$_]; - $saved_opening_indentation{$seqno} = [ - lookup_opening_indentation( - $_, $ri_first, $ri_last, $rindentation_list - ) - ]; - } - return; - } -} # end unmatched_indexes - -sub get_opening_indentation { - - # get the indentation of the line which output the opening token - # corresponding to a given closing token in the current output batch. - # - # given: - # $i_closing - index in this line of a closing token ')' '}' or ']' - # - # $ri_first - reference to list of the first index $i for each output - # line in this batch - # $ri_last - reference to list of the last index $i for each output line - # in this batch - # $rindentation_list - reference to a list containing the indentation - # used for each line. - # - # return: - # -the indentation of the line which contained the opening token - # which matches the token at index $i_opening - # -and its offset (number of columns) from the start of the line - # - my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_; - - # first, see if the opening token is in the current batch - my $i_opening = $mate_index_to_go[$i_closing]; - my ( $indent, $offset, $is_leading, $exists ); - $exists = 1; - if ( $i_opening >= 0 ) { - - # it is..look up the indentation - ( $indent, $offset, $is_leading ) = - lookup_opening_indentation( $i_opening, $ri_first, $ri_last, - $rindentation_list ); - } - - # if not, it should have been stored in the hash by a previous batch - else { - my $seqno = $type_sequence_to_go[$i_closing]; - if ($seqno) { - if ( $saved_opening_indentation{$seqno} ) { - ( $indent, $offset, $is_leading ) = - @{ $saved_opening_indentation{$seqno} }; - } - - # some kind of serious error - # (example is badfile.t) - else { - $indent = 0; - $offset = 0; - $is_leading = 0; - $exists = 0; - } - } - - # if no sequence number it must be an unbalanced container - else { - $indent = 0; - $offset = 0; - $is_leading = 0; - $exists = 0; - } - } - return ( $indent, $offset, $is_leading, $exists ); -} - -sub lookup_opening_indentation { - - # get the indentation of the line in the current output batch - # which output a selected opening token - # - # given: - # $i_opening - index of an opening token in the current output batch - # whose line indentation we need - # $ri_first - reference to list of the first index $i for each output - # line in this batch - # $ri_last - reference to list of the last index $i for each output line - # in this batch - # $rindentation_list - reference to a list containing the indentation - # used for each line. (NOTE: the first slot in - # this list is the last returned line number, and this is - # followed by the list of indentations). - # - # return - # -the indentation of the line which contained token $i_opening - # -and its offset (number of columns) from the start of the line - - my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_; - - my $nline = $rindentation_list->[0]; # line number of previous lookup - - # reset line location if necessary - $nline = 0 if ( $i_opening < $ri_start->[$nline] ); - - # find the correct line - unless ( $i_opening > $ri_last->[-1] ) { - while ( $i_opening > $ri_last->[$nline] ) { $nline++; } - } - - # error - token index is out of bounds - shouldn't happen - else { - warning( -"non-fatal program bug in lookup_opening_indentation - index out of range\n" - ); - report_definite_bug(); - $nline = $#{$ri_last}; - } - - $rindentation_list->[0] = - $nline; # save line number to start looking next call - my $ibeg = $ri_start->[$nline]; - my $offset = token_sequence_length( $ibeg, $i_opening ) - 1; - my $is_leading = ( $ibeg == $i_opening ); - return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading ); -} - -{ - my %is_if_elsif_else_unless_while_until_for_foreach; - - BEGIN { - - # These block types may have text between the keyword and opening - # curly. Note: 'else' does not, but must be included to allow trailing - # if/elsif text to be appended. - # patch for SWITCH/CASE: added 'case' and 'when' - my @q = qw(if elsif else unless while until for foreach case when); - @is_if_elsif_else_unless_while_until_for_foreach{@q} = - (1) x scalar(@q); - } - - sub set_adjusted_indentation { - - # This routine has the final say regarding the actual indentation of - # a line. It starts with the basic indentation which has been - # defined for the leading token, and then takes into account any - # options that the user has set regarding special indenting and - # outdenting. - - my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last, - $rindentation_list, $level_jump ) - = @_; - - # we need to know the last token of this line - my ( $terminal_type, $i_terminal ) = - terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend ); - - my $is_outdented_line = 0; - - my $is_semicolon_terminated = $terminal_type eq ';' - && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg]; - - # NOTE: A future improvement would be to make it semicolon terminated - # even if it does not have a semicolon but is followed by a closing - # block brace. This would undo ci even for something like the - # following, in which the final paren does not have a semicolon because - # it is a possible weld location: - - # if ($BOLD_MATH) { - # ( - # $labels, $comment, - # join( '', '', &make_math( $mode, '', '', $_ ), '' ) - # ) - # } - # - - # MOJO: Set a flag if this lines begins with ')->' - my $leading_paren_arrow = ( - $types_to_go[$ibeg] eq '}' - && $tokens_to_go[$ibeg] eq ')' - && ( - ( $ibeg < $i_terminal && $types_to_go[ $ibeg + 1 ] eq '->' ) - || ( $ibeg < $i_terminal - 1 - && $types_to_go[ $ibeg + 1 ] eq 'b' - && $types_to_go[ $ibeg + 2 ] eq '->' ) - ) - ); - - ########################################################## - # Section 1: set a flag and a default indentation - # - # Most lines are indented according to the initial token. - # But it is common to outdent to the level just after the - # terminal token in certain cases... - # adjust_indentation flag: - # 0 - do not adjust - # 1 - outdent - # 2 - vertically align with opening token - # 3 - indent - ########################################################## - my $adjust_indentation = 0; - my $default_adjust_indentation = $adjust_indentation; - - my ( - $opening_indentation, $opening_offset, - $is_leading, $opening_exists - ); - - # if we are at a closing token of some type.. - if ( $types_to_go[$ibeg] =~ /^[\)\}\]R]$/ ) { - - # get the indentation of the line containing the corresponding - # opening token - ( - $opening_indentation, $opening_offset, - $is_leading, $opening_exists - ) - = get_opening_indentation( $ibeg, $ri_first, $ri_last, - $rindentation_list ); - - # First set the default behavior: - if ( - - # default behavior is to outdent closing lines - # of the form: "); }; ]; )->xxx;" - $is_semicolon_terminated - - # and 'cuddled parens' of the form: ")->pack(" - # Bug fix for RT #123749]: the types here were - # incorrectly '(' and ')'. Corrected to be '{' and '}' - || ( - $terminal_type eq '{' - && $types_to_go[$ibeg] eq '}' - && ( $nesting_depth_to_go[$iend] + 1 == - $nesting_depth_to_go[$ibeg] ) - ) - - # remove continuation indentation for any line like - # } ... { - # or without ending '{' and unbalanced, such as - # such as '}->{$operator}' - || ( - $types_to_go[$ibeg] eq '}' - - && ( $types_to_go[$iend] eq '{' - || $levels_to_go[$iend] < $levels_to_go[$ibeg] ) - ) - - # and when the next line is at a lower indentation level - # PATCH: and only if the style allows undoing continuation - # for all closing token types. We should really wait until - # the indentation of the next line is known and then make - # a decision, but that would require another pass. - || ( $level_jump < 0 && !$some_closing_token_indentation ) - - # Patch for -wn=2, multiple welded closing tokens - || ( $i_terminal > $ibeg - && $types_to_go[$iend] =~ /^[\)\}\]R]$/ ) - - ) - { - $adjust_indentation = 1; - } - - # outdent something like '),' - if ( - $terminal_type eq ',' - - # Removed this constraint for -wn - # OLD: allow just one character before the comma - # && $i_terminal == $ibeg + 1 - - # require LIST environment; otherwise, we may outdent too much - - # this can happen in calls without parentheses (overload.t); - && $container_environment_to_go[$i_terminal] eq 'LIST' - ) - { - $adjust_indentation = 1; - } - - # undo continuation indentation of a terminal closing token if - # it is the last token before a level decrease. This will allow - # a closing token to line up with its opening counterpart, and - # avoids a indentation jump larger than 1 level. - if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/ - && $i_terminal == $ibeg ) - { - my $ci = $ci_levels_to_go[$ibeg]; - my $lev = $levels_to_go[$ibeg]; - my $next_type = $types_to_go[ $ibeg + 1 ]; - my $i_next_nonblank = - ( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 ); - if ( $i_next_nonblank <= $max_index_to_go - && $levels_to_go[$i_next_nonblank] < $lev ) - { - $adjust_indentation = 1; - } - - # Patch for RT #96101, in which closing brace of anonymous subs - # was not outdented. We should look ahead and see if there is - # a level decrease at the next token (i.e., a closing token), - # but right now we do not have that information. For now - # we see if we are in a list, and this works well. - # See test files 'sub*.t' for good test cases. - if ( $block_type_to_go[$ibeg] =~ /$ASUB_PATTERN/ - && $container_environment_to_go[$i_terminal] eq 'LIST' - && !$rOpts->{'indent-closing-brace'} ) - { - ( - $opening_indentation, $opening_offset, - $is_leading, $opening_exists - ) - = get_opening_indentation( $ibeg, $ri_first, $ri_last, - $rindentation_list ); - my $indentation = $leading_spaces_to_go[$ibeg]; - if ( defined($opening_indentation) - && get_spaces($indentation) > - get_spaces($opening_indentation) ) - { - $adjust_indentation = 1; - } - } - } - - # YVES patch 1 of 2: - # Undo ci of line with leading closing eval brace, - # but not beyond the indention of the line with - # the opening brace. - if ( $block_type_to_go[$ibeg] eq 'eval' - && !$rOpts->{'line-up-parentheses'} - && !$rOpts->{'indent-closing-brace'} ) - { - ( - $opening_indentation, $opening_offset, - $is_leading, $opening_exists - ) - = get_opening_indentation( $ibeg, $ri_first, $ri_last, - $rindentation_list ); - my $indentation = $leading_spaces_to_go[$ibeg]; - if ( defined($opening_indentation) - && get_spaces($indentation) > - get_spaces($opening_indentation) ) - { - $adjust_indentation = 1; - } - } - - $default_adjust_indentation = $adjust_indentation; - - # Now modify default behavior according to user request: - # handle option to indent non-blocks of the form ); }; ]; - # But don't do special indentation to something like ')->pack(' - if ( !$block_type_to_go[$ibeg] ) { - my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] }; - if ( $cti == 1 ) { - if ( $i_terminal <= $ibeg + 1 - || $is_semicolon_terminated ) - { - $adjust_indentation = 2; - } - else { - $adjust_indentation = 0; - } - } - elsif ( $cti == 2 ) { - if ($is_semicolon_terminated) { - $adjust_indentation = 3; - } - else { - $adjust_indentation = 0; - } - } - elsif ( $cti == 3 ) { - $adjust_indentation = 3; - } - } - - # handle option to indent blocks - else { - if ( - $rOpts->{'indent-closing-brace'} - && ( - $i_terminal == $ibeg # isolated terminal '}' - || $is_semicolon_terminated - ) - ) # } xxxx ; - { - $adjust_indentation = 3; - } - } - } - - # if at ');', '};', '>;', and '];' of a terminal qw quote - elsif ($rpatterns->[0] =~ /^qb*;$/ - && $rfields->[0] =~ /^([\)\}\]\>]);$/ ) - { - if ( $closing_token_indentation{$1} == 0 ) { - $adjust_indentation = 1; - } - else { - $adjust_indentation = 3; - } - } - - # if line begins with a ':', align it with any - # previous line leading with corresponding ? - elsif ( $types_to_go[$ibeg] eq ':' ) { - ( - $opening_indentation, $opening_offset, - $is_leading, $opening_exists - ) - = get_opening_indentation( $ibeg, $ri_first, $ri_last, - $rindentation_list ); - if ($is_leading) { $adjust_indentation = 2; } - } - - ########################################################## - # Section 2: set indentation according to flag set above - # - # Select the indentation object to define leading - # whitespace. If we are outdenting something like '} } );' - # then we want to use one level below the last token - # ($i_terminal) in order to get it to fully outdent through - # all levels. - ########################################################## - my $indentation; - my $lev; - my $level_end = $levels_to_go[$iend]; - - if ( $adjust_indentation == 0 ) { - $indentation = $leading_spaces_to_go[$ibeg]; - $lev = $levels_to_go[$ibeg]; - } - elsif ( $adjust_indentation == 1 ) { - - # Change the indentation to be that of a different token on the line - # Previously, the indentation of the terminal token was used: - # OLD CODING: - # $indentation = $reduced_spaces_to_go[$i_terminal]; - # $lev = $levels_to_go[$i_terminal]; - - # Generalization for MOJO: - # Use the lowest level indentation of the tokens on the line. - # For example, here we can use the indentation of the ending ';': - # } until ($selection > 0 and $selection < 10); # ok to use ';' - # But this will not outdent if we use the terminal indentation: - # )->then( sub { # use indentation of the ->, not the { - # Warning: reduced_spaces_to_go[] may be a reference, do not - # do numerical checks with it - - my $i_ind = $ibeg; - $indentation = $reduced_spaces_to_go[$i_ind]; - $lev = $levels_to_go[$i_ind]; - while ( $i_ind < $i_terminal ) { - $i_ind++; - if ( $levels_to_go[$i_ind] < $lev ) { - $indentation = $reduced_spaces_to_go[$i_ind]; - $lev = $levels_to_go[$i_ind]; - } - } - } - - # handle indented closing token which aligns with opening token - elsif ( $adjust_indentation == 2 ) { - - # handle option to align closing token with opening token - $lev = $levels_to_go[$ibeg]; - - # calculate spaces needed to align with opening token - my $space_count = - get_spaces($opening_indentation) + $opening_offset; - - # Indent less than the previous line. - # - # Problem: For -lp we don't exactly know what it was if there - # were recoverable spaces sent to the aligner. A good solution - # would be to force a flush of the vertical alignment buffer, so - # that we would know. For now, this rule is used for -lp: - # - # When the last line did not start with a closing token we will - # be optimistic that the aligner will recover everything wanted. - # - # This rule will prevent us from breaking a hierarchy of closing - # tokens, and in a worst case will leave a closing paren too far - # indented, but this is better than frequently leaving it not - # indented enough. - my $last_spaces = get_spaces($last_indentation_written); - if ( $last_leading_token !~ /^[\}\]\)]$/ ) { - $last_spaces += - get_recoverable_spaces($last_indentation_written); - } - - # reset the indentation to the new space count if it works - # only options are all or none: nothing in-between looks good - $lev = $levels_to_go[$ibeg]; - if ( $space_count < $last_spaces ) { - if ($rOpts_line_up_parentheses) { - my $lev = $levels_to_go[$ibeg]; - $indentation = - new_lp_indentation_item( $space_count, $lev, 0, 0, 0 ); - } - else { - $indentation = $space_count; - } - } - - # revert to default if it doesn't work - else { - $space_count = leading_spaces_to_go($ibeg); - if ( $default_adjust_indentation == 0 ) { - $indentation = $leading_spaces_to_go[$ibeg]; - } - elsif ( $default_adjust_indentation == 1 ) { - $indentation = $reduced_spaces_to_go[$i_terminal]; - $lev = $levels_to_go[$i_terminal]; - } - } - } - - # Full indentaion of closing tokens (-icb and -icp or -cti=2) - else { - - # handle -icb (indented closing code block braces) - # Updated method for indented block braces: indent one full level if - # there is no continuation indentation. This will occur for major - # structures such as sub, if, else, but not for things like map - # blocks. - # - # Note: only code blocks without continuation indentation are - # handled here (if, else, unless, ..). In the following snippet, - # the terminal brace of the sort block will have continuation - # indentation as shown so it will not be handled by the coding - # here. We would have to undo the continuation indentation to do - # this, but it probably looks ok as is. This is a possible future - # update for semicolon terminated lines. - # - # if ($sortby eq 'date' or $sortby eq 'size') { - # @files = sort { - # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby} - # or $a cmp $b - # } @files; - # } - # - if ( $block_type_to_go[$ibeg] - && $ci_levels_to_go[$i_terminal] == 0 ) - { - my $spaces = get_spaces( $leading_spaces_to_go[$i_terminal] ); - $indentation = $spaces + $rOpts_indent_columns; - - # NOTE: for -lp we could create a new indentation object, but - # there is probably no need to do it - } - - # handle -icp and any -icb block braces which fall through above - # test such as the 'sort' block mentioned above. - else { - - # There are currently two ways to handle -icp... - # One way is to use the indentation of the previous line: - # $indentation = $last_indentation_written; - - # The other way is to use the indentation that the previous line - # would have had if it hadn't been adjusted: - $indentation = $last_unadjusted_indentation; - - # Current method: use the minimum of the two. This avoids - # inconsistent indentation. - if ( get_spaces($last_indentation_written) < - get_spaces($indentation) ) - { - $indentation = $last_indentation_written; - } - } - - # use previous indentation but use own level - # to cause list to be flushed properly - $lev = $levels_to_go[$ibeg]; - } - - # remember indentation except for multi-line quotes, which get - # no indentation - unless ( $ibeg == 0 && $starting_in_quote ) { - $last_indentation_written = $indentation; - $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg]; - $last_leading_token = $tokens_to_go[$ibeg]; - } - - # be sure lines with leading closing tokens are not outdented more - # than the line which contained the corresponding opening token. - - ############################################################# - # updated per bug report in alex_bug.pl: we must not - # mess with the indentation of closing logical braces so - # we must treat something like '} else {' as if it were - # an isolated brace my $is_isolated_block_brace = ( - # $iend == $ibeg ) && $block_type_to_go[$ibeg]; - ############################################################# - my $is_isolated_block_brace = $block_type_to_go[$ibeg] - && ( $iend == $ibeg - || $is_if_elsif_else_unless_while_until_for_foreach{ - $block_type_to_go[$ibeg] - } ); - - # only do this for a ':; which is aligned with its leading '?' - my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading; - - if ( - defined($opening_indentation) - && !$leading_paren_arrow # MOJO - && !$is_isolated_block_brace - && !$is_unaligned_colon - ) - { - if ( get_spaces($opening_indentation) > get_spaces($indentation) ) { - $indentation = $opening_indentation; - } - } - - # remember the indentation of each line of this batch - push @{$rindentation_list}, $indentation; - - # outdent lines with certain leading tokens... - if ( - - # must be first word of this batch - $ibeg == 0 - - # and ... - && ( - - # certain leading keywords if requested - ( - $rOpts->{'outdent-keywords'} - && $types_to_go[$ibeg] eq 'k' - && $outdent_keyword{ $tokens_to_go[$ibeg] } - ) - - # or labels if requested - || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' ) - - # or static block comments if requested - || ( $types_to_go[$ibeg] eq '#' - && $rOpts->{'outdent-static-block-comments'} - && $is_static_block_comment ) - ) - ) - - { - my $space_count = leading_spaces_to_go($ibeg); - if ( $space_count > 0 ) { - $space_count -= $rOpts_continuation_indentation; - $is_outdented_line = 1; - if ( $space_count < 0 ) { $space_count = 0 } - - # do not promote a spaced static block comment to non-spaced; - # this is not normally necessary but could be for some - # unusual user inputs (such as -ci = -i) - if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) { - $space_count = 1; - } - - if ($rOpts_line_up_parentheses) { - $indentation = - new_lp_indentation_item( $space_count, $lev, 0, 0, 0 ); - } - else { - $indentation = $space_count; - } - } - } - - return ( $indentation, $lev, $level_end, $terminal_type, - $is_semicolon_terminated, $is_outdented_line ); - } -} - -sub set_vertical_tightness_flags { - - my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_; - - # Define vertical tightness controls for the nth line of a batch. - # We create an array of parameters which tell the vertical aligner - # if we should combine this line with the next line to achieve the - # desired vertical tightness. The array of parameters contains: - # - # [0] type: 1=opening non-block 2=closing non-block - # 3=opening block brace 4=closing block brace - # - # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok - # if closing: spaces of padding to use - # [2] sequence number of container - # [3] valid flag: do not append if this flag is false. Will be - # true if appropriate -vt flag is set. Otherwise, Will be - # made true only for 2 line container in parens with -lp - # - # These flags are used by sub set_leading_whitespace in - # the vertical aligner - - my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ]; - - #-------------------------------------------------------------- - # Vertical Tightness Flags Section 1: - # Handle Lines 1 .. n-1 but not the last line - # For non-BLOCK tokens, we will need to examine the next line - # too, so we won't consider the last line. - #-------------------------------------------------------------- - if ( $n < $n_last_line ) { - - #-------------------------------------------------------------- - # Vertical Tightness Flags Section 1a: - # Look for Type 1, last token of this line is a non-block opening token - #-------------------------------------------------------------- - my $ibeg_next = $ri_first->[ $n + 1 ]; - my $token_end = $tokens_to_go[$iend]; - my $iend_next = $ri_last->[ $n + 1 ]; - if ( - $type_sequence_to_go[$iend] - && !$block_type_to_go[$iend] - && $is_opening_token{$token_end} - && ( - $opening_vertical_tightness{$token_end} > 0 - - # allow 2-line method call to be closed up - || ( $rOpts_line_up_parentheses - && $token_end eq '(' - && $iend > $ibeg - && $types_to_go[ $iend - 1 ] ne 'b' ) - ) - ) - { - - # avoid multiple jumps in nesting depth in one line if - # requested - my $ovt = $opening_vertical_tightness{$token_end}; - my $iend_next = $ri_last->[ $n + 1 ]; - unless ( - $ovt < 2 - && ( $nesting_depth_to_go[ $iend_next + 1 ] != - $nesting_depth_to_go[$ibeg_next] ) - ) - { - - # If -vt flag has not been set, mark this as invalid - # and aligner will validate it if it sees the closing paren - # within 2 lines. - my $valid_flag = $ovt; - @{$rvertical_tightness_flags} = - ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag ); - } - } - - #-------------------------------------------------------------- - # Vertical Tightness Flags Section 1b: - # Look for Type 2, first token of next line is a non-block closing - # token .. and be sure this line does not have a side comment - #-------------------------------------------------------------- - my $token_next = $tokens_to_go[$ibeg_next]; - if ( $type_sequence_to_go[$ibeg_next] - && !$block_type_to_go[$ibeg_next] - && $is_closing_token{$token_next} - && $types_to_go[$iend] !~ '#' ) # for safety, shouldn't happen! - { - my $ovt = $opening_vertical_tightness{$token_next}; - my $cvt = $closing_vertical_tightness{$token_next}; - if ( - - # never append a trailing line like )->pack( - # because it will throw off later alignment - ( - $nesting_depth_to_go[$ibeg_next] == - $nesting_depth_to_go[ $iend_next + 1 ] + 1 - ) - && ( - $cvt == 2 - || ( - $container_environment_to_go[$ibeg_next] ne 'LIST' - && ( - $cvt == 1 - - # allow closing up 2-line method calls - || ( $rOpts_line_up_parentheses - && $token_next eq ')' ) - ) - ) - ) - ) - { - - # decide which trailing closing tokens to append.. - my $ok = 0; - if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 } - else { - my $str = join( '', - @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] ); - - # append closing token if followed by comment or ';' - if ( $str =~ /^b?[#;]/ ) { $ok = 1 } - } - - if ($ok) { - my $valid_flag = $cvt; - @{$rvertical_tightness_flags} = ( - 2, - $tightness{$token_next} == 2 ? 0 : 1, - $type_sequence_to_go[$ibeg_next], $valid_flag, - ); - } - } - } - - #-------------------------------------------------------------- - # Vertical Tightness Flags Section 1c: - # Implement the Opening Token Right flag (Type 2).. - # If requested, move an isolated trailing opening token to the end of - # the previous line which ended in a comma. We could do this - # in sub recombine_breakpoints but that would cause problems - # with -lp formatting. The problem is that indentation will - # quickly move far to the right in nested expressions. By - # doing it after indentation has been set, we avoid changes - # to the indentation. Actual movement of the token takes place - # in sub valign_output_step_B. - #-------------------------------------------------------------- - if ( - $opening_token_right{ $tokens_to_go[$ibeg_next] } - - # previous line is not opening - # (use -sot to combine with it) - && !$is_opening_token{$token_end} - - # previous line ended in one of these - # (add other cases if necessary; '=>' and '.' are not necessary - && !$block_type_to_go[$ibeg_next] - - # this is a line with just an opening token - && ( $iend_next == $ibeg_next - || $iend_next == $ibeg_next + 2 - && $types_to_go[$iend_next] eq '#' ) - - # looks bad if we align vertically with the wrong container - && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next] - ) - { - my $valid_flag = 1; - my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0; - @{$rvertical_tightness_flags} = - ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, ); - } - - #-------------------------------------------------------------- - # Vertical Tightness Flags Section 1d: - # Stacking of opening and closing tokens (Type 2) - #-------------------------------------------------------------- - my $stackable; - my $token_beg_next = $tokens_to_go[$ibeg_next]; - - # patch to make something like 'qw(' behave like an opening paren - # (aran.t) - if ( $types_to_go[$ibeg_next] eq 'q' ) { - if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) { - $token_beg_next = $1; - } - } - - if ( $is_closing_token{$token_end} - && $is_closing_token{$token_beg_next} ) - { - $stackable = $stack_closing_token{$token_beg_next} - unless ( $block_type_to_go[$ibeg_next] ) - ; # shouldn't happen; just checking - } - elsif ($is_opening_token{$token_end} - && $is_opening_token{$token_beg_next} ) - { - $stackable = $stack_opening_token{$token_beg_next} - unless ( $block_type_to_go[$ibeg_next] ) - ; # shouldn't happen; just checking - } - - if ($stackable) { - - my $is_semicolon_terminated; - if ( $n + 1 == $n_last_line ) { - my ( $terminal_type, $i_terminal ) = terminal_type( - \@types_to_go, \@block_type_to_go, - $ibeg_next, $iend_next - ); - $is_semicolon_terminated = $terminal_type eq ';' - && $nesting_depth_to_go[$iend_next] < - $nesting_depth_to_go[$ibeg_next]; - } - - # this must be a line with just an opening token - # or end in a semicolon - if ( - $is_semicolon_terminated - || ( $iend_next == $ibeg_next - || $iend_next == $ibeg_next + 2 - && $types_to_go[$iend_next] eq '#' ) - ) - { - my $valid_flag = 1; - my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0; - @{$rvertical_tightness_flags} = - ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, - ); - } - } - } - - #-------------------------------------------------------------- - # Vertical Tightness Flags Section 2: - # Handle type 3, opening block braces on last line of the batch - # Check for a last line with isolated opening BLOCK curly - #-------------------------------------------------------------- - elsif ($rOpts_block_brace_vertical_tightness - && $ibeg eq $iend - && $types_to_go[$iend] eq '{' - && $block_type_to_go[$iend] =~ - /$block_brace_vertical_tightness_pattern/o ) - { - @{$rvertical_tightness_flags} = - ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 ); - } - - #-------------------------------------------------------------- - # Vertical Tightness Flags Section 3: - # Handle type 4, a closing block brace on the last line of the batch Check - # for a last line with isolated closing BLOCK curly - #-------------------------------------------------------------- - elsif ($rOpts_stack_closing_block_brace - && $ibeg eq $iend - && $block_type_to_go[$iend] - && $types_to_go[$iend] eq '}' ) - { - my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1; - @{$rvertical_tightness_flags} = - ( 4, $spaces, $type_sequence_to_go[$iend], 1 ); - } - - # pack in the sequence numbers of the ends of this line - $rvertical_tightness_flags->[4] = get_seqno($ibeg); - $rvertical_tightness_flags->[5] = get_seqno($iend); - return $rvertical_tightness_flags; -} - -sub get_seqno { - - # get opening and closing sequence numbers of a token for the vertical - # aligner. Assign qw quotes a value to allow qw opening and closing tokens - # to be treated somewhat like opening and closing tokens for stacking - # tokens by the vertical aligner. - my ($ii) = @_; - my $seqno = $type_sequence_to_go[$ii]; - if ( $types_to_go[$ii] eq 'q' ) { - my $SEQ_QW = -1; - if ( $ii > 0 ) { - $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ ); - } - else { - if ( !$ending_in_quote ) { - $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ ); - } - } - } - return ($seqno); -} - -{ - my %is_vertical_alignment_type; - my %is_vertical_alignment_keyword; - my %is_terminal_alignment_type; - - BEGIN { - - my @q; - - # Removed =~ from list to improve chances of alignment - # Removed // from list to improve chances of alignment (RT# 119588) - @q = qw# - = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= - { ? : => && || ~~ !~~ - #; - @is_vertical_alignment_type{@q} = (1) x scalar(@q); - - # only align these at end of line - @q = qw(&& ||); - @is_terminal_alignment_type{@q} = (1) x scalar(@q); - - # eq and ne were removed from this list to improve alignment chances - @q = qw(if unless and or err for foreach while until); - @is_vertical_alignment_keyword{@q} = (1) x scalar(@q); - } - - sub set_vertical_alignment_markers { - - # This routine takes the first step toward vertical alignment of the - # lines of output text. It looks for certain tokens which can serve as - # vertical alignment markers (such as an '='). - # - # Method: We look at each token $i in this output batch and set - # $matching_token_to_go[$i] equal to those tokens at which we would - # accept vertical alignment. - - my ( $ri_first, $ri_last ) = @_; - - # nothing to do if we aren't allowed to change whitespace - if ( !$rOpts_add_whitespace ) { - for my $i ( 0 .. $max_index_to_go ) { - $matching_token_to_go[$i] = ''; - } - return; - } - - # remember the index of last nonblank token before any sidecomment - my $i_terminal = $max_index_to_go; - if ( $types_to_go[$i_terminal] eq '#' ) { - if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) { - if ( $i_terminal > 0 ) { --$i_terminal } - } - } - - # look at each line of this batch.. - my $last_vertical_alignment_before_index; - my $vert_last_nonblank_type; - my $vert_last_nonblank_token; - my $vert_last_nonblank_block_type; - my $max_line = @{$ri_first} - 1; - - foreach my $line ( 0 .. $max_line ) { - my $ibeg = $ri_first->[$line]; - my $iend = $ri_last->[$line]; - $last_vertical_alignment_before_index = -1; - $vert_last_nonblank_type = ''; - $vert_last_nonblank_token = ''; - $vert_last_nonblank_block_type = ''; - - # look at each token in this output line.. - foreach my $i ( $ibeg .. $iend ) { - my $alignment_type = ''; - my $type = $types_to_go[$i]; - my $block_type = $block_type_to_go[$i]; - my $token = $tokens_to_go[$i]; - - # check for flag indicating that we should not align - # this token - if ( $matching_token_to_go[$i] ) { - $matching_token_to_go[$i] = ''; - next; - } - - #-------------------------------------------------------- - # First see if we want to align BEFORE this token - #-------------------------------------------------------- - - # The first possible token that we can align before - # is index 2 because: 1) it doesn't normally make sense to - # align before the first token and 2) the second - # token must be a blank if we are to align before - # the third - if ( $i < $ibeg + 2 ) { } - - # must follow a blank token - elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { } - - # align a side comment -- - elsif ( $type eq '#' ) { - - unless ( - - # it is a static side comment - ( - $rOpts->{'static-side-comments'} - && $token =~ /$static_side_comment_pattern/o - ) - - # or a closing side comment - || ( $vert_last_nonblank_block_type - && $token =~ - /$closing_side_comment_prefix_pattern/o ) - ) - { - $alignment_type = $type; - } ## Example of a static side comment - } - - # otherwise, do not align two in a row to create a - # blank field - elsif ( $last_vertical_alignment_before_index == $i - 2 ) { } - - # align before one of these keywords - # (within a line, since $i>1) - elsif ( $type eq 'k' ) { - - # /^(if|unless|and|or|eq|ne)$/ - if ( $is_vertical_alignment_keyword{$token} ) { - $alignment_type = $token; - } - } - - # align before one of these types.. - # Note: add '.' after new vertical aligner is operational - elsif ( $is_vertical_alignment_type{$type} ) { - $alignment_type = $token; - - # Do not align a terminal token. Although it might - # occasionally look ok to do this, this has been found to be - # a good general rule. The main problems are: - # (1) that the terminal token (such as an = or :) might get - # moved far to the right where it is hard to see because - # nothing follows it, and - # (2) doing so may prevent other good alignments. - # Current exceptions are && and || - if ( $i == $iend || $i >= $i_terminal ) { - $alignment_type = "" - unless ( $is_terminal_alignment_type{$type} ); - } - - # Do not align leading ': (' or '. ('. This would prevent - # alignment in something like the following: - # $extra_space .= - # ( $input_line_number < 10 ) ? " " - # : ( $input_line_number < 100 ) ? " " - # : ""; - # or - # $code = - # ( $case_matters ? $accessor : " lc($accessor) " ) - # . ( $yesno ? " eq " : " ne " ) - if ( $i == $ibeg + 2 - && $types_to_go[$ibeg] =~ /^[\.\:]$/ - && $types_to_go[ $i - 1 ] eq 'b' ) - { - $alignment_type = ""; - } - - # For a paren after keyword, only align something like this: - # if ( $a ) { &a } - # elsif ( $b ) { &b } - if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) { - $alignment_type = "" - unless $vert_last_nonblank_token =~ - /^(if|unless|elsif)$/; - } - - # be sure the alignment tokens are unique - # This didn't work well: reason not determined - # if ($token ne $type) {$alignment_type .= $type} - } - - # NOTE: This is deactivated because it causes the previous - # if/elsif alignment to fail - #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i]) - #{ $alignment_type = $type; } - - if ($alignment_type) { - $last_vertical_alignment_before_index = $i; - } - - #-------------------------------------------------------- - # Next see if we want to align AFTER the previous nonblank - #-------------------------------------------------------- - - # We want to line up ',' and interior ';' tokens, with the added - # space AFTER these tokens. (Note: interior ';' is included - # because it may occur in short blocks). - if ( - - # we haven't already set it - !$alignment_type - - # and its not the first token of the line - && ( $i > $ibeg ) - - # and it follows a blank - && $types_to_go[ $i - 1 ] eq 'b' - - # and previous token IS one of these: - && ( $vert_last_nonblank_type =~ /^[\,\;]$/ ) - - # and it's NOT one of these - && ( $type !~ /^[b\#\)\]\}]$/ ) - - # then go ahead and align - ) - - { - $alignment_type = $vert_last_nonblank_type; - } - - #-------------------------------------------------------- - # then store the value - #-------------------------------------------------------- - $matching_token_to_go[$i] = $alignment_type; - if ( $type ne 'b' ) { - $vert_last_nonblank_type = $type; - $vert_last_nonblank_token = $token; - $vert_last_nonblank_block_type = $block_type; - } - } - } - return; - } -} - -sub terminal_type { - - # returns type of last token on this line (terminal token), as follows: - # returns # for a full-line comment - # returns ' ' for a blank line - # otherwise returns final token type - - my ( $rtype, $rblock_type, $ibeg, $iend ) = @_; - - # check for full-line comment.. - if ( $rtype->[$ibeg] eq '#' ) { - return wantarray ? ( $rtype->[$ibeg], $ibeg ) : $rtype->[$ibeg]; - } - else { - - # start at end and walk backwards.. - for ( my $i = $iend ; $i >= $ibeg ; $i-- ) { - - # skip past any side comment and blanks - next if ( $rtype->[$i] eq 'b' ); - next if ( $rtype->[$i] eq '#' ); - - # found it..make sure it is a BLOCK termination, - # but hide a terminal } after sort/grep/map because it is not - # necessarily the end of the line. (terminal.t) - my $terminal_type = $rtype->[$i]; - if ( - $terminal_type eq '}' - && ( !$rblock_type->[$i] - || ( $is_sort_map_grep_eval_do{ $rblock_type->[$i] } ) ) - ) - { - $terminal_type = 'b'; - } - return wantarray ? ( $terminal_type, $i ) : $terminal_type; - } - - # empty line - return wantarray ? ( ' ', $ibeg ) : ' '; - } -} - -{ # set_bond_strengths - - my %is_good_keyword_breakpoint; - my %is_lt_gt_le_ge; - - my %binary_bond_strength; - my %nobreak_lhs; - my %nobreak_rhs; - - my @bias_tokens; - my $delta_bias; - - sub bias_table_key { - my ( $type, $token ) = @_; - my $bias_table_key = $type; - if ( $type eq 'k' ) { - $bias_table_key = $token; - if ( $token eq 'err' ) { $bias_table_key = 'or' } - } - return $bias_table_key; - } - - sub set_bond_strengths { - - BEGIN { - - my @q; - @q = qw(if unless while until for foreach); - @is_good_keyword_breakpoint{@q} = (1) x scalar(@q); - - @q = qw(lt gt le ge); - @is_lt_gt_le_ge{@q} = (1) x scalar(@q); - # - # The decision about where to break a line depends upon a "bond - # strength" between tokens. The LOWER the bond strength, the MORE - # likely a break. A bond strength may be any value but to simplify - # things there are several pre-defined strength levels: - - # NO_BREAK => 10000; - # VERY_STRONG => 100; - # STRONG => 2.1; - # NOMINAL => 1.1; - # WEAK => 0.8; - # VERY_WEAK => 0.55; - - # The strength values are based on trial-and-error, and need to be - # tweaked occasionally to get desired results. Some comments: - # - # 1. Only relative strengths are important. small differences - # in strengths can make big formatting differences. - # 2. Each indentation level adds one unit of bond strength. - # 3. A value of NO_BREAK makes an unbreakable bond - # 4. A value of VERY_WEAK is the strength of a ',' - # 5. Values below NOMINAL are considered ok break points. - # 6. Values above NOMINAL are considered poor break points. - # - # The bond strengths should roughly follow precedence order where - # possible. If you make changes, please check the results very - # carefully on a variety of scripts. Testing with the -extrude - # options is particularly helpful in exercising all of the rules. - - # Wherever possible, bond strengths are defined in the following - # tables. There are two main stages to setting bond strengths and - # two types of tables: - # - # The first stage involves looking at each token individually and - # defining left and right bond strengths, according to if we want - # to break to the left or right side, and how good a break point it - # is. For example tokens like =, ||, && make good break points and - # will have low strengths, but one might want to break on either - # side to put them at the end of one line or beginning of the next. - # - # The second stage involves looking at certain pairs of tokens and - # defining a bond strength for that particular pair. This second - # stage has priority. - - #--------------------------------------------------------------- - # Bond Strength BEGIN Section 1. - # Set left and right bond strengths of individual tokens. - #--------------------------------------------------------------- - - # NOTE: NO_BREAK's set in this section first are HINTS which will - # probably not be honored. Essential NO_BREAKS's should be set in - # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end - # of this subroutine. - - # Note that we are setting defaults in this section. The user - # cannot change bond strengths but can cause the left and right - # bond strengths of any token type to be swapped through the use of - # the -wba and -wbb flags. In this way the user can determine if a - # breakpoint token should appear at the end of one line or the - # beginning of the next line. - - # The hash keys in this section are token types, plus the text of - # certain keywords like 'or', 'and'. - - # no break around possible filehandle - $left_bond_strength{'Z'} = NO_BREAK; - $right_bond_strength{'Z'} = NO_BREAK; - - # never put a bare word on a new line: - # example print (STDERR, "bla"); will fail with break after ( - $left_bond_strength{'w'} = NO_BREAK; - - # blanks always have infinite strength to force breaks after - # real tokens - $right_bond_strength{'b'} = NO_BREAK; - - # try not to break on exponentation - @q = qw" ** .. ... <=> "; - @left_bond_strength{@q} = (STRONG) x scalar(@q); - @right_bond_strength{@q} = (STRONG) x scalar(@q); - - # The comma-arrow has very low precedence but not a good break point - $left_bond_strength{'=>'} = NO_BREAK; - $right_bond_strength{'=>'} = NOMINAL; - - # ok to break after label - $left_bond_strength{'J'} = NO_BREAK; - $right_bond_strength{'J'} = NOMINAL; - $left_bond_strength{'j'} = STRONG; - $right_bond_strength{'j'} = STRONG; - $left_bond_strength{'A'} = STRONG; - $right_bond_strength{'A'} = STRONG; - - $left_bond_strength{'->'} = STRONG; - $right_bond_strength{'->'} = VERY_STRONG; - - $left_bond_strength{'CORE::'} = NOMINAL; - $right_bond_strength{'CORE::'} = NO_BREAK; - - # breaking AFTER modulus operator is ok: - @q = qw" % "; - @left_bond_strength{@q} = (STRONG) x scalar(@q); - @right_bond_strength{@q} = - ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@q); - - # Break AFTER math operators * and / - @q = qw" * / x "; - @left_bond_strength{@q} = (STRONG) x scalar(@q); - @right_bond_strength{@q} = (NOMINAL) x scalar(@q); - - # Break AFTER weakest math operators + and - - # Make them weaker than * but a bit stronger than '.' - @q = qw" + - "; - @left_bond_strength{@q} = (STRONG) x scalar(@q); - @right_bond_strength{@q} = - ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@q); - - # breaking BEFORE these is just ok: - @q = qw" >> << "; - @right_bond_strength{@q} = (STRONG) x scalar(@q); - @left_bond_strength{@q} = (NOMINAL) x scalar(@q); - - # breaking before the string concatenation operator seems best - # because it can be hard to see at the end of a line - $right_bond_strength{'.'} = STRONG; - $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK; - - @q = qw"} ] ) R"; - @left_bond_strength{@q} = (STRONG) x scalar(@q); - @right_bond_strength{@q} = (NOMINAL) x scalar(@q); - - # make these a little weaker than nominal so that they get - # favored for end-of-line characters - @q = qw"!= == =~ !~ ~~ !~~"; - @left_bond_strength{@q} = (STRONG) x scalar(@q); - @right_bond_strength{@q} = - ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@q); - - # break AFTER these - @q = qw" < > | & >= <="; - @left_bond_strength{@q} = (VERY_STRONG) x scalar(@q); - @right_bond_strength{@q} = - ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@q); - - # breaking either before or after a quote is ok - # but bias for breaking before a quote - $left_bond_strength{'Q'} = NOMINAL; - $right_bond_strength{'Q'} = NOMINAL + 0.02; - $left_bond_strength{'q'} = NOMINAL; - $right_bond_strength{'q'} = NOMINAL; - - # starting a line with a keyword is usually ok - $left_bond_strength{'k'} = NOMINAL; - - # we usually want to bond a keyword strongly to what immediately - # follows, rather than leaving it stranded at the end of a line - $right_bond_strength{'k'} = STRONG; - - $left_bond_strength{'G'} = NOMINAL; - $right_bond_strength{'G'} = STRONG; - - # assignment operators - @q = qw( - = **= += *= &= <<= &&= - -= /= |= >>= ||= //= - .= %= ^= - x= - ); - - # Default is to break AFTER various assignment operators - @left_bond_strength{@q} = (STRONG) x scalar(@q); - @right_bond_strength{@q} = - ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@q); - - # Default is to break BEFORE '&&' and '||' and '//' - # set strength of '||' to same as '=' so that chains like - # $a = $b || $c || $d will break before the first '||' - $right_bond_strength{'||'} = NOMINAL; - $left_bond_strength{'||'} = $right_bond_strength{'='}; - - # same thing for '//' - $right_bond_strength{'//'} = NOMINAL; - $left_bond_strength{'//'} = $right_bond_strength{'='}; - - # set strength of && a little higher than || - $right_bond_strength{'&&'} = NOMINAL; - $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1; - - $left_bond_strength{';'} = VERY_STRONG; - $right_bond_strength{';'} = VERY_WEAK; - $left_bond_strength{'f'} = VERY_STRONG; - - # make right strength of for ';' a little less than '=' - # to make for contents break after the ';' to avoid this: - # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j += - # $number_of_fields ) - # and make it weaker than ',' and 'and' too - $right_bond_strength{'f'} = VERY_WEAK - 0.03; - - # The strengths of ?/: should be somewhere between - # an '=' and a quote (NOMINAL), - # make strength of ':' slightly less than '?' to help - # break long chains of ? : after the colons - $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL; - $right_bond_strength{':'} = NO_BREAK; - $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01; - $right_bond_strength{'?'} = NO_BREAK; - - $left_bond_strength{','} = VERY_STRONG; - $right_bond_strength{','} = VERY_WEAK; - - # remaining digraphs and trigraphs not defined above - @q = qw( :: <> ++ --); - @left_bond_strength{@q} = (WEAK) x scalar(@q); - @right_bond_strength{@q} = (STRONG) x scalar(@q); - - # Set bond strengths of certain keywords - # make 'or', 'err', 'and' slightly weaker than a ',' - $left_bond_strength{'and'} = VERY_WEAK - 0.01; - $left_bond_strength{'or'} = VERY_WEAK - 0.02; - $left_bond_strength{'err'} = VERY_WEAK - 0.02; - $left_bond_strength{'xor'} = NOMINAL; - $right_bond_strength{'and'} = NOMINAL; - $right_bond_strength{'or'} = NOMINAL; - $right_bond_strength{'err'} = NOMINAL; - $right_bond_strength{'xor'} = STRONG; - - #--------------------------------------------------------------- - # Bond Strength BEGIN Section 2. - # Set binary rules for bond strengths between certain token types. - #--------------------------------------------------------------- - - # We have a little problem making tables which apply to the - # container tokens. Here is a list of container tokens and - # their types: - # - # type tokens // meaning - # { {, [, ( // indent - # } }, ], ) // outdent - # [ [ // left non-structural [ (enclosing an array index) - # ] ] // right non-structural square bracket - # ( ( // left non-structural paren - # ) ) // right non-structural paren - # L { // left non-structural curly brace (enclosing a key) - # R } // right non-structural curly brace - # - # Some rules apply to token types and some to just the token - # itself. We solve the problem by combining type and token into a - # new hash key for the container types. - # - # If a rule applies to a token 'type' then we need to make rules - # for each of these 'type.token' combinations: - # Type Type.Token - # { {{, {[, {( - # [ [[ - # ( (( - # L L{ - # } }}, }], }) - # ] ]] - # ) )) - # R R} - # - # If a rule applies to a token then we need to make rules for - # these 'type.token' combinations: - # Token Type.Token - # { {{, L{ - # [ {[, [[ - # ( {(, (( - # } }}, R} - # ] }], ]] - # ) }), )) - - # allow long lines before final { in an if statement, as in: - # if (.......... - # ..........) - # { - # - # Otherwise, the line before the { tends to be too short. - - $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03; - $binary_bond_strength{'(('}{'{{'} = NOMINAL; - - # break on something like '} (', but keep this stronger than a ',' - # example is in 'howe.pl' - $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK; - $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK; - - # keep matrix and hash indices together - # but make them a little below STRONG to allow breaking open - # something like {'some-word'}{'some-very-long-word'} at the }{ - # (bracebrk.t) - $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL; - $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL; - $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL; - $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL; - - # increase strength to the point where a break in the following - # will be after the opening paren rather than at the arrow: - # $a->$b($c); - $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG; - - $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; - $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; - $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; - $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; - $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; - $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; - - $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL; - $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL; - $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL; - $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL; - - #--------------------------------------------------------------- - # Binary NO_BREAK rules - #--------------------------------------------------------------- - - # use strict requires that bare word and => not be separated - $binary_bond_strength{'C'}{'=>'} = NO_BREAK; - $binary_bond_strength{'U'}{'=>'} = NO_BREAK; - - # Never break between a bareword and a following paren because - # perl may give an error. For example, if a break is placed - # between 'to_filehandle' and its '(' the following line will - # give a syntax error [Carp.pm]: my( $no) =fileno( - # to_filehandle( $in)) ; - $binary_bond_strength{'C'}{'(('} = NO_BREAK; - $binary_bond_strength{'C'}{'{('} = NO_BREAK; - $binary_bond_strength{'U'}{'(('} = NO_BREAK; - $binary_bond_strength{'U'}{'{('} = NO_BREAK; - - # use strict requires that bare word within braces not start new - # line - $binary_bond_strength{'L{'}{'w'} = NO_BREAK; - - $binary_bond_strength{'w'}{'R}'} = NO_BREAK; - - # use strict requires that bare word and => not be separated - $binary_bond_strength{'w'}{'=>'} = NO_BREAK; - - # use strict does not allow separating type info from trailing { } - # testfile is readmail.pl - $binary_bond_strength{'t'}{'L{'} = NO_BREAK; - $binary_bond_strength{'i'}{'L{'} = NO_BREAK; - - # As a defensive measure, do not break between a '(' and a - # filehandle. In some cases, this can cause an error. For - # example, the following program works: - # my $msg="hi!\n"; - # print - # ( STDOUT - # $msg - # ); - # - # But this program fails: - # my $msg="hi!\n"; - # print - # ( - # STDOUT - # $msg - # ); - # - # This is normally only a problem with the 'extrude' option - $binary_bond_strength{'(('}{'Y'} = NO_BREAK; - $binary_bond_strength{'{('}{'Y'} = NO_BREAK; - - # never break between sub name and opening paren - $binary_bond_strength{'w'}{'(('} = NO_BREAK; - $binary_bond_strength{'w'}{'{('} = NO_BREAK; - - # keep '}' together with ';' - $binary_bond_strength{'}}'}{';'} = NO_BREAK; - - # Breaking before a ++ can cause perl to guess wrong. For - # example the following line will cause a syntax error - # with -extrude if we break between '$i' and '++' [fixstyle2] - # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) ); - $nobreak_lhs{'++'} = NO_BREAK; - - # Do not break before a possible file handle - $nobreak_lhs{'Z'} = NO_BREAK; - - # use strict hates bare words on any new line. For - # example, a break before the underscore here provokes the - # wrath of use strict: - # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) { - $nobreak_rhs{'F'} = NO_BREAK; - $nobreak_rhs{'CORE::'} = NO_BREAK; - - #--------------------------------------------------------------- - # Bond Strength BEGIN Section 3. - # Define tables and values for applying a small bias to the above - # values. - #--------------------------------------------------------------- - # Adding a small 'bias' to strengths is a simple way to make a line - # break at the first of a sequence of identical terms. For - # example, to force long string of conditional operators to break - # with each line ending in a ':', we can add a small number to the - # bond strength of each ':' (colon.t) - @bias_tokens = qw( : && || f and or . ); # tokens which get bias - $delta_bias = 0.0001; # a very small strength level - - } ## end BEGIN - - # patch-its always ok to break at end of line - $nobreak_to_go[$max_index_to_go] = 0; - - # we start a new set of bias values for each line - my %bias; - @bias{@bias_tokens} = (0) x scalar(@bias_tokens); - my $code_bias = -.01; # bias for closing block braces - - my $type = 'b'; - my $token = ' '; - my $last_type; - my $last_nonblank_type = $type; - my $last_nonblank_token = $token; - my $list_str = $left_bond_strength{'?'}; - - my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token, - $next_nonblank_type, $next_token, $next_type, $total_nesting_depth, - ); - - # main loop to compute bond strengths between each pair of tokens - foreach my $i ( 0 .. $max_index_to_go ) { - $last_type = $type; - if ( $type ne 'b' ) { - $last_nonblank_type = $type; - $last_nonblank_token = $token; - } - $type = $types_to_go[$i]; - - # strength on both sides of a blank is the same - if ( $type eq 'b' && $last_type ne 'b' ) { - $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ]; - next; - } - - $token = $tokens_to_go[$i]; - $block_type = $block_type_to_go[$i]; - $i_next = $i + 1; - $next_type = $types_to_go[$i_next]; - $next_token = $tokens_to_go[$i_next]; - $total_nesting_depth = $nesting_depth_to_go[$i_next]; - $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); - $next_nonblank_type = $types_to_go[$i_next_nonblank]; - $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; - - # We are computing the strength of the bond between the current - # token and the NEXT token. - - #--------------------------------------------------------------- - # Bond Strength Section 1: - # First Approximation. - # Use minimum of individual left and right tabulated bond - # strengths. - #--------------------------------------------------------------- - my $bsr = $right_bond_strength{$type}; - my $bsl = $left_bond_strength{$next_nonblank_type}; - - # define right bond strengths of certain keywords - if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) { - $bsr = $right_bond_strength{$token}; - } - elsif ( $token eq 'ne' or $token eq 'eq' ) { - $bsr = NOMINAL; - } - - # set terminal bond strength to the nominal value - # this will cause good preceding breaks to be retained - if ( $i_next_nonblank > $max_index_to_go ) { - $bsl = NOMINAL; - } - - # define right bond strengths of certain keywords - if ( $next_nonblank_type eq 'k' - && defined( $left_bond_strength{$next_nonblank_token} ) ) - { - $bsl = $left_bond_strength{$next_nonblank_token}; - } - elsif ($next_nonblank_token eq 'ne' - or $next_nonblank_token eq 'eq' ) - { - $bsl = NOMINAL; - } - elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) { - $bsl = 0.9 * NOMINAL + 0.1 * STRONG; - } - - # Use the minimum of the left and right strengths. Note: it might - # seem that we would want to keep a NO_BREAK if either token has - # this value. This didn't work, for example because in an arrow - # list, it prevents the comma from separating from the following - # bare word (which is probably quoted by its arrow). So necessary - # NO_BREAK's have to be handled as special cases in the final - # section. - if ( !defined($bsr) ) { $bsr = VERY_STRONG } - if ( !defined($bsl) ) { $bsl = VERY_STRONG } - my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl; - my $bond_str_1 = $bond_str; - - #--------------------------------------------------------------- - # Bond Strength Section 2: - # Apply hardwired rules.. - #--------------------------------------------------------------- - - # Patch to put terminal or clauses on a new line: Weaken the bond - # at an || followed by die or similar keyword to make the terminal - # or clause fall on a new line, like this: - # - # my $class = shift - # || die "Cannot add broadcast: No class identifier found"; - # - # Otherwise the break will be at the previous '=' since the || and - # = have the same starting strength and the or is biased, like - # this: - # - # my $class = - # shift || die "Cannot add broadcast: No class identifier found"; - # - # In any case if the user places a break at either the = or the || - # it should remain there. - if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) { - if ( $next_nonblank_token =~ /^(die|confess|croak|warn)$/ ) { - if ( $want_break_before{$token} && $i > 0 ) { - $bond_strength_to_go[ $i - 1 ] -= $delta_bias; - } - else { - $bond_str -= $delta_bias; - } - } - } - - # good to break after end of code blocks - if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) { - - $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias; - $code_bias += $delta_bias; - } - - if ( $type eq 'k' ) { - - # allow certain control keywords to stand out - if ( $next_nonblank_type eq 'k' - && $is_last_next_redo_return{$token} ) - { - $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK; - } - - # Don't break after keyword my. This is a quick fix for a - # rare problem with perl. An example is this line from file - # Container.pm: - - # foreach my $question( Debian::DebConf::ConfigDb::gettree( - # $this->{'question'} ) ) - - if ( $token eq 'my' ) { - $bond_str = NO_BREAK; - } - - } - - # good to break before 'if', 'unless', etc - if ( $is_if_brace_follower{$next_nonblank_token} ) { - $bond_str = VERY_WEAK; - } - - if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) { - - # FIXME: needs more testing - if ( $is_keyword_returning_list{$next_nonblank_token} ) { - $bond_str = $list_str if ( $bond_str > $list_str ); - } - - # keywords like 'unless', 'if', etc, within statements - # make good breaks - if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) { - $bond_str = VERY_WEAK / 1.05; - } - } - - # try not to break before a comma-arrow - elsif ( $next_nonblank_type eq '=>' ) { - if ( $bond_str < STRONG ) { $bond_str = STRONG } - } - - #--------------------------------------------------------------- - # Additional hardwired NOBREAK rules - #--------------------------------------------------------------- - - # map1.t -- correct for a quirk in perl - if ( $token eq '(' - && $next_nonblank_type eq 'i' - && $last_nonblank_type eq 'k' - && $is_sort_map_grep{$last_nonblank_token} ) - - # /^(sort|map|grep)$/ ) - { - $bond_str = NO_BREAK; - } - - # extrude.t: do not break before paren at: - # -l pid_filename( - if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) { - $bond_str = NO_BREAK; - } - - # in older version of perl, use strict can cause problems with - # breaks before bare words following opening parens. For example, - # this will fail under older versions if a break is made between - # '(' and 'MAIL': use strict; open( MAIL, "a long filename or - # command"); close MAIL; - if ( $type eq '{' ) { - - if ( $token eq '(' && $next_nonblank_type eq 'w' ) { - - # but it's fine to break if the word is followed by a '=>' - # or if it is obviously a sub call - my $i_next_next_nonblank = $i_next_nonblank + 1; - my $next_next_type = $types_to_go[$i_next_next_nonblank]; - if ( $next_next_type eq 'b' - && $i_next_nonblank < $max_index_to_go ) - { - $i_next_next_nonblank++; - $next_next_type = $types_to_go[$i_next_next_nonblank]; - } - - # We'll check for an old breakpoint and keep a leading - # bareword if it was that way in the input file. - # Presumably it was ok that way. For example, the - # following would remain unchanged: - # - # @months = ( - # January, February, March, April, - # May, June, July, August, - # September, October, November, December, - # ); - # - # This should be sufficient: - if ( - !$old_breakpoint_to_go[$i] - && ( $next_next_type eq ',' - || $next_next_type eq '}' ) - ) - { - $bond_str = NO_BREAK; - } - } - } - - # Do not break between a possible filehandle and a ? or / and do - # not introduce a break after it if there is no blank - # (extrude.t) - elsif ( $type eq 'Z' ) { - - # don't break.. - if ( - - # if there is no blank and we do not want one. Examples: - # print $x++ # do not break after $x - # print HTML"HELLO" # break ok after HTML - ( - $next_type ne 'b' - && defined( $want_left_space{$next_type} ) - && $want_left_space{$next_type} == WS_NO - ) - - # or we might be followed by the start of a quote - || $next_nonblank_type =~ /^[\/\?]$/ - ) - { - $bond_str = NO_BREAK; - } - } - - # Breaking before a ? before a quote can cause trouble if - # they are not separated by a blank. - # Example: a syntax error occurs if you break before the ? here - # my$logic=join$all?' && ':' || ',@regexps; - # From: Professional_Perl_Programming_Code/multifind.pl - if ( $next_nonblank_type eq '?' ) { - $bond_str = NO_BREAK - if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' ); - } - - # Breaking before a . followed by a number - # can cause trouble if there is no intervening space - # Example: a syntax error occurs if you break before the .2 here - # $str .= pack($endian.2, ensurrogate($ord)); - # From: perl58/Unicode.pm - elsif ( $next_nonblank_type eq '.' ) { - $bond_str = NO_BREAK - if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' ); - } - - # patch to put cuddled elses back together when on multiple - # lines, as in: } \n else \n { \n - if ($rOpts_cuddled_else) { - - if ( ( $token eq 'else' ) && ( $next_nonblank_type eq '{' ) - || ( $type eq '}' ) && ( $next_nonblank_token eq 'else' ) ) - { - $bond_str = NO_BREAK; - } - } - my $bond_str_2 = $bond_str; - - #--------------------------------------------------------------- - # End of hardwired rules - #--------------------------------------------------------------- - - #--------------------------------------------------------------- - # Bond Strength Section 3: - # Apply table rules. These have priority over the above - # hardwired rules. - #--------------------------------------------------------------- - - my $tabulated_bond_str; - my $ltype = $type; - my $rtype = $next_nonblank_type; - if ( $token =~ /^[\(\[\{\)\]\}]/ ) { $ltype = $type . $token } - if ( $next_nonblank_token =~ /^[\(\[\{\)\]\}]/ ) { - $rtype = $next_nonblank_type . $next_nonblank_token; - } - - if ( $binary_bond_strength{$ltype}{$rtype} ) { - $bond_str = $binary_bond_strength{$ltype}{$rtype}; - $tabulated_bond_str = $bond_str; - } - - if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) { - $bond_str = NO_BREAK; - $tabulated_bond_str = $bond_str; - } - my $bond_str_3 = $bond_str; - - # If the hardwired rules conflict with the tabulated bond - # strength then there is an inconsistency that should be fixed - FORMATTER_DEBUG_FLAG_BOND_TABLES - && $tabulated_bond_str - && $bond_str_1 - && $bond_str_1 != $bond_str_2 - && $bond_str_2 != $tabulated_bond_str - && do { - print STDERR -"BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n"; - }; - - #----------------------------------------------------------------- - # Bond Strength Section 4: - # Modify strengths of certain tokens which often occur in sequence - # by adding a small bias to each one in turn so that the breaks - # occur from left to right. - # - # Note that we only changing strengths by small amounts here, - # and usually increasing, so we should not be altering any NO_BREAKs. - # Other routines which check for NO_BREAKs will use a tolerance - # of one to avoid any problem. - #----------------------------------------------------------------- - - # The bias tables use special keys - my $left_key = bias_table_key( $type, $token ); - my $right_key = - bias_table_key( $next_nonblank_type, $next_nonblank_token ); - - # add any bias set by sub scan_list at old comma break points. - if ( $type eq ',' ) { $bond_str += $bond_strength_to_go[$i] } - - # bias left token - elsif ( defined( $bias{$left_key} ) ) { - if ( !$want_break_before{$left_key} ) { - $bias{$left_key} += $delta_bias; - $bond_str += $bias{$left_key}; - } - } - - # bias right token - if ( defined( $bias{$right_key} ) ) { - if ( $want_break_before{$right_key} ) { - - # for leading '.' align all but 'short' quotes; the idea - # is to not place something like "\n" on a single line. - if ( $right_key eq '.' ) { - unless ( - $last_nonblank_type eq '.' - && ( - length($token) <= - $rOpts_short_concatenation_item_length ) - && ( !$is_closing_token{$token} ) - ) - { - $bias{$right_key} += $delta_bias; - } - } - else { - $bias{$right_key} += $delta_bias; - } - $bond_str += $bias{$right_key}; - } - } - my $bond_str_4 = $bond_str; - - #--------------------------------------------------------------- - # Bond Strength Section 5: - # Fifth Approximation. - # Take nesting depth into account by adding the nesting depth - # to the bond strength. - #--------------------------------------------------------------- - my $strength; - - if ( defined($bond_str) && !$nobreak_to_go[$i] ) { - if ( $total_nesting_depth > 0 ) { - $strength = $bond_str + $total_nesting_depth; - } - else { - $strength = $bond_str; - } - } - else { - $strength = NO_BREAK; - } - - #--------------------------------------------------------------- - # Bond Strength Section 6: - # Sixth Approximation. Welds. - #--------------------------------------------------------------- - - # Do not allow a break within welds, - if ( weld_len_right_to_go($i) ) { $strength = NO_BREAK } - - # But encourage breaking after opening welded tokens - elsif ( weld_len_left_to_go($i) && $is_opening_token{$token} ) { - $strength -= 1; - } - -## # TESTING: weaken before first weld closing token -## # This did not help -## elsif ($i_next_nonblank <= $max_index_to_go -## && weld_len_right_to_go($i_next_nonblank) -## && $next_nonblank_token =~ /^[\}\]\)]$/ ) -## { -## $strength -= 0.9; -## } - - # always break after side comment - if ( $type eq '#' ) { $strength = 0 } - - $bond_strength_to_go[$i] = $strength; - - FORMATTER_DEBUG_FLAG_BOND && do { - my $str = substr( $token, 0, 15 ); - $str .= ' ' x ( 16 - length($str) ); - print STDOUT -"BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n"; - }; - } ## end main loop - return; - } ## end sub set_bond_strengths -} - -sub pad_array_to_go { - - # to simplify coding in scan_list and set_bond_strengths, it helps - # to create some extra blank tokens at the end of the arrays - $tokens_to_go[ $max_index_to_go + 1 ] = ''; - $tokens_to_go[ $max_index_to_go + 2 ] = ''; - $types_to_go[ $max_index_to_go + 1 ] = 'b'; - $types_to_go[ $max_index_to_go + 2 ] = 'b'; - $nesting_depth_to_go[ $max_index_to_go + 1 ] = - $nesting_depth_to_go[$max_index_to_go]; - - # /^[R\}\)\]]$/ - if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) { - if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) { - - # shouldn't happen: - unless ( get_saw_brace_error() ) { - warning( -"Program bug in scan_list: hit nesting error which should have been caught\n" - ); - report_definite_bug(); - } - } - else { - $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1; - } - } - - # /^[L\{\(\[]$/ - elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) { - $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1; - } - return; -} - -{ # begin scan_list - - my ( - $block_type, $current_depth, - $depth, $i, - $i_last_nonblank_token, $last_colon_sequence_number, - $last_nonblank_token, $last_nonblank_type, - $last_nonblank_block_type, $last_old_breakpoint_count, - $minimum_depth, $next_nonblank_block_type, - $next_nonblank_token, $next_nonblank_type, - $old_breakpoint_count, $starting_breakpoint_count, - $starting_depth, $token, - $type, $type_sequence, - ); - - my ( - @breakpoint_stack, @breakpoint_undo_stack, - @comma_index, @container_type, - @identifier_count_stack, @index_before_arrow, - @interrupted_list, @item_count_stack, - @last_comma_index, @last_dot_index, - @last_nonblank_type, @old_breakpoint_count_stack, - @opening_structure_index_stack, @rfor_semicolon_list, - @has_old_logical_breakpoints, @rand_or_list, - @i_equals, - ); - - # routine to define essential variables when we go 'up' to - # a new depth - sub check_for_new_minimum_depth { - my $depth = shift; - if ( $depth < $minimum_depth ) { - - $minimum_depth = $depth; - - # these arrays need not retain values between calls - $breakpoint_stack[$depth] = $starting_breakpoint_count; - $container_type[$depth] = ""; - $identifier_count_stack[$depth] = 0; - $index_before_arrow[$depth] = -1; - $interrupted_list[$depth] = 1; - $item_count_stack[$depth] = 0; - $last_nonblank_type[$depth] = ""; - $opening_structure_index_stack[$depth] = -1; - - $breakpoint_undo_stack[$depth] = undef; - $comma_index[$depth] = undef; - $last_comma_index[$depth] = undef; - $last_dot_index[$depth] = undef; - $old_breakpoint_count_stack[$depth] = undef; - $has_old_logical_breakpoints[$depth] = 0; - $rand_or_list[$depth] = []; - $rfor_semicolon_list[$depth] = []; - $i_equals[$depth] = -1; - - # these arrays must retain values between calls - if ( !defined( $has_broken_sublist[$depth] ) ) { - $dont_align[$depth] = 0; - $has_broken_sublist[$depth] = 0; - $want_comma_break[$depth] = 0; - } - } - return; - } - - # routine to decide which commas to break at within a container; - # returns: - # $bp_count = number of comma breakpoints set - # $do_not_break_apart = a flag indicating if container need not - # be broken open - sub set_comma_breakpoints { - - my $dd = shift; - my $bp_count = 0; - my $do_not_break_apart = 0; - - # anything to do? - if ( $item_count_stack[$dd] ) { - - # handle commas not in containers... - if ( $dont_align[$dd] ) { - do_uncontained_comma_breaks($dd); - } - - # handle commas within containers... - else { - my $fbc = $forced_breakpoint_count; - - # always open comma lists not preceded by keywords, - # barewords, identifiers (that is, anything that doesn't - # look like a function call) - my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/; - - set_comma_breakpoints_do( - $dd, - $opening_structure_index_stack[$dd], - $i, - $item_count_stack[$dd], - $identifier_count_stack[$dd], - $comma_index[$dd], - $next_nonblank_type, - $container_type[$dd], - $interrupted_list[$dd], - \$do_not_break_apart, - $must_break_open, - ); - $bp_count = $forced_breakpoint_count - $fbc; - $do_not_break_apart = 0 if $must_break_open; - } - } - return ( $bp_count, $do_not_break_apart ); - } - - sub do_uncontained_comma_breaks { - - # Handle commas not in containers... - # This is a catch-all routine for commas that we - # don't know what to do with because the don't fall - # within containers. We will bias the bond strength - # to break at commas which ended lines in the input - # file. This usually works better than just trying - # to put as many items on a line as possible. A - # downside is that if the input file is garbage it - # won't work very well. However, the user can always - # prevent following the old breakpoints with the - # -iob flag. - my $dd = shift; - my $bias = -.01; - my $old_comma_break_count = 0; - foreach my $ii ( @{ $comma_index[$dd] } ) { - if ( $old_breakpoint_to_go[$ii] ) { - $old_comma_break_count++; - $bond_strength_to_go[$ii] = $bias; - - # reduce bias magnitude to force breaks in order - $bias *= 0.99; - } - } - - # Also put a break before the first comma if - # (1) there was a break there in the input, and - # (2) there was exactly one old break before the first comma break - # (3) OLD: there are multiple old comma breaks - # (3) NEW: there are one or more old comma breaks (see return example) - # - # For example, we will follow the user and break after - # 'print' in this snippet: - # print - # "conformability (Not the same dimension)\n", - # "\t", $have, " is ", text_unit($hu), "\n", - # "\t", $want, " is ", text_unit($wu), "\n", - # ; - # - # Another example, just one comma, where we will break after - # the return: - # return - # $x * cos($a) - $y * sin($a), - # $x * sin($a) + $y * cos($a); - - # Breaking a print statement: - # print SAVEOUT - # ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "", - # ( $? & 128 ) ? " -- core dumped" : "", "\n"; - # - # But we will not force a break after the opening paren here - # (causes a blinker): - # $heap->{stream}->set_output_filter( - # poe::filter::reference->new('myotherfreezer') ), - # ; - # - my $i_first_comma = $comma_index[$dd]->[0]; - if ( $old_breakpoint_to_go[$i_first_comma] ) { - my $level_comma = $levels_to_go[$i_first_comma]; - my $ibreak = -1; - my $obp_count = 0; - for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) { - if ( $old_breakpoint_to_go[$ii] ) { - $obp_count++; - last if ( $obp_count > 1 ); - $ibreak = $ii - if ( $levels_to_go[$ii] == $level_comma ); - } - } - - # Changed rule from multiple old commas to just one here: - if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 ) - { - # Do not to break before an opening token because - # it can lead to "blinkers". - my $ibreakm = $ibreak; - $ibreakm-- if ( $types_to_go[$ibreakm] eq 'b' ); - if ( $ibreakm >= 0 && $types_to_go[$ibreakm] !~ /^[\(\{\[L]$/ ) - { - set_forced_breakpoint($ibreak); - } - } - } - return; - } - - my %is_logical_container; - - BEGIN { - my @q = qw# if elsif unless while and or err not && | || ? : ! #; - @is_logical_container{@q} = (1) x scalar(@q); - } - - sub set_for_semicolon_breakpoints { - my $dd = shift; - foreach ( @{ $rfor_semicolon_list[$dd] } ) { - set_forced_breakpoint($_); - } - return; - } - - sub set_logical_breakpoints { - my $dd = shift; - if ( - $item_count_stack[$dd] == 0 - && $is_logical_container{ $container_type[$dd] } - - || $has_old_logical_breakpoints[$dd] - ) - { - - # Look for breaks in this order: - # 0 1 2 3 - # or and || && - foreach my $i ( 0 .. 3 ) { - if ( $rand_or_list[$dd][$i] ) { - foreach ( @{ $rand_or_list[$dd][$i] } ) { - set_forced_breakpoint($_); - } - - # break at any 'if' and 'unless' too - foreach ( @{ $rand_or_list[$dd][4] } ) { - set_forced_breakpoint($_); - } - $rand_or_list[$dd] = []; - last; - } - } - } - return; - } - - sub is_unbreakable_container { - - # never break a container of one of these types - # because bad things can happen (map1.t) - my $dd = shift; - return $is_sort_map_grep{ $container_type[$dd] }; - } - - sub scan_list { - - # This routine is responsible for setting line breaks for all lists, - # so that hierarchical structure can be displayed and so that list - # items can be vertically aligned. The output of this routine is - # stored in the array @forced_breakpoint_to_go, which is used to set - # final breakpoints. - - $starting_depth = $nesting_depth_to_go[0]; - - $block_type = ' '; - $current_depth = $starting_depth; - $i = -1; - $last_colon_sequence_number = -1; - $last_nonblank_token = ';'; - $last_nonblank_type = ';'; - $last_nonblank_block_type = ' '; - $last_old_breakpoint_count = 0; - $minimum_depth = $current_depth + 1; # forces update in check below - $old_breakpoint_count = 0; - $starting_breakpoint_count = $forced_breakpoint_count; - $token = ';'; - $type = ';'; - $type_sequence = ''; - - my $total_depth_variation = 0; - my $i_old_assignment_break; - my $depth_last = $starting_depth; - - check_for_new_minimum_depth($current_depth); - - my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0; - my $want_previous_breakpoint = -1; - - my $saw_good_breakpoint; - my $i_line_end = -1; - my $i_line_start = -1; - - # loop over all tokens in this batch - while ( ++$i <= $max_index_to_go ) { - if ( $type ne 'b' ) { - $i_last_nonblank_token = $i - 1; - $last_nonblank_type = $type; - $last_nonblank_token = $token; - $last_nonblank_block_type = $block_type; - } ## end if ( $type ne 'b' ) - $type = $types_to_go[$i]; - $block_type = $block_type_to_go[$i]; - $token = $tokens_to_go[$i]; - $type_sequence = $type_sequence_to_go[$i]; - my $next_type = $types_to_go[ $i + 1 ]; - my $next_token = $tokens_to_go[ $i + 1 ]; - my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); - $next_nonblank_type = $types_to_go[$i_next_nonblank]; - $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; - $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank]; - - # set break if flag was set - if ( $want_previous_breakpoint >= 0 ) { - set_forced_breakpoint($want_previous_breakpoint); - $want_previous_breakpoint = -1; - } - - $last_old_breakpoint_count = $old_breakpoint_count; - if ( $old_breakpoint_to_go[$i] ) { - $i_line_end = $i; - $i_line_start = $i_next_nonblank; - - $old_breakpoint_count++; - - # Break before certain keywords if user broke there and - # this is a 'safe' break point. The idea is to retain - # any preferred breaks for sequential list operations, - # like a schwartzian transform. - if ($rOpts_break_at_old_keyword_breakpoints) { - if ( - $next_nonblank_type eq 'k' - && $is_keyword_returning_list{$next_nonblank_token} - && ( $type =~ /^[=\)\]\}Riw]$/ - || $type eq 'k' - && $is_keyword_returning_list{$token} ) - ) - { - - # we actually have to set this break next time through - # the loop because if we are at a closing token (such - # as '}') which forms a one-line block, this break might - # get undone. - $want_previous_breakpoint = $i; - } ## end if ( $next_nonblank_type...) - } ## end if ($rOpts_break_at_old_keyword_breakpoints) - - # Break before attributes if user broke there - if ($rOpts_break_at_old_attribute_breakpoints) { - if ( $next_nonblank_type eq 'A' ) { - $want_previous_breakpoint = $i; - } - } - - # remember an = break as possible good break point - if ( $is_assignment{$type} ) { - $i_old_assignment_break = $i; - } - elsif ( $is_assignment{$next_nonblank_type} ) { - $i_old_assignment_break = $i_next_nonblank; - } - } ## end if ( $old_breakpoint_to_go...) - - next if ( $type eq 'b' ); - $depth = $nesting_depth_to_go[ $i + 1 ]; - - $total_depth_variation += abs( $depth - $depth_last ); - $depth_last = $depth; - - # safety check - be sure we always break after a comment - # Shouldn't happen .. an error here probably means that the - # nobreak flag did not get turned off correctly during - # formatting. - if ( $type eq '#' ) { - if ( $i != $max_index_to_go ) { - warning( -"Non-fatal program bug: backup logic needed to break after a comment\n" - ); - report_definite_bug(); - $nobreak_to_go[$i] = 0; - set_forced_breakpoint($i); - } ## end if ( $i != $max_index_to_go) - } ## end if ( $type eq '#' ) - - # Force breakpoints at certain tokens in long lines. - # Note that such breakpoints will be undone later if these tokens - # are fully contained within parens on a line. - if ( - - # break before a keyword within a line - $type eq 'k' - && $i > 0 - - # if one of these keywords: - && $token =~ /^(if|unless|while|until|for)$/ - - # but do not break at something like '1 while' - && ( $last_nonblank_type ne 'n' || $i > 2 ) - - # and let keywords follow a closing 'do' brace - && $last_nonblank_block_type ne 'do' - - && ( - $is_long_line - - # or container is broken (by side-comment, etc) - || ( $next_nonblank_token eq '(' - && $mate_index_to_go[$i_next_nonblank] < $i ) - ) - ) - { - set_forced_breakpoint( $i - 1 ); - } ## end if ( $type eq 'k' && $i...) - - # remember locations of '||' and '&&' for possible breaks if we - # decide this is a long logical expression. - if ( $type eq '||' ) { - push @{ $rand_or_list[$depth][2] }, $i; - ++$has_old_logical_breakpoints[$depth] - if ( ( $i == $i_line_start || $i == $i_line_end ) - && $rOpts_break_at_old_logical_breakpoints ); - } ## end if ( $type eq '||' ) - elsif ( $type eq '&&' ) { - push @{ $rand_or_list[$depth][3] }, $i; - ++$has_old_logical_breakpoints[$depth] - if ( ( $i == $i_line_start || $i == $i_line_end ) - && $rOpts_break_at_old_logical_breakpoints ); - } ## end elsif ( $type eq '&&' ) - elsif ( $type eq 'f' ) { - push @{ $rfor_semicolon_list[$depth] }, $i; - } - elsif ( $type eq 'k' ) { - if ( $token eq 'and' ) { - push @{ $rand_or_list[$depth][1] }, $i; - ++$has_old_logical_breakpoints[$depth] - if ( ( $i == $i_line_start || $i == $i_line_end ) - && $rOpts_break_at_old_logical_breakpoints ); - } ## end if ( $token eq 'and' ) - - # break immediately at 'or's which are probably not in a logical - # block -- but we will break in logical breaks below so that - # they do not add to the forced_breakpoint_count - elsif ( $token eq 'or' ) { - push @{ $rand_or_list[$depth][0] }, $i; - ++$has_old_logical_breakpoints[$depth] - if ( ( $i == $i_line_start || $i == $i_line_end ) - && $rOpts_break_at_old_logical_breakpoints ); - if ( $is_logical_container{ $container_type[$depth] } ) { - } - else { - if ($is_long_line) { set_forced_breakpoint($i) } - elsif ( ( $i == $i_line_start || $i == $i_line_end ) - && $rOpts_break_at_old_logical_breakpoints ) - { - $saw_good_breakpoint = 1; - } - } ## end else [ if ( $is_logical_container...)] - } ## end elsif ( $token eq 'or' ) - elsif ( $token eq 'if' || $token eq 'unless' ) { - push @{ $rand_or_list[$depth][4] }, $i; - if ( ( $i == $i_line_start || $i == $i_line_end ) - && $rOpts_break_at_old_logical_breakpoints ) - { - set_forced_breakpoint($i); - } - } ## end elsif ( $token eq 'if' ||...) - } ## end elsif ( $type eq 'k' ) - elsif ( $is_assignment{$type} ) { - $i_equals[$depth] = $i; - } - - if ($type_sequence) { - - # handle any postponed closing breakpoints - if ( $token =~ /^[\)\]\}\:]$/ ) { - if ( $type eq ':' ) { - $last_colon_sequence_number = $type_sequence; - - # retain break at a ':' line break - if ( ( $i == $i_line_start || $i == $i_line_end ) - && $rOpts_break_at_old_ternary_breakpoints ) - { - - set_forced_breakpoint($i); - - # break at previous '=' - if ( $i_equals[$depth] > 0 ) { - set_forced_breakpoint( $i_equals[$depth] ); - $i_equals[$depth] = -1; - } - } ## end if ( ( $i == $i_line_start...)) - } ## end if ( $type eq ':' ) - if ( defined( $postponed_breakpoint{$type_sequence} ) ) { - my $inc = ( $type eq ':' ) ? 0 : 1; - set_forced_breakpoint( $i - $inc ); - delete $postponed_breakpoint{$type_sequence}; - } - } ## end if ( $token =~ /^[\)\]\}\:]$/[{[(]) - - # set breaks at ?/: if they will get separated (and are - # not a ?/: chain), or if the '?' is at the end of the - # line - elsif ( $token eq '?' ) { - my $i_colon = $mate_index_to_go[$i]; - if ( - $i_colon <= 0 # the ':' is not in this batch - || $i == 0 # this '?' is the first token of the line - || $i == - $max_index_to_go # or this '?' is the last token - ) - { - - # don't break at a '?' if preceded by ':' on - # this line of previous ?/: pair on this line. - # This is an attempt to preserve a chain of ?/: - # expressions (elsif2.t). And don't break if - # this has a side comment. - set_forced_breakpoint($i) - unless ( - $type_sequence == ( - $last_colon_sequence_number + - TYPE_SEQUENCE_INCREMENT - ) - || $tokens_to_go[$max_index_to_go] eq '#' - ); - set_closing_breakpoint($i); - } ## end if ( $i_colon <= 0 ||...) - } ## end elsif ( $token eq '?' ) - } ## end if ($type_sequence) - -#print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n"; - - #------------------------------------------------------------ - # Handle Increasing Depth.. - # - # prepare for a new list when depth increases - # token $i is a '(','{', or '[' - #------------------------------------------------------------ - if ( $depth > $current_depth ) { - - $breakpoint_stack[$depth] = $forced_breakpoint_count; - $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count; - $has_broken_sublist[$depth] = 0; - $identifier_count_stack[$depth] = 0; - $index_before_arrow[$depth] = -1; - $interrupted_list[$depth] = 0; - $item_count_stack[$depth] = 0; - $last_comma_index[$depth] = undef; - $last_dot_index[$depth] = undef; - $last_nonblank_type[$depth] = $last_nonblank_type; - $old_breakpoint_count_stack[$depth] = $old_breakpoint_count; - $opening_structure_index_stack[$depth] = $i; - $rand_or_list[$depth] = []; - $rfor_semicolon_list[$depth] = []; - $i_equals[$depth] = -1; - $want_comma_break[$depth] = 0; - $container_type[$depth] = - ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ ) - ? $last_nonblank_token - : ""; - $has_old_logical_breakpoints[$depth] = 0; - - # if line ends here then signal closing token to break - if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' ) - { - set_closing_breakpoint($i); - } - - # Not all lists of values should be vertically aligned.. - $dont_align[$depth] = - - # code BLOCKS are handled at a higher level - ( $block_type ne "" ) - - # certain paren lists - || ( $type eq '(' ) && ( - - # it does not usually look good to align a list of - # identifiers in a parameter list, as in: - # my($var1, $var2, ...) - # (This test should probably be refined, for now I'm just - # testing for any keyword) - ( $last_nonblank_type eq 'k' ) - - # a trailing '(' usually indicates a non-list - || ( $next_nonblank_type eq '(' ) - ); - - # patch to outdent opening brace of long if/for/.. - # statements (like this one). See similar coding in - # set_continuation breaks. We have also catch it here for - # short line fragments which otherwise will not go through - # set_continuation_breaks. - if ( - $block_type - - # if we have the ')' but not its '(' in this batch.. - && ( $last_nonblank_token eq ')' ) - && $mate_index_to_go[$i_last_nonblank_token] < 0 - - # and user wants brace to left - && !$rOpts->{'opening-brace-always-on-right'} - - && ( $type eq '{' ) # should be true - && ( $token eq '{' ) # should be true - ) - { - set_forced_breakpoint( $i - 1 ); - } ## end if ( $block_type && ( ...)) - } ## end if ( $depth > $current_depth) - - #------------------------------------------------------------ - # Handle Decreasing Depth.. - # - # finish off any old list when depth decreases - # token $i is a ')','}', or ']' - #------------------------------------------------------------ - elsif ( $depth < $current_depth ) { - - check_for_new_minimum_depth($depth); - - # force all outer logical containers to break after we see on - # old breakpoint - $has_old_logical_breakpoints[$depth] ||= - $has_old_logical_breakpoints[$current_depth]; - - # Patch to break between ') {' if the paren list is broken. - # There is similar logic in set_continuation_breaks for - # non-broken lists. - if ( $token eq ')' - && $next_nonblank_block_type - && $interrupted_list[$current_depth] - && $next_nonblank_type eq '{' - && !$rOpts->{'opening-brace-always-on-right'} ) - { - set_forced_breakpoint($i); - } ## end if ( $token eq ')' && ... - -#print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n"; - - # set breaks at commas if necessary - my ( $bp_count, $do_not_break_apart ) = - set_comma_breakpoints($current_depth); - - my $i_opening = $opening_structure_index_stack[$current_depth]; - my $saw_opening_structure = ( $i_opening >= 0 ); - - # this term is long if we had to break at interior commas.. - my $is_long_term = $bp_count > 0; - - # If this is a short container with one or more comma arrows, - # then we will mark it as a long term to open it if requested. - # $rOpts_comma_arrow_breakpoints = - # 0 - open only if comma precedes closing brace - # 1 - stable: except for one line blocks - # 2 - try to form 1 line blocks - # 3 - ignore => - # 4 - always open up if vt=0 - # 5 - stable: even for one line blocks if vt=0 - if ( !$is_long_term - && $tokens_to_go[$i_opening] =~ /^[\(\{\[]$/ - && $index_before_arrow[ $depth + 1 ] > 0 - && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] } - ) - { - $is_long_term = $rOpts_comma_arrow_breakpoints == 4 - || ( $rOpts_comma_arrow_breakpoints == 0 - && $last_nonblank_token eq ',' ) - || ( $rOpts_comma_arrow_breakpoints == 5 - && $old_breakpoint_to_go[$i_opening] ); - } ## end if ( !$is_long_term &&...) - - # mark term as long if the length between opening and closing - # parens exceeds allowed line length - if ( !$is_long_term && $saw_opening_structure ) { - my $i_opening_minus = find_token_starting_list($i_opening); - - # Note: we have to allow for one extra space after a - # closing token so that we do not strand a comma or - # semicolon, hence the '>=' here (oneline.t) - # Note: we ignore left weld lengths here for best results - $is_long_term = - excess_line_length( $i_opening_minus, $i, 1 ) >= 0; - } ## end if ( !$is_long_term &&...) - - # We've set breaks after all comma-arrows. Now we have to - # undo them if this can be a one-line block - # (the only breakpoints set will be due to comma-arrows) - if ( - - # user doesn't require breaking after all comma-arrows - ( $rOpts_comma_arrow_breakpoints != 0 ) - && ( $rOpts_comma_arrow_breakpoints != 4 ) - - # and if the opening structure is in this batch - && $saw_opening_structure - - # and either on the same old line - && ( - $old_breakpoint_count_stack[$current_depth] == - $last_old_breakpoint_count - - # or user wants to form long blocks with arrows - || $rOpts_comma_arrow_breakpoints == 2 - ) - - # and we made some breakpoints between the opening and closing - && ( $breakpoint_undo_stack[$current_depth] < - $forced_breakpoint_undo_count ) - - # and this block is short enough to fit on one line - # Note: use < because need 1 more space for possible comma - && !$is_long_term - - ) - { - undo_forced_breakpoint_stack( - $breakpoint_undo_stack[$current_depth] ); - } ## end if ( ( $rOpts_comma_arrow_breakpoints...)) - - # now see if we have any comma breakpoints left - my $has_comma_breakpoints = - ( $breakpoint_stack[$current_depth] != - $forced_breakpoint_count ); - - # update broken-sublist flag of the outer container - $has_broken_sublist[$depth] = - $has_broken_sublist[$depth] - || $has_broken_sublist[$current_depth] - || $is_long_term - || $has_comma_breakpoints; - -# Having come to the closing ')', '}', or ']', now we have to decide if we -# should 'open up' the structure by placing breaks at the opening and -# closing containers. This is a tricky decision. Here are some of the -# basic considerations: -# -# -If this is a BLOCK container, then any breakpoints will have already -# been set (and according to user preferences), so we need do nothing here. -# -# -If we have a comma-separated list for which we can align the list items, -# then we need to do so because otherwise the vertical aligner cannot -# currently do the alignment. -# -# -If this container does itself contain a container which has been broken -# open, then it should be broken open to properly show the structure. -# -# -If there is nothing to align, and no other reason to break apart, -# then do not do it. -# -# We will not break open the parens of a long but 'simple' logical expression. -# For example: -# -# This is an example of a simple logical expression and its formatting: -# -# if ( $bigwasteofspace1 && $bigwasteofspace2 -# || $bigwasteofspace3 && $bigwasteofspace4 ) -# -# Most people would prefer this than the 'spacey' version: -# -# if ( -# $bigwasteofspace1 && $bigwasteofspace2 -# || $bigwasteofspace3 && $bigwasteofspace4 -# ) -# -# To illustrate the rules for breaking logical expressions, consider: -# -# FULLY DENSE: -# if ( $opt_excl -# and ( exists $ids_excl_uc{$id_uc} -# or grep $id_uc =~ /$_/, @ids_excl_uc )) -# -# This is on the verge of being difficult to read. The current default is to -# open it up like this: -# -# DEFAULT: -# if ( -# $opt_excl -# and ( exists $ids_excl_uc{$id_uc} -# or grep $id_uc =~ /$_/, @ids_excl_uc ) -# ) -# -# This is a compromise which tries to avoid being too dense and to spacey. -# A more spaced version would be: -# -# SPACEY: -# if ( -# $opt_excl -# and ( -# exists $ids_excl_uc{$id_uc} -# or grep $id_uc =~ /$_/, @ids_excl_uc -# ) -# ) -# -# Some people might prefer the spacey version -- an option could be added. The -# innermost expression contains a long block '( exists $ids_... ')'. -# -# Here is how the logic goes: We will force a break at the 'or' that the -# innermost expression contains, but we will not break apart its opening and -# closing containers because (1) it contains no multi-line sub-containers itself, -# and (2) there is no alignment to be gained by breaking it open like this -# -# and ( -# exists $ids_excl_uc{$id_uc} -# or grep $id_uc =~ /$_/, @ids_excl_uc -# ) -# -# (although this looks perfectly ok and might be good for long expressions). The -# outer 'if' container, though, contains a broken sub-container, so it will be -# broken open to avoid too much density. Also, since it contains no 'or's, there -# will be a forced break at its 'and'. - - # set some flags telling something about this container.. - my $is_simple_logical_expression = 0; - if ( $item_count_stack[$current_depth] == 0 - && $saw_opening_structure - && $tokens_to_go[$i_opening] eq '(' - && $is_logical_container{ $container_type[$current_depth] } - ) - { - - # This seems to be a simple logical expression with - # no existing breakpoints. Set a flag to prevent - # opening it up. - if ( !$has_comma_breakpoints ) { - $is_simple_logical_expression = 1; - } - - # This seems to be a simple logical expression with - # breakpoints (broken sublists, for example). Break - # at all 'or's and '||'s. - else { - set_logical_breakpoints($current_depth); - } - } ## end if ( $item_count_stack...) - - if ( $is_long_term - && @{ $rfor_semicolon_list[$current_depth] } ) - { - set_for_semicolon_breakpoints($current_depth); - - # open up a long 'for' or 'foreach' container to allow - # leading term alignment unless -lp is used. - $has_comma_breakpoints = 1 - unless $rOpts_line_up_parentheses; - } ## end if ( $is_long_term && ...) - - if ( - - # breaks for code BLOCKS are handled at a higher level - !$block_type - - # we do not need to break at the top level of an 'if' - # type expression - && !$is_simple_logical_expression - - ## modification to keep ': (' containers vertically tight; - ## but probably better to let user set -vt=1 to avoid - ## inconsistency with other paren types - ## && ($container_type[$current_depth] ne ':') - - # otherwise, we require one of these reasons for breaking: - && ( - - # - this term has forced line breaks - $has_comma_breakpoints - - # - the opening container is separated from this batch - # for some reason (comment, blank line, code block) - # - this is a non-paren container spanning multiple lines - || !$saw_opening_structure - - # - this is a long block contained in another breakable - # container - || ( $is_long_term - && $container_environment_to_go[$i_opening] ne - 'BLOCK' ) - ) - ) - { - - # For -lp option, we must put a breakpoint before - # the token which has been identified as starting - # this indentation level. This is necessary for - # proper alignment. - if ( $rOpts_line_up_parentheses && $saw_opening_structure ) - { - my $item = $leading_spaces_to_go[ $i_opening + 1 ]; - if ( $i_opening + 1 < $max_index_to_go - && $types_to_go[ $i_opening + 1 ] eq 'b' ) - { - $item = $leading_spaces_to_go[ $i_opening + 2 ]; - } - if ( defined($item) ) { - my $i_start_2 = $item->get_starting_index(); - if ( - defined($i_start_2) - - # we are breaking after an opening brace, paren, - # so don't break before it too - && $i_start_2 ne $i_opening - ) - { - - # Only break for breakpoints at the same - # indentation level as the opening paren - my $test1 = $nesting_depth_to_go[$i_opening]; - my $test2 = $nesting_depth_to_go[$i_start_2]; - if ( $test2 == $test1 ) { - set_forced_breakpoint( $i_start_2 - 1 ); - } - } ## end if ( defined($i_start_2...)) - } ## end if ( defined($item) ) - } ## end if ( $rOpts_line_up_parentheses...) - - # break after opening structure. - # note: break before closing structure will be automatic - if ( $minimum_depth <= $current_depth ) { - - set_forced_breakpoint($i_opening) - unless ( $do_not_break_apart - || is_unbreakable_container($current_depth) ); - - # break at ',' of lower depth level before opening token - if ( $last_comma_index[$depth] ) { - set_forced_breakpoint( $last_comma_index[$depth] ); - } - - # break at '.' of lower depth level before opening token - if ( $last_dot_index[$depth] ) { - set_forced_breakpoint( $last_dot_index[$depth] ); - } - - # break before opening structure if preceded by another - # closing structure and a comma. This is normally - # done by the previous closing brace, but not - # if it was a one-line block. - if ( $i_opening > 2 ) { - my $i_prev = - ( $types_to_go[ $i_opening - 1 ] eq 'b' ) - ? $i_opening - 2 - : $i_opening - 1; - - if ( $types_to_go[$i_prev] eq ',' - && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ ) - { - set_forced_breakpoint($i_prev); - } - - # also break before something like ':(' or '?(' - # if appropriate. - elsif ( - $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ ) - { - my $token_prev = $tokens_to_go[$i_prev]; - if ( $want_break_before{$token_prev} ) { - set_forced_breakpoint($i_prev); - } - } ## end elsif ( $types_to_go[$i_prev...]) - } ## end if ( $i_opening > 2 ) - } ## end if ( $minimum_depth <=...) - - # break after comma following closing structure - if ( $next_type eq ',' ) { - set_forced_breakpoint( $i + 1 ); - } - - # break before an '=' following closing structure - if ( - $is_assignment{$next_nonblank_type} - && ( $breakpoint_stack[$current_depth] != - $forced_breakpoint_count ) - ) - { - set_forced_breakpoint($i); - } ## end if ( $is_assignment{$next_nonblank_type...}) - - # break at any comma before the opening structure Added - # for -lp, but seems to be good in general. It isn't - # obvious how far back to look; the '5' below seems to - # work well and will catch the comma in something like - # push @list, myfunc( $param, $param, .. - - my $icomma = $last_comma_index[$depth]; - if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) { - unless ( $forced_breakpoint_to_go[$icomma] ) { - set_forced_breakpoint($icomma); - } - } - } # end logic to open up a container - - # Break open a logical container open if it was already open - elsif ($is_simple_logical_expression - && $has_old_logical_breakpoints[$current_depth] ) - { - set_logical_breakpoints($current_depth); - } - - # Handle long container which does not get opened up - elsif ($is_long_term) { - - # must set fake breakpoint to alert outer containers that - # they are complex - set_fake_breakpoint(); - } ## end elsif ($is_long_term) - - } ## end elsif ( $depth < $current_depth) - - #------------------------------------------------------------ - # Handle this token - #------------------------------------------------------------ - - $current_depth = $depth; - - # handle comma-arrow - if ( $type eq '=>' ) { - next if ( $last_nonblank_type eq '=>' ); - next if $rOpts_break_at_old_comma_breakpoints; - next if $rOpts_comma_arrow_breakpoints == 3; - $want_comma_break[$depth] = 1; - $index_before_arrow[$depth] = $i_last_nonblank_token; - next; - } ## end if ( $type eq '=>' ) - - elsif ( $type eq '.' ) { - $last_dot_index[$depth] = $i; - } - - # Turn off alignment if we are sure that this is not a list - # environment. To be safe, we will do this if we see certain - # non-list tokens, such as ';', and also the environment is - # not a list. Note that '=' could be in any of the = operators - # (lextest.t). We can't just use the reported environment - # because it can be incorrect in some cases. - elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} ) - && $container_environment_to_go[$i] ne 'LIST' ) - { - $dont_align[$depth] = 1; - $want_comma_break[$depth] = 0; - $index_before_arrow[$depth] = -1; - } ## end elsif ( ( $type =~ /^[\;\<\>\~]$/...)) - - # now just handle any commas - next unless ( $type eq ',' ); - - $last_dot_index[$depth] = undef; - $last_comma_index[$depth] = $i; - - # break here if this comma follows a '=>' - # but not if there is a side comment after the comma - if ( $want_comma_break[$depth] ) { - - if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) { - if ($rOpts_comma_arrow_breakpoints) { - $want_comma_break[$depth] = 0; - ##$index_before_arrow[$depth] = -1; - next; - } - } - - set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' ); - - # break before the previous token if it looks safe - # Example of something that we will not try to break before: - # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt}, - # Also we don't want to break at a binary operator (like +): - # $c->createOval( - # $x + $R, $y + - # $R => $x - $R, - # $y - $R, -fill => 'black', - # ); - my $ibreak = $index_before_arrow[$depth] - 1; - if ( $ibreak > 0 - && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ ) - { - if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- } - if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- } - if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) { - - # don't break pointer calls, such as the following: - # File::Spec->curdir => 1, - # (This is tokenized as adjacent 'w' tokens) - ##if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) { - - # And don't break before a comma, as in the following: - # ( LONGER_THAN,=> 1, - # EIGHTY_CHARACTERS,=> 2, - # CAUSES_FORMATTING,=> 3, - # LIKE_THIS,=> 4, - # ); - # This example is for -tso but should be general rule - if ( $tokens_to_go[ $ibreak + 1 ] ne '->' - && $tokens_to_go[ $ibreak + 1 ] ne ',' ) - { - set_forced_breakpoint($ibreak); - } - } ## end if ( $types_to_go[$ibreak...]) - } ## end if ( $ibreak > 0 && $tokens_to_go...) - - $want_comma_break[$depth] = 0; - $index_before_arrow[$depth] = -1; - - # handle list which mixes '=>'s and ','s: - # treat any list items so far as an interrupted list - $interrupted_list[$depth] = 1; - next; - } ## end if ( $want_comma_break...) - - # break after all commas above starting depth - if ( $depth < $starting_depth && !$dont_align[$depth] ) { - set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' ); - next; - } - - # add this comma to the list.. - my $item_count = $item_count_stack[$depth]; - if ( $item_count == 0 ) { - - # but do not form a list with no opening structure - # for example: - - # open INFILE_COPY, ">$input_file_copy" - # or die ("very long message"); - - if ( ( $opening_structure_index_stack[$depth] < 0 ) - && $container_environment_to_go[$i] eq 'BLOCK' ) - { - $dont_align[$depth] = 1; - } - } ## end if ( $item_count == 0 ) - - $comma_index[$depth][$item_count] = $i; - ++$item_count_stack[$depth]; - if ( $last_nonblank_type =~ /^[iR\]]$/ ) { - $identifier_count_stack[$depth]++; - } - } ## end while ( ++$i <= $max_index_to_go) - - #------------------------------------------- - # end of loop over all tokens in this batch - #------------------------------------------- - - # set breaks for any unfinished lists .. - for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) { - - $interrupted_list[$dd] = 1; - $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth ); - set_comma_breakpoints($dd); - set_logical_breakpoints($dd) - if ( $has_old_logical_breakpoints[$dd] ); - set_for_semicolon_breakpoints($dd); - - # break open container... - my $i_opening = $opening_structure_index_stack[$dd]; - set_forced_breakpoint($i_opening) - unless ( - is_unbreakable_container($dd) - - # Avoid a break which would place an isolated ' or " - # on a line - || ( $type eq 'Q' - && $i_opening >= $max_index_to_go - 2 - && $token =~ /^['"]$/ ) - ); - } ## end for ( my $dd = $current_depth...) - - # Return a flag indicating if the input file had some good breakpoints. - # This flag will be used to force a break in a line shorter than the - # allowed line length. - if ( $has_old_logical_breakpoints[$current_depth] ) { - $saw_good_breakpoint = 1; - } - - # A complex line with one break at an = has a good breakpoint. - # This is not complex ($total_depth_variation=0): - # $res1 - # = 10; - # - # This is complex ($total_depth_variation=6): - # $res2 = - # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert')); - elsif ($i_old_assignment_break - && $total_depth_variation > 4 - && $old_breakpoint_count == 1 ) - { - $saw_good_breakpoint = 1; - } ## end elsif ( $i_old_assignment_break...) - - return $saw_good_breakpoint; - } ## end sub scan_list -} # end scan_list - -sub find_token_starting_list { - - # When testing to see if a block will fit on one line, some - # previous token(s) may also need to be on the line; particularly - # if this is a sub call. So we will look back at least one - # token. NOTE: This isn't perfect, but not critical, because - # if we mis-identify a block, it will be wrapped and therefore - # fixed the next time it is formatted. - my $i_opening_paren = shift; - my $i_opening_minus = $i_opening_paren; - my $im1 = $i_opening_paren - 1; - my $im2 = $i_opening_paren - 2; - my $im3 = $i_opening_paren - 3; - my $typem1 = $types_to_go[$im1]; - my $typem2 = $im2 >= 0 ? $types_to_go[$im2] : 'b'; - if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) { - $i_opening_minus = $i_opening_paren; - } - elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) { - $i_opening_minus = $im1 if $im1 >= 0; - - # walk back to improve length estimate - for ( my $j = $im1 ; $j >= 0 ; $j-- ) { - last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ ); - $i_opening_minus = $j; - } - if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ } - } - elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 } - elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) { - $i_opening_minus = $im2; - } - return $i_opening_minus; -} - -{ # begin set_comma_breakpoints_do - - my %is_keyword_with_special_leading_term; - - BEGIN { - - # These keywords have prototypes which allow a special leading item - # followed by a list - my @q = - qw(formline grep kill map printf sprintf push chmod join pack unshift); - @is_keyword_with_special_leading_term{@q} = (1) x scalar(@q); - } - - sub set_comma_breakpoints_do { - - # Given a list with some commas, set breakpoints at some of the - # commas, if necessary, to make it easy to read. This list is - # an example: - my ( - $depth, $i_opening_paren, $i_closing_paren, - $item_count, $identifier_count, $rcomma_index, - $next_nonblank_type, $list_type, $interrupted, - $rdo_not_break_apart, $must_break_open, - ) = @_; - - # nothing to do if no commas seen - return if ( $item_count < 1 ); - my $i_first_comma = $rcomma_index->[0]; - my $i_true_last_comma = $rcomma_index->[ $item_count - 1 ]; - my $i_last_comma = $i_true_last_comma; - if ( $i_last_comma >= $max_index_to_go ) { - $i_last_comma = $rcomma_index->[ --$item_count - 1 ]; - return if ( $item_count < 1 ); - } - - #--------------------------------------------------------------- - # find lengths of all items in the list to calculate page layout - #--------------------------------------------------------------- - my $comma_count = $item_count; - my @item_lengths; - my @i_term_begin; - my @i_term_end; - my @i_term_comma; - my $i_prev_plus; - my @max_length = ( 0, 0 ); - my $first_term_length; - my $i = $i_opening_paren; - my $is_odd = 1; - - foreach my $j ( 0 .. $comma_count - 1 ) { - $is_odd = 1 - $is_odd; - $i_prev_plus = $i + 1; - $i = $rcomma_index->[$j]; - - my $i_term_end = - ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1; - my $i_term_begin = - ( $types_to_go[$i_prev_plus] eq 'b' ) - ? $i_prev_plus + 1 - : $i_prev_plus; - push @i_term_begin, $i_term_begin; - push @i_term_end, $i_term_end; - push @i_term_comma, $i; - - # note: currently adding 2 to all lengths (for comma and space) - my $length = - 2 + token_sequence_length( $i_term_begin, $i_term_end ); - push @item_lengths, $length; - - if ( $j == 0 ) { - $first_term_length = $length; - } - else { - - if ( $length > $max_length[$is_odd] ) { - $max_length[$is_odd] = $length; - } - } - } - - # now we have to make a distinction between the comma count and item - # count, because the item count will be one greater than the comma - # count if the last item is not terminated with a comma - my $i_b = - ( $types_to_go[ $i_last_comma + 1 ] eq 'b' ) - ? $i_last_comma + 1 - : $i_last_comma; - my $i_e = - ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' ) - ? $i_closing_paren - 2 - : $i_closing_paren - 1; - my $i_effective_last_comma = $i_last_comma; - - my $last_item_length = token_sequence_length( $i_b + 1, $i_e ); - - if ( $last_item_length > 0 ) { - - # add 2 to length because other lengths include a comma and a blank - $last_item_length += 2; - push @item_lengths, $last_item_length; - push @i_term_begin, $i_b + 1; - push @i_term_end, $i_e; - push @i_term_comma, undef; - - my $i_odd = $item_count % 2; - - if ( $last_item_length > $max_length[$i_odd] ) { - $max_length[$i_odd] = $last_item_length; - } - - $item_count++; - $i_effective_last_comma = $i_e + 1; - - if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) { - $identifier_count++; - } - } - - #--------------------------------------------------------------- - # End of length calculations - #--------------------------------------------------------------- - - #--------------------------------------------------------------- - # Compound List Rule 1: - # Break at (almost) every comma for a list containing a broken - # sublist. This has higher priority than the Interrupted List - # Rule. - #--------------------------------------------------------------- - if ( $has_broken_sublist[$depth] ) { - - # Break at every comma except for a comma between two - # simple, small terms. This prevents long vertical - # columns of, say, just 0's. - my $small_length = 10; # 2 + actual maximum length wanted - - # We'll insert a break in long runs of small terms to - # allow alignment in uniform tables. - my $skipped_count = 0; - my $columns = table_columns_available($i_first_comma); - my $fields = int( $columns / $small_length ); - if ( $rOpts_maximum_fields_per_table - && $fields > $rOpts_maximum_fields_per_table ) - { - $fields = $rOpts_maximum_fields_per_table; - } - my $max_skipped_count = $fields - 1; - - my $is_simple_last_term = 0; - my $is_simple_next_term = 0; - foreach my $j ( 0 .. $item_count ) { - $is_simple_last_term = $is_simple_next_term; - $is_simple_next_term = 0; - if ( $j < $item_count - && $i_term_end[$j] == $i_term_begin[$j] - && $item_lengths[$j] <= $small_length ) - { - $is_simple_next_term = 1; - } - next if $j == 0; - if ( $is_simple_last_term - && $is_simple_next_term - && $skipped_count < $max_skipped_count ) - { - $skipped_count++; - } - else { - $skipped_count = 0; - my $i = $i_term_comma[ $j - 1 ]; - last unless defined $i; - set_forced_breakpoint($i); - } - } - - # always break at the last comma if this list is - # interrupted; we wouldn't want to leave a terminal '{', for - # example. - if ($interrupted) { set_forced_breakpoint($i_true_last_comma) } - return; - } - -#my ( $a, $b, $c ) = caller(); -#print "LISTX: in set_list $a $c interrupt=$interrupted count=$item_count -#i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n"; -#print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n"; - - #--------------------------------------------------------------- - # Interrupted List Rule: - # A list is forced to use old breakpoints if it was interrupted - # by side comments or blank lines, or requested by user. - #--------------------------------------------------------------- - if ( $rOpts_break_at_old_comma_breakpoints - || $interrupted - || $i_opening_paren < 0 ) - { - copy_old_breakpoints( $i_first_comma, $i_true_last_comma ); - return; - } - - #--------------------------------------------------------------- - # Looks like a list of items. We have to look at it and size it up. - #--------------------------------------------------------------- - - my $opening_token = $tokens_to_go[$i_opening_paren]; - my $opening_environment = - $container_environment_to_go[$i_opening_paren]; - - #------------------------------------------------------------------- - # Return if this will fit on one line - #------------------------------------------------------------------- - - my $i_opening_minus = find_token_starting_list($i_opening_paren); - return - unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0; - - #------------------------------------------------------------------- - # Now we know that this block spans multiple lines; we have to set - # at least one breakpoint -- real or fake -- as a signal to break - # open any outer containers. - #------------------------------------------------------------------- - set_fake_breakpoint(); - - # be sure we do not extend beyond the current list length - if ( $i_effective_last_comma >= $max_index_to_go ) { - $i_effective_last_comma = $max_index_to_go - 1; - } - - # Set a flag indicating if we need to break open to keep -lp - # items aligned. This is necessary if any of the list terms - # exceeds the available space after the '('. - my $need_lp_break_open = $must_break_open; - if ( $rOpts_line_up_parentheses && !$must_break_open ) { - my $columns_if_unbroken = - maximum_line_length($i_opening_minus) - - total_line_length( $i_opening_minus, $i_opening_paren ); - $need_lp_break_open = - ( $max_length[0] > $columns_if_unbroken ) - || ( $max_length[1] > $columns_if_unbroken ) - || ( $first_term_length > $columns_if_unbroken ); - } - - # Specify if the list must have an even number of fields or not. - # It is generally safest to assume an even number, because the - # list items might be a hash list. But if we can be sure that - # it is not a hash, then we can allow an odd number for more - # flexibility. - my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count - - if ( $identifier_count >= $item_count - 1 - || $is_assignment{$next_nonblank_type} - || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ ) - ) - { - $odd_or_even = 1; - } - - # do we have a long first term which should be - # left on a line by itself? - my $use_separate_first_term = ( - $odd_or_even == 1 # only if we can use 1 field/line - && $item_count > 3 # need several items - && $first_term_length > - 2 * $max_length[0] - 2 # need long first term - && $first_term_length > - 2 * $max_length[1] - 2 # need long first term - ); - - # or do we know from the type of list that the first term should - # be placed alone? - if ( !$use_separate_first_term ) { - if ( $is_keyword_with_special_leading_term{$list_type} ) { - $use_separate_first_term = 1; - - # should the container be broken open? - if ( $item_count < 3 ) { - if ( $i_first_comma - $i_opening_paren < 4 ) { - ${$rdo_not_break_apart} = 1; - } - } - elsif ($first_term_length < 20 - && $i_first_comma - $i_opening_paren < 4 ) - { - my $columns = table_columns_available($i_first_comma); - if ( $first_term_length < $columns ) { - ${$rdo_not_break_apart} = 1; - } - } - } - } - - # if so, - if ($use_separate_first_term) { - - # ..set a break and update starting values - $use_separate_first_term = 1; - set_forced_breakpoint($i_first_comma); - $i_opening_paren = $i_first_comma; - $i_first_comma = $rcomma_index->[1]; - $item_count--; - return if $comma_count == 1; - shift @item_lengths; - shift @i_term_begin; - shift @i_term_end; - shift @i_term_comma; - } - - # if not, update the metrics to include the first term - else { - if ( $first_term_length > $max_length[0] ) { - $max_length[0] = $first_term_length; - } - } - - # Field width parameters - my $pair_width = ( $max_length[0] + $max_length[1] ); - my $max_width = - ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1]; - - # Number of free columns across the page width for laying out tables - my $columns = table_columns_available($i_first_comma); - - # Estimated maximum number of fields which fit this space - # This will be our first guess - my $number_of_fields_max = - maximum_number_of_fields( $columns, $odd_or_even, $max_width, - $pair_width ); - my $number_of_fields = $number_of_fields_max; - - # Find the best-looking number of fields - # and make this our second guess if possible - my ( $number_of_fields_best, $ri_ragged_break_list, - $new_identifier_count ) - = study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths, - $max_width ); - - if ( $number_of_fields_best != 0 - && $number_of_fields_best < $number_of_fields_max ) - { - $number_of_fields = $number_of_fields_best; - } - - # ---------------------------------------------------------------------- - # If we are crowded and the -lp option is being used, try to - # undo some indentation - # ---------------------------------------------------------------------- - if ( - $rOpts_line_up_parentheses - && ( - $number_of_fields == 0 - || ( $number_of_fields == 1 - && $number_of_fields != $number_of_fields_best ) - ) - ) - { - my $available_spaces = get_available_spaces_to_go($i_first_comma); - if ( $available_spaces > 0 ) { - - my $spaces_wanted = $max_width - $columns; # for 1 field - - if ( $number_of_fields_best == 0 ) { - $number_of_fields_best = - get_maximum_fields_wanted( \@item_lengths ); - } - - if ( $number_of_fields_best != 1 ) { - my $spaces_wanted_2 = - 1 + $pair_width - $columns; # for 2 fields - if ( $available_spaces > $spaces_wanted_2 ) { - $spaces_wanted = $spaces_wanted_2; - } - } - - if ( $spaces_wanted > 0 ) { - my $deleted_spaces = - reduce_lp_indentation( $i_first_comma, $spaces_wanted ); - - # redo the math - if ( $deleted_spaces > 0 ) { - $columns = table_columns_available($i_first_comma); - $number_of_fields_max = - maximum_number_of_fields( $columns, $odd_or_even, - $max_width, $pair_width ); - $number_of_fields = $number_of_fields_max; - - if ( $number_of_fields_best == 1 - && $number_of_fields >= 1 ) - { - $number_of_fields = $number_of_fields_best; - } - } - } - } - } - - # try for one column if two won't work - if ( $number_of_fields <= 0 ) { - $number_of_fields = int( $columns / $max_width ); - } - - # The user can place an upper bound on the number of fields, - # which can be useful for doing maintenance on tables - if ( $rOpts_maximum_fields_per_table - && $number_of_fields > $rOpts_maximum_fields_per_table ) - { - $number_of_fields = $rOpts_maximum_fields_per_table; - } - - # How many columns (characters) and lines would this container take - # if no additional whitespace were added? - my $packed_columns = token_sequence_length( $i_opening_paren + 1, - $i_effective_last_comma + 1 ); - if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero - my $packed_lines = 1 + int( $packed_columns / $columns ); - - # are we an item contained in an outer list? - my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/; - - if ( $number_of_fields <= 0 ) { - -# #--------------------------------------------------------------- -# # We're in trouble. We can't find a single field width that works. -# # There is no simple answer here; we may have a single long list -# # item, or many. -# #--------------------------------------------------------------- -# -# In many cases, it may be best to not force a break if there is just one -# comma, because the standard continuation break logic will do a better -# job without it. -# -# In the common case that all but one of the terms can fit -# on a single line, it may look better not to break open the -# containing parens. Consider, for example -# -# $color = -# join ( '/', -# sort { $color_value{$::a} <=> $color_value{$::b}; } -# keys %colors ); -# -# which will look like this with the container broken: -# -# $color = join ( -# '/', -# sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors -# ); -# -# Here is an example of this rule for a long last term: -# -# log_message( 0, 256, 128, -# "Number of routes in adj-RIB-in to be considered: $peercount" ); -# -# And here is an example with a long first term: -# -# $s = sprintf( -# "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)", -# $r, $pu, $ps, $cu, $cs, $tt -# ) -# if $style eq 'all'; - - my $i_last_comma = $rcomma_index->[ $comma_count - 1 ]; - my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0; - my $long_first_term = - excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0; - - # break at every comma ... - if ( - - # if requested by user or is best looking - $number_of_fields_best == 1 - - # or if this is a sublist of a larger list - || $in_hierarchical_list - - # or if multiple commas and we don't have a long first or last - # term - || ( $comma_count > 1 - && !( $long_last_term || $long_first_term ) ) - ) - { - foreach ( 0 .. $comma_count - 1 ) { - set_forced_breakpoint( $rcomma_index->[$_] ); - } - } - elsif ($long_last_term) { - - set_forced_breakpoint($i_last_comma); - ${$rdo_not_break_apart} = 1 unless $must_break_open; - } - elsif ($long_first_term) { - - set_forced_breakpoint($i_first_comma); - } - else { - - # let breaks be defined by default bond strength logic - } - return; - } - - # -------------------------------------------------------- - # We have a tentative field count that seems to work. - # How many lines will this require? - # -------------------------------------------------------- - my $formatted_lines = $item_count / ($number_of_fields); - if ( $formatted_lines != int $formatted_lines ) { - $formatted_lines = 1 + int $formatted_lines; - } - - # So far we've been trying to fill out to the right margin. But - # compact tables are easier to read, so let's see if we can use fewer - # fields without increasing the number of lines. - $number_of_fields = - compactify_table( $item_count, $number_of_fields, $formatted_lines, - $odd_or_even ); - - # How many spaces across the page will we fill? - my $columns_per_line = - ( int $number_of_fields / 2 ) * $pair_width + - ( $number_of_fields % 2 ) * $max_width; - - my $formatted_columns; - - if ( $number_of_fields > 1 ) { - $formatted_columns = - ( $pair_width * ( int( $item_count / 2 ) ) + - ( $item_count % 2 ) * $max_width ); - } - else { - $formatted_columns = $max_width * $item_count; - } - if ( $formatted_columns < $packed_columns ) { - $formatted_columns = $packed_columns; - } - - my $unused_columns = $formatted_columns - $packed_columns; - - # set some empirical parameters to help decide if we should try to - # align; high sparsity does not look good, especially with few lines - my $sparsity = ($unused_columns) / ($formatted_columns); - my $max_allowed_sparsity = - ( $item_count < 3 ) ? 0.1 - : ( $packed_lines == 1 ) ? 0.15 - : ( $packed_lines == 2 ) ? 0.4 - : 0.7; - - # Begin check for shortcut methods, which avoid treating a list - # as a table for relatively small parenthesized lists. These - # are usually easier to read if not formatted as tables. - if ( - $packed_lines <= 2 # probably can fit in 2 lines - && $item_count < 9 # doesn't have too many items - && $opening_environment eq 'BLOCK' # not a sub-container - && $opening_token eq '(' # is paren list - ) - { - - # Shortcut method 1: for -lp and just one comma: - # This is a no-brainer, just break at the comma. - if ( - $rOpts_line_up_parentheses # -lp - && $item_count == 2 # two items, one comma - && !$must_break_open - ) - { - my $i_break = $rcomma_index->[0]; - set_forced_breakpoint($i_break); - ${$rdo_not_break_apart} = 1; - set_non_alignment_flags( $comma_count, $rcomma_index ); - return; - - } - - # method 2 is for most small ragged lists which might look - # best if not displayed as a table. - if ( - ( $number_of_fields == 2 && $item_count == 3 ) - || ( - $new_identifier_count > 0 # isn't all quotes - && $sparsity > 0.15 - ) # would be fairly spaced gaps if aligned - ) - { - - my $break_count = set_ragged_breakpoints( \@i_term_comma, - $ri_ragged_break_list ); - ++$break_count if ($use_separate_first_term); - - # NOTE: we should really use the true break count here, - # which can be greater if there are large terms and - # little space, but usually this will work well enough. - unless ($must_break_open) { - - if ( $break_count <= 1 ) { - ${$rdo_not_break_apart} = 1; - } - elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open ) - { - ${$rdo_not_break_apart} = 1; - } - } - set_non_alignment_flags( $comma_count, $rcomma_index ); - return; - } - - } # end shortcut methods - - # debug stuff - - FORMATTER_DEBUG_FLAG_SPARSE && do { - print STDOUT -"SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n"; - - }; - - #--------------------------------------------------------------- - # Compound List Rule 2: - # If this list is too long for one line, and it is an item of a - # larger list, then we must format it, regardless of sparsity - # (ian.t). One reason that we have to do this is to trigger - # Compound List Rule 1, above, which causes breaks at all commas of - # all outer lists. In this way, the structure will be properly - # displayed. - #--------------------------------------------------------------- - - # Decide if this list is too long for one line unless broken - my $total_columns = table_columns_available($i_opening_paren); - my $too_long = $packed_columns > $total_columns; - - # For a paren list, include the length of the token just before the - # '(' because this is likely a sub call, and we would have to - # include the sub name on the same line as the list. This is still - # imprecise, but not too bad. (steve.t) - if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) { - - $too_long = excess_line_length( $i_opening_minus, - $i_effective_last_comma + 1 ) > 0; - } - - # FIXME: For an item after a '=>', try to include the length of the - # thing before the '=>'. This is crude and should be improved by - # actually looking back token by token. - if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) { - my $i_opening_minus = $i_opening_paren - 4; - if ( $i_opening_minus >= 0 ) { - $too_long = excess_line_length( $i_opening_minus, - $i_effective_last_comma + 1 ) > 0; - } - } - - # Always break lists contained in '[' and '{' if too long for 1 line, - # and always break lists which are too long and part of a more complex - # structure. - my $must_break_open_container = $must_break_open - || ( $too_long - && ( $in_hierarchical_list || $opening_token ne '(' ) ); - -#print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n"; - - #--------------------------------------------------------------- - # The main decision: - # Now decide if we will align the data into aligned columns. Do not - # attempt to align columns if this is a tiny table or it would be - # too spaced. It seems that the more packed lines we have, the - # sparser the list that can be allowed and still look ok. - #--------------------------------------------------------------- - - if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines ) - || ( $formatted_lines < 2 ) - || ( $unused_columns > $max_allowed_sparsity * $formatted_columns ) - ) - { - - #--------------------------------------------------------------- - # too sparse: would look ugly if aligned in a table; - #--------------------------------------------------------------- - - # use old breakpoints if this is a 'big' list - # FIXME: goal is to improve set_ragged_breakpoints so that - # this is not necessary. - if ( $packed_lines > 2 && $item_count > 10 ) { - write_logfile_entry("List sparse: using old breakpoints\n"); - copy_old_breakpoints( $i_first_comma, $i_last_comma ); - } - - # let the continuation logic handle it if 2 lines - else { - - my $break_count = set_ragged_breakpoints( \@i_term_comma, - $ri_ragged_break_list ); - ++$break_count if ($use_separate_first_term); - - unless ($must_break_open_container) { - if ( $break_count <= 1 ) { - ${$rdo_not_break_apart} = 1; - } - elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open ) - { - ${$rdo_not_break_apart} = 1; - } - } - set_non_alignment_flags( $comma_count, $rcomma_index ); - } - return; - } - - #--------------------------------------------------------------- - # go ahead and format as a table - #--------------------------------------------------------------- - write_logfile_entry( - "List: auto formatting with $number_of_fields fields/row\n"); - - my $j_first_break = - $use_separate_first_term ? $number_of_fields : $number_of_fields - 1; - - for ( - my $j = $j_first_break ; - $j < $comma_count ; - $j += $number_of_fields - ) - { - my $i = $rcomma_index->[$j]; - set_forced_breakpoint($i); - } - return; - } -} - -sub set_non_alignment_flags { - - # set flag which indicates that these commas should not be - # aligned - my ( $comma_count, $rcomma_index ) = @_; - foreach ( 0 .. $comma_count - 1 ) { - $matching_token_to_go[ $rcomma_index->[$_] ] = 1; - } - return; -} - -sub study_list_complexity { - - # Look for complex tables which should be formatted with one term per line. - # Returns the following: - # - # \@i_ragged_break_list = list of good breakpoints to avoid lines - # which are hard to read - # $number_of_fields_best = suggested number of fields based on - # complexity; = 0 if any number may be used. - # - my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_; - my $item_count = @{$ri_term_begin}; - my $complex_item_count = 0; - my $number_of_fields_best = $rOpts_maximum_fields_per_table; - my $i_max = @{$ritem_lengths} - 1; - ##my @item_complexity; - - my $i_last_last_break = -3; - my $i_last_break = -2; - my @i_ragged_break_list; - - my $definitely_complex = 30; - my $definitely_simple = 12; - my $quote_count = 0; - - for my $i ( 0 .. $i_max ) { - my $ib = $ri_term_begin->[$i]; - my $ie = $ri_term_end->[$i]; - - # define complexity: start with the actual term length - my $weighted_length = ( $ritem_lengths->[$i] - 2 ); - - ##TBD: join types here and check for variations - ##my $str=join "", @tokens_to_go[$ib..$ie]; - - my $is_quote = 0; - if ( $types_to_go[$ib] =~ /^[qQ]$/ ) { - $is_quote = 1; - $quote_count++; - } - elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) { - $quote_count++; - } - - if ( $ib eq $ie ) { - if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) { - $complex_item_count++; - $weighted_length *= 2; - } - else { - } - } - else { - if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) { - $complex_item_count++; - $weighted_length *= 2; - } - if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) { - $weighted_length += 4; - } - } - - # add weight for extra tokens. - $weighted_length += 2 * ( $ie - $ib ); - -## my $BUB = join '', @tokens_to_go[$ib..$ie]; -## print "# COMPLEXITY:$weighted_length $BUB\n"; - -##push @item_complexity, $weighted_length; - - # now mark a ragged break after this item it if it is 'long and - # complex': - if ( $weighted_length >= $definitely_complex ) { - - # if we broke after the previous term - # then break before it too - if ( $i_last_break == $i - 1 - && $i > 1 - && $i_last_last_break != $i - 2 ) - { - - ## FIXME: don't strand a small term - pop @i_ragged_break_list; - push @i_ragged_break_list, $i - 2; - push @i_ragged_break_list, $i - 1; - } - - push @i_ragged_break_list, $i; - $i_last_last_break = $i_last_break; - $i_last_break = $i; - } - - # don't break before a small last term -- it will - # not look good on a line by itself. - elsif ($i == $i_max - && $i_last_break == $i - 1 - && $weighted_length <= $definitely_simple ) - { - pop @i_ragged_break_list; - } - } - - my $identifier_count = $i_max + 1 - $quote_count; - - # Need more tuning here.. - if ( $max_width > 12 - && $complex_item_count > $item_count / 2 - && $number_of_fields_best != 2 ) - { - $number_of_fields_best = 1; - } - - return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count ); -} - -sub get_maximum_fields_wanted { - - # Not all tables look good with more than one field of items. - # This routine looks at a table and decides if it should be - # formatted with just one field or not. - # This coding is still under development. - my ($ritem_lengths) = @_; - - my $number_of_fields_best = 0; - - # For just a few items, we tentatively assume just 1 field. - my $item_count = @{$ritem_lengths}; - if ( $item_count <= 5 ) { - $number_of_fields_best = 1; - } - - # For larger tables, look at it both ways and see what looks best - else { - - my $is_odd = 1; - my @max_length = ( 0, 0 ); - my @last_length_2 = ( undef, undef ); - my @first_length_2 = ( undef, undef ); - my $last_length = undef; - my $total_variation_1 = 0; - my $total_variation_2 = 0; - my @total_variation_2 = ( 0, 0 ); - foreach my $j ( 0 .. $item_count - 1 ) { - - $is_odd = 1 - $is_odd; - my $length = $ritem_lengths->[$j]; - if ( $length > $max_length[$is_odd] ) { - $max_length[$is_odd] = $length; - } - - if ( defined($last_length) ) { - my $dl = abs( $length - $last_length ); - $total_variation_1 += $dl; - } - $last_length = $length; - - my $ll = $last_length_2[$is_odd]; - if ( defined($ll) ) { - my $dl = abs( $length - $ll ); - $total_variation_2[$is_odd] += $dl; - } - else { - $first_length_2[$is_odd] = $length; - } - $last_length_2[$is_odd] = $length; - } - $total_variation_2 = $total_variation_2[0] + $total_variation_2[1]; - - my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0; - unless ( $total_variation_2 < $factor * $total_variation_1 ) { - $number_of_fields_best = 1; - } - } - return ($number_of_fields_best); -} - -sub table_columns_available { - my $i_first_comma = shift; - my $columns = - maximum_line_length($i_first_comma) - - leading_spaces_to_go($i_first_comma); - - # Patch: the vertical formatter does not line up lines whose lengths - # exactly equal the available line length because of allowances - # that must be made for side comments. Therefore, the number of - # available columns is reduced by 1 character. - $columns -= 1; - return $columns; -} - -sub maximum_number_of_fields { - - # how many fields will fit in the available space? - my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_; - my $max_pairs = int( $columns / $pair_width ); - my $number_of_fields = $max_pairs * 2; - if ( $odd_or_even == 1 - && $max_pairs * $pair_width + $max_width <= $columns ) - { - $number_of_fields++; - } - return $number_of_fields; -} - -sub compactify_table { - - # given a table with a certain number of fields and a certain number - # of lines, see if reducing the number of fields will make it look - # better. - my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_; - if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) { - my $min_fields; - - for ( - $min_fields = $number_of_fields ; - $min_fields >= $odd_or_even - && $min_fields * $formatted_lines >= $item_count ; - $min_fields -= $odd_or_even - ) - { - $number_of_fields = $min_fields; - } - } - return $number_of_fields; -} - -sub set_ragged_breakpoints { - - # Set breakpoints in a list that cannot be formatted nicely as a - # table. - my ( $ri_term_comma, $ri_ragged_break_list ) = @_; - - my $break_count = 0; - foreach ( @{$ri_ragged_break_list} ) { - my $j = $ri_term_comma->[$_]; - if ($j) { - set_forced_breakpoint($j); - $break_count++; - } - } - return $break_count; -} - -sub copy_old_breakpoints { - my ( $i_first_comma, $i_last_comma ) = @_; - for my $i ( $i_first_comma .. $i_last_comma ) { - if ( $old_breakpoint_to_go[$i] ) { - set_forced_breakpoint($i); - } - } - return; -} - -sub set_nobreaks { - my ( $i, $j ) = @_; - if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) { - - FORMATTER_DEBUG_FLAG_NOBREAK && do { - my ( $a, $b, $c ) = caller(); - print STDOUT -"NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n"; - }; - - @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 ); - } - - # shouldn't happen; non-critical error - else { - FORMATTER_DEBUG_FLAG_NOBREAK && do { - my ( $a, $b, $c ) = caller(); - print STDOUT - "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n"; - }; - } - return; -} - -sub set_fake_breakpoint { - - # Just bump up the breakpoint count as a signal that there are breaks. - # This is useful if we have breaks but may want to postpone deciding where - # to make them. - $forced_breakpoint_count++; - return; -} - -sub set_forced_breakpoint { - my $i = shift; - - return unless defined $i && $i >= 0; - - # no breaks between welded tokens - return if ( weld_len_right_to_go($i) ); - - # when called with certain tokens, use bond strengths to decide - # if we break before or after it - my $token = $tokens_to_go[$i]; - - if ( $token =~ /^([\=\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) { - if ( $want_break_before{$token} && $i >= 0 ) { $i-- } - } - - # breaks are forced before 'if' and 'unless' - elsif ( $is_if_unless{$token} ) { $i-- } - - if ( $i >= 0 && $i <= $max_index_to_go ) { - my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1; - - FORMATTER_DEBUG_FLAG_FORCE && do { - my ( $a, $b, $c ) = caller(); - print STDOUT -"FORCE $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n"; - }; - - if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) { - $forced_breakpoint_to_go[$i_nonblank] = 1; - - if ( $i_nonblank > $index_max_forced_break ) { - $index_max_forced_break = $i_nonblank; - } - $forced_breakpoint_count++; - $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] = - $i_nonblank; - - # if we break at an opening container..break at the closing - if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) { - set_closing_breakpoint($i_nonblank); - } - } - } - return; -} - -sub clear_breakpoint_undo_stack { - $forced_breakpoint_undo_count = 0; - return; -} - -sub undo_forced_breakpoint_stack { - - my $i_start = shift; - if ( $i_start < 0 ) { - $i_start = 0; - my ( $a, $b, $c ) = caller(); - warning( -"Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start " - ); - } - - while ( $forced_breakpoint_undo_count > $i_start ) { - my $i = - $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ]; - if ( $i >= 0 && $i <= $max_index_to_go ) { - $forced_breakpoint_to_go[$i] = 0; - $forced_breakpoint_count--; - - FORMATTER_DEBUG_FLAG_UNDOBP && do { - my ( $a, $b, $c ) = caller(); - print STDOUT -"UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n"; - }; - } - - # shouldn't happen, but not a critical error - else { - FORMATTER_DEBUG_FLAG_UNDOBP && do { - my ( $a, $b, $c ) = caller(); - print STDOUT -"Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go"; - }; - } - } - return; -} - -{ # begin recombine_breakpoints - - my %is_amp_amp; - my %is_ternary; - my %is_math_op; - my %is_plus_minus; - my %is_mult_div; - - BEGIN { - - my @q; - @q = qw( && || ); - @is_amp_amp{@q} = (1) x scalar(@q); - - @q = qw( ? : ); - @is_ternary{@q} = (1) x scalar(@q); - - @q = qw( + - * / ); - @is_math_op{@q} = (1) x scalar(@q); - - @q = qw( + - ); - @is_plus_minus{@q} = (1) x scalar(@q); - - @q = qw( * / ); - @is_mult_div{@q} = (1) x scalar(@q); - } - - sub DUMP_BREAKPOINTS { - - # Debug routine to dump current breakpoints...not normally called - # We are given indexes to the current lines: - # $ri_beg = ref to array of BEGinning indexes of each line - # $ri_end = ref to array of ENDing indexes of each line - my ( $ri_beg, $ri_end, $msg ) = @_; - print STDERR "----Dumping breakpoints from: $msg----\n"; - for my $n ( 0 .. @{$ri_end} - 1 ) { - my $ibeg = $ri_beg->[$n]; - my $iend = $ri_end->[$n]; - my $text = ""; - foreach my $i ( $ibeg .. $iend ) { - $text .= $tokens_to_go[$i]; - } - print STDERR "$n ($ibeg:$iend) $text\n"; - } - print STDERR "----\n"; - return; - } - - sub unmask_phantom_semicolons { - - my ( $self, $ri_beg, $ri_end ) = @_; - - # Walk down the lines of this batch and unmask any invisible line-ending - # semicolons. They were placed by sub respace_tokens but we only now - # know if we actually need them. - - my $nmax = @{$ri_end} - 1; - foreach my $n ( 0 .. $nmax ) { - - my $i = $ri_end->[$n]; - if ( $types_to_go[$i] eq ';' && $tokens_to_go[$i] eq '' ) { - - $tokens_to_go[$i] = $rtoken_vars_to_go[$i]->[_TOKEN_] = - $want_left_space{';'} == WS_NO ? ';' : ' ;'; - my $line_number = $rtoken_vars_to_go[$i]->[_LINE_INDEX_] + 1; - note_added_semicolon($line_number); - } - } - return; - } - - sub recombine_breakpoints { - - # sub set_continuation_breaks is very liberal in setting line breaks - # for long lines, always setting breaks at good breakpoints, even - # when that creates small lines. Sometimes small line fragments - # are produced which would look better if they were combined. - # That's the task of this routine. - # - # We are given indexes to the current lines: - # $ri_beg = ref to array of BEGinning indexes of each line - # $ri_end = ref to array of ENDing indexes of each line - my ( $ri_beg, $ri_end ) = @_; - - # Make a list of all good joining tokens between the lines - # n-1 and n. - my @joint; - my $nmax = @{$ri_end} - 1; - for my $n ( 1 .. $nmax ) { - my $ibeg_1 = $ri_beg->[ $n - 1 ]; - my $iend_1 = $ri_end->[ $n - 1 ]; - my $iend_2 = $ri_end->[$n]; - my $ibeg_2 = $ri_beg->[$n]; - - my ( $itok, $itokp, $itokm ); - - foreach my $itest ( $iend_1, $ibeg_2 ) { - my $type = $types_to_go[$itest]; - if ( $is_math_op{$type} - || $is_amp_amp{$type} - || $is_assignment{$type} - || $type eq ':' ) - { - $itok = $itest; - } - } - $joint[$n] = [$itok]; - } - - my $more_to_do = 1; - - # We keep looping over all of the lines of this batch - # until there are no more possible recombinations - my $nmax_last = @{$ri_end}; - my $reverse = 0; - while ($more_to_do) { - my $n_best = 0; - my $bs_best; - my $nmax = @{$ri_end} - 1; - - # Safety check for infinite loop - unless ( $nmax < $nmax_last ) { - - # Shouldn't happen because splice below decreases nmax on each - # pass. - Fault("Program bug-infinite loop in recombine breakpoints\n"); - } - $nmax_last = $nmax; - $more_to_do = 0; - my $skip_Section_3; - my $leading_amp_count = 0; - my $this_line_is_semicolon_terminated; - - # loop over all remaining lines in this batch - for my $iter ( 1 .. $nmax ) { - - # alternating sweep direction gives symmetric results - # for recombining lines which exceed the line length - # such as eval {{{{.... }}}} - my $n; - if ($reverse) { $n = 1 + $nmax - $iter; } - else { $n = $iter } - - #---------------------------------------------------------- - # If we join the current pair of lines, - # line $n-1 will become the left part of the joined line - # line $n will become the right part of the joined line - # - # Here are Indexes of the endpoint tokens of the two lines: - # - # -----line $n-1--- | -----line $n----- - # $ibeg_1 $iend_1 | $ibeg_2 $iend_2 - # ^ - # | - # We want to decide if we should remove the line break - # between the tokens at $iend_1 and $ibeg_2 - # - # We will apply a number of ad-hoc tests to see if joining - # here will look ok. The code will just issue a 'next' - # command if the join doesn't look good. If we get through - # the gauntlet of tests, the lines will be recombined. - #---------------------------------------------------------- - # - # beginning and ending tokens of the lines we are working on - my $ibeg_1 = $ri_beg->[ $n - 1 ]; - my $iend_1 = $ri_end->[ $n - 1 ]; - my $iend_2 = $ri_end->[$n]; - my $ibeg_2 = $ri_beg->[$n]; - my $ibeg_nmax = $ri_beg->[$nmax]; - - # combined line cannot be too long - my $excess = excess_line_length( $ibeg_1, $iend_2, 1, 1 ); - next if ( $excess > 0 ); - - my $type_iend_1 = $types_to_go[$iend_1]; - my $type_iend_2 = $types_to_go[$iend_2]; - my $type_ibeg_1 = $types_to_go[$ibeg_1]; - my $type_ibeg_2 = $types_to_go[$ibeg_2]; - - # terminal token of line 2 if any side comment is ignored: - my $iend_2t = $iend_2; - my $type_iend_2t = $type_iend_2; - - # some beginning indexes of other lines, which may not exist - my $ibeg_0 = $n > 1 ? $ri_beg->[ $n - 2 ] : -1; - my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1; - my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1; - - my $bs_tweak = 0; - - #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] - - # $nesting_depth_to_go[$ibeg_1] ); - - FORMATTER_DEBUG_FLAG_RECOMBINE && do { - print STDERR -"RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n"; - }; - - # If line $n is the last line, we set some flags and - # do any special checks for it - if ( $n == $nmax ) { - - # a terminal '{' should stay where it is - next if $type_ibeg_2 eq '{'; - - if ( $type_iend_2 eq '#' - && $iend_2 - $ibeg_2 >= 2 - && $types_to_go[ $iend_2 - 1 ] eq 'b' ) - { - $iend_2t = $iend_2 - 2; - $type_iend_2t = $types_to_go[$iend_2t]; - } - - $this_line_is_semicolon_terminated = $type_iend_2t eq ';'; - } - - #---------------------------------------------------------- - # Recombine Section 0: - # Examine the special token joining this line pair, if any. - # Put as many tests in this section to avoid duplicate code and - # to make formatting independent of whether breaks are to the - # left or right of an operator. - #---------------------------------------------------------- - - my ($itok) = @{ $joint[$n] }; - if ($itok) { - - # FIXME: Patch - may not be necessary - my $iend_1 = - $type_iend_1 eq 'b' - ? $iend_1 - 1 - : $iend_1; - - my $iend_2 = - $type_iend_2 eq 'b' - ? $iend_2 - 1 - : $iend_2; - ## END PATCH - - my $type = $types_to_go[$itok]; - - if ( $type eq ':' ) { - - # do not join at a colon unless it disobeys the break request - if ( $itok eq $iend_1 ) { - next unless $want_break_before{$type}; - } - else { - $leading_amp_count++; - next if $want_break_before{$type}; - } - } ## end if ':' - - # handle math operators + - * / - elsif ( $is_math_op{$type} ) { - - # Combine these lines if this line is a single - # number, or if it is a short term with same - # operator as the previous line. For example, in - # the following code we will combine all of the - # short terms $A, $B, $C, $D, $E, $F, together - # instead of leaving them one per line: - # my $time = - # $A * $B * $C * $D * $E * $F * - # ( 2. * $eps * $sigma * $area ) * - # ( 1. / $tcold**3 - 1. / $thot**3 ); - - # This can be important in math-intensive code. - - my $good_combo; - - my $itokp = min( $inext_to_go[$itok], $iend_2 ); - my $itokpp = min( $inext_to_go[$itokp], $iend_2 ); - my $itokm = max( $iprev_to_go[$itok], $ibeg_1 ); - my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 ); - - # check for a number on the right - if ( $types_to_go[$itokp] eq 'n' ) { - - # ok if nothing else on right - if ( $itokp == $iend_2 ) { - $good_combo = 1; - } - else { - - # look one more token to right.. - # okay if math operator or some termination - $good_combo = - ( ( $itokpp == $iend_2 ) - && $is_math_op{ $types_to_go[$itokpp] } ) - || $types_to_go[$itokpp] =~ /^[#,;]$/; - } - } - - # check for a number on the left - if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) { - - # okay if nothing else to left - if ( $itokm == $ibeg_1 ) { - $good_combo = 1; - } - - # otherwise look one more token to left - else { - - # okay if math operator, comma, or assignment - $good_combo = ( $itokmm == $ibeg_1 ) - && ( $is_math_op{ $types_to_go[$itokmm] } - || $types_to_go[$itokmm] =~ /^[,]$/ - || $is_assignment{ $types_to_go[$itokmm] } - ); - } - } - - # look for a single short token either side of the - # operator - if ( !$good_combo ) { - - # Slight adjustment factor to make results - # independent of break before or after operator in - # long summed lists. (An operator and a space make - # two spaces). - my $two = ( $itok eq $iend_1 ) ? 2 : 0; - - $good_combo = - - # numbers or id's on both sides of this joint - $types_to_go[$itokp] =~ /^[in]$/ - && $types_to_go[$itokm] =~ /^[in]$/ - - # one of the two lines must be short: - && ( - ( - # no more than 2 nonblank tokens right of - # joint - $itokpp == $iend_2 - - # short - && token_sequence_length( $itokp, $iend_2 ) - < $two + - $rOpts_short_concatenation_item_length - ) - || ( - # no more than 2 nonblank tokens left of - # joint - $itokmm == $ibeg_1 - - # short - && token_sequence_length( $ibeg_1, $itokm ) - < 2 - $two + - $rOpts_short_concatenation_item_length - ) - - ) - - # keep pure terms; don't mix +- with */ - && !( - $is_plus_minus{$type} - && ( $is_mult_div{ $types_to_go[$itokmm] } - || $is_mult_div{ $types_to_go[$itokpp] } ) - ) - && !( - $is_mult_div{$type} - && ( $is_plus_minus{ $types_to_go[$itokmm] } - || $is_plus_minus{ $types_to_go[$itokpp] } ) - ) - - ; - } - - # it is also good to combine if we can reduce to 2 lines - if ( !$good_combo ) { - - # index on other line where same token would be in a - # long chain. - my $iother = - ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1; - - $good_combo = - $n == 2 - && $n == $nmax - && $types_to_go[$iother] ne $type; - } - - next unless ($good_combo); - - } ## end math - - elsif ( $is_amp_amp{$type} ) { - ##TBD - } ## end &&, || - - elsif ( $is_assignment{$type} ) { - ##TBD - } ## end assignment - } - - #---------------------------------------------------------- - # Recombine Section 1: - # Join welded nested containers immediately - # use alternating sweep direction until all are welds - # are done. This produces more symmetric opening and - # closing joins when lines exceed line length. - #---------------------------------------------------------- - if ( weld_len_right_to_go($iend_1) - || weld_len_left_to_go($ibeg_2) ) - { - $n_best = $n; - $reverse = 1 - $reverse; - last; - } - $reverse = 0; - - #---------------------------------------------------------- - # Recombine Section 2: - # Examine token at $iend_1 (right end of first line of pair) - #---------------------------------------------------------- - - # an isolated '}' may join with a ';' terminated segment - if ( $type_iend_1 eq '}' ) { - - # Check for cases where combining a semicolon terminated - # statement with a previous isolated closing paren will - # allow the combined line to be outdented. This is - # generally a good move. For example, we can join up - # the last two lines here: - # ( - # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, - # $size, $atime, $mtime, $ctime, $blksize, $blocks - # ) - # = stat($file); - # - # to get: - # ( - # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, - # $size, $atime, $mtime, $ctime, $blksize, $blocks - # ) = stat($file); - # - # which makes the parens line up. - # - # Another example, from Joe Matarazzo, probably looks best - # with the 'or' clause appended to the trailing paren: - # $self->some_method( - # PARAM1 => 'foo', - # PARAM2 => 'bar' - # ) or die "Some_method didn't work"; - # - # But we do not want to do this for something like the -lp - # option where the paren is not outdentable because the - # trailing clause will be far to the right. - # - # The logic here is synchronized with the logic in sub - # sub set_adjusted_indentation, which actually does - # the outdenting. - # - $skip_Section_3 ||= $this_line_is_semicolon_terminated - - # only one token on last line - && $ibeg_1 == $iend_1 - - # must be structural paren - && $tokens_to_go[$iend_1] eq ')' - - # style must allow outdenting, - && !$closing_token_indentation{')'} - - # only leading '&&', '||', and ':' if no others seen - # (but note: our count made below could be wrong - # due to intervening comments) - && ( $leading_amp_count == 0 - || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ ) - - # but leading colons probably line up with a - # previous colon or question (count could be wrong). - && $type_ibeg_2 ne ':' - - # only one step in depth allowed. this line must not - # begin with a ')' itself. - && ( $nesting_depth_to_go[$iend_1] == - $nesting_depth_to_go[$iend_2] + 1 ); - - # YVES patch 2 of 2: - # Allow cuddled eval chains, like this: - # eval { - # #STUFF; - # 1; # return true - # } or do { - # #handle error - # }; - # This patch works together with a patch in - # setting adjusted indentation (where the closing eval - # brace is outdented if possible). - # The problem is that an 'eval' block has continuation - # indentation and it looks better to undo it in some - # cases. If we do not use this patch we would get: - # eval { - # #STUFF; - # 1; # return true - # } - # or do { - # #handle error - # }; - # The alternative, for uncuddled style, is to create - # a patch in set_adjusted_indentation which undoes - # the indentation of a leading line like 'or do {'. - # This doesn't work well with -icb through - if ( - $block_type_to_go[$iend_1] eq 'eval' - && !$rOpts->{'line-up-parentheses'} - && !$rOpts->{'indent-closing-brace'} - && $tokens_to_go[$iend_2] eq '{' - && ( - ( $type_ibeg_2 =~ /^(|\&\&|\|\|)$/ ) - || ( $type_ibeg_2 eq 'k' - && $is_and_or{ $tokens_to_go[$ibeg_2] } ) - || $is_if_unless{ $tokens_to_go[$ibeg_2] } - ) - ) - { - $skip_Section_3 ||= 1; - } - - next - unless ( - $skip_Section_3 - - # handle '.' and '?' specially below - || ( $type_ibeg_2 =~ /^[\.\?]$/ ) - ); - } - - elsif ( $type_iend_1 eq '{' ) { - - # YVES - # honor breaks at opening brace - # Added to prevent recombining something like this: - # } || eval { package main; - next if $forced_breakpoint_to_go[$iend_1]; - } - - # do not recombine lines with ending &&, ||, - elsif ( $is_amp_amp{$type_iend_1} ) { - next unless $want_break_before{$type_iend_1}; - } - - # Identify and recombine a broken ?/: chain - elsif ( $type_iend_1 eq '?' ) { - - # Do not recombine different levels - next - if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] ); - - # do not recombine unless next line ends in : - next unless $type_iend_2 eq ':'; - } - - # for lines ending in a comma... - elsif ( $type_iend_1 eq ',' ) { - - # Do not recombine at comma which is following the - # input bias. - # TODO: might be best to make a special flag - next if ( $old_breakpoint_to_go[$iend_1] ); - - # an isolated '},' may join with an identifier + ';' - # this is useful for the class of a 'bless' statement (bless.t) - if ( $type_ibeg_1 eq '}' - && $type_ibeg_2 eq 'i' ) - { - next - unless ( ( $ibeg_1 == ( $iend_1 - 1 ) ) - && ( $iend_2 == ( $ibeg_2 + 1 ) ) - && $this_line_is_semicolon_terminated ); - - # override breakpoint - $forced_breakpoint_to_go[$iend_1] = 0; - } - - # but otherwise .. - else { - - # do not recombine after a comma unless this will leave - # just 1 more line - next unless ( $n + 1 >= $nmax ); - - # do not recombine if there is a change in indentation depth - next - if ( - $levels_to_go[$iend_1] != $levels_to_go[$iend_2] ); - - # do not recombine a "complex expression" after a - # comma. "complex" means no parens. - my $saw_paren; - foreach my $ii ( $ibeg_2 .. $iend_2 ) { - if ( $tokens_to_go[$ii] eq '(' ) { - $saw_paren = 1; - last; - } - } - next if $saw_paren; - } - } - - # opening paren.. - elsif ( $type_iend_1 eq '(' ) { - - # No longer doing this - } - - elsif ( $type_iend_1 eq ')' ) { - - # No longer doing this - } - - # keep a terminal for-semicolon - elsif ( $type_iend_1 eq 'f' ) { - next; - } - - # if '=' at end of line ... - elsif ( $is_assignment{$type_iend_1} ) { - - # keep break after = if it was in input stream - # this helps prevent 'blinkers' - next if $old_breakpoint_to_go[$iend_1] - - # don't strand an isolated '=' - && $iend_1 != $ibeg_1; - - my $is_short_quote = - ( $type_ibeg_2 eq 'Q' - && $ibeg_2 == $iend_2 - && token_sequence_length( $ibeg_2, $ibeg_2 ) < - $rOpts_short_concatenation_item_length ); - my $is_ternary = - ( $type_ibeg_1 eq '?' - && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) ); - - # always join an isolated '=', a short quote, or if this - # will put ?/: at start of adjacent lines - if ( $ibeg_1 != $iend_1 - && !$is_short_quote - && !$is_ternary ) - { - next - unless ( - ( - - # unless we can reduce this to two lines - $nmax < $n + 2 - - # or three lines, the last with a leading semicolon - || ( $nmax == $n + 2 - && $types_to_go[$ibeg_nmax] eq ';' ) - - # or the next line ends with a here doc - || $type_iend_2 eq 'h' - - # or the next line ends in an open paren or brace - # and the break hasn't been forced [dima.t] - || ( !$forced_breakpoint_to_go[$iend_1] - && $type_iend_2 eq '{' ) - ) - - # do not recombine if the two lines might align well - # this is a very approximate test for this - && ( $ibeg_3 >= 0 - && $type_ibeg_2 ne $types_to_go[$ibeg_3] ) - ); - - if ( - - # Recombine if we can make two lines - $nmax >= $n + 2 - - # -lp users often prefer this: - # my $title = function($env, $env, $sysarea, - # "bubba Borrower Entry"); - # so we will recombine if -lp is used we have - # ending comma - && ( !$rOpts_line_up_parentheses - || $type_iend_2 ne ',' ) - ) - { - - # otherwise, scan the rhs line up to last token for - # complexity. Note that we are not counting the last - # token in case it is an opening paren. - my $tv = 0; - my $depth = $nesting_depth_to_go[$ibeg_2]; - foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) { - if ( $nesting_depth_to_go[$i] != $depth ) { - $tv++; - last if ( $tv > 1 ); - } - $depth = $nesting_depth_to_go[$i]; - } - - # ok to recombine if no level changes before last token - if ( $tv > 0 ) { - - # otherwise, do not recombine if more than two - # level changes. - next if ( $tv > 1 ); - - # check total complexity of the two adjacent lines - # that will occur if we do this join - my $istop = - ( $n < $nmax ) - ? $ri_end->[ $n + 1 ] - : $iend_2; - foreach my $i ( $iend_2 .. $istop ) { - if ( $nesting_depth_to_go[$i] != $depth ) { - $tv++; - last if ( $tv > 2 ); - } - $depth = $nesting_depth_to_go[$i]; - } - - # do not recombine if total is more than 2 level changes - next if ( $tv > 2 ); - } - } - } - - unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) { - $forced_breakpoint_to_go[$iend_1] = 0; - } - } - - # for keywords.. - elsif ( $type_iend_1 eq 'k' ) { - - # make major control keywords stand out - # (recombine.t) - next - if ( - - #/^(last|next|redo|return)$/ - $is_last_next_redo_return{ $tokens_to_go[$iend_1] } - - # but only if followed by multiple lines - && $n < $nmax - ); - - if ( $is_and_or{ $tokens_to_go[$iend_1] } ) { - next - unless $want_break_before{ $tokens_to_go[$iend_1] }; - } - } - - #---------------------------------------------------------- - # Recombine Section 3: - # Examine token at $ibeg_2 (left end of second line of pair) - #---------------------------------------------------------- - - # join lines identified above as capable of - # causing an outdented line with leading closing paren - # Note that we are skipping the rest of this section - # and the rest of the loop to do the join - if ($skip_Section_3) { - $forced_breakpoint_to_go[$iend_1] = 0; - $n_best = $n; - last; - } - - # handle lines with leading &&, || - elsif ( $is_amp_amp{$type_ibeg_2} ) { - - $leading_amp_count++; - - # ok to recombine if it follows a ? or : - # and is followed by an open paren.. - my $ok = - ( $is_ternary{$type_ibeg_1} - && $tokens_to_go[$iend_2] eq '(' ) - - # or is followed by a ? or : at same depth - # - # We are looking for something like this. We can - # recombine the && line with the line above to make the - # structure more clear: - # return - # exists $G->{Attr}->{V} - # && exists $G->{Attr}->{V}->{$u} - # ? %{ $G->{Attr}->{V}->{$u} } - # : (); - # - # We should probably leave something like this alone: - # return - # exists $G->{Attr}->{E} - # && exists $G->{Attr}->{E}->{$u} - # && exists $G->{Attr}->{E}->{$u}->{$v} - # ? %{ $G->{Attr}->{E}->{$u}->{$v} } - # : (); - # so that we either have all of the &&'s (or ||'s) - # on one line, as in the first example, or break at - # each one as in the second example. However, it - # sometimes makes things worse to check for this because - # it prevents multiple recombinations. So this is not done. - || ( $ibeg_3 >= 0 - && $is_ternary{ $types_to_go[$ibeg_3] } - && $nesting_depth_to_go[$ibeg_3] == - $nesting_depth_to_go[$ibeg_2] ); - - next if !$ok && $want_break_before{$type_ibeg_2}; - $forced_breakpoint_to_go[$iend_1] = 0; - - # tweak the bond strength to give this joint priority - # over ? and : - $bs_tweak = 0.25; - } - - # Identify and recombine a broken ?/: chain - elsif ( $type_ibeg_2 eq '?' ) { - - # Do not recombine different levels - my $lev = $levels_to_go[$ibeg_2]; - next if ( $lev ne $levels_to_go[$ibeg_1] ); - - # Do not recombine a '?' if either next line or - # previous line does not start with a ':'. The reasons - # are that (1) no alignment of the ? will be possible - # and (2) the expression is somewhat complex, so the - # '?' is harder to see in the interior of the line. - my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':'; - my $precedes_colon = - $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':'; - next unless ( $follows_colon || $precedes_colon ); - - # we will always combining a ? line following a : line - if ( !$follows_colon ) { - - # ...otherwise recombine only if it looks like a chain. - # we will just look at a few nearby lines to see if - # this looks like a chain. - my $local_count = 0; - foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) { - $local_count++ - if $ii >= 0 - && $types_to_go[$ii] eq ':' - && $levels_to_go[$ii] == $lev; - } - next unless ( $local_count > 1 ); - } - $forced_breakpoint_to_go[$iend_1] = 0; - } - - # do not recombine lines with leading '.' - elsif ( $type_ibeg_2 eq '.' ) { - my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 ); - next - unless ( - - # ... unless there is just one and we can reduce - # this to two lines if we do. For example, this - # - # - # $bodyA .= - # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;' - # - # looks better than this: - # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;' - # . '$args .= $pat;' - - ( - $n == 2 - && $n == $nmax - && $type_ibeg_1 ne $type_ibeg_2 - ) - - # ... or this would strand a short quote , like this - # . "some long quote" - # . "\n"; - - || ( $types_to_go[$i_next_nonblank] eq 'Q' - && $i_next_nonblank >= $iend_2 - 1 - && $token_lengths_to_go[$i_next_nonblank] < - $rOpts_short_concatenation_item_length ) - ); - } - - # handle leading keyword.. - elsif ( $type_ibeg_2 eq 'k' ) { - - # handle leading "or" - if ( $tokens_to_go[$ibeg_2] eq 'or' ) { - next - unless ( - $this_line_is_semicolon_terminated - && ( - - # following 'if' or 'unless' or 'or' - $type_ibeg_1 eq 'k' - && $is_if_unless{ $tokens_to_go[$ibeg_1] } - - # important: only combine a very simple or - # statement because the step below may have - # combined a trailing 'and' with this or, - # and we do not want to then combine - # everything together - && ( $iend_2 - $ibeg_2 <= 7 ) - ) - ); - - #X: RT #81854 - $forced_breakpoint_to_go[$iend_1] = 0 - unless $old_breakpoint_to_go[$iend_1]; - } - - # handle leading 'and' - elsif ( $tokens_to_go[$ibeg_2] eq 'and' ) { - - # Decide if we will combine a single terminal 'and' - # after an 'if' or 'unless'. - - # This looks best with the 'and' on the same - # line as the 'if': - # - # $a = 1 - # if $seconds and $nu < 2; - # - # But this looks better as shown: - # - # $a = 1 - # if !$this->{Parents}{$_} - # or $this->{Parents}{$_} eq $_; - # - next - unless ( - $this_line_is_semicolon_terminated - && ( - - # following 'if' or 'unless' or 'or' - $type_ibeg_1 eq 'k' - && ( $is_if_unless{ $tokens_to_go[$ibeg_1] } - || $tokens_to_go[$ibeg_1] eq 'or' ) - ) - ); - } - - # handle leading "if" and "unless" - elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) { - - # FIXME: This is still experimental..may not be too useful - next - unless ( - $this_line_is_semicolon_terminated - - # previous line begins with 'and' or 'or' - && $type_ibeg_1 eq 'k' - && $is_and_or{ $tokens_to_go[$ibeg_1] } - - ); - } - - # handle all other leading keywords - else { - - # keywords look best at start of lines, - # but combine things like "1 while" - unless ( $is_assignment{$type_iend_1} ) { - next - if ( ( $type_iend_1 ne 'k' ) - && ( $tokens_to_go[$ibeg_2] ne 'while' ) ); - } - } - } - - # similar treatment of && and || as above for 'and' and 'or': - # NOTE: This block of code is currently bypassed because - # of a previous block but is retained for possible future use. - elsif ( $is_amp_amp{$type_ibeg_2} ) { - - # maybe looking at something like: - # unless $TEXTONLY || $item =~ m%|p>|a|img)%i; - - next - unless ( - $this_line_is_semicolon_terminated - - # previous line begins with an 'if' or 'unless' keyword - && $type_ibeg_1 eq 'k' - && $is_if_unless{ $tokens_to_go[$ibeg_1] } - - ); - } - - # handle line with leading = or similar - elsif ( $is_assignment{$type_ibeg_2} ) { - next unless ( $n == 1 || $n == $nmax ); - next if $old_breakpoint_to_go[$iend_1]; - next - unless ( - - # unless we can reduce this to two lines - $nmax == 2 - - # or three lines, the last with a leading semicolon - || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' ) - - # or the next line ends with a here doc - || $type_iend_2 eq 'h' - - # or this is a short line ending in ; - || ( $n == $nmax && $this_line_is_semicolon_terminated ) - ); - $forced_breakpoint_to_go[$iend_1] = 0; - } - - #---------------------------------------------------------- - # Recombine Section 4: - # Combine the lines if we arrive here and it is possible - #---------------------------------------------------------- - - # honor hard breakpoints - next if ( $forced_breakpoint_to_go[$iend_1] > 0 ); - - my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak; - - # Require a few extra spaces before recombining lines if we are - # at an old breakpoint unless this is a simple list or terminal - # line. The goal is to avoid oscillating between two - # quasi-stable end states. For example this snippet caused - # problems: -## my $this = -## bless { -## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]" -## }, -## $type; - next - if ( $old_breakpoint_to_go[$iend_1] - && !$this_line_is_semicolon_terminated - && $n < $nmax - && $excess + 4 > 0 - && $type_iend_2 ne ',' ); - - # do not recombine if we would skip in indentation levels - if ( $n < $nmax ) { - my $if_next = $ri_beg->[ $n + 1 ]; - next - if ( - $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2] - && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next] - - # but an isolated 'if (' is undesirable - && !( - $n == 1 - && $iend_1 - $ibeg_1 <= 2 - && $type_ibeg_1 eq 'k' - && $tokens_to_go[$ibeg_1] eq 'if' - && $tokens_to_go[$iend_1] ne '(' - ) - ); - } - - # honor no-break's - next if ( $bs >= NO_BREAK - 1 ); - - # remember the pair with the greatest bond strength - if ( !$n_best ) { - $n_best = $n; - $bs_best = $bs; - } - else { - - if ( $bs > $bs_best ) { - $n_best = $n; - $bs_best = $bs; - } - } - } - - # recombine the pair with the greatest bond strength - if ($n_best) { - splice @{$ri_beg}, $n_best, 1; - splice @{$ri_end}, $n_best - 1, 1; - splice @joint, $n_best, 1; - - # keep going if we are still making progress - $more_to_do++; - } - } - return ( $ri_beg, $ri_end ); - } -} # end recombine_breakpoints - -sub break_all_chain_tokens { - - # scan the current breakpoints looking for breaks at certain "chain - # operators" (. : && || + etc) which often occur repeatedly in a long - # statement. If we see a break at any one, break at all similar tokens - # within the same container. - # - my ( $ri_left, $ri_right ) = @_; - - my %saw_chain_type; - my %left_chain_type; - my %right_chain_type; - my %interior_chain_type; - my $nmax = @{$ri_right} - 1; - - # scan the left and right end tokens of all lines - my $count = 0; - for my $n ( 0 .. $nmax ) { - my $il = $ri_left->[$n]; - my $ir = $ri_right->[$n]; - my $typel = $types_to_go[$il]; - my $typer = $types_to_go[$ir]; - $typel = '+' if ( $typel eq '-' ); # treat + and - the same - $typer = '+' if ( $typer eq '-' ); - $typel = '*' if ( $typel eq '/' ); # treat * and / the same - $typer = '*' if ( $typer eq '/' ); - my $tokenl = $tokens_to_go[$il]; - my $tokenr = $tokens_to_go[$ir]; - - if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) { - next if ( $typel eq '?' ); - push @{ $left_chain_type{$typel} }, $il; - $saw_chain_type{$typel} = 1; - $count++; - } - if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) { - next if ( $typer eq '?' ); - push @{ $right_chain_type{$typer} }, $ir; - $saw_chain_type{$typer} = 1; - $count++; - } - } - return unless $count; - - # now look for any interior tokens of the same types - $count = 0; - for my $n ( 0 .. $nmax ) { - my $il = $ri_left->[$n]; - my $ir = $ri_right->[$n]; - foreach my $i ( $il + 1 .. $ir - 1 ) { - my $type = $types_to_go[$i]; - $type = '+' if ( $type eq '-' ); - $type = '*' if ( $type eq '/' ); - if ( $saw_chain_type{$type} ) { - push @{ $interior_chain_type{$type} }, $i; - $count++; - } - } - } - return unless $count; - - # now make a list of all new break points - my @insert_list; - - # loop over all chain types - foreach my $type ( keys %saw_chain_type ) { - - # quit if just ONE continuation line with leading . For example-- - # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{' - # . $contents; - last if ( $nmax == 1 && $type =~ /^[\.\+]$/ ); - - # loop over all interior chain tokens - foreach my $itest ( @{ $interior_chain_type{$type} } ) { - - # loop over all left end tokens of same type - if ( $left_chain_type{$type} ) { - next if $nobreak_to_go[ $itest - 1 ]; - foreach my $i ( @{ $left_chain_type{$type} } ) { - next unless in_same_container( $i, $itest ); - push @insert_list, $itest - 1; - - # Break at matching ? if this : is at a different level. - # For example, the ? before $THRf_DEAD in the following - # should get a break if its : gets a break. - # - # my $flags = - # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE - # : ( $_ & 4 ) ? $THRf_R_DETACHED - # : $THRf_R_JOINABLE; - if ( $type eq ':' - && $levels_to_go[$i] != $levels_to_go[$itest] ) - { - my $i_question = $mate_index_to_go[$itest]; - if ( $i_question > 0 ) { - push @insert_list, $i_question - 1; - } - } - last; - } - } - - # loop over all right end tokens of same type - if ( $right_chain_type{$type} ) { - next if $nobreak_to_go[$itest]; - foreach my $i ( @{ $right_chain_type{$type} } ) { - next unless in_same_container( $i, $itest ); - push @insert_list, $itest; - - # break at matching ? if this : is at a different level - if ( $type eq ':' - && $levels_to_go[$i] != $levels_to_go[$itest] ) - { - my $i_question = $mate_index_to_go[$itest]; - if ( $i_question >= 0 ) { - push @insert_list, $i_question; - } - } - last; - } - } - } - } - - # insert any new break points - if (@insert_list) { - insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); - } - return; -} - -sub break_equals { - - # Look for assignment operators that could use a breakpoint. - # For example, in the following snippet - # - # $HOME = $ENV{HOME} - # || $ENV{LOGDIR} - # || $pw[7] - # || die "no home directory for user $<"; - # - # we could break at the = to get this, which is a little nicer: - # $HOME = - # $ENV{HOME} - # || $ENV{LOGDIR} - # || $pw[7] - # || die "no home directory for user $<"; - # - # The logic here follows the logic in set_logical_padding, which - # will add the padding in the second line to improve alignment. - # - my ( $ri_left, $ri_right ) = @_; - my $nmax = @{$ri_right} - 1; - return unless ( $nmax >= 2 ); - - # scan the left ends of first two lines - my $tokbeg = ""; - my $depth_beg; - for my $n ( 1 .. 2 ) { - my $il = $ri_left->[$n]; - my $typel = $types_to_go[$il]; - my $tokenl = $tokens_to_go[$il]; - - my $has_leading_op = ( $tokenl =~ /^\w/ ) - ? $is_chain_operator{$tokenl} # + - * / : ? && || - : $is_chain_operator{$typel}; # and, or - return unless ($has_leading_op); - if ( $n > 1 ) { - return - unless ( $tokenl eq $tokbeg - && $nesting_depth_to_go[$il] eq $depth_beg ); - } - $tokbeg = $tokenl; - $depth_beg = $nesting_depth_to_go[$il]; - } - - # now look for any interior tokens of the same types - my $il = $ri_left->[0]; - my $ir = $ri_right->[0]; - - # now make a list of all new break points - my @insert_list; - for ( my $i = $ir - 1 ; $i > $il ; $i-- ) { - my $type = $types_to_go[$i]; - if ( $is_assignment{$type} - && $nesting_depth_to_go[$i] eq $depth_beg ) - { - if ( $want_break_before{$type} ) { - push @insert_list, $i - 1; - } - else { - push @insert_list, $i; - } - } - } - - # Break after a 'return' followed by a chain of operators - # return ( $^O !~ /win32|dos/i ) - # && ( $^O ne 'VMS' ) - # && ( $^O ne 'OS2' ) - # && ( $^O ne 'MacOS' ); - # To give: - # return - # ( $^O !~ /win32|dos/i ) - # && ( $^O ne 'VMS' ) - # && ( $^O ne 'OS2' ) - # && ( $^O ne 'MacOS' ); - my $i = 0; - if ( $types_to_go[$i] eq 'k' - && $tokens_to_go[$i] eq 'return' - && $ir > $il - && $nesting_depth_to_go[$i] eq $depth_beg ) - { - push @insert_list, $i; - } - - return unless (@insert_list); - - # One final check... - # scan second and third lines and be sure there are no assignments - # we want to avoid breaking at an = to make something like this: - # unless ( $icon = - # $html_icons{"$type-$state"} - # or $icon = $html_icons{$type} - # or $icon = $html_icons{$state} ) - for my $n ( 1 .. 2 ) { - my $il = $ri_left->[$n]; - my $ir = $ri_right->[$n]; - foreach my $i ( $il + 1 .. $ir ) { - my $type = $types_to_go[$i]; - return - if ( $is_assignment{$type} - && $nesting_depth_to_go[$i] eq $depth_beg ); - } - } - - # ok, insert any new break point - if (@insert_list) { - insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); - } - return; -} - -sub insert_final_breaks { - - my ( $ri_left, $ri_right ) = @_; - - my $nmax = @{$ri_right} - 1; - - # scan the left and right end tokens of all lines - my $count = 0; - my $i_first_colon = -1; - for my $n ( 0 .. $nmax ) { - my $il = $ri_left->[$n]; - my $ir = $ri_right->[$n]; - my $typel = $types_to_go[$il]; - my $typer = $types_to_go[$ir]; - return if ( $typel eq '?' ); - return if ( $typer eq '?' ); - if ( $typel eq ':' ) { $i_first_colon = $il; last; } - elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; } - } - - # For long ternary chains, - # if the first : we see has its # ? is in the interior - # of a preceding line, then see if there are any good - # breakpoints before the ?. - if ( $i_first_colon > 0 ) { - my $i_question = $mate_index_to_go[$i_first_colon]; - if ( $i_question > 0 ) { - my @insert_list; - for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) { - my $token = $tokens_to_go[$ii]; - my $type = $types_to_go[$ii]; - - # For now, a good break is either a comma or a 'return'. - if ( ( $type eq ',' || $type eq 'k' && $token eq 'return' ) - && in_same_container( $ii, $i_question ) ) - { - push @insert_list, $ii; - last; - } - } - - # insert any new break points - if (@insert_list) { - insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); - } - } - } - return; -} - -sub in_same_container { - - # check to see if tokens at i1 and i2 are in the - # same container, and not separated by a comma, ? or : - my ( $i1, $i2 ) = @_; - my $type = $types_to_go[$i1]; - my $depth = $nesting_depth_to_go[$i1]; - return unless ( $nesting_depth_to_go[$i2] == $depth ); - if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) } - - ########################################################### - # This is potentially a very slow routine and not critical. - # For safety just give up for large differences. - # See test file 'infinite_loop.txt' - # TODO: replace this loop with a data structure - ########################################################### - return if ( $i2 - $i1 > 200 ); - - foreach my $i ( $i1 + 1 .. $i2 - 1 ) { - next if ( $nesting_depth_to_go[$i] > $depth ); - return if ( $nesting_depth_to_go[$i] < $depth ); - - my $tok = $tokens_to_go[$i]; - $tok = ',' if $tok eq '=>'; # treat => same as , - - # Example: we would not want to break at any of these .'s - # : "$str" - if ( $type ne ':' ) { - return if ( $tok =~ /^[\,\:\?]$/ ) || $tok eq '||' || $tok eq 'or'; - } - else { - return if ( $tok =~ /^[\,]$/ ); - } - } - return 1; -} - -sub set_continuation_breaks { - - # Define an array of indexes for inserting newline characters to - # keep the line lengths below the maximum desired length. There is - # an implied break after the last token, so it need not be included. - - # Method: - # This routine is part of series of routines which adjust line - # lengths. It is only called if a statement is longer than the - # maximum line length, or if a preliminary scanning located - # desirable break points. Sub scan_list has already looked at - # these tokens and set breakpoints (in array - # $forced_breakpoint_to_go[$i]) where it wants breaks (for example - # after commas, after opening parens, and before closing parens). - # This routine will honor these breakpoints and also add additional - # breakpoints as necessary to keep the line length below the maximum - # requested. It bases its decision on where the 'bond strength' is - # lowest. - - # Output: returns references to the arrays: - # @i_first - # @i_last - # which contain the indexes $i of the first and last tokens on each - # line. - - # In addition, the array: - # $forced_breakpoint_to_go[$i] - # may be updated to be =1 for any index $i after which there must be - # a break. This signals later routines not to undo the breakpoint. - - my $saw_good_break = shift; - my @i_first = (); # the first index to output - my @i_last = (); # the last index to output - my @i_colon_breaks = (); # needed to decide if we have to break at ?'s - if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 } - - set_bond_strengths(); - - my $imin = 0; - my $imax = $max_index_to_go; - if ( $types_to_go[$imin] eq 'b' ) { $imin++ } - if ( $types_to_go[$imax] eq 'b' ) { $imax-- } - my $i_begin = $imin; # index for starting next iteration - - my $leading_spaces = leading_spaces_to_go($imin); - my $line_count = 0; - my $last_break_strength = NO_BREAK; - my $i_last_break = -1; - my $max_bias = 0.001; - my $tiny_bias = 0.0001; - my $leading_alignment_token = ""; - my $leading_alignment_type = ""; - - # see if any ?/:'s are in order - my $colons_in_order = 1; - my $last_tok = ""; - my @colon_list = grep /^[\?\:]$/, @types_to_go[ 0 .. $max_index_to_go ]; - my $colon_count = @colon_list; - foreach (@colon_list) { - if ( $_ eq $last_tok ) { $colons_in_order = 0; last } - $last_tok = $_; - } - - # This is a sufficient but not necessary condition for colon chain - my $is_colon_chain = ( $colons_in_order && @colon_list > 2 ); - - #------------------------------------------------------- - # BEGINNING of main loop to set continuation breakpoints - # Keep iterating until we reach the end - #------------------------------------------------------- - while ( $i_begin <= $imax ) { - my $lowest_strength = NO_BREAK; - my $starting_sum = $summed_lengths_to_go[$i_begin]; - my $i_lowest = -1; - my $i_test = -1; - my $lowest_next_token = ''; - my $lowest_next_type = 'b'; - my $i_lowest_next_nonblank = -1; - - #------------------------------------------------------- - # BEGINNING of inner loop to find the best next breakpoint - #------------------------------------------------------- - for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) { - my $type = $types_to_go[$i_test]; - my $token = $tokens_to_go[$i_test]; - my $next_type = $types_to_go[ $i_test + 1 ]; - my $next_token = $tokens_to_go[ $i_test + 1 ]; - my $i_next_nonblank = $inext_to_go[$i_test]; - my $next_nonblank_type = $types_to_go[$i_next_nonblank]; - my $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; - my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank]; - my $strength = $bond_strength_to_go[$i_test]; - my $maximum_line_length = maximum_line_length($i_begin); - - # use old breaks as a tie-breaker. For example to - # prevent blinkers with -pbp in this code: - -##@keywords{ -## qw/ARG OUTPUT PROTO CONSTRUCTOR RETURNS DESC PARAMS SEEALSO EXAMPLE/} -## = (); - - # At the same time try to prevent a leading * in this code - # with the default formatting: - # -## return -## factorial( $a + $b - 1 ) / factorial( $a - 1 ) / factorial( $b - 1 ) -## * ( $x**( $a - 1 ) ) -## * ( ( 1 - $x )**( $b - 1 ) ); - - # reduce strength a bit to break ties at an old breakpoint ... - if ( - $old_breakpoint_to_go[$i_test] - - # which is a 'good' breakpoint, meaning ... - # we don't want to break before it - && !$want_break_before{$type} - - # and either we want to break before the next token - # or the next token is not short (i.e. not a '*', '/' etc.) - && $i_next_nonblank <= $imax - && ( $want_break_before{$next_nonblank_type} - || $token_lengths_to_go[$i_next_nonblank] > 2 - || $next_nonblank_type =~ /^[\,\(\[\{L]$/ ) - ) - { - $strength -= $tiny_bias; - } - - # otherwise increase strength a bit if this token would be at the - # maximum line length. This is necessary to avoid blinking - # in the above example when the -iob flag is added. - else { - my $len = - $leading_spaces + - $summed_lengths_to_go[ $i_test + 1 ] - - $starting_sum; - if ( $len >= $maximum_line_length ) { - $strength += $tiny_bias; - } - } - - my $must_break = 0; - - # Force an immediate break at certain operators - # with lower level than the start of the line, - # unless we've already seen a better break. - # - ############################################## - # Note on an issue with a preceding ? - ############################################## - # We don't include a ? in the above list, but there may - # be a break at a previous ? if the line is long. - # Because of this we do not want to force a break if - # there is a previous ? on this line. For now the best way - # to do this is to not break if we have seen a lower strength - # point, which is probably a ?. - # - # Example of unwanted breaks we are avoiding at a '.' following a ? - # from pod2html using perltidy -gnu: - # ) - # ? "\n<A NAME=\"" - # . $value - # . "\">\n$text</A>\n" - # : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n"; - if ( - ( - $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/ - || ( $next_nonblank_type eq 'k' - && $next_nonblank_token =~ /^(and|or)$/ ) - ) - && ( $nesting_depth_to_go[$i_begin] > - $nesting_depth_to_go[$i_next_nonblank] ) - && ( $strength <= $lowest_strength ) - ) - { - set_forced_breakpoint($i_next_nonblank); - } - - if ( - - # Try to put a break where requested by scan_list - $forced_breakpoint_to_go[$i_test] - - # break between ) { in a continued line so that the '{' can - # be outdented - # See similar logic in scan_list which catches instances - # where a line is just something like ') {'. We have to - # be careful because the corresponding block keyword might - # not be on the first line, such as 'for' here: - # - # eval { - # for ("a") { - # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ } - # } - # }; - # - || ( - $line_count - && ( $token eq ')' ) - && ( $next_nonblank_type eq '{' ) - && ($next_nonblank_block_type) - && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] ) - - # RT #104427: Dont break before opening sub brace because - # sub block breaks handled at higher level, unless - # it looks like the preceeding list is long and broken - && !( - $next_nonblank_block_type =~ /^sub\b/ - && ( $nesting_depth_to_go[$i_begin] == - $nesting_depth_to_go[$i_next_nonblank] ) - ) - - && !$rOpts->{'opening-brace-always-on-right'} - ) - - # There is an implied forced break at a terminal opening brace - || ( ( $type eq '{' ) && ( $i_test == $imax ) ) - ) - { - - # Forced breakpoints must sometimes be overridden, for example - # because of a side comment causing a NO_BREAK. It is easier - # to catch this here than when they are set. - if ( $strength < NO_BREAK - 1 ) { - $strength = $lowest_strength - $tiny_bias; - $must_break = 1; - } - } - - # quit if a break here would put a good terminal token on - # the next line and we already have a possible break - if ( - !$must_break - && ( $next_nonblank_type =~ /^[\;\,]$/ ) - && ( - ( - $leading_spaces + - $summed_lengths_to_go[ $i_next_nonblank + 1 ] - - $starting_sum - ) > $maximum_line_length - ) - ) - { - last if ( $i_lowest >= 0 ); - } - - # Avoid a break which would strand a single punctuation - # token. For example, we do not want to strand a leading - # '.' which is followed by a long quoted string. - # But note that we do want to do this with -extrude (l=1) - # so please test any changes to this code on -extrude. - if ( - !$must_break - && ( $i_test == $i_begin ) - && ( $i_test < $imax ) - && ( $token eq $type ) - && ( - ( - $leading_spaces + - $summed_lengths_to_go[ $i_test + 1 ] - - $starting_sum - ) < $maximum_line_length - ) - ) - { - $i_test = min( $imax, $inext_to_go[$i_test] ); - redo; - } - - if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) ) - { - - # break at previous best break if it would have produced - # a leading alignment of certain common tokens, and it - # is different from the latest candidate break - last - if ($leading_alignment_type); - - # Force at least one breakpoint if old code had good - # break It is only called if a breakpoint is required or - # desired. This will probably need some adjustments - # over time. A goal is to try to be sure that, if a new - # side comment is introduced into formatted text, then - # the same breakpoints will occur. scbreak.t - last - if ( - $i_test == $imax # we are at the end - && !$forced_breakpoint_count # - && $saw_good_break # old line had good break - && $type =~ /^[#;\{]$/ # and this line ends in - # ';' or side comment - && $i_last_break < 0 # and we haven't made a break - && $i_lowest >= 0 # and we saw a possible break - && $i_lowest < $imax - 1 # (but not just before this ;) - && $strength - $lowest_strength < 0.5 * WEAK # and it's good - ); - - # Do not skip past an important break point in a short final - # segment. For example, without this check we would miss the - # break at the final / in the following code: - # - # $depth_stop = - # ( $tau * $mass_pellet * $q_0 * - # ( 1. - exp( -$t_stop / $tau ) ) - - # 4. * $pi * $factor * $k_ice * - # ( $t_melt - $t_ice ) * - # $r_pellet * - # $t_stop ) / - # ( $rho_ice * $Qs * $pi * $r_pellet**2 ); - # - if ( $line_count > 2 - && $i_lowest < $i_test - && $i_test > $imax - 2 - && $nesting_depth_to_go[$i_begin] > - $nesting_depth_to_go[$i_lowest] - && $lowest_strength < $last_break_strength - .5 * WEAK ) - { - # Make this break for math operators for now - my $ir = $inext_to_go[$i_lowest]; - my $il = $iprev_to_go[$ir]; - last - if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/ - || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ ); - } - - # Update the minimum bond strength location - $lowest_strength = $strength; - $i_lowest = $i_test; - $lowest_next_token = $next_nonblank_token; - $lowest_next_type = $next_nonblank_type; - $i_lowest_next_nonblank = $i_next_nonblank; - last if $must_break; - - # set flags to remember if a break here will produce a - # leading alignment of certain common tokens - if ( $line_count > 0 - && $i_test < $imax - && ( $lowest_strength - $last_break_strength <= $max_bias ) - ) - { - my $i_last_end = $iprev_to_go[$i_begin]; - my $tok_beg = $tokens_to_go[$i_begin]; - my $type_beg = $types_to_go[$i_begin]; - if ( - - # check for leading alignment of certain tokens - ( - $tok_beg eq $next_nonblank_token - && $is_chain_operator{$tok_beg} - && ( $type_beg eq 'k' - || $type_beg eq $tok_beg ) - && $nesting_depth_to_go[$i_begin] >= - $nesting_depth_to_go[$i_next_nonblank] - ) - - || ( $tokens_to_go[$i_last_end] eq $token - && $is_chain_operator{$token} - && ( $type eq 'k' || $type eq $token ) - && $nesting_depth_to_go[$i_last_end] >= - $nesting_depth_to_go[$i_test] ) - ) - { - $leading_alignment_token = $next_nonblank_token; - $leading_alignment_type = $next_nonblank_type; - } - } - } - - my $too_long = ( $i_test >= $imax ); - if ( !$too_long ) { - my $next_length = - $leading_spaces + - $summed_lengths_to_go[ $i_test + 2 ] - - $starting_sum; - $too_long = $next_length > $maximum_line_length; - - # To prevent blinkers we will avoid leaving a token exactly at - # the line length limit unless it is the last token or one of - # several "good" types. - # - # The following code was a blinker with -pbp before this - # modification: -## $last_nonblank_token eq '(' -## && $is_indirect_object_taker{ $paren_type -## [$paren_depth] } - # The issue causing the problem is that if the - # term [$paren_depth] gets broken across a line then - # the whitespace routine doesn't see both opening and closing - # brackets and will format like '[ $paren_depth ]'. This - # leads to an oscillation in length depending if we break - # before the closing bracket or not. - if ( !$too_long - && $i_test + 1 < $imax - && $next_nonblank_type !~ /^[,\}\]\)R]$/ ) - { - $too_long = $next_length >= $maximum_line_length; - } - } - - FORMATTER_DEBUG_FLAG_BREAK - && do { - my $ltok = $token; - my $rtok = $next_nonblank_token ? $next_nonblank_token : ""; - my $i_testp2 = $i_test + 2; - if ( $i_testp2 > $max_index_to_go + 1 ) { - $i_testp2 = $max_index_to_go + 1; - } - if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) } - if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) } - print STDOUT -"BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength $ltok $rtok\n"; - }; - - # allow one extra terminal token after exceeding line length - # if it would strand this token. - if ( $rOpts_fuzzy_line_length - && $too_long - && $i_lowest == $i_test - && $token_lengths_to_go[$i_test] > 1 - && $next_nonblank_type =~ /^[\;\,]$/ ) - { - $too_long = 0; - } - - last - if ( - ( $i_test == $imax ) # we're done if no more tokens, - || ( - ( $i_lowest >= 0 ) # or no more space and we have a break - && $too_long - ) - ); - } - - #------------------------------------------------------- - # END of inner loop to find the best next breakpoint - # Now decide exactly where to put the breakpoint - #------------------------------------------------------- - - # it's always ok to break at imax if no other break was found - if ( $i_lowest < 0 ) { $i_lowest = $imax } - - # semi-final index calculation - my $i_next_nonblank = $inext_to_go[$i_lowest]; - my $next_nonblank_type = $types_to_go[$i_next_nonblank]; - my $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; - - #------------------------------------------------------- - # ?/: rule 1 : if a break here will separate a '?' on this - # line from its closing ':', then break at the '?' instead. - #------------------------------------------------------- - foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) { - next unless ( $tokens_to_go[$i] eq '?' ); - - # do not break if probable sequence of ?/: statements - next if ($is_colon_chain); - - # do not break if statement is broken by side comment - next - if ( - $tokens_to_go[$max_index_to_go] eq '#' - && terminal_type( \@types_to_go, \@block_type_to_go, 0, - $max_index_to_go ) !~ /^[\;\}]$/ - ); - - # no break needed if matching : is also on the line - next - if ( $mate_index_to_go[$i] >= 0 - && $mate_index_to_go[$i] <= $i_next_nonblank ); - - $i_lowest = $i; - if ( $want_break_before{'?'} ) { $i_lowest-- } - last; - } - - #------------------------------------------------------- - # END of inner loop to find the best next breakpoint: - # Break the line after the token with index i=$i_lowest - #------------------------------------------------------- - - # final index calculation - $i_next_nonblank = $inext_to_go[$i_lowest]; - $next_nonblank_type = $types_to_go[$i_next_nonblank]; - $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; - - FORMATTER_DEBUG_FLAG_BREAK - && print STDOUT - "BREAK: best is i = $i_lowest strength = $lowest_strength\n"; - - #------------------------------------------------------- - # ?/: rule 2 : if we break at a '?', then break at its ':' - # - # Note: this rule is also in sub scan_list to handle a break - # at the start and end of a line (in case breaks are dictated - # by side comments). - #------------------------------------------------------- - if ( $next_nonblank_type eq '?' ) { - set_closing_breakpoint($i_next_nonblank); - } - elsif ( $types_to_go[$i_lowest] eq '?' ) { - set_closing_breakpoint($i_lowest); - } - - #------------------------------------------------------- - # ?/: rule 3 : if we break at a ':' then we save - # its location for further work below. We may need to go - # back and break at its '?'. - #------------------------------------------------------- - if ( $next_nonblank_type eq ':' ) { - push @i_colon_breaks, $i_next_nonblank; - } - elsif ( $types_to_go[$i_lowest] eq ':' ) { - push @i_colon_breaks, $i_lowest; - } - - # here we should set breaks for all '?'/':' pairs which are - # separated by this line - - $line_count++; - - # save this line segment, after trimming blanks at the ends - push( @i_first, - ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin ); - push( @i_last, - ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest ); - - # set a forced breakpoint at a container opening, if necessary, to - # signal a break at a closing container. Excepting '(' for now. - if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/ - && !$forced_breakpoint_to_go[$i_lowest] ) - { - set_closing_breakpoint($i_lowest); - } - - # get ready to go again - $i_begin = $i_lowest + 1; - $last_break_strength = $lowest_strength; - $i_last_break = $i_lowest; - $leading_alignment_token = ""; - $leading_alignment_type = ""; - $lowest_next_token = ''; - $lowest_next_type = 'b'; - - if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) { - $i_begin++; - } - - # update indentation size - if ( $i_begin <= $imax ) { - $leading_spaces = leading_spaces_to_go($i_begin); - } - } - - #------------------------------------------------------- - # END of main loop to set continuation breakpoints - # Now go back and make any necessary corrections - #------------------------------------------------------- - - #------------------------------------------------------- - # ?/: rule 4 -- if we broke at a ':', then break at - # corresponding '?' unless this is a chain of ?: expressions - #------------------------------------------------------- - if (@i_colon_breaks) { - - # using a simple method for deciding if we are in a ?/: chain -- - # this is a chain if it has multiple ?/: pairs all in order; - # otherwise not. - # Note that if line starts in a ':' we count that above as a break - my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 ); - - unless ($is_chain) { - my @insert_list = (); - foreach (@i_colon_breaks) { - my $i_question = $mate_index_to_go[$_]; - if ( $i_question >= 0 ) { - if ( $want_break_before{'?'} ) { - $i_question = $iprev_to_go[$i_question]; - } - - if ( $i_question >= 0 ) { - push @insert_list, $i_question; - } - } - insert_additional_breaks( \@insert_list, \@i_first, \@i_last ); - } - } - } - return ( \@i_first, \@i_last, $colon_count ); -} - -sub insert_additional_breaks { - - # this routine will add line breaks at requested locations after - # sub set_continuation_breaks has made preliminary breaks. - - my ( $ri_break_list, $ri_first, $ri_last ) = @_; - my $i_f; - my $i_l; - my $line_number = 0; - foreach my $i_break_left ( sort { $a <=> $b } @{$ri_break_list} ) { - - $i_f = $ri_first->[$line_number]; - $i_l = $ri_last->[$line_number]; - while ( $i_break_left >= $i_l ) { - $line_number++; - - # shouldn't happen unless caller passes bad indexes - if ( $line_number >= @{$ri_last} ) { - warning( -"Non-fatal program bug: couldn't set break at $i_break_left\n" - ); - report_definite_bug(); - return; - } - $i_f = $ri_first->[$line_number]; - $i_l = $ri_last->[$line_number]; - } - - # Do not leave a blank at the end of a line; back up if necessary - if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- } - - my $i_break_right = $inext_to_go[$i_break_left]; - if ( $i_break_left >= $i_f - && $i_break_left < $i_l - && $i_break_right > $i_f - && $i_break_right <= $i_l ) - { - splice( @{$ri_first}, $line_number, 1, ( $i_f, $i_break_right ) ); - splice( @{$ri_last}, $line_number, 1, ( $i_break_left, $i_l ) ); - } - } - return; -} - -sub set_closing_breakpoint { - - # set a breakpoint at a matching closing token - # at present, this is only used to break at a ':' which matches a '?' - my $i_break = shift; - - if ( $mate_index_to_go[$i_break] >= 0 ) { - - # CAUTION: infinite recursion possible here: - # set_closing_breakpoint calls set_forced_breakpoint, and - # set_forced_breakpoint call set_closing_breakpoint - # ( test files attrib.t, BasicLyx.pm.html). - # Don't reduce the '2' in the statement below - if ( $mate_index_to_go[$i_break] > $i_break + 2 ) { - - # break before } ] and ), but sub set_forced_breakpoint will decide - # to break before or after a ? and : - my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1; - set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc ); - } - } - else { - my $type_sequence = $type_sequence_to_go[$i_break]; - if ($type_sequence) { - my $closing_token = $matching_token{ $tokens_to_go[$i_break] }; - $postponed_breakpoint{$type_sequence} = 1; - } - } - return; -} - -sub compare_indentation_levels { - - # check to see if output line tabbing agrees with input line - # this can be very useful for debugging a script which has an extra - # or missing brace - my ( $guessed_indentation_level, $structural_indentation_level ) = @_; - if ( $guessed_indentation_level ne $structural_indentation_level ) { - $last_tabbing_disagreement = $input_line_number; - - if ($in_tabbing_disagreement) { - } - else { - $tabbing_disagreement_count++; - - if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) { - write_logfile_entry( -"Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n" - ); - } - $in_tabbing_disagreement = $input_line_number; - $first_tabbing_disagreement = $in_tabbing_disagreement - unless ($first_tabbing_disagreement); - } - } - else { - - if ($in_tabbing_disagreement) { - - if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) { - write_logfile_entry( -"End indentation disagreement from input line $in_tabbing_disagreement\n" - ); - - if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) { - write_logfile_entry( - "No further tabbing disagreements will be noted\n"); - } - } - $in_tabbing_disagreement = 0; - } - } - return; -} - -##################################################################### -# -# the Perl::Tidy::IndentationItem class supplies items which contain -# how much whitespace should be used at the start of a line -# -##################################################################### - -package Perl::Tidy::IndentationItem; - -sub new { - - # Create an 'indentation_item' which describes one level of leading - # whitespace when the '-lp' indentation is used. - my ( - $class, $spaces, $level, - $ci_level, $available_spaces, $index, - $gnu_sequence_number, $align_paren, $stack_depth, - $starting_index, - ) = @_; - - my $closed = -1; - my $arrow_count = 0; - my $comma_count = 0; - my $have_child = 0; - my $want_right_spaces = 0; - my $marked = 0; - - # DEFINITIONS: - # spaces => # total leading white spaces - # level => # the indentation 'level' - # ci_level => # the 'continuation level' - # available_spaces => # how many left spaces available - # # for this level - # closed => # index where we saw closing '}' - # comma_count => # how many commas at this level? - # sequence_number => # output batch number - # index => # index in output batch list - # have_child => # any dependents? - # recoverable_spaces => # how many spaces to the right - # # we would like to move to get - # # alignment (negative if left) - # align_paren => # do we want to try to align - # # with an opening structure? - # marked => # if visited by corrector logic - # stack_depth => # indentation nesting depth - # starting_index => # first token index of this level - # arrow_count => # how many =>'s - - return bless { - _spaces => $spaces, - _level => $level, - _ci_level => $ci_level, - _available_spaces => $available_spaces, - _closed => $closed, - _comma_count => $comma_count, - _sequence_number => $gnu_sequence_number, - _index => $index, - _have_child => $have_child, - _recoverable_spaces => $want_right_spaces, - _align_paren => $align_paren, - _marked => $marked, - _stack_depth => $stack_depth, - _starting_index => $starting_index, - _arrow_count => $arrow_count, - }, $class; -} - -sub permanently_decrease_available_spaces { - - # make a permanent reduction in the available indentation spaces - # at one indentation item. NOTE: if there are child nodes, their - # total SPACES must be reduced by the caller. - - my ( $item, $spaces_needed ) = @_; - my $available_spaces = $item->get_available_spaces(); - my $deleted_spaces = - ( $available_spaces > $spaces_needed ) - ? $spaces_needed - : $available_spaces; - $item->decrease_available_spaces($deleted_spaces); - $item->decrease_SPACES($deleted_spaces); - $item->set_recoverable_spaces(0); - - return $deleted_spaces; -} - -sub tentatively_decrease_available_spaces { - - # We are asked to tentatively delete $spaces_needed of indentation - # for a indentation item. We may want to undo this later. NOTE: if - # there are child nodes, their total SPACES must be reduced by the - # caller. - my ( $item, $spaces_needed ) = @_; - my $available_spaces = $item->get_available_spaces(); - my $deleted_spaces = - ( $available_spaces > $spaces_needed ) - ? $spaces_needed - : $available_spaces; - $item->decrease_available_spaces($deleted_spaces); - $item->decrease_SPACES($deleted_spaces); - $item->increase_recoverable_spaces($deleted_spaces); - return $deleted_spaces; -} - -sub get_stack_depth { - my $self = shift; - return $self->{_stack_depth}; -} - -sub get_spaces { - my $self = shift; - return $self->{_spaces}; -} - -sub get_marked { - my $self = shift; - return $self->{_marked}; -} - -sub set_marked { - my ( $self, $value ) = @_; - if ( defined($value) ) { - $self->{_marked} = $value; - } - return $self->{_marked}; -} - -sub get_available_spaces { - my $self = shift; - return $self->{_available_spaces}; -} - -sub decrease_SPACES { - my ( $self, $value ) = @_; - if ( defined($value) ) { - $self->{_spaces} -= $value; - } - return $self->{_spaces}; -} - -sub decrease_available_spaces { - my ( $self, $value ) = @_; - if ( defined($value) ) { - $self->{_available_spaces} -= $value; - } - return $self->{_available_spaces}; -} - -sub get_align_paren { - my $self = shift; - return $self->{_align_paren}; -} - -sub get_recoverable_spaces { - my $self = shift; - return $self->{_recoverable_spaces}; -} - -sub set_recoverable_spaces { - my ( $self, $value ) = @_; - if ( defined($value) ) { - $self->{_recoverable_spaces} = $value; - } - return $self->{_recoverable_spaces}; -} - -sub increase_recoverable_spaces { - my ( $self, $value ) = @_; - if ( defined($value) ) { - $self->{_recoverable_spaces} += $value; - } - return $self->{_recoverable_spaces}; -} - -sub get_ci_level { - my $self = shift; - return $self->{_ci_level}; -} - -sub get_level { - my $self = shift; - return $self->{_level}; -} - -sub get_sequence_number { - my $self = shift; - return $self->{_sequence_number}; -} - -sub get_index { - my $self = shift; - return $self->{_index}; -} - -sub get_starting_index { - my $self = shift; - return $self->{_starting_index}; -} - -sub set_have_child { - my ( $self, $value ) = @_; - if ( defined($value) ) { - $self->{_have_child} = $value; - } - return $self->{_have_child}; -} - -sub get_have_child { - my $self = shift; - return $self->{_have_child}; -} - -sub set_arrow_count { - my ( $self, $value ) = @_; - if ( defined($value) ) { - $self->{_arrow_count} = $value; - } - return $self->{_arrow_count}; -} - -sub get_arrow_count { - my $self = shift; - return $self->{_arrow_count}; -} - -sub set_comma_count { - my ( $self, $value ) = @_; - if ( defined($value) ) { - $self->{_comma_count} = $value; - } - return $self->{_comma_count}; -} - -sub get_comma_count { - my $self = shift; - return $self->{_comma_count}; -} - -sub set_closed { - my ( $self, $value ) = @_; - if ( defined($value) ) { - $self->{_closed} = $value; - } - return $self->{_closed}; -} - -sub get_closed { - my $self = shift; - return $self->{_closed}; -} - -##################################################################### -# -# the Perl::Tidy::VerticalAligner::Line class supplies an object to -# contain a single output line -# -##################################################################### - -package Perl::Tidy::VerticalAligner::Line; - -{ - - use strict; - ##use Carp; - - my %default_data = ( - jmax => undef, - jmax_original_line => undef, - rtokens => undef, - rfields => undef, - rpatterns => undef, - indentation => undef, - leading_space_count => undef, - outdent_long_lines => undef, - list_type => undef, - is_hanging_side_comment => undef, - ralignments => [], - maximum_line_length => undef, - rvertical_tightness_flags => undef - ); - { - - # methods to count object population - my $_count = 0; - sub get_count { return $_count; } - sub _increment_count { return ++$_count } - sub _decrement_count { return --$_count } - } - - # Constructor may be called as a class method - sub new { - my ( $caller, %arg ) = @_; - my $caller_is_obj = ref($caller); - my $class = $caller_is_obj || $caller; - ##no strict "refs"; - my $self = bless {}, $class; - - $self->{_ralignments} = []; - - foreach my $key ( keys %default_data ) { - my $_key = '_' . $key; - - # Caller keys do not have an underscore - if ( exists $arg{$key} ) { $self->{$_key} = $arg{$key} } - elsif ($caller_is_obj) { $self->{$_key} = $caller->{$_key} } - else { $self->{$_key} = $default_data{$_key} } - } - - $self->_increment_count(); - return $self; - } - - sub DESTROY { - my $self = shift; - $self->_decrement_count(); - return; - } - - sub get_jmax { my $self = shift; return $self->{_jmax} } - - sub get_jmax_original_line { - my $self = shift; - return $self->{_jmax_original_line}; - } - sub get_rtokens { my $self = shift; return $self->{_rtokens} } - sub get_rfields { my $self = shift; return $self->{_rfields} } - sub get_rpatterns { my $self = shift; return $self->{_rpatterns} } - sub get_indentation { my $self = shift; return $self->{_indentation} } - - sub get_leading_space_count { - my $self = shift; - return $self->{_leading_space_count}; - } - - sub get_outdent_long_lines { - my $self = shift; - return $self->{_outdent_long_lines}; - } - sub get_list_type { my $self = shift; return $self->{_list_type} } - - sub get_is_hanging_side_comment { - my $self = shift; - return $self->{_is_hanging_side_comment}; - } - - sub get_rvertical_tightness_flags { - my $self = shift; - return $self->{_rvertical_tightness_flags}; - } - - sub set_column { - ## FIXME: does caller ever supply $val?? - my ( $self, $j, $val ) = @_; - return $self->{_ralignments}->[$j]->set_column($val); - } - - sub get_alignment { - my ( $self, $j ) = @_; - return $self->{_ralignments}->[$j]; - } - sub get_alignments { my $self = shift; return @{ $self->{_ralignments} } } - - sub get_column { - my ( $self, $j ) = @_; - return $self->{_ralignments}->[$j]->get_column(); - } - - sub get_starting_column { - my ( $self, $j ) = @_; - return $self->{_ralignments}->[$j]->get_starting_column(); - } - - sub increment_column { - my ( $self, $k, $pad ) = @_; - $self->{_ralignments}->[$k]->increment_column($pad); - return; - } - - sub set_alignments { - my $self = shift; - @{ $self->{_ralignments} } = @_; - return; - } - - sub current_field_width { - my ( $self, $j ) = @_; - if ( $j == 0 ) { - return $self->get_column($j); - } - else { - return $self->get_column($j) - $self->get_column( $j - 1 ); - } - } - - sub field_width_growth { - my ( $self, $j ) = @_; - return $self->get_column($j) - $self->get_starting_column($j); - } - - sub starting_field_width { - my ( $self, $j ) = @_; - if ( $j == 0 ) { - return $self->get_starting_column($j); - } - else { - return $self->get_starting_column($j) - - $self->get_starting_column( $j - 1 ); - } - } - - sub increase_field_width { - - my ( $self, $j, $pad ) = @_; - my $jmax = $self->get_jmax(); - for my $k ( $j .. $jmax ) { - $self->increment_column( $k, $pad ); - } - return; - } - - sub get_available_space_on_right { - my $self = shift; - my $jmax = $self->get_jmax(); - return $self->{_maximum_line_length} - $self->get_column($jmax); - } - - sub set_jmax { my ( $self, $val ) = @_; $self->{_jmax} = $val; return } - - sub set_jmax_original_line { - my ( $self, $val ) = @_; - $self->{_jmax_original_line} = $val; - return; - } - - sub set_rtokens { - my ( $self, $val ) = @_; - $self->{_rtokens} = $val; - return; - } - - sub set_rfields { - my ( $self, $val ) = @_; - $self->{_rfields} = $val; - return; - } - - sub set_rpatterns { - my ( $self, $val ) = @_; - $self->{_rpatterns} = $val; - return; - } - - sub set_indentation { - my ( $self, $val ) = @_; - $self->{_indentation} = $val; - return; - } - - sub set_leading_space_count { - my ( $self, $val ) = @_; - $self->{_leading_space_count} = $val; - return; - } - - sub set_outdent_long_lines { - my ( $self, $val ) = @_; - $self->{_outdent_long_lines} = $val; - return; - } - - sub set_list_type { - my ( $self, $val ) = @_; - $self->{_list_type} = $val; - return; - } - - sub set_is_hanging_side_comment { - my ( $self, $val ) = @_; - $self->{_is_hanging_side_comment} = $val; - return; - } - - sub set_alignment { - my ( $self, $j, $val ) = @_; - $self->{_ralignments}->[$j] = $val; - return; - } - -} - -##################################################################### -# -# the Perl::Tidy::VerticalAligner::Alignment class holds information -# on a single column being aligned -# -##################################################################### -package Perl::Tidy::VerticalAligner::Alignment; - -{ - - use strict; - - #use Carp; - - # _column # the current column number - # _starting_column # column number when created - # _matching_token # what token we are matching - # _starting_line # the line index of creation - # _ending_line - # the most recent line to use it - # _saved_column - # _serial_number # unique number for this alignment - - my %default_data = ( - column => undef, - starting_column => undef, - matching_token => undef, - starting_line => undef, - ending_line => undef, - saved_column => undef, - serial_number => undef, - ); - - # class population count - { - my $_count = 0; - sub get_count { return $_count } - sub _increment_count { return ++$_count } - sub _decrement_count { return --$_count } - } - - # constructor - sub new { - my ( $caller, %arg ) = @_; - my $caller_is_obj = ref($caller); - my $class = $caller_is_obj || $caller; - ##no strict "refs"; - my $self = bless {}, $class; - - foreach my $key ( keys %default_data ) { - my $_key = '_' . $key; - if ( exists $arg{$key} ) { $self->{$_key} = $arg{$key} } - elsif ($caller_is_obj) { $self->{$_key} = $caller->{$_key} } - else { $self->{$_key} = $default_data{$_key} } - } - $self->_increment_count(); - return $self; - } - - sub DESTROY { - my $self = shift; - $self->_decrement_count(); - return; - } - - sub get_column { my $self = shift; return $self->{_column} } - - sub get_starting_column { - my $self = shift; - return $self->{_starting_column}; - } - sub get_matching_token { my $self = shift; return $self->{_matching_token} } - sub get_starting_line { my $self = shift; return $self->{_starting_line} } - sub get_ending_line { my $self = shift; return $self->{_ending_line} } - sub get_serial_number { my $self = shift; return $self->{_serial_number} } - - sub set_column { my ( $self, $val ) = @_; $self->{_column} = $val; return } - - sub set_starting_column { - my ( $self, $val ) = @_; - $self->{_starting_column} = $val; - return; - } - - sub set_matching_token { - my ( $self, $val ) = @_; - $self->{_matching_token} = $val; - return; - } - - sub set_starting_line { - my ( $self, $val ) = @_; - $self->{_starting_line} = $val; - return; - } - - sub set_ending_line { - my ( $self, $val ) = @_; - $self->{_ending_line} = $val; - return; - } - - sub increment_column { - my ( $self, $val ) = @_; - $self->{_column} += $val; - return; - } - - sub save_column { - my $self = shift; - $self->{_saved_column} = $self->{_column}; - return; - } - - sub restore_column { - my $self = shift; - $self->{_column} = $self->{_saved_column}; - return; - } -} - -package Perl::Tidy::VerticalAligner; - -# The Perl::Tidy::VerticalAligner package collects output lines and -# attempts to line up certain common tokens, such as => and #, which are -# identified by the calling routine. -# -# There are two main routines: valign_input and flush. Append acts as a -# storage buffer, collecting lines into a group which can be vertically -# aligned. When alignment is no longer possible or desirable, it dumps -# the group to flush. -# -# valign_input -----> flush -# -# collects writes -# vertical one -# groups group - -BEGIN { - - # Caution: these debug flags produce a lot of output - # They should all be 0 except when debugging small scripts - - use constant VALIGN_DEBUG_FLAG_APPEND => 0; - use constant VALIGN_DEBUG_FLAG_APPEND0 => 0; - use constant VALIGN_DEBUG_FLAG_TERNARY => 0; - use constant VALIGN_DEBUG_FLAG_TABS => 0; - - my $debug_warning = sub { - print STDOUT "VALIGN_DEBUGGING with key $_[0]\n"; - return; - }; - - VALIGN_DEBUG_FLAG_APPEND && $debug_warning->('APPEND'); - VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0'); - VALIGN_DEBUG_FLAG_TERNARY && $debug_warning->('TERNARY'); - VALIGN_DEBUG_FLAG_TABS && $debug_warning->('TABS'); - -} - -use vars qw( - $vertical_aligner_self - $current_line - $maximum_alignment_index - $ralignment_list - $maximum_jmax_seen - $minimum_jmax_seen - $previous_minimum_jmax_seen - $previous_maximum_jmax_seen - $maximum_line_index - $group_level - $group_type - $group_maximum_gap - $marginal_match - $last_level_written - $last_leading_space_count - $extra_indent_ok - $zero_count - @group_lines - $last_comment_column - $last_side_comment_line_number - $last_side_comment_length - $last_side_comment_level - $outdented_line_count - $first_outdented_line_at - $last_outdented_line_at - $diagnostics_object - $logger_object - $file_writer_object - @side_comment_history - $comment_leading_space_count - $is_matching_terminal_line - $consecutive_block_comments - - $cached_line_text - $cached_line_type - $cached_line_flag - $cached_seqno - $cached_line_valid - $cached_line_leading_space_count - $cached_seqno_string - - $valign_buffer_filling - @valign_buffer - - $seqno_string - $last_nonblank_seqno_string - - $rOpts - - $rOpts_maximum_line_length - $rOpts_variable_maximum_line_length - $rOpts_continuation_indentation - $rOpts_indent_columns - $rOpts_tabs - $rOpts_entab_leading_whitespace - $rOpts_valign - - $rOpts_fixed_position_side_comment - $rOpts_minimum_space_to_comment - -); - -sub initialize { - - ( - my $class, $rOpts, $file_writer_object, $logger_object, - $diagnostics_object - ) = @_; - - # variables describing the entire space group: - $ralignment_list = []; - $group_level = 0; - $last_level_written = -1; - $extra_indent_ok = 0; # can we move all lines to the right? - $last_side_comment_length = 0; - $maximum_jmax_seen = 0; - $minimum_jmax_seen = 0; - $previous_minimum_jmax_seen = 0; - $previous_maximum_jmax_seen = 0; - - # variables describing each line of the group - @group_lines = (); # list of all lines in group - - $outdented_line_count = 0; - $first_outdented_line_at = 0; - $last_outdented_line_at = 0; - $last_side_comment_line_number = 0; - $last_side_comment_level = -1; - $is_matching_terminal_line = 0; - - # most recent 3 side comments; [ line number, column ] - $side_comment_history[0] = [ -300, 0 ]; - $side_comment_history[1] = [ -200, 0 ]; - $side_comment_history[2] = [ -100, 0 ]; - - # valign_output_step_B cache: - $cached_line_text = ""; - $cached_line_type = 0; - $cached_line_flag = 0; - $cached_seqno = 0; - $cached_line_valid = 0; - $cached_line_leading_space_count = 0; - $cached_seqno_string = ""; - - # string of sequence numbers joined together - $seqno_string = ""; - $last_nonblank_seqno_string = ""; - - # frequently used parameters - $rOpts_indent_columns = $rOpts->{'indent-columns'}; - $rOpts_tabs = $rOpts->{'tabs'}; - $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'}; - $rOpts_fixed_position_side_comment = - $rOpts->{'fixed-position-side-comment'}; - $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'}; - $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'}; - $rOpts_variable_maximum_line_length = - $rOpts->{'variable-maximum-line-length'}; - $rOpts_valign = $rOpts->{'valign'}; - - $consecutive_block_comments = 0; - forget_side_comment(); - - initialize_for_new_group(); - - $vertical_aligner_self = {}; - bless $vertical_aligner_self, $class; - return $vertical_aligner_self; -} - -sub initialize_for_new_group { - $maximum_line_index = -1; # lines in the current group - $maximum_alignment_index = -1; # alignments in current group - $zero_count = 0; # count consecutive lines without tokens - $current_line = undef; # line being matched for alignment - $group_maximum_gap = 0; # largest gap introduced - $group_type = ""; - $marginal_match = 0; - $comment_leading_space_count = 0; - $last_leading_space_count = 0; - return; -} - -# interface to Perl::Tidy::Diagnostics routines -sub write_diagnostics { - my $msg = shift; - if ($diagnostics_object) { - $diagnostics_object->write_diagnostics($msg); - } - return; -} - -# interface to Perl::Tidy::Logger routines -sub warning { - my ($msg) = @_; - if ($logger_object) { - $logger_object->warning($msg); - } - return; -} - -sub write_logfile_entry { - my ($msg) = @_; - if ($logger_object) { - $logger_object->write_logfile_entry($msg); - } - return; -} - -sub report_definite_bug { - if ($logger_object) { - $logger_object->report_definite_bug(); - } - return; -} - -sub get_spaces { - - # return the number of leading spaces associated with an indentation - # variable $indentation is either a constant number of spaces or an - # object with a get_spaces method. - my $indentation = shift; - return ref($indentation) ? $indentation->get_spaces() : $indentation; -} - -sub get_recoverable_spaces { - - # return the number of spaces (+ means shift right, - means shift left) - # that we would like to shift a group of lines with the same indentation - # to get them to line up with their opening parens - my $indentation = shift; - return ref($indentation) ? $indentation->get_recoverable_spaces() : 0; -} - -sub get_stack_depth { - - my $indentation = shift; - return ref($indentation) ? $indentation->get_stack_depth() : 0; -} - -sub make_alignment { - my ( $col, $token ) = @_; - - # make one new alignment at column $col which aligns token $token - ++$maximum_alignment_index; - my $alignment = new Perl::Tidy::VerticalAligner::Alignment( - column => $col, - starting_column => $col, - matching_token => $token, - starting_line => $maximum_line_index, - ending_line => $maximum_line_index, - serial_number => $maximum_alignment_index, - ); - $ralignment_list->[$maximum_alignment_index] = $alignment; - return $alignment; -} - -sub dump_alignments { - print STDOUT -"Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n"; - for my $i ( 0 .. $maximum_alignment_index ) { - my $column = $ralignment_list->[$i]->get_column(); - my $starting_column = $ralignment_list->[$i]->get_starting_column(); - my $matching_token = $ralignment_list->[$i]->get_matching_token(); - my $starting_line = $ralignment_list->[$i]->get_starting_line(); - my $ending_line = $ralignment_list->[$i]->get_ending_line(); - print STDOUT -"$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n"; - } - return; -} - -sub save_alignment_columns { - for my $i ( 0 .. $maximum_alignment_index ) { - $ralignment_list->[$i]->save_column(); - } - return; -} - -sub restore_alignment_columns { - for my $i ( 0 .. $maximum_alignment_index ) { - $ralignment_list->[$i]->restore_column(); - } - return; -} - -sub forget_side_comment { - $last_comment_column = 0; - return; -} - -sub maximum_line_length_for_level { - - # return maximum line length for line starting with a given level - my $maximum_line_length = $rOpts_maximum_line_length; - if ($rOpts_variable_maximum_line_length) { - my $level = shift; - if ( $level < 0 ) { $level = 0 } - $maximum_line_length += $level * $rOpts_indent_columns; - } - return $maximum_line_length; -} - -sub valign_input { - - # Place one line in the current vertical group. - # - # The input parameters are: - # $level = indentation level of this line - # $rfields = reference to array of fields - # $rpatterns = reference to array of patterns, one per field - # $rtokens = reference to array of tokens starting fields 1,2,.. - # - # Here is an example of what this package does. In this example, - # we are trying to line up both the '=>' and the '#'. - # - # '18' => 'grave', # \` - # '19' => 'acute', # `' - # '20' => 'caron', # \v - # <-tabs-><--field 2 ---><-f3-> - # | | | | - # | | | | - # col1 col2 col3 col4 - # - # The calling routine has already broken the entire line into 3 fields as - # indicated. (So the work of identifying promising common tokens has - # already been done). - # - # In this example, there will be 2 tokens being matched: '=>' and '#'. - # They are the leading parts of fields 2 and 3, but we do need to know - # what they are so that we can dump a group of lines when these tokens - # change. - # - # The fields contain the actual characters of each field. The patterns - # are like the fields, but they contain mainly token types instead - # of tokens, so they have fewer characters. They are used to be - # sure we are matching fields of similar type. - # - # In this example, there will be 4 column indexes being adjusted. The - # first one is always at zero. The interior columns are at the start of - # the matching tokens, and the last one tracks the maximum line length. - # - # Each time a new line comes in, it joins the current vertical - # group if possible. Otherwise it causes the current group to be dumped - # and a new group is started. - # - # For each new group member, the column locations are increased, as - # necessary, to make room for the new fields. When the group is finally - # output, these column numbers are used to compute the amount of spaces of - # padding needed for each field. - # - # Programming note: the fields are assumed not to have any tab characters. - # Tabs have been previously removed except for tabs in quoted strings and - # side comments. Tabs in these fields can mess up the column counting. - # The log file warns the user if there are any such tabs. - - my ( - $level, $level_end, - $indentation, $rfields, - $rtokens, $rpatterns, - $is_forced_break, $outdent_long_lines, - $is_terminal_ternary, $is_terminal_statement, - $do_not_pad, $rvertical_tightness_flags, - $level_jump, - ) = @_; - - # number of fields is $jmax - # number of tokens between fields is $jmax-1 - my $jmax = $#{$rfields}; - - my $leading_space_count = get_spaces($indentation); - - # set outdented flag to be sure we either align within statements or - # across statement boundaries, but not both. - my $is_outdented = $last_leading_space_count > $leading_space_count; - $last_leading_space_count = $leading_space_count; - - # Patch: undo for hanging side comment - my $is_hanging_side_comment = - ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ ); - $is_outdented = 0 if $is_hanging_side_comment; - - # Forget side comment alignment after seeing 2 or more block comments - my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ ); - if ($is_block_comment) { - $consecutive_block_comments++; - } - else { - if ( $consecutive_block_comments > 1 ) { forget_side_comment() } - $consecutive_block_comments = 0; - } - - VALIGN_DEBUG_FLAG_APPEND0 && do { - print STDOUT -"APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n"; - }; - - # Validate cached line if necessary: If we can produce a container - # with just 2 lines total by combining an existing cached opening - # token with the closing token to follow, then we will mark both - # cached flags as valid. - if ($rvertical_tightness_flags) { - if ( $maximum_line_index <= 0 - && $cached_line_type - && $cached_seqno - && $rvertical_tightness_flags->[2] - && $rvertical_tightness_flags->[2] == $cached_seqno ) - { - $rvertical_tightness_flags->[3] ||= 1; - $cached_line_valid ||= 1; - } - } - - # do not join an opening block brace with an unbalanced line - # unless requested with a flag value of 2 - if ( $cached_line_type == 3 - && $maximum_line_index < 0 - && $cached_line_flag < 2 - && $level_jump != 0 ) - { - $cached_line_valid = 0; - } - - # patch until new aligner is finished - if ($do_not_pad) { my_flush() } - - # shouldn't happen: - if ( $level < 0 ) { $level = 0 } - - # do not align code across indentation level changes - # or if vertical alignment is turned off for debugging - if ( $level != $group_level || $is_outdented || !$rOpts_valign ) { - - # we are allowed to shift a group of lines to the right if its - # level is greater than the previous and next group - $extra_indent_ok = - ( $level < $group_level && $last_level_written < $group_level ); - - my_flush(); - - # If we know that this line will get flushed out by itself because - # of level changes, we can leave the extra_indent_ok flag set. - # That way, if we get an external flush call, we will still be - # able to do some -lp alignment if necessary. - $extra_indent_ok = ( $is_terminal_statement && $level > $group_level ); - - $group_level = $level; - - # wait until after the above flush to get the leading space - # count because it may have been changed if the -icp flag is in - # effect - $leading_space_count = get_spaces($indentation); - - } - - # -------------------------------------------------------------------- - # Patch to collect outdentable block COMMENTS - # -------------------------------------------------------------------- - my $is_blank_line = ""; - if ( $group_type eq 'COMMENT' ) { - if ( - ( - $is_block_comment - && $outdent_long_lines - && $leading_space_count == $comment_leading_space_count - ) - || $is_blank_line - ) - { - $group_lines[ ++$maximum_line_index ] = $rfields->[0]; - return; - } - else { - my_flush(); - } - } - - # -------------------------------------------------------------------- - # add dummy fields for terminal ternary - # -------------------------------------------------------------------- - my $j_terminal_match; - if ( $is_terminal_ternary && $current_line ) { - $j_terminal_match = - fix_terminal_ternary( $rfields, $rtokens, $rpatterns ); - $jmax = @{$rfields} - 1; - } - - # -------------------------------------------------------------------- - # add dummy fields for else statement - # -------------------------------------------------------------------- - if ( $rfields->[0] =~ /^else\s*$/ - && $current_line - && $level_jump == 0 ) - { - $j_terminal_match = fix_terminal_else( $rfields, $rtokens, $rpatterns ); - $jmax = @{$rfields} - 1; - } - - # -------------------------------------------------------------------- - # Step 1. Handle simple line of code with no fields to match. - # -------------------------------------------------------------------- - if ( $jmax <= 0 ) { - $zero_count++; - - if ( $maximum_line_index >= 0 - && !get_recoverable_spaces( $group_lines[0]->get_indentation() ) ) - { - - # flush the current group if it has some aligned columns.. - if ( $group_lines[0]->get_jmax() > 1 ) { my_flush() } - - # flush current group if we are just collecting side comments.. - elsif ( - - # ...and we haven't seen a comment lately - ( $zero_count > 3 ) - - # ..or if this new line doesn't fit to the left of the comments - || ( ( $leading_space_count + length( $rfields->[0] ) ) > - $group_lines[0]->get_column(0) ) - ) - { - my_flush(); - } - } - - # patch to start new COMMENT group if this comment may be outdented - if ( $is_block_comment - && $outdent_long_lines - && $maximum_line_index < 0 ) - { - $group_type = 'COMMENT'; - $comment_leading_space_count = $leading_space_count; - $group_lines[ ++$maximum_line_index ] = $rfields->[0]; - return; - } - - # just write this line directly if no current group, no side comment, - # and no space recovery is needed. - if ( $maximum_line_index < 0 && !get_recoverable_spaces($indentation) ) - { - valign_output_step_B( $leading_space_count, $rfields->[0], 0, - $outdent_long_lines, $rvertical_tightness_flags, $level ); - return; - } - } - else { - $zero_count = 0; - } - - # programming check: (shouldn't happen) - # an error here implies an incorrect call was made - if ( $jmax > 0 && ( $#{$rtokens} != ( $jmax - 1 ) ) ) { - warning( -"Program bug in Perl::Tidy::VerticalAligner - number of tokens = $#{$rtokens} should be one less than number of fields: $#{$rfields})\n" - ); - report_definite_bug(); - } - - # -------------------------------------------------------------------- - # create an object to hold this line - # -------------------------------------------------------------------- - my $new_line = new Perl::Tidy::VerticalAligner::Line( - jmax => $jmax, - jmax_original_line => $jmax, - rtokens => $rtokens, - rfields => $rfields, - rpatterns => $rpatterns, - indentation => $indentation, - leading_space_count => $leading_space_count, - outdent_long_lines => $outdent_long_lines, - list_type => "", - is_hanging_side_comment => $is_hanging_side_comment, - maximum_line_length => maximum_line_length_for_level($level), - rvertical_tightness_flags => $rvertical_tightness_flags, - ); - - # Initialize a global flag saying if the last line of the group should - # match end of group and also terminate the group. There should be no - # returns between here and where the flag is handled at the bottom. - my $col_matching_terminal = 0; - if ( defined($j_terminal_match) ) { - - # remember the column of the terminal ? or { to match with - $col_matching_terminal = $current_line->get_column($j_terminal_match); - - # set global flag for sub decide_if_aligned - $is_matching_terminal_line = 1; - } - - # -------------------------------------------------------------------- - # It simplifies things to create a zero length side comment - # if none exists. - # -------------------------------------------------------------------- - make_side_comment( $new_line, $level_end ); - - # -------------------------------------------------------------------- - # Decide if this is a simple list of items. - # There are 3 list types: none, comma, comma-arrow. - # We use this below to be less restrictive in deciding what to align. - # -------------------------------------------------------------------- - if ($is_forced_break) { - decide_if_list($new_line); - } - - if ($current_line) { - - # -------------------------------------------------------------------- - # Allow hanging side comment to join current group, if any - # This will help keep side comments aligned, because otherwise we - # will have to start a new group, making alignment less likely. - # -------------------------------------------------------------------- - join_hanging_comment( $new_line, $current_line ) - if $is_hanging_side_comment; - - # -------------------------------------------------------------------- - # If there is just one previous line, and it has more fields - # than the new line, try to join fields together to get a match with - # the new line. At the present time, only a single leading '=' is - # allowed to be compressed out. This is useful in rare cases where - # a table is forced to use old breakpoints because of side comments, - # and the table starts out something like this: - # my %MonthChars = ('0', 'Jan', # side comment - # '1', 'Feb', - # '2', 'Mar', - # Eliminating the '=' field will allow the remaining fields to line up. - # This situation does not occur if there are no side comments - # because scan_list would put a break after the opening '('. - # -------------------------------------------------------------------- - eliminate_old_fields( $new_line, $current_line ); - - # -------------------------------------------------------------------- - # If the new line has more fields than the current group, - # see if we can match the first fields and combine the remaining - # fields of the new line. - # -------------------------------------------------------------------- - eliminate_new_fields( $new_line, $current_line ); - - # -------------------------------------------------------------------- - # Flush previous group unless all common tokens and patterns match.. - # -------------------------------------------------------------------- - check_match( $new_line, $current_line ); - - # -------------------------------------------------------------------- - # See if there is space for this line in the current group (if any) - # -------------------------------------------------------------------- - if ($current_line) { - check_fit( $new_line, $current_line ); - } - } - - # -------------------------------------------------------------------- - # Append this line to the current group (or start new group) - # -------------------------------------------------------------------- - add_to_group($new_line); - - # Future update to allow this to vary: - $current_line = $new_line if ( $maximum_line_index == 0 ); - - # output this group if it ends in a terminal else or ternary line - if ( defined($j_terminal_match) ) { - - # if there is only one line in the group (maybe due to failure to match - # perfectly with previous lines), then align the ? or { of this - # terminal line with the previous one unless that would make the line - # too long - if ( $maximum_line_index == 0 ) { - my $col_now = $current_line->get_column($j_terminal_match); - my $pad = $col_matching_terminal - $col_now; - my $padding_available = - $current_line->get_available_space_on_right(); - if ( $pad > 0 && $pad <= $padding_available ) { - $current_line->increase_field_width( $j_terminal_match, $pad ); - } - } - my_flush(); - $is_matching_terminal_line = 0; - } - - # -------------------------------------------------------------------- - # Step 8. Some old debugging stuff - # -------------------------------------------------------------------- - VALIGN_DEBUG_FLAG_APPEND && do { - print STDOUT "APPEND fields:"; - dump_array( @{$rfields} ); - print STDOUT "APPEND tokens:"; - dump_array( @{$rtokens} ); - print STDOUT "APPEND patterns:"; - dump_array( @{$rpatterns} ); - dump_alignments(); - }; - - return; -} - -sub join_hanging_comment { - - my $line = shift; - my $jmax = $line->get_jmax(); - return 0 unless $jmax == 1; # must be 2 fields - my $rtokens = $line->get_rtokens(); - return 0 unless $rtokens->[0] eq '#'; # the second field is a comment.. - my $rfields = $line->get_rfields(); - return 0 unless $rfields->[0] =~ /^\s*$/; # the first field is empty... - my $old_line = shift; - my $maximum_field_index = $old_line->get_jmax(); - return 0 - unless $maximum_field_index > $jmax; # the current line has more fields - my $rpatterns = $line->get_rpatterns(); - - $line->set_is_hanging_side_comment(1); - $jmax = $maximum_field_index; - $line->set_jmax($jmax); - $rfields->[$jmax] = $rfields->[1]; - $rtokens->[ $jmax - 1 ] = $rtokens->[0]; - $rpatterns->[ $jmax - 1 ] = $rpatterns->[0]; - foreach my $j ( 1 .. $jmax - 1 ) { - $rfields->[$j] = " "; # NOTE: caused glitch unless 1 blank, why? - $rtokens->[ $j - 1 ] = ""; - $rpatterns->[ $j - 1 ] = ""; - } - return 1; -} - -sub eliminate_old_fields { - - my $new_line = shift; - my $jmax = $new_line->get_jmax(); - if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax } - if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax } - - # there must be one previous line - return unless ( $maximum_line_index == 0 ); - - my $old_line = shift; - my $maximum_field_index = $old_line->get_jmax(); - - ############################################### - # Moved below to allow new coding for => matches - # return unless $maximum_field_index > $jmax; - ############################################### - - # Identify specific cases where field elimination is allowed: - # case=1: both lines have comma-separated lists, and the first - # line has an equals - # case=2: both lines have leading equals - - # case 1 is the default - my $case = 1; - - # See if case 2: both lines have leading '=' - # We'll require similar leading patterns in this case - my $old_rtokens = $old_line->get_rtokens(); - my $rtokens = $new_line->get_rtokens(); - my $rpatterns = $new_line->get_rpatterns(); - my $old_rpatterns = $old_line->get_rpatterns(); - if ( $rtokens->[0] =~ /^=>?\d*$/ - && $old_rtokens->[0] eq $rtokens->[0] - && $old_rpatterns->[0] eq $rpatterns->[0] ) - { - $case = 2; - } - - # not too many fewer fields in new line for case 1 - return unless ( $case != 1 || $maximum_field_index - 2 <= $jmax ); - - # case 1 must have side comment - my $old_rfields = $old_line->get_rfields(); - return - if ( $case == 1 - && length( $old_rfields->[$maximum_field_index] ) == 0 ); - - my $rfields = $new_line->get_rfields(); - - my $hid_equals = 0; - - my @new_alignments = (); - my @new_fields = (); - my @new_matching_patterns = (); - my @new_matching_tokens = (); - - my $j = 0; - my $current_field = ''; - my $current_pattern = ''; - - # loop over all old tokens - my $in_match = 0; - foreach my $k ( 0 .. $maximum_field_index - 1 ) { - $current_field .= $old_rfields->[$k]; - $current_pattern .= $old_rpatterns->[$k]; - last if ( $j > $jmax - 1 ); - - if ( $old_rtokens->[$k] eq $rtokens->[$j] ) { - $in_match = 1; - $new_fields[$j] = $current_field; - $new_matching_patterns[$j] = $current_pattern; - $current_field = ''; - $current_pattern = ''; - $new_matching_tokens[$j] = $old_rtokens->[$k]; - $new_alignments[$j] = $old_line->get_alignment($k); - $j++; - } - else { - - if ( $old_rtokens->[$k] =~ /^\=\d*$/ ) { - last if ( $case == 2 ); # avoid problems with stuff - # like: $a=$b=$c=$d; - $hid_equals = 1; - } - last - if ( $in_match && $case == 1 ) - ; # disallow gaps in matching field types in case 1 - } - } - - # Modify the current state if we are successful. - # We must exactly reach the ends of the new list for success, and the old - # pattern must have more fields. Here is an example where the first and - # second lines have the same number, and we should not align: - # my @a = map chr, 0 .. 255; - # my @b = grep /\W/, @a; - # my @c = grep /[^\w]/, @a; - - # Otherwise, we would get all of the commas aligned, which doesn't work as - # well: - # my @a = map chr, 0 .. 255; - # my @b = grep /\W/, @a; - # my @c = grep /[^\w]/, @a; - - if ( ( $j == $jmax ) - && ( $current_field eq '' ) - && ( $case != 1 || $hid_equals ) - && ( $maximum_field_index > $jmax ) ) - { - my $k = $maximum_field_index; - $current_field .= $old_rfields->[$k]; - $current_pattern .= $old_rpatterns->[$k]; - $new_fields[$j] = $current_field; - $new_matching_patterns[$j] = $current_pattern; - - $new_alignments[$j] = $old_line->get_alignment($k); - $maximum_field_index = $j; - - $old_line->set_alignments(@new_alignments); - $old_line->set_jmax($jmax); - $old_line->set_rtokens( \@new_matching_tokens ); - $old_line->set_rfields( \@new_fields ); - $old_line->set_rpatterns( \@{$rpatterns} ); - } - - # Dumb Down starting match if necessary: - # - # Consider the following two lines: - # - # { - # $a => 20 > 3 ? 1 : 0, - # $xyz => 5, - # } - -# We would like to get alignment regardless of the order of the two lines. -# If the lines come in in this order, then we will simplify the patterns of the first line -# in sub eliminate_new_fields. -# If the lines come in reverse order, then we achieve this with eliminate_new_fields. - - # This update is currently restricted to leading '=>' matches. Although we - # could do this for both '=' and '=>', overall the results for '=' come out - # better without this step because this step can eliminate some other good - # matches. For example, with the '=' we get: - -# my @disilva = ( "di Silva", "diSilva", "di Si\x{301}lva", "diSi\x{301}lva" ); -# my @dsf = map "$_\x{FFFE}Fred", @disilva; -# my @dsj = map "$_\x{FFFE}John", @disilva; -# my @dsJ = map "$_ John", @disilva; - - # without including '=' we get: - -# my @disilva = ( "di Silva", "diSilva", "di Si\x{301}lva", "diSi\x{301}lva" ); -# my @dsf = map "$_\x{FFFE}Fred", @disilva; -# my @dsj = map "$_\x{FFFE}John", @disilva; -# my @dsJ = map "$_ John", @disilva; - elsif ( - $case == 2 - - && @new_matching_tokens == 1 - ##&& $new_matching_tokens[0] =~ /^=/ # see note above - && $new_matching_tokens[0] =~ /^=>/ - && $maximum_field_index > 2 - ) - { - my $jmaxm = $jmax - 1; - my $kmaxm = $maximum_field_index - 1; - my $have_side_comment = $old_rtokens->[$kmaxm] eq '#'; - - # We need to reduce the group pattern to be just two tokens, - # the leading equality or => and the final side comment - - my $mid_field = join "", - @{$old_rfields}[ 1 .. $maximum_field_index - 1 ]; - my $mid_patterns = join "", - @{$old_rpatterns}[ 1 .. $maximum_field_index - 1 ]; - my @new_alignments = ( - $old_line->get_alignment(0), - $old_line->get_alignment( $maximum_field_index - 1 ) - ); - my @new_tokens = - ( $old_rtokens->[0], $old_rtokens->[ $maximum_field_index - 1 ] ); - my @new_fields = ( - $old_rfields->[0], $mid_field, $old_rfields->[$maximum_field_index] - ); - my @new_patterns = ( - $old_rpatterns->[0], $mid_patterns, - $old_rpatterns->[$maximum_field_index] - ); - - $maximum_field_index = 2; - $old_line->set_jmax($maximum_field_index); - $old_line->set_rtokens( \@new_tokens ); - $old_line->set_rfields( \@new_fields ); - $old_line->set_rpatterns( \@new_patterns ); - - initialize_for_new_group(); - add_to_group($old_line); - $current_line = $old_line; - } - return; -} - -# create an empty side comment if none exists -sub make_side_comment { - my ( $new_line, $level_end ) = @_; - my $jmax = $new_line->get_jmax(); - my $rtokens = $new_line->get_rtokens(); - - # if line does not have a side comment... - if ( ( $jmax == 0 ) || ( $rtokens->[ $jmax - 1 ] ne '#' ) ) { - my $rfields = $new_line->get_rfields(); - my $rpatterns = $new_line->get_rpatterns(); - $rtokens->[$jmax] = '#'; - $rfields->[ ++$jmax ] = ''; - $rpatterns->[$jmax] = '#'; - $new_line->set_jmax($jmax); - $new_line->set_jmax_original_line($jmax); - } - - # line has a side comment.. - else { - - # don't remember old side comment location for very long - my $line_number = $vertical_aligner_self->get_output_line_number(); - my $rfields = $new_line->get_rfields(); - if ( - $line_number - $last_side_comment_line_number > 12 - - # and don't remember comment location across block level changes - || ( $level_end < $last_side_comment_level - && $rfields->[0] =~ /^}/ ) - ) - { - forget_side_comment(); - } - $last_side_comment_line_number = $line_number; - $last_side_comment_level = $level_end; - } - return; -} - -sub decide_if_list { - - my $line = shift; - - # A list will be taken to be a line with a forced break in which all - # of the field separators are commas or comma-arrows (except for the - # trailing #) - - # List separator tokens are things like ',3' or '=>2', - # where the trailing digit is the nesting depth. Allow braces - # to allow nested list items. - my $rtokens = $line->get_rtokens(); - my $test_token = $rtokens->[0]; - if ( $test_token =~ /^(\,|=>)/ ) { - my $list_type = $test_token; - my $jmax = $line->get_jmax(); - - foreach ( 1 .. $jmax - 2 ) { - if ( $rtokens->[$_] !~ /^(\,|=>|\{)/ ) { - $list_type = ""; - last; - } - } - $line->set_list_type($list_type); - } - return; -} - -sub eliminate_new_fields { - - my ( $new_line, $old_line ) = @_; - return unless ( $maximum_line_index >= 0 ); - my $jmax = $new_line->get_jmax(); - - my $old_rtokens = $old_line->get_rtokens(); - my $rtokens = $new_line->get_rtokens(); - my $is_assignment = - ( $rtokens->[0] =~ /^=>?\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) ); - - # must be monotonic variation - return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax ); - - # must be more fields in the new line - my $maximum_field_index = $old_line->get_jmax(); - return unless ( $maximum_field_index < $jmax ); - - unless ($is_assignment) { - return - unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen ) - ; # only if monotonic - - # never combine fields of a comma list - return - unless ( $maximum_field_index > 1 ) - && ( $new_line->get_list_type() !~ /^,/ ); - } - - my $rfields = $new_line->get_rfields(); - my $rpatterns = $new_line->get_rpatterns(); - my $old_rpatterns = $old_line->get_rpatterns(); - - # loop over all OLD tokens except comment and check match - my $match = 1; - foreach my $k ( 0 .. $maximum_field_index - 2 ) { - if ( ( $old_rtokens->[$k] ne $rtokens->[$k] ) - || ( $old_rpatterns->[$k] ne $rpatterns->[$k] ) ) - { - $match = 0; - last; - } - } - - # first tokens agree, so combine extra new tokens - if ($match) { - ##for my $k ( $maximum_field_index .. $jmax - 1 ) { - foreach my $k ( $maximum_field_index .. $jmax - 1 ) { - - $rfields->[ $maximum_field_index - 1 ] .= $rfields->[$k]; - $rfields->[$k] = ""; - $rpatterns->[ $maximum_field_index - 1 ] .= $rpatterns->[$k]; - $rpatterns->[$k] = ""; - } - - $rtokens->[ $maximum_field_index - 1 ] = '#'; - $rfields->[$maximum_field_index] = $rfields->[$jmax]; - $rpatterns->[$maximum_field_index] = $rpatterns->[$jmax]; - $jmax = $maximum_field_index; - } - $new_line->set_jmax($jmax); - return; -} - -sub fix_terminal_ternary { - - # Add empty fields as necessary to align a ternary term - # like this: - # - # my $leapyear = - # $year % 4 ? 0 - # : $year % 100 ? 1 - # : $year % 400 ? 0 - # : 1; - # - # returns 1 if the terminal item should be indented - - my ( $rfields, $rtokens, $rpatterns ) = @_; - - my $jmax = @{$rfields} - 1; - my $old_line = $group_lines[$maximum_line_index]; - my $rfields_old = $old_line->get_rfields(); - - my $rpatterns_old = $old_line->get_rpatterns(); - my $rtokens_old = $old_line->get_rtokens(); - my $maximum_field_index = $old_line->get_jmax(); - - # look for the question mark after the : - my ($jquestion); - my $depth_question; - my $pad = ""; - foreach my $j ( 0 .. $maximum_field_index - 1 ) { - my $tok = $rtokens_old->[$j]; - if ( $tok =~ /^\?(\d+)$/ ) { - $depth_question = $1; - - # depth must be correct - next unless ( $depth_question eq $group_level ); - - $jquestion = $j; - if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) { - $pad = " " x length($1); - } - else { - return; # shouldn't happen - } - last; - } - } - return unless ( defined($jquestion) ); # shouldn't happen - - # Now splice the tokens and patterns of the previous line - # into the else line to insure a match. Add empty fields - # as necessary. - my $jadd = $jquestion; - - # Work on copies of the actual arrays in case we have - # to return due to an error - my @fields = @{$rfields}; - my @patterns = @{$rpatterns}; - my @tokens = @{$rtokens}; - - VALIGN_DEBUG_FLAG_TERNARY && do { - local $" = '><'; - print STDOUT "CURRENT FIELDS=<@{$rfields_old}>\n"; - print STDOUT "CURRENT TOKENS=<@{$rtokens_old}>\n"; - print STDOUT "CURRENT PATTERNS=<@{$rpatterns_old}>\n"; - print STDOUT "UNMODIFIED FIELDS=<@{$rfields}>\n"; - print STDOUT "UNMODIFIED TOKENS=<@{$rtokens}>\n"; - print STDOUT "UNMODIFIED PATTERNS=<@{$rpatterns}>\n"; - }; - - # handle cases of leading colon on this line - if ( $fields[0] =~ /^(:\s*)(.*)$/ ) { - - my ( $colon, $therest ) = ( $1, $2 ); - - # Handle sub-case of first field with leading colon plus additional code - # This is the usual situation as at the '1' below: - # ... - # : $year % 400 ? 0 - # : 1; - if ($therest) { - - # Split the first field after the leading colon and insert padding. - # Note that this padding will remain even if the terminal value goes - # out on a separate line. This does not seem to look to bad, so no - # mechanism has been included to undo it. - my $field1 = shift @fields; - unshift @fields, ( $colon, $pad . $therest ); - - # change the leading pattern from : to ? - return unless ( $patterns[0] =~ s/^\:/?/ ); - - # install leading tokens and patterns of existing line - unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] ); - unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] ); - - # insert appropriate number of empty fields - splice( @fields, 1, 0, ('') x $jadd ) if $jadd; - } - - # handle sub-case of first field just equal to leading colon. - # This can happen for example in the example below where - # the leading '(' would create a new alignment token - # : ( $name =~ /[]}]$/ ) ? ( $mname = $name ) - # : ( $mname = $name . '->' ); - else { - - return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen - - # prepend a leading ? onto the second pattern - $patterns[1] = "?b" . $patterns[1]; - - # pad the second field - $fields[1] = $pad . $fields[1]; - - # install leading tokens and patterns of existing line, replacing - # leading token and inserting appropriate number of empty fields - splice( @tokens, 0, 1, @{$rtokens_old}[ 0 .. $jquestion ] ); - splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] ); - splice( @fields, 1, 0, ('') x $jadd ) if $jadd; - } - } - - # Handle case of no leading colon on this line. This will - # be the case when -wba=':' is used. For example, - # $year % 400 ? 0 : - # 1; - else { - - # install leading tokens and patterns of existing line - $patterns[0] = '?' . 'b' . $patterns[0]; - unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] ); - unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] ); - - # insert appropriate number of empty fields - $jadd = $jquestion + 1; - $fields[0] = $pad . $fields[0]; - splice( @fields, 0, 0, ('') x $jadd ) if $jadd; - } - - VALIGN_DEBUG_FLAG_TERNARY && do { - local $" = '><'; - print STDOUT "MODIFIED TOKENS=<@tokens>\n"; - print STDOUT "MODIFIED PATTERNS=<@patterns>\n"; - print STDOUT "MODIFIED FIELDS=<@fields>\n"; - }; - - # all ok .. update the arrays - @{$rfields} = @fields; - @{$rtokens} = @tokens; - @{$rpatterns} = @patterns; - - # force a flush after this line - return $jquestion; -} - -sub fix_terminal_else { - - # Add empty fields as necessary to align a balanced terminal - # else block to a previous if/elsif/unless block, - # like this: - # - # if ( 1 || $x ) { print "ok 13\n"; } - # else { print "not ok 13\n"; } - # - # returns 1 if the else block should be indented - # - my ( $rfields, $rtokens, $rpatterns ) = @_; - my $jmax = @{$rfields} - 1; - return unless ( $jmax > 0 ); - - # check for balanced else block following if/elsif/unless - my $rfields_old = $current_line->get_rfields(); - - # TBD: add handling for 'case' - return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ ); - - # look for the opening brace after the else, and extract the depth - my $tok_brace = $rtokens->[0]; - my $depth_brace; - if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; } - - # probably: "else # side_comment" - else { return } - - my $rpatterns_old = $current_line->get_rpatterns(); - my $rtokens_old = $current_line->get_rtokens(); - my $maximum_field_index = $current_line->get_jmax(); - - # be sure the previous if/elsif is followed by an opening paren - my $jparen = 0; - my $tok_paren = '(' . $depth_brace; - my $tok_test = $rtokens_old->[$jparen]; - return unless ( $tok_test eq $tok_paren ); # shouldn't happen - - # Now find the opening block brace - my ($jbrace); - foreach my $j ( 1 .. $maximum_field_index - 1 ) { - my $tok = $rtokens_old->[$j]; - if ( $tok eq $tok_brace ) { - $jbrace = $j; - last; - } - } - return unless ( defined($jbrace) ); # shouldn't happen - - # Now splice the tokens and patterns of the previous line - # into the else line to insure a match. Add empty fields - # as necessary. - my $jadd = $jbrace - $jparen; - splice( @{$rtokens}, 0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] ); - splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] ); - splice( @{$rfields}, 1, 0, ('') x $jadd ); - - # force a flush after this line if it does not follow a case - if ( $rfields_old->[0] =~ /^case\s*$/ ) { return } - else { return $jbrace } -} - -{ # sub check_match - my %is_good_alignment; - - BEGIN { - - # Vertically aligning on certain "good" tokens is usually okay - # so we can be less restrictive in marginal cases. - my @q = qw( { ? => = ); - push @q, (','); - @is_good_alignment{@q} = (1) x scalar(@q); - } - - sub check_match { - - # See if the current line matches the current vertical alignment group. - # If not, flush the current group. - my ( $new_line, $old_line ) = @_; - - # uses global variables: - # $previous_minimum_jmax_seen - # $maximum_jmax_seen - # $maximum_line_index - # $marginal_match - my $jmax = $new_line->get_jmax(); - my $maximum_field_index = $old_line->get_jmax(); - - # flush if this line has too many fields - if ( $jmax > $maximum_field_index ) { goto NO_MATCH } - - # flush if adding this line would make a non-monotonic field count - if ( - ( $maximum_field_index > $jmax ) # this has too few fields - && ( - ( $previous_minimum_jmax_seen < - $jmax ) # and wouldn't be monotonic - || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen ) - ) - ) - { - goto NO_MATCH; - } - - # otherwise see if this line matches the current group - my $jmax_original_line = $new_line->get_jmax_original_line(); - my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment(); - my $rtokens = $new_line->get_rtokens(); - my $rfields = $new_line->get_rfields(); - my $rpatterns = $new_line->get_rpatterns(); - my $list_type = $new_line->get_list_type(); - - my $group_list_type = $old_line->get_list_type(); - my $old_rpatterns = $old_line->get_rpatterns(); - my $old_rtokens = $old_line->get_rtokens(); - - my $jlimit = $jmax - 1; - if ( $maximum_field_index > $jmax ) { - $jlimit = $jmax_original_line; - --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) ); - } - - # handle comma-separated lists .. - if ( $group_list_type && ( $list_type eq $group_list_type ) ) { - for my $j ( 0 .. $jlimit ) { - my $old_tok = $old_rtokens->[$j]; - next unless $old_tok; - my $new_tok = $rtokens->[$j]; - next unless $new_tok; - - # lists always match ... - # unless they would align any '=>'s with ','s - goto NO_MATCH - if ( $old_tok =~ /^=>/ && $new_tok =~ /^,/ - || $new_tok =~ /^=>/ && $old_tok =~ /^,/ ); - } - } - - # do detailed check for everything else except hanging side comments - elsif ( !$is_hanging_side_comment ) { - - my $leading_space_count = $new_line->get_leading_space_count(); - - my $max_pad = 0; - my $min_pad = 0; - my $saw_good_alignment; - - for my $j ( 0 .. $jlimit ) { - - my $old_tok = $old_rtokens->[$j]; - my $new_tok = $rtokens->[$j]; - - # Note on encoding used for alignment tokens: - # ------------------------------------------- - # Tokens are "decorated" with information which can help - # prevent unwanted alignments. Consider for example the - # following two lines: - # local ( $xn, $xd ) = split( '/', &'rnorm(@_) ); - # local ( $i, $f ) = &'bdiv( $xn, $xd ); - # There are three alignment tokens in each line, a comma, - # an =, and a comma. In the first line these three tokens - # are encoded as: - # ,4+local-18 =3 ,4+split-7 - # and in the second line they are encoded as - # ,4+local-18 =3 ,4+&'bdiv-8 - # Tokens always at least have token name and nesting - # depth. So in this example the ='s are at depth 3 and - # the ,'s are at depth 4. This prevents aligning tokens - # of different depths. Commas contain additional - # information, as follows: - # , {depth} + {container name} - {spaces to opening paren} - # This allows us to reject matching the rightmost commas - # in the above two lines, since they are for different - # function calls. This encoding is done in - # 'sub send_lines_to_vertical_aligner'. - - # Pick off actual token. - # Everything up to the first digit is the actual token. - my $alignment_token = $new_tok; - if ( $alignment_token =~ /^([^\d]+)/ ) { $alignment_token = $1 } - - # see if the decorated tokens match - my $tokens_match = $new_tok eq $old_tok - - # Exception for matching terminal : of ternary statement.. - # consider containers prefixed by ? and : a match - || ( $new_tok =~ /^,\d*\+\:/ && $old_tok =~ /^,\d*\+\?/ ); - - # No match if the alignment tokens differ... - if ( !$tokens_match ) { - - # ...Unless this is a side comment - if ( - $j == $jlimit - - # and there is either at least one alignment token - # or this is a single item following a list. This - # latter rule is required for 'December' to join - # the following list: - # my (@months) = ( - # '', 'January', 'February', 'March', - # 'April', 'May', 'June', 'July', - # 'August', 'September', 'October', 'November', - # 'December' - # ); - # If it doesn't then the -lp formatting will fail. - && ( $j > 0 || $old_tok =~ /^,/ ) - ) - { - $marginal_match = 1 - if ( $marginal_match == 0 - && $maximum_line_index == 0 ); - last; - } - - goto NO_MATCH; - } - - # Calculate amount of padding required to fit this in. - # $pad is the number of spaces by which we must increase - # the current field to squeeze in this field. - my $pad = - length( $rfields->[$j] ) - $old_line->current_field_width($j); - if ( $j == 0 ) { $pad += $leading_space_count; } - - # remember max pads to limit marginal cases - if ( $alignment_token ne '#' ) { - if ( $pad > $max_pad ) { $max_pad = $pad } - if ( $pad < $min_pad ) { $min_pad = $pad } - } - if ( $is_good_alignment{$alignment_token} ) { - $saw_good_alignment = 1; - } - - # If patterns don't match, we have to be careful... - if ( $old_rpatterns->[$j] ne $rpatterns->[$j] ) { - - # flag this as a marginal match since patterns differ - $marginal_match = 1 - if ( $marginal_match == 0 && $maximum_line_index == 0 ); - - # We have to be very careful about aligning commas - # when the pattern's don't match, because it can be - # worse to create an alignment where none is needed - # than to omit one. Here's an example where the ','s - # are not in named containers. The first line below - # should not match the next two: - # ( $a, $b ) = ( $b, $r ); - # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 ); - # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 ); - if ( $alignment_token eq ',' ) { - - # do not align commas unless they are in named containers - goto NO_MATCH unless ( $new_tok =~ /[A-Za-z]/ ); - } - - # do not align parens unless patterns match; - # large ugly spaces can occur in math expressions. - elsif ( $alignment_token eq '(' ) { - - # But we can allow a match if the parens don't - # require any padding. - if ( $pad != 0 ) { goto NO_MATCH } - } - - # Handle an '=' alignment with different patterns to - # the left. - elsif ( $alignment_token eq '=' ) { - - # It is best to be a little restrictive when - # aligning '=' tokens. Here is an example of - # two lines that we will not align: - # my $variable=6; - # $bb=4; - # The problem is that one is a 'my' declaration, - # and the other isn't, so they're not very similar. - # We will filter these out by comparing the first - # letter of the pattern. This is crude, but works - # well enough. - if ( - substr( $old_rpatterns->[$j], 0, 1 ) ne - substr( $rpatterns->[$j], 0, 1 ) ) - { - goto NO_MATCH; - } - - # If we pass that test, we'll call it a marginal match. - # Here is an example of a marginal match: - # $done{$$op} = 1; - # $op = compile_bblock($op); - # The left tokens are both identifiers, but - # one accesses a hash and the other doesn't. - # We'll let this be a tentative match and undo - # it later if we don't find more than 2 lines - # in the group. - elsif ( $maximum_line_index == 0 ) { - $marginal_match = - 2; # =2 prevents being undone below - } - } - } - - # Don't let line with fewer fields increase column widths - # ( align3.t ) - if ( $maximum_field_index > $jmax ) { - - # Exception: suspend this rule to allow last lines to join - if ( $pad > 0 ) { goto NO_MATCH; } - } - } ## end for my $j ( 0 .. $jlimit) - - # Turn off the "marginal match" flag in some cases... - # A "marginal match" occurs when the alignment tokens agree - # but there are differences in the other tokens (patterns). - # If we leave the marginal match flag set, then the rule is that we - # will align only if there are more than two lines in the group. - # We will turn of the flag if we almost have a match - # and either we have seen a good alignment token or we - # just need a small pad (2 spaces) to fit. These rules are - # the result of experimentation. Tokens which misaligned by just - # one or two characters are annoying. On the other hand, - # large gaps to less important alignment tokens are also annoying. - if ( $marginal_match == 1 - && $jmax == $maximum_field_index - && ( $saw_good_alignment || ( $max_pad < 3 && $min_pad > -3 ) ) - ) - { - $marginal_match = 0; - } - ##print "marginal=$marginal_match saw=$saw_good_alignment jmax=$jmax max=$maximum_field_index maxpad=$max_pad minpad=$min_pad\n"; - } - - # We have a match (even if marginal). - # If the current line has fewer fields than the current group - # but otherwise matches, copy the remaining group fields to - # make it a perfect match. - if ( $maximum_field_index > $jmax ) { - - ########################################################## - # FIXME: The previous version had a bug which made side comments - # become regular fields, so for now the program does not allow a - # line with side comment to match. This should eventually be done. - # The best test file for experimenting is 'lista.t' - ########################################################## - - my $comment = $rfields->[$jmax]; - goto NO_MATCH if ($comment); - - # Corrected loop - for my $jj ( $jlimit .. $maximum_field_index ) { - $rtokens->[$jj] = $old_rtokens->[$jj]; - $rfields->[ $jj + 1 ] = ''; - $rpatterns->[ $jj + 1 ] = $old_rpatterns->[ $jj + 1 ]; - } - -## THESE DO NOT GIVE CORRECT RESULTS -## $rfields->[$jmax] = $comment; -## $new_line->set_jmax($jmax); - - } - return; - - NO_MATCH: - ##print "no match jmax=$jmax max=$maximum_field_index $group_list_type lines=$maximum_line_index token=$old_rtokens->[0]\n"; - my_flush(); - return; - } -} - -sub check_fit { - - my ( $new_line, $old_line ) = @_; - return unless ( $maximum_line_index >= 0 ); - - my $jmax = $new_line->get_jmax(); - my $leading_space_count = $new_line->get_leading_space_count(); - my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment(); - my $rtokens = $new_line->get_rtokens(); - my $rfields = $new_line->get_rfields(); - my $rpatterns = $new_line->get_rpatterns(); - - my $group_list_type = $group_lines[0]->get_list_type(); - - my $padding_so_far = 0; - my $padding_available = $old_line->get_available_space_on_right(); - - # save current columns in case this doesn't work - save_alignment_columns(); - - my $maximum_field_index = $old_line->get_jmax(); - for my $j ( 0 .. $jmax ) { - - my $pad = length( $rfields->[$j] ) - $old_line->current_field_width($j); - - if ( $j == 0 ) { - $pad += $leading_space_count; - } - - # remember largest gap of the group, excluding gap to side comment - if ( $pad < 0 - && $group_maximum_gap < -$pad - && $j > 0 - && $j < $jmax - 1 ) - { - $group_maximum_gap = -$pad; - } - - next if $pad < 0; - - ## OLD NOTES: - ## This patch helps sometimes, but it doesn't check to see if - ## the line is too long even without the side comment. It needs - ## to be reworked. - ##don't let a long token with no trailing side comment push - ##side comments out, or end a group. (sidecmt1.t) - ##next if ($j==$jmax-1 && length($rfields->[$jmax])==0); - - # BEGIN PATCH for keith1.txt. - # If the group began matching multiple tokens but later this got - # reduced to a fewer number of matching tokens, then the fields - # of the later lines will still have to fit into their corresponding - # fields. So a large later field will "push" the other fields to - # the right, including previous side comments, and if there is no room - # then there is no match. - # For example, look at the last line in the following snippet: - - # my $b_prod_db = ( $ENV{ORACLE_SID} =~ m/p$/ && !$testing ) ? true : false; - # my $env = ($b_prod_db) ? "prd" : "val"; - # my $plant = ( $OPT{p} ) ? $OPT{p} : "STL"; - # my $task = $OPT{t}; - # my $fnam = "longggggggggggggggg.$record_created.$env.$plant.idash"; - - # The long term will push the '?' to the right to fit in, and in this - # case there is not enough room so it will not match the equals unless - # we do something special. - - # Usually it looks good to keep an initial alignment of '=' going, and - # we can do this if the long term can fit in the space taken up by the - # remaining fields (the ? : fields here). - - # Allowing any matching token for now, but it could be restricted - # to an '='-like token if necessary. - - if ( - $pad > $padding_available - && $jmax == 2 # matching one thing (plus #) - && $j == $jmax - 1 # at last field - && $maximum_line_index > 0 # more than 1 line in group now - && $jmax < $maximum_field_index # other lines have more fields - && length( $rfields->[$jmax] ) == 0 # no side comment - - # Uncomment to match only equals (but this does not seem necessary) - # && $rtokens->[0] =~ /^=\d/ # matching an equals - ) - { - my $extra_padding = 0; - foreach my $jj ( $j + 1 .. $maximum_field_index - 1 ) { - $extra_padding += $old_line->current_field_width($jj); - } - - next if ( $pad <= $padding_available + $extra_padding ); - } - - # END PATCH for keith1.pl - - # This line will need space; lets see if we want to accept it.. - if ( - - # not if this won't fit - ( $pad > $padding_available ) - - # previously, there were upper bounds placed on padding here - # (maximum_whitespace_columns), but they were not really helpful - - ) - { - - # revert to starting state then flush; things didn't work out - restore_alignment_columns(); - my_flush(); - last; - } - - # patch to avoid excessive gaps in previous lines, - # due to a line of fewer fields. - # return join( ".", - # $self->{"dfi"}, $self->{"aa"}, $self->rsvd, $self->{"rd"}, - # $self->{"area"}, $self->{"id"}, $self->{"sel"} ); - next if ( $jmax < $maximum_field_index && $j == $jmax - 1 ); - - # looks ok, squeeze this field in - $old_line->increase_field_width( $j, $pad ); - $padding_available -= $pad; - - # remember largest gap of the group, excluding gap to side comment - if ( $pad > $group_maximum_gap && $j > 0 && $j < $jmax - 1 ) { - $group_maximum_gap = $pad; - } - } - return; -} - -sub add_to_group { - - # The current line either starts a new alignment group or is - # accepted into the current alignment group. - my $new_line = shift; - $group_lines[ ++$maximum_line_index ] = $new_line; - - # initialize field lengths if starting new group - if ( $maximum_line_index == 0 ) { - - my $jmax = $new_line->get_jmax(); - my $rfields = $new_line->get_rfields(); - my $rtokens = $new_line->get_rtokens(); - my $col = $new_line->get_leading_space_count(); - - for my $j ( 0 .. $jmax ) { - $col += length( $rfields->[$j] ); - - # create initial alignments for the new group - my $token = ""; - if ( $j < $jmax ) { $token = $rtokens->[$j] } - my $alignment = make_alignment( $col, $token ); - $new_line->set_alignment( $j, $alignment ); - } - - $maximum_jmax_seen = $jmax; - $minimum_jmax_seen = $jmax; - } - - # use previous alignments otherwise - else { - my @new_alignments = - $group_lines[ $maximum_line_index - 1 ]->get_alignments(); - $new_line->set_alignments(@new_alignments); - } - - # remember group jmax extremes for next call to valign_input - $previous_minimum_jmax_seen = $minimum_jmax_seen; - $previous_maximum_jmax_seen = $maximum_jmax_seen; - return; -} - -sub dump_array { - - # debug routine to dump array contents - local $" = ')('; - print STDOUT "(@_)\n"; - return; -} - -# flush() sends the current Perl::Tidy::VerticalAligner group down the -# pipeline to Perl::Tidy::FileWriter. - -# This is the external flush, which also empties the buffer and cache -sub flush { - - # the buffer must be emptied first, then any cached text - dump_valign_buffer(); - - if ( $maximum_line_index < 0 ) { - if ($cached_line_type) { - $seqno_string = $cached_seqno_string; - valign_output_step_C( $cached_line_text, - $cached_line_leading_space_count, - $last_level_written ); - $cached_line_type = 0; - $cached_line_text = ""; - $cached_seqno_string = ""; - } - } - else { - my_flush(); - } - return; -} - -sub reduce_valign_buffer_indentation { - - my ($diff) = @_; - if ( $valign_buffer_filling && $diff ) { - my $max_valign_buffer = @valign_buffer; - foreach my $i ( 0 .. $max_valign_buffer - 1 ) { - my ( $line, $leading_space_count, $level ) = - @{ $valign_buffer[$i] }; - my $ws = substr( $line, 0, $diff ); - if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) { - $line = substr( $line, $diff ); - } - if ( $leading_space_count >= $diff ) { - $leading_space_count -= $diff; - $level = level_change( $leading_space_count, $diff, $level ); - } - $valign_buffer[$i] = [ $line, $leading_space_count, $level ]; - } - } - return; -} - -sub level_change { - - # compute decrease in level when we remove $diff spaces from the - # leading spaces - my ( $leading_space_count, $diff, $level ) = @_; - if ($rOpts_indent_columns) { - my $olev = - int( ( $leading_space_count + $diff ) / $rOpts_indent_columns ); - my $nlev = int( $leading_space_count / $rOpts_indent_columns ); - $level -= ( $olev - $nlev ); - if ( $level < 0 ) { $level = 0 } - } - return $level; -} - -sub dump_valign_buffer { - if (@valign_buffer) { - foreach (@valign_buffer) { - valign_output_step_D( @{$_} ); - } - @valign_buffer = (); - } - $valign_buffer_filling = ""; - return; -} - -# This is the internal flush, which leaves the cache intact -sub my_flush { - - return if ( $maximum_line_index < 0 ); - - # handle a group of comment lines - if ( $group_type eq 'COMMENT' ) { - - VALIGN_DEBUG_FLAG_APPEND0 && do { - my ( $a, $b, $c ) = caller(); - print STDOUT -"APPEND0: Flush called from $a $b $c for COMMENT group: lines=$maximum_line_index \n"; - - }; - my $leading_space_count = $comment_leading_space_count; - my $leading_string = get_leading_string($leading_space_count); - - # zero leading space count if any lines are too long - my $max_excess = 0; - for my $i ( 0 .. $maximum_line_index ) { - my $str = $group_lines[$i]; - my $excess = - length($str) + - $leading_space_count - - maximum_line_length_for_level($group_level); - if ( $excess > $max_excess ) { - $max_excess = $excess; - } - } - - if ( $max_excess > 0 ) { - $leading_space_count -= $max_excess; - if ( $leading_space_count < 0 ) { $leading_space_count = 0 } - $last_outdented_line_at = - $file_writer_object->get_output_line_number(); - unless ($outdented_line_count) { - $first_outdented_line_at = $last_outdented_line_at; - } - $outdented_line_count += ( $maximum_line_index + 1 ); - } - - # write the group of lines - my $outdent_long_lines = 0; - for my $i ( 0 .. $maximum_line_index ) { - valign_output_step_B( $leading_space_count, $group_lines[$i], 0, - $outdent_long_lines, "", $group_level ); - } - } - - # handle a group of code lines - else { - - VALIGN_DEBUG_FLAG_APPEND0 && do { - my $group_list_type = $group_lines[0]->get_list_type(); - my ( $a, $b, $c ) = caller(); - my $maximum_field_index = $group_lines[0]->get_jmax(); - print STDOUT -"APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n"; - - }; - - # some small groups are best left unaligned - my $do_not_align = decide_if_aligned(); - - # optimize side comment location - $do_not_align = adjust_side_comment($do_not_align); - - # recover spaces for -lp option if possible - my $extra_leading_spaces = get_extra_leading_spaces(); - - # all lines of this group have the same basic leading spacing - my $group_leader_length = $group_lines[0]->get_leading_space_count(); - - # add extra leading spaces if helpful - # NOTE: Use zero; this did not work well - my $min_ci_gap = 0; - - # loop to output all lines - for my $i ( 0 .. $maximum_line_index ) { - my $line = $group_lines[$i]; - valign_output_step_A( $line, $min_ci_gap, $do_not_align, - $group_leader_length, $extra_leading_spaces ); - } - } - initialize_for_new_group(); - return; -} - -sub decide_if_aligned { - - # Do not try to align two lines which are not really similar - return unless $maximum_line_index == 1; - return if ($is_matching_terminal_line); - - my $group_list_type = $group_lines[0]->get_list_type(); - - my $do_not_align = ( - - # always align lists - !$group_list_type - - && ( - - # don't align if it was just a marginal match - $marginal_match - - # don't align two lines with big gap - || $group_maximum_gap > 12 - - # or lines with differing number of alignment tokens - # TODO: this could be improved. It occasionally rejects - # good matches. - || $previous_maximum_jmax_seen != $previous_minimum_jmax_seen - ) - ); - - # But try to convert them into a simple comment group if the first line - # a has side comment - my $rfields = $group_lines[0]->get_rfields(); - my $maximum_field_index = $group_lines[0]->get_jmax(); - if ( $do_not_align - && ( $maximum_line_index > 0 ) - && ( length( $rfields->[$maximum_field_index] ) > 0 ) ) - { - combine_fields(); - $do_not_align = 0; - } - return $do_not_align; -} - -sub adjust_side_comment { - - my $do_not_align = shift; - - # let's see if we can move the side comment field out a little - # to improve readability (the last field is always a side comment field) - my $have_side_comment = 0; - my $first_side_comment_line = -1; - my $maximum_field_index = $group_lines[0]->get_jmax(); - for my $i ( 0 .. $maximum_line_index ) { - my $line = $group_lines[$i]; - - if ( length( $line->get_rfields()->[$maximum_field_index] ) ) { - $have_side_comment = 1; - $first_side_comment_line = $i; - last; - } - } - - my $kmax = $maximum_field_index + 1; - - if ($have_side_comment) { - - my $line = $group_lines[0]; - - # the maximum space without exceeding the line length: - my $avail = $line->get_available_space_on_right(); - - # try to use the previous comment column - my $side_comment_column = $line->get_column( $kmax - 2 ); - my $move = $last_comment_column - $side_comment_column; - -## my $sc_line0 = $side_comment_history[0]->[0]; -## my $sc_col0 = $side_comment_history[0]->[1]; -## my $sc_line1 = $side_comment_history[1]->[0]; -## my $sc_col1 = $side_comment_history[1]->[1]; -## my $sc_line2 = $side_comment_history[2]->[0]; -## my $sc_col2 = $side_comment_history[2]->[1]; -## -## # FUTURE UPDATES: -## # Be sure to ignore 'do not align' and '} # end comments' -## # Find first $move > 0 and $move <= $avail as follows: -## # 1. try sc_col1 if sc_col1 == sc_col0 && (line-sc_line0) < 12 -## # 2. try sc_col2 if (line-sc_line2) < 12 -## # 3. try min possible space, plus up to 8, -## # 4. try min possible space - - if ( $kmax > 0 && !$do_not_align ) { - - # but if this doesn't work, give up and use the minimum space - if ( $move > $avail ) { - $move = $rOpts_minimum_space_to_comment - 1; - } - - # but we want some minimum space to the comment - my $min_move = $rOpts_minimum_space_to_comment - 1; - if ( $move >= 0 - && $last_side_comment_length > 0 - && ( $first_side_comment_line == 0 ) - && $group_level == $last_level_written ) - { - $min_move = 0; - } - - if ( $move < $min_move ) { - $move = $min_move; - } - - # previously, an upper bound was placed on $move here, - # (maximum_space_to_comment), but it was not helpful - - # don't exceed the available space - if ( $move > $avail ) { $move = $avail } - - # we can only increase space, never decrease - if ( $move > 0 ) { - $line->increase_field_width( $maximum_field_index - 1, $move ); - } - - # remember this column for the next group - $last_comment_column = $line->get_column( $kmax - 2 ); - } - else { - - # try to at least line up the existing side comment location - if ( $kmax > 0 && $move > 0 && $move < $avail ) { - $line->increase_field_width( $maximum_field_index - 1, $move ); - $do_not_align = 0; - } - - # reset side comment column if we can't align - else { - forget_side_comment(); - } - } - } - return $do_not_align; -} - -sub valign_output_step_A { - - ############################################################### - # This is Step A in writing vertically aligned lines. - # The line is prepared according to the alignments which have - # been found and shipped to the next step. - ############################################################### - - my ( $line, $min_ci_gap, $do_not_align, $group_leader_length, - $extra_leading_spaces ) - = @_; - my $rfields = $line->get_rfields(); - my $leading_space_count = $line->get_leading_space_count(); - my $outdent_long_lines = $line->get_outdent_long_lines(); - my $maximum_field_index = $line->get_jmax(); - my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags(); - - # add any extra spaces - if ( $leading_space_count > $group_leader_length ) { - $leading_space_count += $min_ci_gap; - } - - my $str = $rfields->[0]; - - # loop to concatenate all fields of this line and needed padding - my $total_pad_count = 0; - for my $j ( 1 .. $maximum_field_index ) { - - # skip zero-length side comments - last - if ( - ( $j == $maximum_field_index ) - && ( !defined( $rfields->[$j] ) - || ( length( $rfields->[$j] ) == 0 ) ) - ); - - # compute spaces of padding before this field - my $col = $line->get_column( $j - 1 ); - my $pad = $col - ( length($str) + $leading_space_count ); - - if ($do_not_align) { - $pad = - ( $j < $maximum_field_index ) - ? 0 - : $rOpts_minimum_space_to_comment - 1; - } - - # if the -fpsc flag is set, move the side comment to the selected - # column if and only if it is possible, ignoring constraints on - # line length and minimum space to comment - if ( $rOpts_fixed_position_side_comment && $j == $maximum_field_index ) - { - my $newpad = $pad + $rOpts_fixed_position_side_comment - $col - 1; - if ( $newpad >= 0 ) { $pad = $newpad; } - } - - # accumulate the padding - if ( $pad > 0 ) { $total_pad_count += $pad; } - - # add this field - if ( !defined $rfields->[$j] ) { - write_diagnostics("UNDEFined field at j=$j\n"); - } - - # only add padding when we have a finite field; - # this avoids extra terminal spaces if we have empty fields - if ( length( $rfields->[$j] ) > 0 ) { - $str .= ' ' x $total_pad_count; - $total_pad_count = 0; - $str .= $rfields->[$j]; - } - else { - $total_pad_count = 0; - } - - # update side comment history buffer - if ( $j == $maximum_field_index ) { - my $lineno = $file_writer_object->get_output_line_number(); - shift @side_comment_history; - push @side_comment_history, [ $lineno, $col ]; - } - } - - my $side_comment_length = ( length( $rfields->[$maximum_field_index] ) ); - - # ship this line off - valign_output_step_B( $leading_space_count + $extra_leading_spaces, - $str, $side_comment_length, $outdent_long_lines, - $rvertical_tightness_flags, $group_level ); - return; -} - -sub get_extra_leading_spaces { - - #---------------------------------------------------------- - # Define any extra indentation space (for the -lp option). - # Here is why: - # If a list has side comments, sub scan_list must dump the - # list before it sees everything. When this happens, it sets - # the indentation to the standard scheme, but notes how - # many spaces it would have liked to use. We may be able - # to recover that space here in the event that all of the - # lines of a list are back together again. - #---------------------------------------------------------- - - my $extra_leading_spaces = 0; - if ($extra_indent_ok) { - my $object = $group_lines[0]->get_indentation(); - if ( ref($object) ) { - my $extra_indentation_spaces_wanted = - get_recoverable_spaces($object); - - # all indentation objects must be the same - for my $i ( 1 .. $maximum_line_index ) { - if ( $object != $group_lines[$i]->get_indentation() ) { - $extra_indentation_spaces_wanted = 0; - last; - } - } - - if ($extra_indentation_spaces_wanted) { - - # the maximum space without exceeding the line length: - my $avail = $group_lines[0]->get_available_space_on_right(); - $extra_leading_spaces = - ( $avail > $extra_indentation_spaces_wanted ) - ? $extra_indentation_spaces_wanted - : $avail; - - # update the indentation object because with -icp the terminal - # ');' will use the same adjustment. - $object->permanently_decrease_available_spaces( - -$extra_leading_spaces ); - } - } - } - return $extra_leading_spaces; -} - -sub combine_fields { - - # combine all fields except for the comment field ( sidecmt.t ) - # Uses global variables: - # @group_lines - # $maximum_line_index - my $maximum_field_index = $group_lines[0]->get_jmax(); - foreach my $j ( 0 .. $maximum_line_index ) { - my $line = $group_lines[$j]; - my $rfields = $line->get_rfields(); - foreach ( 1 .. $maximum_field_index - 1 ) { - $rfields->[0] .= $rfields->[$_]; - } - $rfields->[1] = $rfields->[$maximum_field_index]; - - $line->set_jmax(1); - $line->set_column( 0, 0 ); - $line->set_column( 1, 0 ); - - } - $maximum_field_index = 1; - - for my $j ( 0 .. $maximum_line_index ) { - my $line = $group_lines[$j]; - my $rfields = $line->get_rfields(); - for my $k ( 0 .. $maximum_field_index ) { - my $pad = length( $rfields->[$k] ) - $line->current_field_width($k); - if ( $k == 0 ) { - $pad += $group_lines[$j]->get_leading_space_count(); - } - - if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) } - - } - } - return; -} - -sub get_output_line_number { - - # the output line number reported to a caller is the number of items - # written plus the number of items in the buffer - my $self = shift; - return 1 + $maximum_line_index + - $file_writer_object->get_output_line_number(); -} - -sub valign_output_step_B { - - ############################################################### - # This is Step B in writing vertically aligned lines. - # Vertical tightness is applied according to preset flags. - # In particular this routine handles stacking of opening - # and closing tokens. - ############################################################### - - my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines, - $rvertical_tightness_flags, $level ) - = @_; - - # handle outdenting of long lines: - if ($outdent_long_lines) { - my $excess = - length($str) - - $side_comment_length + - $leading_space_count - - maximum_line_length_for_level($level); - if ( $excess > 0 ) { - $leading_space_count = 0; - $last_outdented_line_at = - $file_writer_object->get_output_line_number(); - - unless ($outdented_line_count) { - $first_outdented_line_at = $last_outdented_line_at; - } - $outdented_line_count++; - } - } - - # Make preliminary leading whitespace. It could get changed - # later by entabbing, so we have to keep track of any changes - # to the leading_space_count from here on. - my $leading_string = - $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : ""; - - # Unpack any recombination data; it was packed by - # sub send_lines_to_vertical_aligner. Contents: - # - # [0] type: 1=opening non-block 2=closing non-block - # 3=opening block brace 4=closing block brace - # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok - # if closing: spaces of padding to use - # [2] sequence number of container - # [3] valid flag: do not append if this flag is false - # - my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg, - $seqno_end ); - if ($rvertical_tightness_flags) { - ( - $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg, - $seqno_end - ) = @{$rvertical_tightness_flags}; - } - - $seqno_string = $seqno_end; - - # handle any cached line .. - # either append this line to it or write it out - if ( length($cached_line_text) ) { - - # Dump an invalid cached line - if ( !$cached_line_valid ) { - valign_output_step_C( $cached_line_text, - $cached_line_leading_space_count, - $last_level_written ); - } - - # Handle cached line ending in OPENING tokens - elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) { - - my $gap = $leading_space_count - length($cached_line_text); - - # handle option of just one tight opening per line: - if ( $cached_line_flag == 1 ) { - if ( defined($open_or_close) && $open_or_close == 1 ) { - $gap = -1; - } - } - - if ( $gap >= 0 && defined($seqno_beg) ) { - $leading_string = $cached_line_text . ' ' x $gap; - $leading_space_count = $cached_line_leading_space_count; - $seqno_string = $cached_seqno_string . ':' . $seqno_beg; - $level = $last_level_written; - } - else { - valign_output_step_C( $cached_line_text, - $cached_line_leading_space_count, - $last_level_written ); - } - } - - # Handle cached line ending in CLOSING tokens - else { - my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str; - if ( - - # The new line must start with container - $seqno_beg - - # The container combination must be okay.. - && ( - - # okay to combine like types - ( $open_or_close == $cached_line_type ) - - # closing block brace may append to non-block - || ( $cached_line_type == 2 && $open_or_close == 4 ) - - # something like ');' - || ( !$open_or_close && $cached_line_type == 2 ) - - ) - - # The combined line must fit - && ( - length($test_line) <= - maximum_line_length_for_level($last_level_written) ) - ) - { - - $seqno_string = $cached_seqno_string . ':' . $seqno_beg; - - # Patch to outdent closing tokens ending # in ');' - # If we are joining a line like ');' to a previous stacked - # set of closing tokens, then decide if we may outdent the - # combined stack to the indentation of the ');'. Since we - # should not normally outdent any of the other tokens more than - # the indentation of the lines that contained them, we will - # only do this if all of the corresponding opening - # tokens were on the same line. This can happen with - # -sot and -sct. For example, it is ok here: - # __PACKAGE__->load_components( qw( - # PK::Auto - # Core - # )); - # - # But, for example, we do not outdent in this example because - # that would put the closing sub brace out farther than the - # opening sub brace: - # - # perltidy -sot -sct - # $c->Tk::bind( - # '' => sub { - # my ($c) = @_; - # my $e = $c->XEvent; - # itemsUnderArea $c; - # } ); - # - if ( $str =~ /^\);/ && $cached_line_text =~ /^[\)\}\]\s]*$/ ) { - - # The way to tell this is if the stacked sequence numbers - # of this output line are the reverse of the stacked - # sequence numbers of the previous non-blank line of - # sequence numbers. So we can join if the previous - # nonblank string of tokens is the mirror image. For - # example if stack )}] is 13:8:6 then we are looking for a - # leading stack like [{( which is 6:8:13 We only need to - # check the two ends, because the intermediate tokens must - # fall in order. Note on speed: having to split on colons - # and eliminate multiple colons might appear to be slow, - # but it's not an issue because we almost never come - # through here. In a typical file we don't. - $seqno_string =~ s/^:+//; - $last_nonblank_seqno_string =~ s/^:+//; - $seqno_string =~ s/:+/:/g; - $last_nonblank_seqno_string =~ s/:+/:/g; - - # how many spaces can we outdent? - my $diff = - $cached_line_leading_space_count - $leading_space_count; - if ( $diff > 0 - && length($seqno_string) - && length($last_nonblank_seqno_string) == - length($seqno_string) ) - { - my @seqno_last = - ( split ':', $last_nonblank_seqno_string ); - my @seqno_now = ( split ':', $seqno_string ); - if ( $seqno_now[-1] == $seqno_last[0] - && $seqno_now[0] == $seqno_last[-1] ) - { - - # OK to outdent .. - # for absolute safety, be sure we only remove - # whitespace - my $ws = substr( $test_line, 0, $diff ); - if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) { - - $test_line = substr( $test_line, $diff ); - $cached_line_leading_space_count -= $diff; - $last_level_written = - level_change( - $cached_line_leading_space_count, - $diff, $last_level_written ); - reduce_valign_buffer_indentation($diff); - } - - # shouldn't happen, but not critical: - ##else { - ## ERROR transferring indentation here - ##} - } - } - } - - $str = $test_line; - $leading_string = ""; - $leading_space_count = $cached_line_leading_space_count; - $level = $last_level_written; - } - else { - valign_output_step_C( $cached_line_text, - $cached_line_leading_space_count, - $last_level_written ); - } - } - } - $cached_line_type = 0; - $cached_line_text = ""; - - # make the line to be written - my $line = $leading_string . $str; - - # write or cache this line - if ( !$open_or_close || $side_comment_length > 0 ) { - valign_output_step_C( $line, $leading_space_count, $level ); - } - else { - $cached_line_text = $line; - $cached_line_type = $open_or_close; - $cached_line_flag = $tightness_flag; - $cached_seqno = $seqno; - $cached_line_valid = $valid; - $cached_line_leading_space_count = $leading_space_count; - $cached_seqno_string = $seqno_string; - } - - $last_level_written = $level; - $last_side_comment_length = $side_comment_length; - $extra_indent_ok = 0; - return; -} - -sub valign_output_step_C { - - ############################################################### - # This is Step C in writing vertically aligned lines. - # Lines are either stored in a buffer or passed along to the next step. - # The reason for storing lines is that we may later want to reduce their - # indentation when -sot and -sct are both used. - ############################################################### - my @args = @_; - - # Dump any saved lines if we see a line with an unbalanced opening or - # closing token. - dump_valign_buffer() if ( $seqno_string && $valign_buffer_filling ); - - # Either store or write this line - if ($valign_buffer_filling) { - push @valign_buffer, [@args]; - } - else { - valign_output_step_D(@args); - } - - # For lines starting or ending with opening or closing tokens.. - if ($seqno_string) { - $last_nonblank_seqno_string = $seqno_string; - - # Start storing lines when we see a line with multiple stacked opening - # tokens. - # patch for RT #94354, requested by Colin Williams - if ( $seqno_string =~ /^\d+(\:+\d+)+$/ && $args[0] !~ /^[\}\)\]\:\?]/ ) - { - - # This test is efficient but a little subtle: The first test says - # that we have multiple sequence numbers and hence multiple opening - # or closing tokens in this line. The second part of the test - # rejects stacked closing and ternary tokens. So if we get here - # then we should have stacked unbalanced opening tokens. - - # Here is a complex example: - - # Foo($Bar[0], { # (side comment) - # baz => 1, - # }); - - # The first line has sequence 6::4. It does not begin with - # a closing token or ternary, so it passes the test and must be - # stacked opening tokens. - - # The last line has sequence 4:6 but is a stack of closing tokens, - # so it gets rejected. - - # Note that the sequence number of an opening token for a qw quote - # is a negative number and will be rejected. - # For example, for the following line: - # skip_symbols([qw( - # $seqno_string='10:5:-1'. It would be okay to accept it but - # I decided not to do this after testing. - - $valign_buffer_filling = $seqno_string; - - } - } - return; -} - -sub valign_output_step_D { - - ############################################################### - # This is Step D in writing vertically aligned lines. - # Write one vertically aligned line of code to the output object. - ############################################################### - - my ( $line, $leading_space_count, $level ) = @_; - - # The line is currently correct if there is no tabbing (recommended!) - # We may have to lop off some leading spaces and replace with tabs. - if ( $leading_space_count > 0 ) { - - # Nothing to do if no tabs - if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace ) - || $rOpts_indent_columns <= 0 ) - { - - # nothing to do - } - - # Handle entab option - elsif ($rOpts_entab_leading_whitespace) { - my $space_count = - $leading_space_count % $rOpts_entab_leading_whitespace; - my $tab_count = - int( $leading_space_count / $rOpts_entab_leading_whitespace ); - my $leading_string = "\t" x $tab_count . ' ' x $space_count; - if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) { - substr( $line, 0, $leading_space_count ) = $leading_string; - } - else { - - # shouldn't happen - program error counting whitespace - # - skip entabbing - VALIGN_DEBUG_FLAG_TABS - && warning( -"Error entabbing in valign_output_step_D: expected count=$leading_space_count\n" - ); - } - } - - # Handle option of one tab per level - else { - my $leading_string = ( "\t" x $level ); - my $space_count = - $leading_space_count - $level * $rOpts_indent_columns; - - # shouldn't happen: - if ( $space_count < 0 ) { - - # But it could be an outdented comment - if ( $line !~ /^\s*#/ ) { - VALIGN_DEBUG_FLAG_TABS - && warning( -"Error entabbing in valign_output_step_D: for level=$group_level count=$leading_space_count\n" - ); - } - $leading_string = ( ' ' x $leading_space_count ); - } - else { - $leading_string .= ( ' ' x $space_count ); - } - if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) { - substr( $line, 0, $leading_space_count ) = $leading_string; - } - else { - - # shouldn't happen - program error counting whitespace - # we'll skip entabbing - VALIGN_DEBUG_FLAG_TABS - && warning( -"Error entabbing in valign_output_step_D: expected count=$leading_space_count\n" - ); - } - } - } - $file_writer_object->write_code_line( $line . "\n" ); - return; -} - -{ # begin get_leading_string - - my @leading_string_cache; - - sub get_leading_string { - - # define the leading whitespace string for this line.. - my $leading_whitespace_count = shift; - - # Handle case of zero whitespace, which includes multi-line quotes - # (which may have a finite level; this prevents tab problems) - if ( $leading_whitespace_count <= 0 ) { - return ""; - } - - # look for previous result - elsif ( $leading_string_cache[$leading_whitespace_count] ) { - return $leading_string_cache[$leading_whitespace_count]; - } - - # must compute a string for this number of spaces - my $leading_string; - - # Handle simple case of no tabs - if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace ) - || $rOpts_indent_columns <= 0 ) - { - $leading_string = ( ' ' x $leading_whitespace_count ); - } - - # Handle entab option - elsif ($rOpts_entab_leading_whitespace) { - my $space_count = - $leading_whitespace_count % $rOpts_entab_leading_whitespace; - my $tab_count = int( - $leading_whitespace_count / $rOpts_entab_leading_whitespace ); - $leading_string = "\t" x $tab_count . ' ' x $space_count; - } - - # Handle option of one tab per level - else { - $leading_string = ( "\t" x $group_level ); - my $space_count = - $leading_whitespace_count - $group_level * $rOpts_indent_columns; - - # shouldn't happen: - if ( $space_count < 0 ) { - VALIGN_DEBUG_FLAG_TABS - && warning( -"Error in get_leading_string: for level=$group_level count=$leading_whitespace_count\n" - ); - - # -- skip entabbing - $leading_string = ( ' ' x $leading_whitespace_count ); - } - else { - $leading_string .= ( ' ' x $space_count ); - } - } - $leading_string_cache[$leading_whitespace_count] = $leading_string; - return $leading_string; - } -} # end get_leading_string - -sub report_anything_unusual { - my $self = shift; - if ( $outdented_line_count > 0 ) { - write_logfile_entry( - "$outdented_line_count long lines were outdented:\n"); - write_logfile_entry( - " First at output line $first_outdented_line_at\n"); - - if ( $outdented_line_count > 1 ) { - write_logfile_entry( - " Last at output line $last_outdented_line_at\n"); - } - write_logfile_entry( - " use -noll to prevent outdenting, -l=n to increase line length\n" - ); - write_logfile_entry("\n"); - } - return; -} - -##################################################################### -# -# the Perl::Tidy::FileWriter class writes the output file -# -##################################################################### - -package Perl::Tidy::FileWriter; - -# Maximum number of little messages; probably need not be changed. -use constant MAX_NAG_MESSAGES => 6; - -sub write_logfile_entry { - my ( $self, $msg ) = @_; - my $logger_object = $self->{_logger_object}; - if ($logger_object) { - $logger_object->write_logfile_entry($msg); - } - return; -} - -sub new { - my ( $class, $line_sink_object, $rOpts, $logger_object ) = @_; - - return bless { - _line_sink_object => $line_sink_object, - _logger_object => $logger_object, - _rOpts => $rOpts, - _output_line_number => 1, - _consecutive_blank_lines => 0, - _consecutive_nonblank_lines => 0, - _first_line_length_error => 0, - _max_line_length_error => 0, - _last_line_length_error => 0, - _first_line_length_error_at => 0, - _max_line_length_error_at => 0, - _last_line_length_error_at => 0, - _line_length_error_count => 0, - _max_output_line_length => 0, - _max_output_line_length_at => 0, - }, $class; -} - -sub tee_on { - my $self = shift; - $self->{_line_sink_object}->tee_on(); - return; -} - -sub tee_off { - my $self = shift; - $self->{_line_sink_object}->tee_off(); - return; -} - -sub get_output_line_number { - my $self = shift; - return $self->{_output_line_number}; -} - -sub decrement_output_line_number { - my $self = shift; - $self->{_output_line_number}--; - return; -} - -sub get_consecutive_nonblank_lines { - my $self = shift; - return $self->{_consecutive_nonblank_lines}; -} - -sub reset_consecutive_blank_lines { - my $self = shift; - $self->{_consecutive_blank_lines} = 0; - return; -} - -sub want_blank_line { - my $self = shift; - unless ( $self->{_consecutive_blank_lines} ) { - $self->write_blank_code_line(); - } - return; -} - -sub require_blank_code_lines { - - # write out the requested number of blanks regardless of the value of -mbl - # unless -mbl=0. This allows extra blank lines to be written for subs and - # packages even with the default -mbl=1 - my ( $self, $count ) = @_; - my $need = $count - $self->{_consecutive_blank_lines}; - my $rOpts = $self->{_rOpts}; - my $forced = $rOpts->{'maximum-consecutive-blank-lines'} > 0; - foreach my $i ( 0 .. $need - 1 ) { - $self->write_blank_code_line($forced); - } - return; -} - -sub write_blank_code_line { - my $self = shift; - my $forced = shift; - my $rOpts = $self->{_rOpts}; - return - if (!$forced - && $self->{_consecutive_blank_lines} >= - $rOpts->{'maximum-consecutive-blank-lines'} ); - $self->{_consecutive_blank_lines}++; - $self->{_consecutive_nonblank_lines} = 0; - $self->write_line("\n"); - return; -} - -sub write_code_line { - my $self = shift; - my $a = shift; - - if ( $a =~ /^\s*$/ ) { - my $rOpts = $self->{_rOpts}; - return - if ( $self->{_consecutive_blank_lines} >= - $rOpts->{'maximum-consecutive-blank-lines'} ); - $self->{_consecutive_blank_lines}++; - $self->{_consecutive_nonblank_lines} = 0; - } - else { - $self->{_consecutive_blank_lines} = 0; - $self->{_consecutive_nonblank_lines}++; - } - $self->write_line($a); - return; -} - -sub write_line { - my ( $self, $a ) = @_; - - # TODO: go through and see if the test is necessary here - if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; } - - $self->{_line_sink_object}->write_line($a); - - # This calculation of excess line length ignores any internal tabs - my $rOpts = $self->{_rOpts}; - my $exceed = length($a) - $rOpts->{'maximum-line-length'} - 1; - if ( $a =~ /^\t+/g ) { - $exceed += pos($a) * ( $rOpts->{'indent-columns'} - 1 ); - } - - # Note that we just incremented output line number to future value - # so we must subtract 1 for current line number - if ( length($a) > 1 + $self->{_max_output_line_length} ) { - $self->{_max_output_line_length} = length($a) - 1; - $self->{_max_output_line_length_at} = $self->{_output_line_number} - 1; - } - - if ( $exceed > 0 ) { - my $output_line_number = $self->{_output_line_number}; - $self->{_last_line_length_error} = $exceed; - $self->{_last_line_length_error_at} = $output_line_number - 1; - if ( $self->{_line_length_error_count} == 0 ) { - $self->{_first_line_length_error} = $exceed; - $self->{_first_line_length_error_at} = $output_line_number - 1; - } - - if ( - $self->{_last_line_length_error} > $self->{_max_line_length_error} ) - { - $self->{_max_line_length_error} = $exceed; - $self->{_max_line_length_error_at} = $output_line_number - 1; - } - - if ( $self->{_line_length_error_count} < MAX_NAG_MESSAGES ) { - $self->write_logfile_entry( - "Line length exceeded by $exceed characters\n"); - } - $self->{_line_length_error_count}++; - } - return; -} - -sub report_line_length_errors { - my $self = shift; - my $rOpts = $self->{_rOpts}; - my $line_length_error_count = $self->{_line_length_error_count}; - if ( $line_length_error_count == 0 ) { - $self->write_logfile_entry( - "No lines exceeded $rOpts->{'maximum-line-length'} characters\n"); - my $max_output_line_length = $self->{_max_output_line_length}; - my $max_output_line_length_at = $self->{_max_output_line_length_at}; - $self->write_logfile_entry( -" Maximum output line length was $max_output_line_length at line $max_output_line_length_at\n" - ); - - } - else { - - my $word = ( $line_length_error_count > 1 ) ? "s" : ""; - $self->write_logfile_entry( -"$line_length_error_count output line$word exceeded $rOpts->{'maximum-line-length'} characters:\n" - ); - - $word = ( $line_length_error_count > 1 ) ? "First" : ""; - my $first_line_length_error = $self->{_first_line_length_error}; - my $first_line_length_error_at = $self->{_first_line_length_error_at}; - $self->write_logfile_entry( -" $word at line $first_line_length_error_at by $first_line_length_error characters\n" - ); - - if ( $line_length_error_count > 1 ) { - my $max_line_length_error = $self->{_max_line_length_error}; - my $max_line_length_error_at = $self->{_max_line_length_error_at}; - my $last_line_length_error = $self->{_last_line_length_error}; - my $last_line_length_error_at = $self->{_last_line_length_error_at}; - $self->write_logfile_entry( -" Maximum at line $max_line_length_error_at by $max_line_length_error characters\n" - ); - $self->write_logfile_entry( -" Last at line $last_line_length_error_at by $last_line_length_error characters\n" - ); - } - } - return; -} - -##################################################################### -# -# The Perl::Tidy::Debugger class shows line tokenization -# -##################################################################### - -package Perl::Tidy::Debugger; - -sub new { - - my ( $class, $filename ) = @_; - - return bless { - _debug_file => $filename, - _debug_file_opened => 0, - _fh => undef, - }, $class; -} - -sub really_open_debug_file { - - my $self = shift; - my $debug_file = $self->{_debug_file}; - my $fh; - unless ( $fh = IO::File->new("> $debug_file") ) { - Perl::Tidy::Warn("can't open $debug_file: $!\n"); - } - $self->{_debug_file_opened} = 1; - $self->{_fh} = $fh; - print $fh - "Use -dump-token-types (-dtt) to get a list of token type codes\n"; - return; -} - -sub close_debug_file { - - my $self = shift; - my $fh = $self->{_fh}; - if ( $self->{_debug_file_opened} ) { - eval { $self->{_fh}->close() }; - } - return; -} - -sub write_debug_entry { - - # This is a debug dump routine which may be modified as necessary - # to dump tokens on a line-by-line basis. The output will be written - # to the .DEBUG file when the -D flag is entered. - my ( $self, $line_of_tokens ) = @_; - - my $input_line = $line_of_tokens->{_line_text}; - - my $rtoken_type = $line_of_tokens->{_rtoken_type}; - my $rtokens = $line_of_tokens->{_rtokens}; - my $rlevels = $line_of_tokens->{_rlevels}; - my $rslevels = $line_of_tokens->{_rslevels}; - my $rblock_type = $line_of_tokens->{_rblock_type}; - - my $input_line_number = $line_of_tokens->{_line_number}; - my $line_type = $line_of_tokens->{_line_type}; - ##my $rtoken_array = $line_of_tokens->{_token_array}; - - my ( $j, $num ); - - my $token_str = "$input_line_number: "; - my $reconstructed_original = "$input_line_number: "; - my $block_str = "$input_line_number: "; - - #$token_str .= "$line_type: "; - #$reconstructed_original .= "$line_type: "; - - my $pattern = ""; - my @next_char = ( '"', '"' ); - my $i_next = 0; - unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() } - my $fh = $self->{_fh}; - - # FIXME: could convert to use of token_array instead - foreach my $j ( 0 .. @{$rtoken_type} - 1 ) { - - # testing patterns - if ( $rtoken_type->[$j] eq 'k' ) { - $pattern .= $rtokens->[$j]; - } - else { - $pattern .= $rtoken_type->[$j]; - } - $reconstructed_original .= $rtokens->[$j]; - $block_str .= "($rblock_type->[$j])"; - $num = length( $rtokens->[$j] ); - my $type_str = $rtoken_type->[$j]; - - # be sure there are no blank tokens (shouldn't happen) - # This can only happen if a programming error has been made - # because all valid tokens are non-blank - if ( $type_str eq ' ' ) { - print $fh "BLANK TOKEN on the next line\n"; - $type_str = $next_char[$i_next]; - $i_next = 1 - $i_next; - } - - if ( length($type_str) == 1 ) { - $type_str = $type_str x $num; - } - $token_str .= $type_str; - } - - # Write what you want here ... - # print $fh "$input_line\n"; - # print $fh "$pattern\n"; - print $fh "$reconstructed_original\n"; - print $fh "$token_str\n"; - - #print $fh "$block_str\n"; - return; -} - -##################################################################### -# -# The Perl::Tidy::LineBuffer class supplies a 'get_line()' -# method for returning the next line to be parsed, as well as a -# 'peek_ahead()' method -# -# The input parameter is an object with a 'get_line()' method -# which returns the next line to be parsed -# -##################################################################### - -package Perl::Tidy::LineBuffer; - -sub new { - - my ( $class, $line_source_object ) = @_; - - return bless { - _line_source_object => $line_source_object, - _rlookahead_buffer => [], - }, $class; -} - -sub peek_ahead { - my ( $self, $buffer_index ) = @_; - my $line = undef; - my $line_source_object = $self->{_line_source_object}; - my $rlookahead_buffer = $self->{_rlookahead_buffer}; - if ( $buffer_index < scalar( @{$rlookahead_buffer} ) ) { - $line = $rlookahead_buffer->[$buffer_index]; - } - else { - $line = $line_source_object->get_line(); - push( @{$rlookahead_buffer}, $line ); - } - return $line; -} - -sub get_line { - my $self = shift; - my $line = undef; - my $line_source_object = $self->{_line_source_object}; - my $rlookahead_buffer = $self->{_rlookahead_buffer}; - - if ( scalar( @{$rlookahead_buffer} ) ) { - $line = shift @{$rlookahead_buffer}; - } - else { - $line = $line_source_object->get_line(); - } - return $line; -} - -######################################################################## -# -# the Perl::Tidy::Tokenizer package is essentially a filter which -# reads lines of perl source code from a source object and provides -# corresponding tokenized lines through its get_line() method. Lines -# flow from the source_object to the caller like this: -# -# source_object --> LineBuffer_object --> Tokenizer --> calling routine -# get_line() get_line() get_line() line_of_tokens -# -# The source object can be any object with a get_line() method which -# supplies one line (a character string) perl call. -# The LineBuffer object is created by the Tokenizer. -# The Tokenizer returns a reference to a data structure 'line_of_tokens' -# containing one tokenized line for each call to its get_line() method. -# -# WARNING: This is not a real class yet. Only one tokenizer my be used. -# -######################################################################## - -package Perl::Tidy::Tokenizer; - -BEGIN { - - # Caution: these debug flags produce a lot of output - # They should all be 0 except when debugging small scripts - - use constant TOKENIZER_DEBUG_FLAG_EXPECT => 0; - use constant TOKENIZER_DEBUG_FLAG_NSCAN => 0; - use constant TOKENIZER_DEBUG_FLAG_QUOTE => 0; - use constant TOKENIZER_DEBUG_FLAG_SCAN_ID => 0; - use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0; - - my $debug_warning = sub { - print STDOUT "TOKENIZER_DEBUGGING with key $_[0]\n"; - }; - - TOKENIZER_DEBUG_FLAG_EXPECT && $debug_warning->('EXPECT'); - TOKENIZER_DEBUG_FLAG_NSCAN && $debug_warning->('NSCAN'); - TOKENIZER_DEBUG_FLAG_QUOTE && $debug_warning->('QUOTE'); - TOKENIZER_DEBUG_FLAG_SCAN_ID && $debug_warning->('SCAN_ID'); - TOKENIZER_DEBUG_FLAG_TOKENIZE && $debug_warning->('TOKENIZE'); - -} - -use Carp; - -# PACKAGE VARIABLES for processing an entire FILE. -use vars qw{ - $tokenizer_self - - $last_nonblank_token - $last_nonblank_type - $last_nonblank_block_type - $statement_type - $in_attribute_list - $current_package - $context - - %is_constant - %is_user_function - %user_function_prototype - %is_block_function - %is_block_list_function - %saw_function_definition - - $brace_depth - $paren_depth - $square_bracket_depth - - @current_depth - @total_depth - $total_depth - @nesting_sequence_number - @current_sequence_number - @paren_type - @paren_semicolon_count - @paren_structural_type - @brace_type - @brace_structural_type - @brace_context - @brace_package - @square_bracket_type - @square_bracket_structural_type - @depth_array - @nested_ternary_flag - @nested_statement_type - @starting_line_of_current_depth -}; - -# GLOBAL CONSTANTS for routines in this package -use vars qw{ - %is_indirect_object_taker - %is_block_operator - %expecting_operator_token - %expecting_operator_types - %expecting_term_types - %expecting_term_token - %is_digraph - %is_file_test_operator - %is_trigraph - %is_tetragraph - %is_valid_token_type - %is_keyword - %is_code_block_token - %really_want_term - @opening_brace_names - @closing_brace_names - %is_keyword_taking_list - %is_q_qq_qw_qx_qr_s_y_tr_m -}; - -# possible values of operator_expected() -use constant TERM => -1; -use constant UNKNOWN => 0; -use constant OPERATOR => 1; - -# possible values of context -use constant SCALAR_CONTEXT => -1; -use constant UNKNOWN_CONTEXT => 0; -use constant LIST_CONTEXT => 1; - -# Maximum number of little messages; probably need not be changed. -use constant MAX_NAG_MESSAGES => 6; - -{ - - # methods to count instances - my $_count = 0; - sub get_count { return $_count; } - sub _increment_count { return ++$_count } - sub _decrement_count { return --$_count } -} - -sub DESTROY { - my $self = shift; - $self->_decrement_count(); - return; -} - -sub new { - - my $class = shift; - - # Note: 'tabs' and 'indent_columns' are temporary and should be - # removed asap - my %defaults = ( - source_object => undef, - debugger_object => undef, - diagnostics_object => undef, - logger_object => undef, - starting_level => undef, - indent_columns => 4, - tabsize => 8, - look_for_hash_bang => 0, - trim_qw => 1, - look_for_autoloader => 1, - look_for_selfloader => 1, - starting_line_number => 1, - extended_syntax => 0, - ); - my %args = ( %defaults, @_ ); - - # we are given an object with a get_line() method to supply source lines - my $source_object = $args{source_object}; - - # we create another object with a get_line() and peek_ahead() method - my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object); - - # Tokenizer state data is as follows: - # _rhere_target_list reference to list of here-doc targets - # _here_doc_target the target string for a here document - # _here_quote_character the type of here-doc quoting (" ' ` or none) - # to determine if interpolation is done - # _quote_target character we seek if chasing a quote - # _line_start_quote line where we started looking for a long quote - # _in_here_doc flag indicating if we are in a here-doc - # _in_pod flag set if we are in pod documentation - # _in_error flag set if we saw severe error (binary in script) - # _in_data flag set if we are in __DATA__ section - # _in_end flag set if we are in __END__ section - # _in_format flag set if we are in a format description - # _in_attribute_list flag telling if we are looking for attributes - # _in_quote flag telling if we are chasing a quote - # _starting_level indentation level of first line - # _line_buffer_object object with get_line() method to supply source code - # _diagnostics_object place to write debugging information - # _unexpected_error_count error count used to limit output - # _lower_case_labels_at line numbers where lower case labels seen - # _hit_bug program bug detected - $tokenizer_self = { - _rhere_target_list => [], - _in_here_doc => 0, - _here_doc_target => "", - _here_quote_character => "", - _in_data => 0, - _in_end => 0, - _in_format => 0, - _in_error => 0, - _in_pod => 0, - _in_attribute_list => 0, - _in_quote => 0, - _quote_target => "", - _line_start_quote => -1, - _starting_level => $args{starting_level}, - _know_starting_level => defined( $args{starting_level} ), - _tabsize => $args{tabsize}, - _indent_columns => $args{indent_columns}, - _look_for_hash_bang => $args{look_for_hash_bang}, - _trim_qw => $args{trim_qw}, - _continuation_indentation => $args{continuation_indentation}, - _outdent_labels => $args{outdent_labels}, - _last_line_number => $args{starting_line_number} - 1, - _saw_perl_dash_P => 0, - _saw_perl_dash_w => 0, - _saw_use_strict => 0, - _saw_v_string => 0, - _hit_bug => 0, - _look_for_autoloader => $args{look_for_autoloader}, - _look_for_selfloader => $args{look_for_selfloader}, - _saw_autoloader => 0, - _saw_selfloader => 0, - _saw_hash_bang => 0, - _saw_end => 0, - _saw_data => 0, - _saw_negative_indentation => 0, - _started_tokenizing => 0, - _line_buffer_object => $line_buffer_object, - _debugger_object => $args{debugger_object}, - _diagnostics_object => $args{diagnostics_object}, - _logger_object => $args{logger_object}, - _unexpected_error_count => 0, - _started_looking_for_here_target_at => 0, - _nearly_matched_here_target_at => undef, - _line_text => "", - _rlower_case_labels_at => undef, - _extended_syntax => $args{extended_syntax}, - }; - - prepare_for_a_new_file(); - find_starting_indentation_level(); - - bless $tokenizer_self, $class; - - # This is not a full class yet, so die if an attempt is made to - # create more than one object. - - if ( _increment_count() > 1 ) { - confess -"Attempt to create more than 1 object in $class, which is not a true class yet\n"; - } - - return $tokenizer_self; - -} - -# interface to Perl::Tidy::Logger routines -sub warning { - my $msg = shift; - my $logger_object = $tokenizer_self->{_logger_object}; - if ($logger_object) { - $logger_object->warning($msg); - } - return; -} - -sub complain { - my $msg = shift; - my $logger_object = $tokenizer_self->{_logger_object}; - if ($logger_object) { - $logger_object->complain($msg); - } - return; -} - -sub write_logfile_entry { - my $msg = shift; - my $logger_object = $tokenizer_self->{_logger_object}; - if ($logger_object) { - $logger_object->write_logfile_entry($msg); - } - return; -} - -sub interrupt_logfile { - my $logger_object = $tokenizer_self->{_logger_object}; - if ($logger_object) { - $logger_object->interrupt_logfile(); - } - return; -} - -sub resume_logfile { - my $logger_object = $tokenizer_self->{_logger_object}; - if ($logger_object) { - $logger_object->resume_logfile(); - } - return; -} - -sub increment_brace_error { - my $logger_object = $tokenizer_self->{_logger_object}; - if ($logger_object) { - $logger_object->increment_brace_error(); - } - return; -} - -sub report_definite_bug { - $tokenizer_self->{_hit_bug} = 1; - my $logger_object = $tokenizer_self->{_logger_object}; - if ($logger_object) { - $logger_object->report_definite_bug(); - } - return; -} - -sub brace_warning { - my $msg = shift; - my $logger_object = $tokenizer_self->{_logger_object}; - if ($logger_object) { - $logger_object->brace_warning($msg); - } - return; -} - -sub get_saw_brace_error { - my $logger_object = $tokenizer_self->{_logger_object}; - if ($logger_object) { - return $logger_object->get_saw_brace_error(); - } - else { - return 0; - } -} - -# interface to Perl::Tidy::Diagnostics routines -sub write_diagnostics { - my $msg = shift; - if ( $tokenizer_self->{_diagnostics_object} ) { - $tokenizer_self->{_diagnostics_object}->write_diagnostics($msg); - } - return; -} - -sub report_tokenization_errors { - - my $self = shift; - my $severe_error = $self->{_in_error}; - - my $level = get_indentation_level(); - if ( $level != $tokenizer_self->{_starting_level} ) { - warning("final indentation level: $level\n"); - } - - check_final_nesting_depths(); - - if ( $tokenizer_self->{_look_for_hash_bang} - && !$tokenizer_self->{_saw_hash_bang} ) - { - warning( - "hit EOF without seeing hash-bang line; maybe don't need -x?\n"); - } - - if ( $tokenizer_self->{_in_format} ) { - warning("hit EOF while in format description\n"); - } - - if ( $tokenizer_self->{_in_pod} ) { - - # Just write log entry if this is after __END__ or __DATA__ - # because this happens to often, and it is not likely to be - # a parsing error. - if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) { - write_logfile_entry( -"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n" - ); - } - - else { - complain( -"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n" - ); - } - - } - - if ( $tokenizer_self->{_in_here_doc} ) { - $severe_error = 1; - my $here_doc_target = $tokenizer_self->{_here_doc_target}; - my $started_looking_for_here_target_at = - $tokenizer_self->{_started_looking_for_here_target_at}; - if ($here_doc_target) { - warning( -"hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n" - ); - } - else { - warning( -"hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n" - ); - } - my $nearly_matched_here_target_at = - $tokenizer_self->{_nearly_matched_here_target_at}; - if ($nearly_matched_here_target_at) { - warning( -"NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n" - ); - } - } - - if ( $tokenizer_self->{_in_quote} ) { - $severe_error = 1; - my $line_start_quote = $tokenizer_self->{_line_start_quote}; - my $quote_target = $tokenizer_self->{_quote_target}; - my $what = - ( $tokenizer_self->{_in_attribute_list} ) - ? "attribute list" - : "quote/pattern"; - warning( -"hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n" - ); - } - - if ( $tokenizer_self->{_hit_bug} ) { - $severe_error = 1; - } - - my $logger_object = $tokenizer_self->{_logger_object}; - -# TODO: eventually may want to activate this to cause file to be output verbatim - if (0) { - - # Set the severe error for a fairly high warning count because - # some of the warnings do not harm formatting, such as duplicate - # sub names. - my $warning_count = $logger_object->{_warning_count}; - if ( $warning_count > 50 ) { - $severe_error = 1; - } - - # Brace errors are significant, so set the severe error flag at - # a low number. - my $saw_brace_error = $logger_object->{_saw_brace_error}; - if ( $saw_brace_error > 2 ) { - $severe_error = 1; - } - } - - unless ( $tokenizer_self->{_saw_perl_dash_w} ) { - if ( $] < 5.006 ) { - write_logfile_entry("Suggest including '-w parameter'\n"); - } - else { - write_logfile_entry("Suggest including 'use warnings;'\n"); - } - } - - if ( $tokenizer_self->{_saw_perl_dash_P} ) { - write_logfile_entry("Use of -P parameter for defines is discouraged\n"); - } - - unless ( $tokenizer_self->{_saw_use_strict} ) { - write_logfile_entry("Suggest including 'use strict;'\n"); - } - - # it is suggested that labels have at least one upper case character - # for legibility and to avoid code breakage as new keywords are introduced - if ( $tokenizer_self->{_rlower_case_labels_at} ) { - my @lower_case_labels_at = - @{ $tokenizer_self->{_rlower_case_labels_at} }; - write_logfile_entry( - "Suggest using upper case characters in label(s)\n"); - local $" = ')('; - write_logfile_entry(" defined at line(s): (@lower_case_labels_at)\n"); - } - return $severe_error; -} - -sub report_v_string { - - # warn if this version can't handle v-strings - my $tok = shift; - unless ( $tokenizer_self->{_saw_v_string} ) { - $tokenizer_self->{_saw_v_string} = $tokenizer_self->{_last_line_number}; - } - if ( $] < 5.006 ) { - warning( -"Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n" - ); - } - return; -} - -sub get_input_line_number { - return $tokenizer_self->{_last_line_number}; -} - -# returns the next tokenized line -sub get_line { - - my $self = shift; - - # USES GLOBAL VARIABLES: $tokenizer_self, $brace_depth, - # $square_bracket_depth, $paren_depth - - my $input_line = $tokenizer_self->{_line_buffer_object}->get_line(); - $tokenizer_self->{_line_text} = $input_line; - - return unless ($input_line); - - my $input_line_number = ++$tokenizer_self->{_last_line_number}; - - # Find and remove what characters terminate this line, including any - # control r - my $input_line_separator = ""; - if ( chomp($input_line) ) { $input_line_separator = $/ } - - # TODO: what other characters should be included here? - if ( $input_line =~ s/((\r|\035|\032)+)$// ) { - $input_line_separator = $2 . $input_line_separator; - } - - # for backwards compatibility we keep the line text terminated with - # a newline character - $input_line .= "\n"; - $tokenizer_self->{_line_text} = $input_line; # update - - # create a data structure describing this line which will be - # returned to the caller. - - # _line_type codes are: - # SYSTEM - system-specific code before hash-bang line - # CODE - line of perl code (including comments) - # POD_START - line starting pod, such as '=head' - # POD - pod documentation text - # POD_END - last line of pod section, '=cut' - # HERE - text of here-document - # HERE_END - last line of here-doc (target word) - # FORMAT - format section - # FORMAT_END - last line of format section, '.' - # DATA_START - __DATA__ line - # DATA - unidentified text following __DATA__ - # END_START - __END__ line - # END - unidentified text following __END__ - # ERROR - we are in big trouble, probably not a perl script - - # Other variables: - # _curly_brace_depth - depth of curly braces at start of line - # _square_bracket_depth - depth of square brackets at start of line - # _paren_depth - depth of parens at start of line - # _starting_in_quote - this line continues a multi-line quote - # (so don't trim leading blanks!) - # _ending_in_quote - this line ends in a multi-line quote - # (so don't trim trailing blanks!) - my $line_of_tokens = { - _line_type => 'EOF', - _line_text => $input_line, - _line_number => $input_line_number, - _rtoken_type => undef, - _rtokens => undef, - _rlevels => undef, - _rslevels => undef, - _rblock_type => undef, - _rcontainer_type => undef, - _rcontainer_environment => undef, - _rtype_sequence => undef, - _rnesting_tokens => undef, - _rci_levels => undef, - _rnesting_blocks => undef, - _guessed_indentation_level => 0, - _starting_in_quote => 0, # to be set by subroutine - _ending_in_quote => 0, - _curly_brace_depth => $brace_depth, - _square_bracket_depth => $square_bracket_depth, - _paren_depth => $paren_depth, - _quote_character => '', - }; - - # must print line unchanged if we are in a here document - if ( $tokenizer_self->{_in_here_doc} ) { - - $line_of_tokens->{_line_type} = 'HERE'; - my $here_doc_target = $tokenizer_self->{_here_doc_target}; - my $here_quote_character = $tokenizer_self->{_here_quote_character}; - my $candidate_target = $input_line; - chomp $candidate_target; - - # Handle <<~ targets, which are indicated here by a leading space on - # the here quote character - if ( $here_quote_character =~ /^\s/ ) { - $candidate_target =~ s/^\s*//; - } - if ( $candidate_target eq $here_doc_target ) { - $tokenizer_self->{_nearly_matched_here_target_at} = undef; - $line_of_tokens->{_line_type} = 'HERE_END'; - write_logfile_entry("Exiting HERE document $here_doc_target\n"); - - my $rhere_target_list = $tokenizer_self->{_rhere_target_list}; - if ( @{$rhere_target_list} ) { # there can be multiple here targets - ( $here_doc_target, $here_quote_character ) = - @{ shift @{$rhere_target_list} }; - $tokenizer_self->{_here_doc_target} = $here_doc_target; - $tokenizer_self->{_here_quote_character} = - $here_quote_character; - write_logfile_entry( - "Entering HERE document $here_doc_target\n"); - $tokenizer_self->{_nearly_matched_here_target_at} = undef; - $tokenizer_self->{_started_looking_for_here_target_at} = - $input_line_number; - } - else { - $tokenizer_self->{_in_here_doc} = 0; - $tokenizer_self->{_here_doc_target} = ""; - $tokenizer_self->{_here_quote_character} = ""; - } - } - - # check for error of extra whitespace - # note for PERL6: leading whitespace is allowed - else { - $candidate_target =~ s/\s*$//; - $candidate_target =~ s/^\s*//; - if ( $candidate_target eq $here_doc_target ) { - $tokenizer_self->{_nearly_matched_here_target_at} = - $input_line_number; - } - } - return $line_of_tokens; - } - - # must print line unchanged if we are in a format section - elsif ( $tokenizer_self->{_in_format} ) { - - if ( $input_line =~ /^\.[\s#]*$/ ) { - write_logfile_entry("Exiting format section\n"); - $tokenizer_self->{_in_format} = 0; - $line_of_tokens->{_line_type} = 'FORMAT_END'; - } - else { - $line_of_tokens->{_line_type} = 'FORMAT'; - } - return $line_of_tokens; - } - - # must print line unchanged if we are in pod documentation - elsif ( $tokenizer_self->{_in_pod} ) { - - $line_of_tokens->{_line_type} = 'POD'; - if ( $input_line =~ /^=cut/ ) { - $line_of_tokens->{_line_type} = 'POD_END'; - write_logfile_entry("Exiting POD section\n"); - $tokenizer_self->{_in_pod} = 0; - } - if ( $input_line =~ /^\#\!.*perl\b/ ) { - warning( - "Hash-bang in pod can cause older versions of perl to fail! \n" - ); - } - - return $line_of_tokens; - } - - # must print line unchanged if we have seen a severe error (i.e., we - # are seeing illegal tokens and cannot continue. Syntax errors do - # not pass this route). Calling routine can decide what to do, but - # the default can be to just pass all lines as if they were after __END__ - elsif ( $tokenizer_self->{_in_error} ) { - $line_of_tokens->{_line_type} = 'ERROR'; - return $line_of_tokens; - } - - # print line unchanged if we are __DATA__ section - elsif ( $tokenizer_self->{_in_data} ) { - - # ...but look for POD - # Note that the _in_data and _in_end flags remain set - # so that we return to that state after seeing the - # end of a pod section - if ( $input_line =~ /^=(?!cut)/ ) { - $line_of_tokens->{_line_type} = 'POD_START'; - write_logfile_entry("Entering POD section\n"); - $tokenizer_self->{_in_pod} = 1; - return $line_of_tokens; - } - else { - $line_of_tokens->{_line_type} = 'DATA'; - return $line_of_tokens; - } - } - - # print line unchanged if we are in __END__ section - elsif ( $tokenizer_self->{_in_end} ) { - - # ...but look for POD - # Note that the _in_data and _in_end flags remain set - # so that we return to that state after seeing the - # end of a pod section - if ( $input_line =~ /^=(?!cut)/ ) { - $line_of_tokens->{_line_type} = 'POD_START'; - write_logfile_entry("Entering POD section\n"); - $tokenizer_self->{_in_pod} = 1; - return $line_of_tokens; - } - else { - $line_of_tokens->{_line_type} = 'END'; - return $line_of_tokens; - } - } - - # check for a hash-bang line if we haven't seen one - if ( !$tokenizer_self->{_saw_hash_bang} ) { - if ( $input_line =~ /^\#\!.*perl\b/ ) { - $tokenizer_self->{_saw_hash_bang} = $input_line_number; - - # check for -w and -P flags - if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) { - $tokenizer_self->{_saw_perl_dash_P} = 1; - } - - if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) { - $tokenizer_self->{_saw_perl_dash_w} = 1; - } - - if ( - ( $input_line_number > 1 ) - - # leave any hash bang in a BEGIN block alone - # i.e. see 'debugger-duck_type.t' - && !( - $last_nonblank_block_type - && $last_nonblank_block_type eq 'BEGIN' - ) - && ( !$tokenizer_self->{_look_for_hash_bang} ) - ) - { - - # this is helpful for VMS systems; we may have accidentally - # tokenized some DCL commands - if ( $tokenizer_self->{_started_tokenizing} ) { - warning( -"There seems to be a hash-bang after line 1; do you need to run with -x ?\n" - ); - } - else { - complain("Useless hash-bang after line 1\n"); - } - } - - # Report the leading hash-bang as a system line - # This will prevent -dac from deleting it - else { - $line_of_tokens->{_line_type} = 'SYSTEM'; - return $line_of_tokens; - } - } - } - - # wait for a hash-bang before parsing if the user invoked us with -x - if ( $tokenizer_self->{_look_for_hash_bang} - && !$tokenizer_self->{_saw_hash_bang} ) - { - $line_of_tokens->{_line_type} = 'SYSTEM'; - return $line_of_tokens; - } - - # a first line of the form ': #' will be marked as SYSTEM - # since lines of this form may be used by tcsh - if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) { - $line_of_tokens->{_line_type} = 'SYSTEM'; - return $line_of_tokens; - } - - # now we know that it is ok to tokenize the line... - # the line tokenizer will modify any of these private variables: - # _rhere_target_list - # _in_data - # _in_end - # _in_format - # _in_error - # _in_pod - # _in_quote - my $ending_in_quote_last = $tokenizer_self->{_in_quote}; - tokenize_this_line($line_of_tokens); - - # Now finish defining the return structure and return it - $line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote}; - - # handle severe error (binary data in script) - if ( $tokenizer_self->{_in_error} ) { - $tokenizer_self->{_in_quote} = 0; # to avoid any more messages - warning("Giving up after error\n"); - $line_of_tokens->{_line_type} = 'ERROR'; - reset_indentation_level(0); # avoid error messages - return $line_of_tokens; - } - - # handle start of pod documentation - if ( $tokenizer_self->{_in_pod} ) { - - # This gets tricky..above a __DATA__ or __END__ section, perl - # accepts '=cut' as the start of pod section. But afterwards, - # only pod utilities see it and they may ignore an =cut without - # leading =head. In any case, this isn't good. - if ( $input_line =~ /^=cut\b/ ) { - if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) { - complain("=cut while not in pod ignored\n"); - $tokenizer_self->{_in_pod} = 0; - $line_of_tokens->{_line_type} = 'POD_END'; - } - else { - $line_of_tokens->{_line_type} = 'POD_START'; - complain( -"=cut starts a pod section .. this can fool pod utilities.\n" - ); - write_logfile_entry("Entering POD section\n"); - } - } - - else { - $line_of_tokens->{_line_type} = 'POD_START'; - write_logfile_entry("Entering POD section\n"); - } - - return $line_of_tokens; - } - - # update indentation levels for log messages - if ( $input_line !~ /^\s*$/ ) { - my $rlevels = $line_of_tokens->{_rlevels}; - $line_of_tokens->{_guessed_indentation_level} = - guess_old_indentation_level($input_line); - } - - # see if this line contains here doc targets - my $rhere_target_list = $tokenizer_self->{_rhere_target_list}; - if ( @{$rhere_target_list} ) { - - my ( $here_doc_target, $here_quote_character ) = - @{ shift @{$rhere_target_list} }; - $tokenizer_self->{_in_here_doc} = 1; - $tokenizer_self->{_here_doc_target} = $here_doc_target; - $tokenizer_self->{_here_quote_character} = $here_quote_character; - write_logfile_entry("Entering HERE document $here_doc_target\n"); - $tokenizer_self->{_started_looking_for_here_target_at} = - $input_line_number; - } - - # NOTE: __END__ and __DATA__ statements are written unformatted - # because they can theoretically contain additional characters - # which are not tokenized (and cannot be read with either!). - if ( $tokenizer_self->{_in_data} ) { - $line_of_tokens->{_line_type} = 'DATA_START'; - write_logfile_entry("Starting __DATA__ section\n"); - $tokenizer_self->{_saw_data} = 1; - - # keep parsing after __DATA__ if use SelfLoader was seen - if ( $tokenizer_self->{_saw_selfloader} ) { - $tokenizer_self->{_in_data} = 0; - write_logfile_entry( - "SelfLoader seen, continuing; -nlsl deactivates\n"); - } - - return $line_of_tokens; - } - - elsif ( $tokenizer_self->{_in_end} ) { - $line_of_tokens->{_line_type} = 'END_START'; - write_logfile_entry("Starting __END__ section\n"); - $tokenizer_self->{_saw_end} = 1; - - # keep parsing after __END__ if use AutoLoader was seen - if ( $tokenizer_self->{_saw_autoloader} ) { - $tokenizer_self->{_in_end} = 0; - write_logfile_entry( - "AutoLoader seen, continuing; -nlal deactivates\n"); - } - return $line_of_tokens; - } - - # now, finally, we know that this line is type 'CODE' - $line_of_tokens->{_line_type} = 'CODE'; - - # remember if we have seen any real code - if ( !$tokenizer_self->{_started_tokenizing} - && $input_line !~ /^\s*$/ - && $input_line !~ /^\s*#/ ) - { - $tokenizer_self->{_started_tokenizing} = 1; - } - - if ( $tokenizer_self->{_debugger_object} ) { - $tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens); - } - - # Note: if keyword 'format' occurs in this line code, it is still CODE - # (keyword 'format' need not start a line) - if ( $tokenizer_self->{_in_format} ) { - write_logfile_entry("Entering format section\n"); - } - - if ( $tokenizer_self->{_in_quote} - and ( $tokenizer_self->{_line_start_quote} < 0 ) ) - { - - #if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) { - if ( - ( my $quote_target = $tokenizer_self->{_quote_target} ) !~ /^\s*$/ ) - { - $tokenizer_self->{_line_start_quote} = $input_line_number; - write_logfile_entry( - "Start multi-line quote or pattern ending in $quote_target\n"); - } - } - elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 ) - && !$tokenizer_self->{_in_quote} ) - { - $tokenizer_self->{_line_start_quote} = -1; - write_logfile_entry("End of multi-line quote or pattern\n"); - } - - # we are returning a line of CODE - return $line_of_tokens; -} - -sub find_starting_indentation_level { - - # We need to find the indentation level of the first line of the - # script being formatted. Often it will be zero for an entire file, - # but if we are formatting a local block of code (within an editor for - # example) it may not be zero. The user may specify this with the - # -sil=n parameter but normally doesn't so we have to guess. - # - # USES GLOBAL VARIABLES: $tokenizer_self - my $starting_level = 0; - - # use value if given as parameter - if ( $tokenizer_self->{_know_starting_level} ) { - $starting_level = $tokenizer_self->{_starting_level}; - } - - # if we know there is a hash_bang line, the level must be zero - elsif ( $tokenizer_self->{_look_for_hash_bang} ) { - $tokenizer_self->{_know_starting_level} = 1; - } - - # otherwise figure it out from the input file - else { - my $line; - my $i = 0; - - # keep looking at lines until we find a hash bang or piece of code - my $msg = ""; - while ( $line = - $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) ) - { - - # if first line is #! then assume starting level is zero - if ( $i == 1 && $line =~ /^\#\!/ ) { - $starting_level = 0; - last; - } - next if ( $line =~ /^\s*#/ ); # skip past comments - next if ( $line =~ /^\s*$/ ); # skip past blank lines - $starting_level = guess_old_indentation_level($line); - last; - } - $msg = "Line $i implies starting-indentation-level = $starting_level\n"; - write_logfile_entry("$msg"); - } - $tokenizer_self->{_starting_level} = $starting_level; - reset_indentation_level($starting_level); - return; -} - -sub guess_old_indentation_level { - my ($line) = @_; - - # Guess the indentation level of an input line. - # - # For the first line of code this result will define the starting - # indentation level. It will mainly be non-zero when perltidy is applied - # within an editor to a local block of code. - # - # This is an impossible task in general because we can't know what tabs - # meant for the old script and how many spaces were used for one - # indentation level in the given input script. For example it may have - # been previously formatted with -i=7 -et=3. But we can at least try to - # make sure that perltidy guesses correctly if it is applied repeatedly to - # a block of code within an editor, so that the block stays at the same - # level when perltidy is applied repeatedly. - # - # USES GLOBAL VARIABLES: $tokenizer_self - my $level = 0; - - # find leading tabs, spaces, and any statement label - my $spaces = 0; - if ( $line =~ /^(\t+)?(\s+)?(\w+:[^:])?/ ) { - - # If there are leading tabs, we use the tab scheme for this run, if - # any, so that the code will remain stable when editing. - if ($1) { $spaces += length($1) * $tokenizer_self->{_tabsize} } - - if ($2) { $spaces += length($2) } - - # correct for outdented labels - if ( $3 && $tokenizer_self->{'_outdent_labels'} ) { - $spaces += $tokenizer_self->{_continuation_indentation}; - } - } - - # compute indentation using the value of -i for this run. - # If -i=0 is used for this run (which is possible) it doesn't matter - # what we do here but we'll guess that the old run used 4 spaces per level. - my $indent_columns = $tokenizer_self->{_indent_columns}; - $indent_columns = 4 if ( !$indent_columns ); - $level = int( $spaces / $indent_columns ); - return ($level); -} - -# This is a currently unused debug routine -sub dump_functions { - - my $fh = *STDOUT; - foreach my $pkg ( keys %is_user_function ) { - print $fh "\nnon-constant subs in package $pkg\n"; - - foreach my $sub ( keys %{ $is_user_function{$pkg} } ) { - my $msg = ""; - if ( $is_block_list_function{$pkg}{$sub} ) { - $msg = 'block_list'; - } - - if ( $is_block_function{$pkg}{$sub} ) { - $msg = 'block'; - } - print $fh "$sub $msg\n"; - } - } - - foreach my $pkg ( keys %is_constant ) { - print $fh "\nconstants and constant subs in package $pkg\n"; - - foreach my $sub ( keys %{ $is_constant{$pkg} } ) { - print $fh "$sub\n"; - } - } - return; -} - -sub ones_count { - - # count number of 1's in a string of 1's and 0's - # example: ones_count("010101010101") gives 6 - my $str = shift; - return $str =~ tr/1/0/; -} - -sub prepare_for_a_new_file { - - # previous tokens needed to determine what to expect next - $last_nonblank_token = ';'; # the only possible starting state which - $last_nonblank_type = ';'; # will make a leading brace a code block - $last_nonblank_block_type = ''; - - # scalars for remembering statement types across multiple lines - $statement_type = ''; # '' or 'use' or 'sub..' or 'case..' - $in_attribute_list = 0; - - # scalars for remembering where we are in the file - $current_package = "main"; - $context = UNKNOWN_CONTEXT; - - # hashes used to remember function information - %is_constant = (); # user-defined constants - %is_user_function = (); # user-defined functions - %user_function_prototype = (); # their prototypes - %is_block_function = (); - %is_block_list_function = (); - %saw_function_definition = (); - - # variables used to track depths of various containers - # and report nesting errors - $paren_depth = 0; - $brace_depth = 0; - $square_bracket_depth = 0; - @current_depth[ 0 .. $#closing_brace_names ] = - (0) x scalar @closing_brace_names; - $total_depth = 0; - @total_depth = (); - @nesting_sequence_number[ 0 .. $#closing_brace_names ] = - ( 0 .. $#closing_brace_names ); - @current_sequence_number = (); - $paren_type[$paren_depth] = ''; - $paren_semicolon_count[$paren_depth] = 0; - $paren_structural_type[$brace_depth] = ''; - $brace_type[$brace_depth] = ';'; # identify opening brace as code block - $brace_structural_type[$brace_depth] = ''; - $brace_context[$brace_depth] = UNKNOWN_CONTEXT; - $brace_package[$paren_depth] = $current_package; - $square_bracket_type[$square_bracket_depth] = ''; - $square_bracket_structural_type[$square_bracket_depth] = ''; - - initialize_tokenizer_state(); - return; -} - -{ # begin tokenize_this_line - - use constant BRACE => 0; - use constant SQUARE_BRACKET => 1; - use constant PAREN => 2; - use constant QUESTION_COLON => 3; - - # TV1: scalars for processing one LINE. - # Re-initialized on each entry to sub tokenize_this_line. - my ( - $block_type, $container_type, $expecting, - $i, $i_tok, $input_line, - $input_line_number, $last_nonblank_i, $max_token_index, - $next_tok, $next_type, $peeked_ahead, - $prototype, $rhere_target_list, $rtoken_map, - $rtoken_type, $rtokens, $tok, - $type, $type_sequence, $indent_flag, - ); - - # TV2: refs to ARRAYS for processing one LINE - # Re-initialized on each call. - my $routput_token_list = []; # stack of output token indexes - my $routput_token_type = []; # token types - my $routput_block_type = []; # types of code block - my $routput_container_type = []; # paren types, such as if, elsif, .. - my $routput_type_sequence = []; # nesting sequential number - my $routput_indent_flag = []; # - - # TV3: SCALARS for quote variables. These are initialized with a - # subroutine call and continually updated as lines are processed. - my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth, - $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, ); - - # TV4: SCALARS for multi-line identifiers and - # statements. These are initialized with a subroutine call - # and continually updated as lines are processed. - my ( $id_scan_state, $identifier, $want_paren, $indented_if_level ); - - # TV5: SCALARS for tracking indentation level. - # Initialized once and continually updated as lines are - # processed. - my ( - $nesting_token_string, $nesting_type_string, - $nesting_block_string, $nesting_block_flag, - $nesting_list_string, $nesting_list_flag, - $ci_string_in_tokenizer, $continuation_string_in_tokenizer, - $in_statement_continuation, $level_in_tokenizer, - $slevel_in_tokenizer, $rslevel_stack, - ); - - # TV6: SCALARS for remembering several previous - # tokens. Initialized once and continually updated as - # lines are processed. - my ( - $last_nonblank_container_type, $last_nonblank_type_sequence, - $last_last_nonblank_token, $last_last_nonblank_type, - $last_last_nonblank_block_type, $last_last_nonblank_container_type, - $last_last_nonblank_type_sequence, $last_nonblank_prototype, - ); - - # ---------------------------------------------------------------- - # beginning of tokenizer variable access and manipulation routines - # ---------------------------------------------------------------- - - sub initialize_tokenizer_state { - - # TV1: initialized on each call - # TV2: initialized on each call - # TV3: - $in_quote = 0; - $quote_type = 'Q'; - $quote_character = ""; - $quote_pos = 0; - $quote_depth = 0; - $quoted_string_1 = ""; - $quoted_string_2 = ""; - $allowed_quote_modifiers = ""; - - # TV4: - $id_scan_state = ''; - $identifier = ''; - $want_paren = ""; - $indented_if_level = 0; - - # TV5: - $nesting_token_string = ""; - $nesting_type_string = ""; - $nesting_block_string = '1'; # initially in a block - $nesting_block_flag = 1; - $nesting_list_string = '0'; # initially not in a list - $nesting_list_flag = 0; # initially not in a list - $ci_string_in_tokenizer = ""; - $continuation_string_in_tokenizer = "0"; - $in_statement_continuation = 0; - $level_in_tokenizer = 0; - $slevel_in_tokenizer = 0; - $rslevel_stack = []; - - # TV6: - $last_nonblank_container_type = ''; - $last_nonblank_type_sequence = ''; - $last_last_nonblank_token = ';'; - $last_last_nonblank_type = ';'; - $last_last_nonblank_block_type = ''; - $last_last_nonblank_container_type = ''; - $last_last_nonblank_type_sequence = ''; - $last_nonblank_prototype = ""; - return; - } - - sub save_tokenizer_state { - - my $rTV1 = [ - $block_type, $container_type, $expecting, - $i, $i_tok, $input_line, - $input_line_number, $last_nonblank_i, $max_token_index, - $next_tok, $next_type, $peeked_ahead, - $prototype, $rhere_target_list, $rtoken_map, - $rtoken_type, $rtokens, $tok, - $type, $type_sequence, $indent_flag, - ]; - - my $rTV2 = [ - $routput_token_list, $routput_token_type, - $routput_block_type, $routput_container_type, - $routput_type_sequence, $routput_indent_flag, - ]; - - my $rTV3 = [ - $in_quote, $quote_type, - $quote_character, $quote_pos, - $quote_depth, $quoted_string_1, - $quoted_string_2, $allowed_quote_modifiers, - ]; - - my $rTV4 = - [ $id_scan_state, $identifier, $want_paren, $indented_if_level ]; - - my $rTV5 = [ - $nesting_token_string, $nesting_type_string, - $nesting_block_string, $nesting_block_flag, - $nesting_list_string, $nesting_list_flag, - $ci_string_in_tokenizer, $continuation_string_in_tokenizer, - $in_statement_continuation, $level_in_tokenizer, - $slevel_in_tokenizer, $rslevel_stack, - ]; - - my $rTV6 = [ - $last_nonblank_container_type, - $last_nonblank_type_sequence, - $last_last_nonblank_token, - $last_last_nonblank_type, - $last_last_nonblank_block_type, - $last_last_nonblank_container_type, - $last_last_nonblank_type_sequence, - $last_nonblank_prototype, - ]; - return [ $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ]; - } - - sub restore_tokenizer_state { - my ($rstate) = @_; - my ( $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate}; - ( - $block_type, $container_type, $expecting, - $i, $i_tok, $input_line, - $input_line_number, $last_nonblank_i, $max_token_index, - $next_tok, $next_type, $peeked_ahead, - $prototype, $rhere_target_list, $rtoken_map, - $rtoken_type, $rtokens, $tok, - $type, $type_sequence, $indent_flag, - ) = @{$rTV1}; - - ( - $routput_token_list, $routput_token_type, - $routput_block_type, $routput_container_type, - $routput_type_sequence, $routput_type_sequence, - ) = @{$rTV2}; - - ( - $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth, - $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, - ) = @{$rTV3}; - - ( $id_scan_state, $identifier, $want_paren, $indented_if_level ) = - @{$rTV4}; - - ( - $nesting_token_string, $nesting_type_string, - $nesting_block_string, $nesting_block_flag, - $nesting_list_string, $nesting_list_flag, - $ci_string_in_tokenizer, $continuation_string_in_tokenizer, - $in_statement_continuation, $level_in_tokenizer, - $slevel_in_tokenizer, $rslevel_stack, - ) = @{$rTV5}; - - ( - $last_nonblank_container_type, - $last_nonblank_type_sequence, - $last_last_nonblank_token, - $last_last_nonblank_type, - $last_last_nonblank_block_type, - $last_last_nonblank_container_type, - $last_last_nonblank_type_sequence, - $last_nonblank_prototype, - ) = @{$rTV6}; - return; - } - - sub get_indentation_level { - - # patch to avoid reporting error if indented if is not terminated - if ($indented_if_level) { return $level_in_tokenizer - 1 } - return $level_in_tokenizer; - } - - sub reset_indentation_level { - $level_in_tokenizer = $slevel_in_tokenizer = shift; - push @{$rslevel_stack}, $slevel_in_tokenizer; - return; - } - - sub peeked_ahead { - my $flag = shift; - $peeked_ahead = defined($flag) ? $flag : $peeked_ahead; - return $peeked_ahead; - } - - # ------------------------------------------------------------ - # end of tokenizer variable access and manipulation routines - # ------------------------------------------------------------ - - # ------------------------------------------------------------ - # beginning of various scanner interface routines - # ------------------------------------------------------------ - sub scan_replacement_text { - - # check for here-docs in replacement text invoked by - # a substitution operator with executable modifier 'e'. - # - # given: - # $replacement_text - # return: - # $rht = reference to any here-doc targets - my ($replacement_text) = @_; - - # quick check - return unless ( $replacement_text =~ /<{_logger_object}; - - # localize all package variables - local ( - $tokenizer_self, $last_nonblank_token, - $last_nonblank_type, $last_nonblank_block_type, - $statement_type, $in_attribute_list, - $current_package, $context, - %is_constant, %is_user_function, - %user_function_prototype, %is_block_function, - %is_block_list_function, %saw_function_definition, - $brace_depth, $paren_depth, - $square_bracket_depth, @current_depth, - @total_depth, $total_depth, - @nesting_sequence_number, @current_sequence_number, - @paren_type, @paren_semicolon_count, - @paren_structural_type, @brace_type, - @brace_structural_type, @brace_context, - @brace_package, @square_bracket_type, - @square_bracket_structural_type, @depth_array, - @starting_line_of_current_depth, @nested_ternary_flag, - @nested_statement_type, - ); - - # save all lexical variables - my $rstate = save_tokenizer_state(); - _decrement_count(); # avoid error check for multiple tokenizers - - # make a new tokenizer - my $rOpts = {}; - my $rpending_logfile_message; - my $source_object = - Perl::Tidy::LineSource->new( \$replacement_text, $rOpts, - $rpending_logfile_message ); - my $tokenizer = Perl::Tidy::Tokenizer->new( - source_object => $source_object, - logger_object => $logger_object, - starting_line_number => $input_line_number, - ); - - # scan the replacement text - 1 while ( $tokenizer->get_line() ); - - # remove any here doc targets - my $rht = undef; - if ( $tokenizer_self->{_in_here_doc} ) { - $rht = []; - push @{$rht}, - [ - $tokenizer_self->{_here_doc_target}, - $tokenizer_self->{_here_quote_character} - ]; - if ( $tokenizer_self->{_rhere_target_list} ) { - push @{$rht}, @{ $tokenizer_self->{_rhere_target_list} }; - $tokenizer_self->{_rhere_target_list} = undef; - } - $tokenizer_self->{_in_here_doc} = undef; - } - - # now its safe to report errors - my $severe_error = $tokenizer->report_tokenization_errors(); - - # TODO: Could propagate a severe error up - - # restore all tokenizer lexical variables - restore_tokenizer_state($rstate); - - # return the here doc targets - return $rht; - } - - sub scan_bare_identifier { - ( $i, $tok, $type, $prototype ) = - scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype, - $rtoken_map, $max_token_index ); - return; - } - - sub scan_identifier { - ( $i, $tok, $type, $id_scan_state, $identifier ) = - scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens, - $max_token_index, $expecting, $paren_type[$paren_depth] ); - return; - } - - sub scan_id { - ( $i, $tok, $type, $id_scan_state ) = - scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map, - $id_scan_state, $max_token_index ); - return; - } - - sub scan_number { - my $number; - ( $i, $type, $number ) = - scan_number_do( $input_line, $i, $rtoken_map, $type, - $max_token_index ); - return $number; - } - - # a sub to warn if token found where term expected - sub error_if_expecting_TERM { - if ( $expecting == TERM ) { - if ( $really_want_term{$last_nonblank_type} ) { - report_unexpected( $tok, "term", $i_tok, $last_nonblank_i, - $rtoken_map, $rtoken_type, $input_line ); - return 1; - } - } - return; - } - - # a sub to warn if token found where operator expected - sub error_if_expecting_OPERATOR { - my $thing = shift; - if ( $expecting == OPERATOR ) { - if ( !defined($thing) ) { $thing = $tok } - report_unexpected( $thing, "operator", $i_tok, $last_nonblank_i, - $rtoken_map, $rtoken_type, $input_line ); - if ( $i_tok == 0 ) { - interrupt_logfile(); - warning("Missing ';' above?\n"); - resume_logfile(); - } - return 1; - } - return; - } - - # ------------------------------------------------------------ - # end scanner interfaces - # ------------------------------------------------------------ - - my %is_for_foreach; - @_ = qw(for foreach); - @is_for_foreach{@_} = (1) x scalar(@_); - - my %is_my_our; - @_ = qw(my our); - @is_my_our{@_} = (1) x scalar(@_); - - # These keywords may introduce blocks after parenthesized expressions, - # in the form: - # keyword ( .... ) { BLOCK } - # patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when' - my %is_blocktype_with_paren; - @_ = - qw(if elsif unless while until for foreach switch case given when catch); - @is_blocktype_with_paren{@_} = (1) x scalar(@_); - - # ------------------------------------------------------------ - # begin hash of code for handling most token types - # ------------------------------------------------------------ - my $tokenization_code = { - - # no special code for these types yet, but syntax checks - # could be added - -## '!' => undef, -## '!=' => undef, -## '!~' => undef, -## '%=' => undef, -## '&&=' => undef, -## '&=' => undef, -## '+=' => undef, -## '-=' => undef, -## '..' => undef, -## '..' => undef, -## '...' => undef, -## '.=' => undef, -## '<<=' => undef, -## '<=' => undef, -## '<=>' => undef, -## '<>' => undef, -## '=' => undef, -## '==' => undef, -## '=~' => undef, -## '>=' => undef, -## '>>' => undef, -## '>>=' => undef, -## '\\' => undef, -## '^=' => undef, -## '|=' => undef, -## '||=' => undef, -## '//=' => undef, -## '~' => undef, -## '~~' => undef, -## '!~~' => undef, - - '>' => sub { - error_if_expecting_TERM() - if ( $expecting == TERM ); - }, - '|' => sub { - error_if_expecting_TERM() - if ( $expecting == TERM ); - }, - '$' => sub { - - # start looking for a scalar - error_if_expecting_OPERATOR("Scalar") - if ( $expecting == OPERATOR ); - scan_identifier(); - - if ( $identifier eq '$^W' ) { - $tokenizer_self->{_saw_perl_dash_w} = 1; - } - - # Check for identifier in indirect object slot - # (vorboard.pl, sort.t). Something like: - # /^(print|printf|sort|exec|system)$/ - if ( - $is_indirect_object_taker{$last_nonblank_token} - - || ( ( $last_nonblank_token eq '(' ) - && $is_indirect_object_taker{ $paren_type[$paren_depth] } ) - || ( $last_nonblank_type =~ /^[Uw]$/ ) # possible object - ) - { - $type = 'Z'; - } - }, - '(' => sub { - - ++$paren_depth; - $paren_semicolon_count[$paren_depth] = 0; - if ($want_paren) { - $container_type = $want_paren; - $want_paren = ""; - } - elsif ( $statement_type =~ /^sub\b/ ) { - $container_type = $statement_type; - } - else { - $container_type = $last_nonblank_token; - - # We can check for a syntax error here of unexpected '(', - # but this is going to get messy... - if ( - $expecting == OPERATOR - - # be sure this is not a method call of the form - # &method(...), $method->(..), &{method}(...), - # $ref[2](list) is ok & short for $ref[2]->(list) - # NOTE: at present, braces in something like &{ xxx } - # are not marked as a block, we might have a method call - && $last_nonblank_token !~ /^([\]\}\&]|\-\>)/ - - ) - { - - # ref: camel 3 p 703. - if ( $last_last_nonblank_token eq 'do' ) { - complain( -"do SUBROUTINE is deprecated; consider & or -> notation\n" - ); - } - else { - - # if this is an empty list, (), then it is not an - # error; for example, we might have a constant pi and - # invoke it with pi() or just pi; - my ( $next_nonblank_token, $i_next ) = - find_next_nonblank_token( $i, $rtokens, - $max_token_index ); - if ( $next_nonblank_token ne ')' ) { - my $hint; - error_if_expecting_OPERATOR('('); - - if ( $last_nonblank_type eq 'C' ) { - $hint = - "$last_nonblank_token has a void prototype\n"; - } - elsif ( $last_nonblank_type eq 'i' ) { - if ( $i_tok > 0 - && $last_nonblank_token =~ /^\$/ ) - { - $hint = -"Do you mean '$last_nonblank_token->(' ?\n"; - } - } - if ($hint) { - interrupt_logfile(); - warning($hint); - resume_logfile(); - } - } ## end if ( $next_nonblank_token... - } ## end else [ if ( $last_last_nonblank_token... - } ## end if ( $expecting == OPERATOR... - } - $paren_type[$paren_depth] = $container_type; - ( $type_sequence, $indent_flag ) = - increase_nesting_depth( PAREN, $rtoken_map->[$i_tok] ); - - # propagate types down through nested parens - # for example: the second paren in 'if ((' would be structural - # since the first is. - - if ( $last_nonblank_token eq '(' ) { - $type = $last_nonblank_type; - } - - # We exclude parens as structural after a ',' because it - # causes subtle problems with continuation indentation for - # something like this, where the first 'or' will not get - # indented. - # - # assert( - # __LINE__, - # ( not defined $check ) - # or ref $check - # or $check eq "new" - # or $check eq "old", - # ); - # - # Likewise, we exclude parens where a statement can start - # because of problems with continuation indentation, like - # these: - # - # ($firstline =~ /^#\!.*perl/) - # and (print $File::Find::name, "\n") - # and (return 1); - # - # (ref($usage_fref) =~ /CODE/) - # ? &$usage_fref - # : (&blast_usage, &blast_params, &blast_general_params); - - else { - $type = '{'; - } - - if ( $last_nonblank_type eq ')' ) { - warning( - "Syntax error? found token '$last_nonblank_type' then '('\n" - ); - } - $paren_structural_type[$paren_depth] = $type; - - }, - ')' => sub { - ( $type_sequence, $indent_flag ) = - decrease_nesting_depth( PAREN, $rtoken_map->[$i_tok] ); - - if ( $paren_structural_type[$paren_depth] eq '{' ) { - $type = '}'; - } - - $container_type = $paren_type[$paren_depth]; - - # restore statement type as 'sub' at closing paren of a signature - # so that a subsequent ':' is identified as an attribute - if ( $container_type =~ /^sub\b/ ) { - $statement_type = $container_type; - } - - # /^(for|foreach)$/ - if ( $is_for_foreach{ $paren_type[$paren_depth] } ) { - my $num_sc = $paren_semicolon_count[$paren_depth]; - if ( $num_sc > 0 && $num_sc != 2 ) { - warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n"); - } - } - - if ( $paren_depth > 0 ) { $paren_depth-- } - }, - ',' => sub { - if ( $last_nonblank_type eq ',' ) { - complain("Repeated ','s \n"); - } - - # patch for operator_expected: note if we are in the list (use.t) - if ( $statement_type eq 'use' ) { $statement_type = '_use' } -## FIXME: need to move this elsewhere, perhaps check after a '(' -## elsif ($last_nonblank_token eq '(') { -## warning("Leading ','s illegal in some versions of perl\n"); -## } - }, - ';' => sub { - $context = UNKNOWN_CONTEXT; - $statement_type = ''; - $want_paren = ""; - - # /^(for|foreach)$/ - if ( $is_for_foreach{ $paren_type[$paren_depth] } ) - { # mark ; in for loop - - # Be careful: we do not want a semicolon such as the - # following to be included: - # - # for (sort {strcoll($a,$b);} keys %investments) { - - if ( $brace_depth == $depth_array[PAREN][BRACE][$paren_depth] - && $square_bracket_depth == - $depth_array[PAREN][SQUARE_BRACKET][$paren_depth] ) - { - - $type = 'f'; - $paren_semicolon_count[$paren_depth]++; - } - } - - }, - '"' => sub { - error_if_expecting_OPERATOR("String") - if ( $expecting == OPERATOR ); - $in_quote = 1; - $type = 'Q'; - $allowed_quote_modifiers = ""; - }, - "'" => sub { - error_if_expecting_OPERATOR("String") - if ( $expecting == OPERATOR ); - $in_quote = 1; - $type = 'Q'; - $allowed_quote_modifiers = ""; - }, - '`' => sub { - error_if_expecting_OPERATOR("String") - if ( $expecting == OPERATOR ); - $in_quote = 1; - $type = 'Q'; - $allowed_quote_modifiers = ""; - }, - '/' => sub { - my $is_pattern; - - if ( $expecting == UNKNOWN ) { # indeterminate, must guess.. - my $msg; - ( $is_pattern, $msg ) = - guess_if_pattern_or_division( $i, $rtokens, $rtoken_map, - $max_token_index ); - - if ($msg) { - write_diagnostics("DIVIDE:$msg\n"); - write_logfile_entry($msg); - } - } - else { $is_pattern = ( $expecting == TERM ) } - - if ($is_pattern) { - $in_quote = 1; - $type = 'Q'; - $allowed_quote_modifiers = '[msixpodualngc]'; - } - else { # not a pattern; check for a /= token - - if ( $rtokens->[ $i + 1 ] eq '=' ) { # form token /= - $i++; - $tok = '/='; - $type = $tok; - } - - #DEBUG - collecting info on what tokens follow a divide - # for development of guessing algorithm - #if ( numerator_expected( $i, $rtokens, $max_token_index ) < 0 ) { - # #write_diagnostics( "DIVIDE? $input_line\n" ); - #} - } - }, - '{' => sub { - - # if we just saw a ')', we will label this block with - # its type. We need to do this to allow sub - # code_block_type to determine if this brace starts a - # code block or anonymous hash. (The type of a paren - # pair is the preceding token, such as 'if', 'else', - # etc). - $container_type = ""; - - # ATTRS: for a '{' following an attribute list, reset - # things to look like we just saw the sub name - if ( $statement_type =~ /^sub/ ) { - $last_nonblank_token = $statement_type; - $last_nonblank_type = 'i'; - $statement_type = ""; - } - - # patch for SWITCH/CASE: hide these keywords from an immediately - # following opening brace - elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' ) - && $statement_type eq $last_nonblank_token ) - { - $last_nonblank_token = ";"; - } - - elsif ( $last_nonblank_token eq ')' ) { - $last_nonblank_token = $paren_type[ $paren_depth + 1 ]; - - # defensive move in case of a nesting error (pbug.t) - # in which this ')' had no previous '(' - # this nesting error will have been caught - if ( !defined($last_nonblank_token) ) { - $last_nonblank_token = 'if'; - } - - # check for syntax error here; - unless ( $is_blocktype_with_paren{$last_nonblank_token} ) { - if ( $tokenizer_self->{'_extended_syntax'} ) { - - # we append a trailing () to mark this as an unknown - # block type. This allows perltidy to format some - # common extensions of perl syntax. - # This is used by sub code_block_type - $last_nonblank_token .= '()'; - } - else { - my $list = - join( ' ', sort keys %is_blocktype_with_paren ); - warning( -"syntax error at ') {', didn't see one of: <<$list>>; If this code is okay try using the -xs flag\n" - ); - } - } - } - - # patch for paren-less for/foreach glitch, part 2. - # see note below under 'qw' - elsif ($last_nonblank_token eq 'qw' - && $is_for_foreach{$want_paren} ) - { - $last_nonblank_token = $want_paren; - if ( $last_last_nonblank_token eq $want_paren ) { - warning( -"syntax error at '$want_paren .. {' -- missing \$ loop variable\n" - ); - - } - $want_paren = ""; - } - - # now identify which of the three possible types of - # curly braces we have: hash index container, anonymous - # hash reference, or code block. - - # non-structural (hash index) curly brace pair - # get marked 'L' and 'R' - if ( is_non_structural_brace() ) { - $type = 'L'; - - # patch for SWITCH/CASE: - # allow paren-less identifier after 'when' - # if the brace is preceded by a space - if ( $statement_type eq 'when' - && $last_nonblank_type eq 'i' - && $last_last_nonblank_type eq 'k' - && ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) ) - { - $type = '{'; - $block_type = $statement_type; - } - } - - # code and anonymous hash have the same type, '{', but are - # distinguished by 'block_type', - # which will be blank for an anonymous hash - else { - - $block_type = code_block_type( $i_tok, $rtokens, $rtoken_type, - $max_token_index ); - - # patch to promote bareword type to function taking block - if ( $block_type - && $last_nonblank_type eq 'w' - && $last_nonblank_i >= 0 ) - { - if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) { - $routput_token_type->[$last_nonblank_i] = 'G'; - } - } - - # patch for SWITCH/CASE: if we find a stray opening block brace - # where we might accept a 'case' or 'when' block, then take it - if ( $statement_type eq 'case' - || $statement_type eq 'when' ) - { - if ( !$block_type || $block_type eq '}' ) { - $block_type = $statement_type; - } - } - } - - $brace_type[ ++$brace_depth ] = $block_type; - $brace_package[$brace_depth] = $current_package; - $brace_structural_type[$brace_depth] = $type; - $brace_context[$brace_depth] = $context; - ( $type_sequence, $indent_flag ) = - increase_nesting_depth( BRACE, $rtoken_map->[$i_tok] ); - }, - '}' => sub { - $block_type = $brace_type[$brace_depth]; - if ($block_type) { $statement_type = '' } - if ( defined( $brace_package[$brace_depth] ) ) { - $current_package = $brace_package[$brace_depth]; - } - - # can happen on brace error (caught elsewhere) - else { - } - ( $type_sequence, $indent_flag ) = - decrease_nesting_depth( BRACE, $rtoken_map->[$i_tok] ); - - if ( $brace_structural_type[$brace_depth] eq 'L' ) { - $type = 'R'; - } - - # propagate type information for 'do' and 'eval' blocks, and also - # for smartmatch operator. This is necessary to enable us to know - # if an operator or term is expected next. - if ( $is_block_operator{$block_type} ) { - $tok = $block_type; - } - - $context = $brace_context[$brace_depth]; - if ( $brace_depth > 0 ) { $brace_depth--; } - }, - '&' => sub { # maybe sub call? start looking - - # We have to check for sub call unless we are sure we - # are expecting an operator. This example from s2p - # got mistaken as a q operator in an early version: - # print BODY &q(<<'EOT'); - if ( $expecting != OPERATOR ) { - - # But only look for a sub call if we are expecting a term or - # if there is no existing space after the &. - # For example we probably don't want & as sub call here: - # Fcntl::S_IRUSR & $mode; - if ( $expecting == TERM || $next_type ne 'b' ) { - scan_identifier(); - } - } - else { - } - }, - '<' => sub { # angle operator or less than? - - if ( $expecting != OPERATOR ) { - ( $i, $type ) = - find_angle_operator_termination( $input_line, $i, $rtoken_map, - $expecting, $max_token_index ); - - if ( $type eq '<' && $expecting == TERM ) { - error_if_expecting_TERM(); - interrupt_logfile(); - warning("Unterminated <> operator?\n"); - resume_logfile(); - } - } - else { - } - }, - '?' => sub { # ?: conditional or starting pattern? - - my $is_pattern; - - if ( $expecting == UNKNOWN ) { - - my $msg; - ( $is_pattern, $msg ) = - guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map, - $max_token_index ); - - if ($msg) { write_logfile_entry($msg) } - } - else { $is_pattern = ( $expecting == TERM ) } - - if ($is_pattern) { - $in_quote = 1; - $type = 'Q'; - $allowed_quote_modifiers = '[msixpodualngc]'; - } - else { - ( $type_sequence, $indent_flag ) = - increase_nesting_depth( QUESTION_COLON, - $rtoken_map->[$i_tok] ); - } - }, - '*' => sub { # typeglob, or multiply? - - if ( $expecting == TERM ) { - scan_identifier(); - } - else { - - if ( $rtokens->[ $i + 1 ] eq '=' ) { - $tok = '*='; - $type = $tok; - $i++; - } - elsif ( $rtokens->[ $i + 1 ] eq '*' ) { - $tok = '**'; - $type = $tok; - $i++; - if ( $rtokens->[ $i + 1 ] eq '=' ) { - $tok = '**='; - $type = $tok; - $i++; - } - } - } - }, - '.' => sub { # what kind of . ? - - if ( $expecting != OPERATOR ) { - scan_number(); - if ( $type eq '.' ) { - error_if_expecting_TERM() - if ( $expecting == TERM ); - } - } - else { - } - }, - ':' => sub { - - # if this is the first nonblank character, call it a label - # since perl seems to just swallow it - if ( $input_line_number == 1 && $last_nonblank_i == -1 ) { - $type = 'J'; - } - - # ATTRS: check for a ':' which introduces an attribute list - # (this might eventually get its own token type) - elsif ( $statement_type =~ /^sub\b/ ) { - $type = 'A'; - $in_attribute_list = 1; - } - - # check for scalar attribute, such as - # my $foo : shared = 1; - elsif ($is_my_our{$statement_type} - && $current_depth[QUESTION_COLON] == 0 ) - { - $type = 'A'; - $in_attribute_list = 1; - } - - # otherwise, it should be part of a ?/: operator - else { - ( $type_sequence, $indent_flag ) = - decrease_nesting_depth( QUESTION_COLON, - $rtoken_map->[$i_tok] ); - if ( $last_nonblank_token eq '?' ) { - warning("Syntax error near ? :\n"); - } - } - }, - '+' => sub { # what kind of plus? - - if ( $expecting == TERM ) { - my $number = scan_number(); - - # unary plus is safest assumption if not a number - if ( !defined($number) ) { $type = 'p'; } - } - elsif ( $expecting == OPERATOR ) { - } - else { - if ( $next_type eq 'w' ) { $type = 'p' } - } - }, - '@' => sub { - - error_if_expecting_OPERATOR("Array") - if ( $expecting == OPERATOR ); - scan_identifier(); - }, - '%' => sub { # hash or modulo? - - # first guess is hash if no following blank - if ( $expecting == UNKNOWN ) { - if ( $next_type ne 'b' ) { $expecting = TERM } - } - if ( $expecting == TERM ) { - scan_identifier(); - } - }, - '[' => sub { - $square_bracket_type[ ++$square_bracket_depth ] = - $last_nonblank_token; - ( $type_sequence, $indent_flag ) = - increase_nesting_depth( SQUARE_BRACKET, $rtoken_map->[$i_tok] ); - - # It may seem odd, but structural square brackets have - # type '{' and '}'. This simplifies the indentation logic. - if ( !is_non_structural_brace() ) { - $type = '{'; - } - $square_bracket_structural_type[$square_bracket_depth] = $type; - }, - ']' => sub { - ( $type_sequence, $indent_flag ) = - decrease_nesting_depth( SQUARE_BRACKET, $rtoken_map->[$i_tok] ); - - if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' ) - { - $type = '}'; - } - - # propagate type information for smartmatch operator. This is - # necessary to enable us to know if an operator or term is expected - # next. - if ( $square_bracket_type[$square_bracket_depth] eq '~~' ) { - $tok = $square_bracket_type[$square_bracket_depth]; - } - - if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; } - }, - '-' => sub { # what kind of minus? - - if ( ( $expecting != OPERATOR ) - && $is_file_test_operator{$next_tok} ) - { - my ( $next_nonblank_token, $i_next ) = - find_next_nonblank_token( $i + 1, $rtokens, - $max_token_index ); - - # check for a quoted word like "-w=>xx"; - # it is sufficient to just check for a following '=' - if ( $next_nonblank_token eq '=' ) { - $type = 'm'; - } - else { - $i++; - $tok .= $next_tok; - $type = 'F'; - } - } - elsif ( $expecting == TERM ) { - my $number = scan_number(); - - # maybe part of bareword token? unary is safest - if ( !defined($number) ) { $type = 'm'; } - - } - elsif ( $expecting == OPERATOR ) { - } - else { - - if ( $next_type eq 'w' ) { - $type = 'm'; - } - } - }, - - '^' => sub { - - # check for special variables like ${^WARNING_BITS} - if ( $expecting == TERM ) { - - # FIXME: this should work but will not catch errors - # because we also have to be sure that previous token is - # a type character ($,@,%). - if ( $last_nonblank_token eq '{' - && ( $next_tok =~ /^[A-Za-z_]/ ) ) - { - - if ( $next_tok eq 'W' ) { - $tokenizer_self->{_saw_perl_dash_w} = 1; - } - $tok = $tok . $next_tok; - $i = $i + 1; - $type = 'w'; - } - - else { - unless ( error_if_expecting_TERM() ) { - - # Something like this is valid but strange: - # undef ^I; - complain("The '^' seems unusual here\n"); - } - } - } - }, - - '::' => sub { # probably a sub call - scan_bare_identifier(); - }, - '<<' => sub { # maybe a here-doc? - return - unless ( $i < $max_token_index ) - ; # here-doc not possible if end of line - - if ( $expecting != OPERATOR ) { - my ( $found_target, $here_doc_target, $here_quote_character, - $saw_error ); - ( - $found_target, $here_doc_target, $here_quote_character, $i, - $saw_error - ) - = find_here_doc( $expecting, $i, $rtokens, $rtoken_map, - $max_token_index ); - - if ($found_target) { - push @{$rhere_target_list}, - [ $here_doc_target, $here_quote_character ]; - $type = 'h'; - if ( length($here_doc_target) > 80 ) { - my $truncated = substr( $here_doc_target, 0, 80 ); - complain("Long here-target: '$truncated' ...\n"); - } - elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) { - complain( - "Unconventional here-target: '$here_doc_target'\n"); - } - } - elsif ( $expecting == TERM ) { - unless ($saw_error) { - - # shouldn't happen.. - warning("Program bug; didn't find here doc target\n"); - report_definite_bug(); - } - } - } - else { - } - }, - '<<~' => sub { # a here-doc, new type added in v26 - return - unless ( $i < $max_token_index ) - ; # here-doc not possible if end of line - if ( $expecting != OPERATOR ) { - my ( $found_target, $here_doc_target, $here_quote_character, - $saw_error ); - ( - $found_target, $here_doc_target, $here_quote_character, $i, - $saw_error - ) - = find_here_doc( $expecting, $i, $rtokens, $rtoken_map, - $max_token_index ); - - if ($found_target) { - - if ( length($here_doc_target) > 80 ) { - my $truncated = substr( $here_doc_target, 0, 80 ); - complain("Long here-target: '$truncated' ...\n"); - } - elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) { - complain( - "Unconventional here-target: '$here_doc_target'\n"); - } - - # Note that we put a leading space on the here quote - # character indicate that it may be preceded by spaces - $here_quote_character = " " . $here_quote_character; - push @{$rhere_target_list}, - [ $here_doc_target, $here_quote_character ]; - $type = 'h'; - } - elsif ( $expecting == TERM ) { - unless ($saw_error) { - - # shouldn't happen.. - warning("Program bug; didn't find here doc target\n"); - report_definite_bug(); - } - } - } - else { - } - }, - '->' => sub { - - # if -> points to a bare word, we must scan for an identifier, - # otherwise something like ->y would look like the y operator - scan_identifier(); - }, - - # type = 'pp' for pre-increment, '++' for post-increment - '++' => sub { - if ( $expecting == TERM ) { $type = 'pp' } - elsif ( $expecting == UNKNOWN ) { - my ( $next_nonblank_token, $i_next ) = - find_next_nonblank_token( $i, $rtokens, $max_token_index ); - if ( $next_nonblank_token eq '$' ) { $type = 'pp' } - } - }, - - '=>' => sub { - if ( $last_nonblank_type eq $tok ) { - complain("Repeated '=>'s \n"); - } - - # patch for operator_expected: note if we are in the list (use.t) - # TODO: make version numbers a new token type - if ( $statement_type eq 'use' ) { $statement_type = '_use' } - }, - - # type = 'mm' for pre-decrement, '--' for post-decrement - '--' => sub { - - if ( $expecting == TERM ) { $type = 'mm' } - elsif ( $expecting == UNKNOWN ) { - my ( $next_nonblank_token, $i_next ) = - find_next_nonblank_token( $i, $rtokens, $max_token_index ); - if ( $next_nonblank_token eq '$' ) { $type = 'mm' } - } - }, - - '&&' => sub { - error_if_expecting_TERM() - if ( $expecting == TERM ); - }, - - '||' => sub { - error_if_expecting_TERM() - if ( $expecting == TERM ); - }, - - '//' => sub { - error_if_expecting_TERM() - if ( $expecting == TERM ); - }, - }; - - # ------------------------------------------------------------ - # end hash of code for handling individual token types - # ------------------------------------------------------------ - - my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' ); - - # These block types terminate statements and do not need a trailing - # semicolon - # patched for SWITCH/CASE/ - my %is_zero_continuation_block_type; - @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ; - if elsif else unless while until for foreach switch case given when); - @is_zero_continuation_block_type{@_} = (1) x scalar(@_); - - my %is_not_zero_continuation_block_type; - @_ = qw(sort grep map do eval); - @is_not_zero_continuation_block_type{@_} = (1) x scalar(@_); - - my %is_logical_container; - @_ = qw(if elsif unless while and or err not && ! || for foreach); - @is_logical_container{@_} = (1) x scalar(@_); - - my %is_binary_type; - @_ = qw(|| &&); - @is_binary_type{@_} = (1) x scalar(@_); - - my %is_binary_keyword; - @_ = qw(and or err eq ne cmp); - @is_binary_keyword{@_} = (1) x scalar(@_); - - # 'L' is token for opening { at hash key - my %is_opening_type; - @_ = qw" L { ( [ "; - @is_opening_type{@_} = (1) x scalar(@_); - - # 'R' is token for closing } at hash key - my %is_closing_type; - @_ = qw" R } ) ] "; - @is_closing_type{@_} = (1) x scalar(@_); - - my %is_redo_last_next_goto; - @_ = qw(redo last next goto); - @is_redo_last_next_goto{@_} = (1) x scalar(@_); - - my %is_use_require; - @_ = qw(use require); - @is_use_require{@_} = (1) x scalar(@_); - - my %is_sub_package; - @_ = qw(sub package); - @is_sub_package{@_} = (1) x scalar(@_); - - # This hash holds the hash key in $tokenizer_self for these keywords: - my %is_format_END_DATA = ( - 'format' => '_in_format', - '__END__' => '_in_end', - '__DATA__' => '_in_data', - ); - - # original ref: camel 3 p 147, - # but perl may accept undocumented flags - # perl 5.10 adds 'p' (preserve) - # Perl version 5.22 added 'n' - # From http://perldoc.perl.org/perlop.html we have - # /PATTERN/msixpodualngc or m?PATTERN?msixpodualngc - # s/PATTERN/REPLACEMENT/msixpodualngcer - # y/SEARCHLIST/REPLACEMENTLIST/cdsr - # tr/SEARCHLIST/REPLACEMENTLIST/cdsr - # qr/STRING/msixpodualn - my %quote_modifiers = ( - 's' => '[msixpodualngcer]', - 'y' => '[cdsr]', - 'tr' => '[cdsr]', - 'm' => '[msixpodualngc]', - 'qr' => '[msixpodualn]', - 'q' => "", - 'qq' => "", - 'qw' => "", - 'qx' => "", - ); - - # table showing how many quoted things to look for after quote operator.. - # s, y, tr have 2 (pattern and replacement) - # others have 1 (pattern only) - my %quote_items = ( - 's' => 2, - 'y' => 2, - 'tr' => 2, - 'm' => 1, - 'qr' => 1, - 'q' => 1, - 'qq' => 1, - 'qw' => 1, - 'qx' => 1, - ); - - sub tokenize_this_line { - - # This routine breaks a line of perl code into tokens which are of use in - # indentation and reformatting. One of my goals has been to define tokens - # such that a newline may be inserted between any pair of tokens without - # changing or invalidating the program. This version comes close to this, - # although there are necessarily a few exceptions which must be caught by - # the formatter. Many of these involve the treatment of bare words. - # - # The tokens and their types are returned in arrays. See previous - # routine for their names. - # - # See also the array "valid_token_types" in the BEGIN section for an - # up-to-date list. - # - # To simplify things, token types are either a single character, or they - # are identical to the tokens themselves. - # - # As a debugging aid, the -D flag creates a file containing a side-by-side - # comparison of the input string and its tokenization for each line of a file. - # This is an invaluable debugging aid. - # - # In addition to tokens, and some associated quantities, the tokenizer - # also returns flags indication any special line types. These include - # quotes, here_docs, formats. - # - # ----------------------------------------------------------------------- - # - # How to add NEW_TOKENS: - # - # New token types will undoubtedly be needed in the future both to keep up - # with changes in perl and to help adapt the tokenizer to other applications. - # - # Here are some notes on the minimal steps. I wrote these notes while - # adding the 'v' token type for v-strings, which are things like version - # numbers 5.6.0, and ip addresses, and will use that as an example. ( You - # can use your editor to search for the string "NEW_TOKENS" to find the - # appropriate sections to change): - # - # *. Try to talk somebody else into doing it! If not, .. - # - # *. Make a backup of your current version in case things don't work out! - # - # *. Think of a new, unused character for the token type, and add to - # the array @valid_token_types in the BEGIN section of this package. - # For example, I used 'v' for v-strings. - # - # *. Implement coding to recognize the $type of the token in this routine. - # This is the hardest part, and is best done by imitating or modifying - # some of the existing coding. For example, to recognize v-strings, I - # patched 'sub scan_bare_identifier' to recognize v-strings beginning with - # 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'. - # - # *. Update sub operator_expected. This update is critically important but - # the coding is trivial. Look at the comments in that routine for help. - # For v-strings, which should behave like numbers, I just added 'v' to the - # regex used to handle numbers and strings (types 'n' and 'Q'). - # - # *. Implement a 'bond strength' rule in sub set_bond_strengths in - # Perl::Tidy::Formatter for breaking lines around this token type. You can - # skip this step and take the default at first, then adjust later to get - # desired results. For adding type 'v', I looked at sub bond_strength and - # saw that number type 'n' was using default strengths, so I didn't do - # anything. I may tune it up someday if I don't like the way line - # breaks with v-strings look. - # - # *. Implement a 'whitespace' rule in sub set_whitespace_flags in - # Perl::Tidy::Formatter. For adding type 'v', I looked at this routine - # and saw that type 'n' used spaces on both sides, so I just added 'v' - # to the array @spaces_both_sides. - # - # *. Update HtmlWriter package so that users can colorize the token as - # desired. This is quite easy; see comments identified by 'NEW_TOKENS' in - # that package. For v-strings, I initially chose to use a default color - # equal to the default for numbers, but it might be nice to change that - # eventually. - # - # *. Update comments in Perl::Tidy::Tokenizer::dump_token_types. - # - # *. Run lots and lots of debug tests. Start with special files designed - # to test the new token type. Run with the -D flag to create a .DEBUG - # file which shows the tokenization. When these work ok, test as many old - # scripts as possible. Start with all of the '.t' files in the 'test' - # directory of the distribution file. Compare .tdy output with previous - # version and updated version to see the differences. Then include as - # many more files as possible. My own technique has been to collect a huge - # number of perl scripts (thousands!) into one directory and run perltidy - # *, then run diff between the output of the previous version and the - # current version. - # - # *. For another example, search for the smartmatch operator '~~' - # with your editor to see where updates were made for it. - # - # ----------------------------------------------------------------------- - - my $line_of_tokens = shift; - my ($untrimmed_input_line) = $line_of_tokens->{_line_text}; - - # patch while coding change is underway - # make callers private data to allow access - # $tokenizer_self = $caller_tokenizer_self; - - # extract line number for use in error messages - $input_line_number = $line_of_tokens->{_line_number}; - - # reinitialize for multi-line quote - $line_of_tokens->{_starting_in_quote} = $in_quote && $quote_type eq 'Q'; - - # check for pod documentation - if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) { - - # must not be in multi-line quote - # and must not be in an equation - if ( !$in_quote && ( operator_expected( 'b', '=', 'b' ) == TERM ) ) - { - $tokenizer_self->{_in_pod} = 1; - return; - } - } - - $input_line = $untrimmed_input_line; - - chomp $input_line; - - # trim start of this line unless we are continuing a quoted line - # do not trim end because we might end in a quote (test: deken4.pl) - # Perl::Tidy::Formatter will delete needless trailing blanks - unless ( $in_quote && ( $quote_type eq 'Q' ) ) { - $input_line =~ s/^\s*//; # trim left end - } - - # Set a flag to indicate if we might be at an __END__ or __DATA__ line - # This will be used below to avoid quoting a bare word followed by - # a fat comma. - my $is_END_or_DATA = $input_line =~ /^\s*__(END|DATA)__\s*$/; - - # update the copy of the line for use in error messages - # This must be exactly what we give the pre_tokenizer - $tokenizer_self->{_line_text} = $input_line; - - # re-initialize for the main loop - $routput_token_list = []; # stack of output token indexes - $routput_token_type = []; # token types - $routput_block_type = []; # types of code block - $routput_container_type = []; # paren types, such as if, elsif, .. - $routput_type_sequence = []; # nesting sequential number - - $rhere_target_list = []; - - $tok = $last_nonblank_token; - $type = $last_nonblank_type; - $prototype = $last_nonblank_prototype; - $last_nonblank_i = -1; - $block_type = $last_nonblank_block_type; - $container_type = $last_nonblank_container_type; - $type_sequence = $last_nonblank_type_sequence; - $indent_flag = 0; - $peeked_ahead = 0; - - # tokenization is done in two stages.. - # stage 1 is a very simple pre-tokenization - my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens - - # a little optimization for a full-line comment - if ( !$in_quote && ( $input_line =~ /^#/ ) ) { - $max_tokens_wanted = 1 # no use tokenizing a comment - } - - # start by breaking the line into pre-tokens - ( $rtokens, $rtoken_map, $rtoken_type ) = - pre_tokenize( $input_line, $max_tokens_wanted ); - - $max_token_index = scalar( @{$rtokens} ) - 1; - push( @{$rtokens}, ' ', ' ', ' ' ); # extra whitespace simplifies logic - push( @{$rtoken_map}, 0, 0, 0 ); # shouldn't be referenced - push( @{$rtoken_type}, 'b', 'b', 'b' ); - - # initialize for main loop - foreach my $ii ( 0 .. $max_token_index + 3 ) { - $routput_token_type->[$ii] = ""; - $routput_block_type->[$ii] = ""; - $routput_container_type->[$ii] = ""; - $routput_type_sequence->[$ii] = ""; - $routput_indent_flag->[$ii] = 0; - } - $i = -1; - $i_tok = -1; - - # ------------------------------------------------------------ - # begin main tokenization loop - # ------------------------------------------------------------ - - # we are looking at each pre-token of one line and combining them - # into tokens - while ( ++$i <= $max_token_index ) { - - if ($in_quote) { # continue looking for end of a quote - $type = $quote_type; - - unless ( @{$routput_token_list} ) - { # initialize if continuation line - push( @{$routput_token_list}, $i ); - $routput_token_type->[$i] = $type; - - } - $tok = $quote_character unless ( $quote_character =~ /^\s*$/ ); - - # scan for the end of the quote or pattern - ( - $i, $in_quote, $quote_character, $quote_pos, $quote_depth, - $quoted_string_1, $quoted_string_2 - ) - = do_quote( - $i, $in_quote, $quote_character, - $quote_pos, $quote_depth, $quoted_string_1, - $quoted_string_2, $rtokens, $rtoken_map, - $max_token_index - ); - - # all done if we didn't find it - last if ($in_quote); - - # save pattern and replacement text for rescanning - my $qs1 = $quoted_string_1; - my $qs2 = $quoted_string_2; - - # re-initialize for next search - $quote_character = ''; - $quote_pos = 0; - $quote_type = 'Q'; - $quoted_string_1 = ""; - $quoted_string_2 = ""; - last if ( ++$i > $max_token_index ); - - # look for any modifiers - if ($allowed_quote_modifiers) { - - # check for exact quote modifiers - if ( $rtokens->[$i] =~ /^[A-Za-z_]/ ) { - my $str = $rtokens->[$i]; - my $saw_modifier_e; - while ( $str =~ /\G$allowed_quote_modifiers/gc ) { - my $pos = pos($str); - my $char = substr( $str, $pos - 1, 1 ); - $saw_modifier_e ||= ( $char eq 'e' ); - } - - # For an 'e' quote modifier we must scan the replacement - # text for here-doc targets. - if ($saw_modifier_e) { - - my $rht = scan_replacement_text($qs1); - - # Change type from 'Q' to 'h' for quotes with - # here-doc targets so that the formatter (see sub - # print_line_of_tokens) will not make any line - # breaks after this point. - if ($rht) { - push @{$rhere_target_list}, @{$rht}; - $type = 'h'; - if ( $i_tok < 0 ) { - my $ilast = $routput_token_list->[-1]; - $routput_token_type->[$ilast] = $type; - } - } - } - - if ( defined( pos($str) ) ) { - - # matched - if ( pos($str) == length($str) ) { - last if ( ++$i > $max_token_index ); - } - - # Looks like a joined quote modifier - # and keyword, maybe something like - # s/xxx/yyy/gefor @k=... - # Example is "galgen.pl". Would have to split - # the word and insert a new token in the - # pre-token list. This is so rare that I haven't - # done it. Will just issue a warning citation. - - # This error might also be triggered if my quote - # modifier characters are incomplete - else { - warning(<[$i]\n"; - # my $num = length($str) - pos($str); - # $rtokens->[$i]=substr($rtokens->[$i],pos($str),$num); - # print "continuing with new token $rtokens->[$i]\n"; - - # skipping past this token does least damage - last if ( ++$i > $max_token_index ); - } - } - else { - - # example file: rokicki4.pl - # This error might also be triggered if my quote - # modifier characters are incomplete - write_logfile_entry( -"Note: found word $str at quote modifier location\n" - ); - } - } - - # re-initialize - $allowed_quote_modifiers = ""; - } - } - - unless ( $tok =~ /^\s*$/ || $tok eq 'CORE::' ) { - - # try to catch some common errors - if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) { - - if ( $last_nonblank_token eq 'eq' ) { - complain("Should 'eq' be '==' here ?\n"); - } - elsif ( $last_nonblank_token eq 'ne' ) { - complain("Should 'ne' be '!=' here ?\n"); - } - } - - $last_last_nonblank_token = $last_nonblank_token; - $last_last_nonblank_type = $last_nonblank_type; - $last_last_nonblank_block_type = $last_nonblank_block_type; - $last_last_nonblank_container_type = - $last_nonblank_container_type; - $last_last_nonblank_type_sequence = - $last_nonblank_type_sequence; - $last_nonblank_token = $tok; - $last_nonblank_type = $type; - $last_nonblank_prototype = $prototype; - $last_nonblank_block_type = $block_type; - $last_nonblank_container_type = $container_type; - $last_nonblank_type_sequence = $type_sequence; - $last_nonblank_i = $i_tok; - } - - # store previous token type - if ( $i_tok >= 0 ) { - $routput_token_type->[$i_tok] = $type; - $routput_block_type->[$i_tok] = $block_type; - $routput_container_type->[$i_tok] = $container_type; - $routput_type_sequence->[$i_tok] = $type_sequence; - $routput_indent_flag->[$i_tok] = $indent_flag; - } - my $pre_tok = $rtokens->[$i]; # get the next pre-token - my $pre_type = $rtoken_type->[$i]; # and type - $tok = $pre_tok; - $type = $pre_type; # to be modified as necessary - $block_type = ""; # blank for all tokens except code block braces - $container_type = ""; # blank for all tokens except some parens - $type_sequence = ""; # blank for all tokens except ?/: - $indent_flag = 0; - $prototype = ""; # blank for all tokens except user defined subs - $i_tok = $i; - - # this pre-token will start an output token - push( @{$routput_token_list}, $i_tok ); - - # continue gathering identifier if necessary - # but do not start on blanks and comments - if ( $id_scan_state && $pre_type !~ /[b#]/ ) { - - if ( $id_scan_state =~ /^(sub|package)/ ) { - scan_id(); - } - else { - scan_identifier(); - } - - last if ($id_scan_state); - next if ( ( $i > 0 ) || $type ); - - # didn't find any token; start over - $type = $pre_type; - $tok = $pre_tok; - } - - # handle whitespace tokens.. - next if ( $type eq 'b' ); - my $prev_tok = $i > 0 ? $rtokens->[ $i - 1 ] : ' '; - my $prev_type = $i > 0 ? $rtoken_type->[ $i - 1 ] : 'b'; - - # Build larger tokens where possible, since we are not in a quote. - # - # First try to assemble digraphs. The following tokens are - # excluded and handled specially: - # '/=' is excluded because the / might start a pattern. - # 'x=' is excluded since it might be $x=, with $ on previous line - # '**' and *= might be typeglobs of punctuation variables - # I have allowed tokens starting with <, such as <=, - # because I don't think these could be valid angle operators. - # test file: storrs4.pl - my $test_tok = $tok . $rtokens->[ $i + 1 ]; - my $combine_ok = $is_digraph{$test_tok}; - - # check for special cases which cannot be combined - if ($combine_ok) { - - # '//' must be defined_or operator if an operator is expected. - # TODO: Code for other ambiguous digraphs (/=, x=, **, *=) - # could be migrated here for clarity - - # Patch for RT#102371, misparsing a // in the following snippet: - # state $b //= ccc(); - # The solution is to always accept the digraph (or trigraph) after - # token type 'Z' (possible file handle). The reason is that - # sub operator_expected gives TERM expected here, which is - # wrong in this case. - if ( $test_tok eq '//' && $last_nonblank_type ne 'Z' ) { - my $next_type = $rtokens->[ $i + 1 ]; - my $expecting = - operator_expected( $prev_type, $tok, $next_type ); - - # Patched for RT#101547, was 'unless ($expecting==OPERATOR)' - $combine_ok = 0 if ( $expecting == TERM ); - } - - # Patch for RT #114359: Missparsing of "print $x ** 0.5; - # Accept the digraphs '**' only after type 'Z' - # Otherwise postpone the decision. - if ( $test_tok eq '**' ) { - if ( $last_nonblank_type ne 'Z' ) { $combine_ok = 0 } - } - } - - if ( - $combine_ok - - && ( $test_tok ne '/=' ) # might be pattern - && ( $test_tok ne 'x=' ) # might be $x - && ( $test_tok ne '*=' ) # typeglob? - - # Moved above as part of fix for - # RT #114359: Missparsing of "print $x ** 0.5; - # && ( $test_tok ne '**' ) # typeglob? - ) - { - $tok = $test_tok; - $i++; - - # Now try to assemble trigraphs. Note that all possible - # perl trigraphs can be constructed by appending a character - # to a digraph. - $test_tok = $tok . $rtokens->[ $i + 1 ]; - - if ( $is_trigraph{$test_tok} ) { - $tok = $test_tok; - $i++; - } - - # The only current tetragraph is the double diamond operator - # and its first three characters are not a trigraph, so - # we do can do a special test for it - elsif ( $test_tok eq '<<>' ) { - $test_tok .= $rtokens->[ $i + 2 ]; - if ( $is_tetragraph{$test_tok} ) { - $tok = $test_tok; - $i += 2; - } - } - } - - $type = $tok; - $next_tok = $rtokens->[ $i + 1 ]; - $next_type = $rtoken_type->[ $i + 1 ]; - - TOKENIZER_DEBUG_FLAG_TOKENIZE && do { - local $" = ')('; - my @debug_list = ( - $last_nonblank_token, $tok, - $next_tok, $brace_depth, - $brace_type[$brace_depth], $paren_depth, - $paren_type[$paren_depth] - ); - print STDOUT "TOKENIZE:(@debug_list)\n"; - }; - - # turn off attribute list on first non-blank, non-bareword - if ( $pre_type ne 'w' ) { $in_attribute_list = 0 } - - ############################################################### - # We have the next token, $tok. - # Now we have to examine this token and decide what it is - # and define its $type - # - # section 1: bare words - ############################################################### - - if ( $pre_type eq 'w' ) { - $expecting = operator_expected( $prev_type, $tok, $next_type ); - my ( $next_nonblank_token, $i_next ) = - find_next_nonblank_token( $i, $rtokens, $max_token_index ); - - # ATTRS: handle sub and variable attributes - if ($in_attribute_list) { - - # treat bare word followed by open paren like qw( - if ( $next_nonblank_token eq '(' ) { - $in_quote = $quote_items{'q'}; - $allowed_quote_modifiers = $quote_modifiers{'q'}; - $type = 'q'; - $quote_type = 'q'; - next; - } - - # handle bareword not followed by open paren - else { - $type = 'w'; - next; - } - } - - # quote a word followed by => operator - # unless the word __END__ or __DATA__ and the only word on - # the line. - if ( !$is_END_or_DATA && $next_nonblank_token eq '=' ) { - - if ( $rtokens->[ $i_next + 1 ] eq '>' ) { - if ( $is_constant{$current_package}{$tok} ) { - $type = 'C'; - } - elsif ( $is_user_function{$current_package}{$tok} ) { - $type = 'U'; - $prototype = - $user_function_prototype{$current_package}{$tok}; - } - elsif ( $tok =~ /^v\d+$/ ) { - $type = 'v'; - report_v_string($tok); - } - else { $type = 'w' } - - next; - } - } - - # quote a bare word within braces..like xxx->{s}; note that we - # must be sure this is not a structural brace, to avoid - # mistaking {s} in the following for a quoted bare word: - # for(@[){s}bla}BLA} - # Also treat q in something like var{-q} as a bare word, not qoute operator - if ( - $next_nonblank_token eq '}' - && ( - $last_nonblank_type eq 'L' - || ( $last_nonblank_type eq 'm' - && $last_last_nonblank_type eq 'L' ) - ) - ) - { - $type = 'w'; - next; - } - - # a bare word immediately followed by :: is not a keyword; - # use $tok_kw when testing for keywords to avoid a mistake - my $tok_kw = $tok; - if ( $rtokens->[ $i + 1 ] eq ':' - && $rtokens->[ $i + 2 ] eq ':' ) - { - $tok_kw .= '::'; - } - - # handle operator x (now we know it isn't $x=) - if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) { - if ( $tok eq 'x' ) { - - if ( $rtokens->[ $i + 1 ] eq '=' ) { # x= - $tok = 'x='; - $type = $tok; - $i++; - } - else { - $type = 'x'; - } - } - - # FIXME: Patch: mark something like x4 as an integer for now - # It gets fixed downstream. This is easier than - # splitting the pretoken. - else { - $type = 'n'; - } - } - elsif ( $tok_kw eq 'CORE::' ) { - $type = $tok = $tok_kw; - $i += 2; - } - elsif ( ( $tok eq 'strict' ) - and ( $last_nonblank_token eq 'use' ) ) - { - $tokenizer_self->{_saw_use_strict} = 1; - scan_bare_identifier(); - } - - elsif ( ( $tok eq 'warnings' ) - and ( $last_nonblank_token eq 'use' ) ) - { - $tokenizer_self->{_saw_perl_dash_w} = 1; - - # scan as identifier, so that we pick up something like: - # use warnings::register - scan_bare_identifier(); - } - - elsif ( - $tok eq 'AutoLoader' - && $tokenizer_self->{_look_for_autoloader} - && ( - $last_nonblank_token eq 'use' - - # these regexes are from AutoSplit.pm, which we want - # to mimic - || $input_line =~ /^\s*(use|require)\s+AutoLoader\b/ - || $input_line =~ /\bISA\s*=.*\bAutoLoader\b/ - ) - ) - { - write_logfile_entry("AutoLoader seen, -nlal deactivates\n"); - $tokenizer_self->{_saw_autoloader} = 1; - $tokenizer_self->{_look_for_autoloader} = 0; - scan_bare_identifier(); - } - - elsif ( - $tok eq 'SelfLoader' - && $tokenizer_self->{_look_for_selfloader} - && ( $last_nonblank_token eq 'use' - || $input_line =~ /^\s*(use|require)\s+SelfLoader\b/ - || $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ ) - ) - { - write_logfile_entry("SelfLoader seen, -nlsl deactivates\n"); - $tokenizer_self->{_saw_selfloader} = 1; - $tokenizer_self->{_look_for_selfloader} = 0; - scan_bare_identifier(); - } - - elsif ( ( $tok eq 'constant' ) - and ( $last_nonblank_token eq 'use' ) ) - { - scan_bare_identifier(); - my ( $next_nonblank_token, $i_next ) = - find_next_nonblank_token( $i, $rtokens, - $max_token_index ); - - if ($next_nonblank_token) { - - if ( $is_keyword{$next_nonblank_token} ) { - - # Assume qw is used as a quote and okay, as in: - # use constant qw{ DEBUG 0 }; - # Not worth trying to parse for just a warning - - # NOTE: This warning is deactivated because recent - # versions of perl do not complain here, but - # the coding is retained for reference. - if ( 0 && $next_nonblank_token ne 'qw' ) { - warning( -"Attempting to define constant '$next_nonblank_token' which is a perl keyword\n" - ); - } - } - - # FIXME: could check for error in which next token is - # not a word (number, punctuation, ..) - else { - $is_constant{$current_package}{$next_nonblank_token} - = 1; - } - } - } - - # various quote operators - elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) { -##NICOL PATCH - if ( $expecting == OPERATOR ) { - - # Be careful not to call an error for a qw quote - # where a parenthesized list is allowed. For example, - # it could also be a for/foreach construct such as - # - # foreach my $key qw\Uno Due Tres Quadro\ { - # print "Set $key\n"; - # } - # - - # Or it could be a function call. - # NOTE: Braces in something like &{ xxx } are not - # marked as a block, we might have a method call. - # &method(...), $method->(..), &{method}(...), - # $ref[2](list) is ok & short for $ref[2]->(list) - # - # See notes in 'sub code_block_type' and - # 'sub is_non_structural_brace' - - unless ( - $tok eq 'qw' - && ( $last_nonblank_token =~ /^([\]\}\&]|\-\>)/ - || $is_for_foreach{$want_paren} ) - ) - { - error_if_expecting_OPERATOR(); - } - } - $in_quote = $quote_items{$tok}; - $allowed_quote_modifiers = $quote_modifiers{$tok}; - - # All quote types are 'Q' except possibly qw quotes. - # qw quotes are special in that they may generally be trimmed - # of leading and trailing whitespace. So they are given a - # separate type, 'q', unless requested otherwise. - $type = - ( $tok eq 'qw' && $tokenizer_self->{_trim_qw} ) - ? 'q' - : 'Q'; - $quote_type = $type; - } - - # check for a statement label - elsif ( - ( $next_nonblank_token eq ':' ) - && ( $rtokens->[ $i_next + 1 ] ne ':' ) - && ( $i_next <= $max_token_index ) # colon on same line - && label_ok() - ) - { - if ( $tok !~ /[A-Z]/ ) { - push @{ $tokenizer_self->{_rlower_case_labels_at} }, - $input_line_number; - } - $type = 'J'; - $tok .= ':'; - $i = $i_next; - next; - } - - # 'sub' || 'package' - elsif ( $is_sub_package{$tok_kw} ) { - error_if_expecting_OPERATOR() - if ( $expecting == OPERATOR ); - scan_id(); - } - - # Note on token types for format, __DATA__, __END__: - # It simplifies things to give these type ';', so that when we - # start rescanning we will be expecting a token of type TERM. - # We will switch to type 'k' before outputting the tokens. - elsif ( $is_format_END_DATA{$tok_kw} ) { - $type = ';'; # make tokenizer look for TERM next - $tokenizer_self->{ $is_format_END_DATA{$tok_kw} } = 1; - last; - } - - elsif ( $is_keyword{$tok_kw} ) { - $type = 'k'; - - # Since for and foreach may not be followed immediately - # by an opening paren, we have to remember which keyword - # is associated with the next '(' - if ( $is_for_foreach{$tok} ) { - if ( new_statement_ok() ) { - $want_paren = $tok; - } - } - - # recognize 'use' statements, which are special - elsif ( $is_use_require{$tok} ) { - $statement_type = $tok; - error_if_expecting_OPERATOR() - if ( $expecting == OPERATOR ); - } - - # remember my and our to check for trailing ": shared" - elsif ( $is_my_our{$tok} ) { - $statement_type = $tok; - } - - # Check for misplaced 'elsif' and 'else', but allow isolated - # else or elsif blocks to be formatted. This is indicated - # by a last noblank token of ';' - elsif ( $tok eq 'elsif' ) { - if ( $last_nonblank_token ne ';' - && $last_nonblank_block_type !~ - /^(if|elsif|unless)$/ ) - { - warning( -"expecting '$tok' to follow one of 'if|elsif|unless'\n" - ); - } - } - elsif ( $tok eq 'else' ) { - - # patched for SWITCH/CASE - if ( - $last_nonblank_token ne ';' - && $last_nonblank_block_type !~ - /^(if|elsif|unless|case|when)$/ - - # patch to avoid an unwanted error message for - # the case of a parenless 'case' (RT 105484): - # switch ( 1 ) { case x { 2 } else { } } - && $statement_type !~ - /^(if|elsif|unless|case|when)$/ - ) - { - warning( -"expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n" - ); - } - } - elsif ( $tok eq 'continue' ) { - if ( $last_nonblank_token ne ';' - && $last_nonblank_block_type !~ - /(^(\{|\}|;|while|until|for|foreach)|:$)/ ) - { - - # note: ';' '{' and '}' in list above - # because continues can follow bare blocks; - # ':' is labeled block - # - ############################################ - # NOTE: This check has been deactivated because - # continue has an alternative usage for given/when - # blocks in perl 5.10 - ## warning("'$tok' should follow a block\n"); - ############################################ - } - } - - # patch for SWITCH/CASE if 'case' and 'when are - # treated as keywords. - elsif ( $tok eq 'when' || $tok eq 'case' ) { - $statement_type = $tok; # next '{' is block - } - - # - # indent trailing if/unless/while/until - # outdenting will be handled by later indentation loop -## DEACTIVATED: unfortunately this can cause some unwanted indentation like: -##$opt_o = 1 -## if !( -## $opt_b -## || $opt_c -## || $opt_d -## || $opt_f -## || $opt_i -## || $opt_l -## || $opt_o -## || $opt_x -## ); -## if ( $tok =~ /^(if|unless|while|until)$/ -## && $next_nonblank_token ne '(' ) -## { -## $indent_flag = 1; -## } - } - - # check for inline label following - # /^(redo|last|next|goto)$/ - elsif (( $last_nonblank_type eq 'k' ) - && ( $is_redo_last_next_goto{$last_nonblank_token} ) ) - { - $type = 'j'; - next; - } - - # something else -- - else { - - scan_bare_identifier(); - if ( $type eq 'w' ) { - - if ( $expecting == OPERATOR ) { - - # don't complain about possible indirect object - # notation. - # For example: - # package main; - # sub new($) { ... } - # $b = new A::; # calls A::new - # $c = new A; # same thing but suspicious - # This will call A::new but we have a 'new' in - # main:: which looks like a constant. - # - if ( $last_nonblank_type eq 'C' ) { - if ( $tok !~ /::$/ ) { - complain(<[ $i + 1 ]; - if ( $next_tok eq '(' ) { - $type = 'U'; - } - - # underscore after file test operator is file handle - if ( $tok eq '_' && $last_nonblank_type eq 'F' ) { - $type = 'Z'; - } - - # patch for SWITCH/CASE if 'case' and 'when are - # not treated as keywords: - if ( - ( - $tok eq 'case' - && $brace_type[$brace_depth] eq 'switch' - ) - || ( $tok eq 'when' - && $brace_type[$brace_depth] eq 'given' ) - ) - { - $statement_type = $tok; # next '{' is block - $type = 'k'; # for keyword syntax coloring - } - - # patch for SWITCH/CASE if switch and given not keywords - # Switch is not a perl 5 keyword, but we will gamble - # and mark switch followed by paren as a keyword. This - # is only necessary to get html syntax coloring nice, - # and does not commit this as being a switch/case. - if ( $next_nonblank_token eq '(' - && ( $tok eq 'switch' || $tok eq 'given' ) ) - { - $type = 'k'; # for keyword syntax coloring - } - } - } - } - - ############################################################### - # section 2: strings of digits - ############################################################### - elsif ( $pre_type eq 'd' ) { - $expecting = operator_expected( $prev_type, $tok, $next_type ); - error_if_expecting_OPERATOR("Number") - if ( $expecting == OPERATOR ); - my $number = scan_number(); - if ( !defined($number) ) { - - # shouldn't happen - we should always get a number - warning("non-number beginning with digit--program bug\n"); - report_definite_bug(); - } - } - - ############################################################### - # section 3: all other tokens - ############################################################### - - else { - last if ( $tok eq '#' ); - my $code = $tokenization_code->{$tok}; - if ($code) { - $expecting = - operator_expected( $prev_type, $tok, $next_type ); - $code->(); - redo if $in_quote; - } - } - } - - # ----------------------------- - # end of main tokenization loop - # ----------------------------- - - if ( $i_tok >= 0 ) { - $routput_token_type->[$i_tok] = $type; - $routput_block_type->[$i_tok] = $block_type; - $routput_container_type->[$i_tok] = $container_type; - $routput_type_sequence->[$i_tok] = $type_sequence; - $routput_indent_flag->[$i_tok] = $indent_flag; - } - - unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) { - $last_last_nonblank_token = $last_nonblank_token; - $last_last_nonblank_type = $last_nonblank_type; - $last_last_nonblank_block_type = $last_nonblank_block_type; - $last_last_nonblank_container_type = $last_nonblank_container_type; - $last_last_nonblank_type_sequence = $last_nonblank_type_sequence; - $last_nonblank_token = $tok; - $last_nonblank_type = $type; - $last_nonblank_block_type = $block_type; - $last_nonblank_container_type = $container_type; - $last_nonblank_type_sequence = $type_sequence; - $last_nonblank_prototype = $prototype; - } - - # reset indentation level if necessary at a sub or package - # in an attempt to recover from a nesting error - if ( $level_in_tokenizer < 0 ) { - if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) { - reset_indentation_level(0); - brace_warning("resetting level to 0 at $1 $2\n"); - } - } - - # all done tokenizing this line ... - # now prepare the final list of tokens and types - - my @token_type = (); # stack of output token types - my @block_type = (); # stack of output code block types - my @container_type = (); # stack of output code container types - my @type_sequence = (); # stack of output type sequence numbers - my @tokens = (); # output tokens - my @levels = (); # structural brace levels of output tokens - my @slevels = (); # secondary nesting levels of output tokens - my @nesting_tokens = (); # string of tokens leading to this depth - my @nesting_types = (); # string of token types leading to this depth - my @nesting_blocks = (); # string of block types leading to this depth - my @nesting_lists = (); # string of list types leading to this depth - my @ci_string = (); # string needed to compute continuation indentation - my @container_environment = (); # BLOCK or LIST - my $container_environment = ''; - my $im = -1; # previous $i value - my $num; - my $ci_string_sum = ones_count($ci_string_in_tokenizer); - -# Computing Token Indentation -# -# The final section of the tokenizer forms tokens and also computes -# parameters needed to find indentation. It is much easier to do it -# in the tokenizer than elsewhere. Here is a brief description of how -# indentation is computed. Perl::Tidy computes indentation as the sum -# of 2 terms: -# -# (1) structural indentation, such as if/else/elsif blocks -# (2) continuation indentation, such as long parameter call lists. -# -# These are occasionally called primary and secondary indentation. -# -# Structural indentation is introduced by tokens of type '{', although -# the actual tokens might be '{', '(', or '['. Structural indentation -# is of two types: BLOCK and non-BLOCK. Default structural indentation -# is 4 characters if the standard indentation scheme is used. -# -# Continuation indentation is introduced whenever a line at BLOCK level -# is broken before its termination. Default continuation indentation -# is 2 characters in the standard indentation scheme. -# -# Both types of indentation may be nested arbitrarily deep and -# interlaced. The distinction between the two is somewhat arbitrary. -# -# For each token, we will define two variables which would apply if -# the current statement were broken just before that token, so that -# that token started a new line: -# -# $level = the structural indentation level, -# $ci_level = the continuation indentation level -# -# The total indentation will be $level * (4 spaces) + $ci_level * (2 spaces), -# assuming defaults. However, in some special cases it is customary -# to modify $ci_level from this strict value. -# -# The total structural indentation is easy to compute by adding and -# subtracting 1 from a saved value as types '{' and '}' are seen. The -# running value of this variable is $level_in_tokenizer. -# -# The total continuation is much more difficult to compute, and requires -# several variables. These variables are: -# -# $ci_string_in_tokenizer = a string of 1's and 0's indicating, for -# each indentation level, if there are intervening open secondary -# structures just prior to that level. -# $continuation_string_in_tokenizer = a string of 1's and 0's indicating -# if the last token at that level is "continued", meaning that it -# is not the first token of an expression. -# $nesting_block_string = a string of 1's and 0's indicating, for each -# indentation level, if the level is of type BLOCK or not. -# $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string -# $nesting_list_string = a string of 1's and 0's indicating, for each -# indentation level, if it is appropriate for list formatting. -# If so, continuation indentation is used to indent long list items. -# $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string -# @{$rslevel_stack} = a stack of total nesting depths at each -# structural indentation level, where "total nesting depth" means -# the nesting depth that would occur if every nesting token -- '{', '[', -# and '(' -- , regardless of context, is used to compute a nesting -# depth. - - #my $nesting_block_flag = ($nesting_block_string =~ /1$/); - #my $nesting_list_flag = ($nesting_list_string =~ /1$/); - - my ( $ci_string_i, $level_i, $nesting_block_string_i, - $nesting_list_string_i, $nesting_token_string_i, - $nesting_type_string_i, ); - - foreach my $i ( @{$routput_token_list} ) - { # scan the list of pre-tokens indexes - - # self-checking for valid token types - my $type = $routput_token_type->[$i]; - my $forced_indentation_flag = $routput_indent_flag->[$i]; - - # See if we should undo the $forced_indentation_flag. - # Forced indentation after 'if', 'unless', 'while' and 'until' - # expressions without trailing parens is optional and doesn't - # always look good. It is usually okay for a trailing logical - # expression, but if the expression is a function call, code block, - # or some kind of list it puts in an unwanted extra indentation - # level which is hard to remove. - # - # Example where extra indentation looks ok: - # return 1 - # if $det_a < 0 and $det_b > 0 - # or $det_a > 0 and $det_b < 0; - # - # Example where extra indentation is not needed because - # the eval brace also provides indentation: - # print "not " if defined eval { - # reduce { die if $b > 2; $a + $b } 0, 1, 2, 3, 4; - # }; - # - # The following rule works fairly well: - # Undo the flag if the end of this line, or start of the next - # line, is an opening container token or a comma. - # This almost always works, but if not after another pass it will - # be stable. - if ( $forced_indentation_flag && $type eq 'k' ) { - my $ixlast = -1; - my $ilast = $routput_token_list->[$ixlast]; - my $toklast = $routput_token_type->[$ilast]; - if ( $toklast eq '#' ) { - $ixlast--; - $ilast = $routput_token_list->[$ixlast]; - $toklast = $routput_token_type->[$ilast]; - } - if ( $toklast eq 'b' ) { - $ixlast--; - $ilast = $routput_token_list->[$ixlast]; - $toklast = $routput_token_type->[$ilast]; - } - if ( $toklast =~ /^[\{,]$/ ) { - $forced_indentation_flag = 0; - } - else { - ( $toklast, my $i_next ) = - find_next_nonblank_token( $max_token_index, $rtokens, - $max_token_index ); - if ( $toklast =~ /^[\{,]$/ ) { - $forced_indentation_flag = 0; - } - } - } - - # if we are already in an indented if, see if we should outdent - if ($indented_if_level) { - - # don't try to nest trailing if's - shouldn't happen - if ( $type eq 'k' ) { - $forced_indentation_flag = 0; - } - - # check for the normal case - outdenting at next ';' - elsif ( $type eq ';' ) { - if ( $level_in_tokenizer == $indented_if_level ) { - $forced_indentation_flag = -1; - $indented_if_level = 0; - } - } - - # handle case of missing semicolon - elsif ( $type eq '}' ) { - if ( $level_in_tokenizer == $indented_if_level ) { - $indented_if_level = 0; - - # TBD: This could be a subroutine call - $level_in_tokenizer--; - if ( @{$rslevel_stack} > 1 ) { - pop( @{$rslevel_stack} ); - } - if ( length($nesting_block_string) > 1 ) - { # true for valid script - chop $nesting_block_string; - chop $nesting_list_string; - } - - } - } - } - - my $tok = $rtokens->[$i]; # the token, but ONLY if same as pretoken - $level_i = $level_in_tokenizer; - - # This can happen by running perltidy on non-scripts - # although it could also be bug introduced by programming change. - # Perl silently accepts a 032 (^Z) and takes it as the end - if ( !$is_valid_token_type{$type} ) { - my $val = ord($type); - warning( - "unexpected character decimal $val ($type) in script\n"); - $tokenizer_self->{_in_error} = 1; - } - - # ---------------------------------------------------------------- - # TOKEN TYPE PATCHES - # output __END__, __DATA__, and format as type 'k' instead of ';' - # to make html colors correct, etc. - my $fix_type = $type; - if ( $type eq ';' && $tok =~ /\w/ ) { $fix_type = 'k' } - - # output anonymous 'sub' as keyword - if ( $type eq 't' && $tok eq 'sub' ) { $fix_type = 'k' } - - # ----------------------------------------------------------------- - - $nesting_token_string_i = $nesting_token_string; - $nesting_type_string_i = $nesting_type_string; - $nesting_block_string_i = $nesting_block_string; - $nesting_list_string_i = $nesting_list_string; - - # set primary indentation levels based on structural braces - # Note: these are set so that the leading braces have a HIGHER - # level than their CONTENTS, which is convenient for indentation - # Also, define continuation indentation for each token. - if ( $type eq '{' || $type eq 'L' || $forced_indentation_flag > 0 ) - { - - # use environment before updating - $container_environment = - $nesting_block_flag ? 'BLOCK' - : $nesting_list_flag ? 'LIST' - : ""; - - # if the difference between total nesting levels is not 1, - # there are intervening non-structural nesting types between - # this '{' and the previous unclosed '{' - my $intervening_secondary_structure = 0; - if ( @{$rslevel_stack} ) { - $intervening_secondary_structure = - $slevel_in_tokenizer - $rslevel_stack->[-1]; - } - - # Continuation Indentation - # - # Having tried setting continuation indentation both in the formatter and - # in the tokenizer, I can say that setting it in the tokenizer is much, - # much easier. The formatter already has too much to do, and can't - # make decisions on line breaks without knowing what 'ci' will be at - # arbitrary locations. - # - # But a problem with setting the continuation indentation (ci) here - # in the tokenizer is that we do not know where line breaks will actually - # be. As a result, we don't know if we should propagate continuation - # indentation to higher levels of structure. - # - # For nesting of only structural indentation, we never need to do this. - # For example, in a long if statement, like this - # - # if ( !$output_block_type[$i] - # && ($in_statement_continuation) ) - # { <--outdented - # do_something(); - # } - # - # the second line has ci but we do normally give the lines within the BLOCK - # any ci. This would be true if we had blocks nested arbitrarily deeply. - # - # But consider something like this, where we have created a break after - # an opening paren on line 1, and the paren is not (currently) a - # structural indentation token: - # - # my $file = $menubar->Menubutton( - # qw/-text File -underline 0 -menuitems/ => [ - # [ - # Cascade => '~View', - # -menuitems => [ - # ... - # - # The second line has ci, so it would seem reasonable to propagate it - # down, giving the third line 1 ci + 1 indentation. This suggests the - # following rule, which is currently used to propagating ci down: if there - # are any non-structural opening parens (or brackets, or braces), before - # an opening structural brace, then ci is propagated down, and otherwise - # not. The variable $intervening_secondary_structure contains this - # information for the current token, and the string - # "$ci_string_in_tokenizer" is a stack of previous values of this - # variable. - - # save the current states - push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer ); - $level_in_tokenizer++; - - if ($forced_indentation_flag) { - - # break BEFORE '?' when there is forced indentation - if ( $type eq '?' ) { $level_i = $level_in_tokenizer; } - if ( $type eq 'k' ) { - $indented_if_level = $level_in_tokenizer; - } - - # do not change container environment here if we are not - # at a real list. Adding this check prevents "blinkers" - # often near 'unless" clauses, such as in the following - # code: -## next -## unless -e ( -## $archive = -## File::Spec->catdir( $_, "auto", $root, "$sub$lib_ext" ) -## ); - - $nesting_block_string .= "$nesting_block_flag"; - } - else { - - if ( $routput_block_type->[$i] ) { - $nesting_block_flag = 1; - $nesting_block_string .= '1'; - } - else { - $nesting_block_flag = 0; - $nesting_block_string .= '0'; - } - } - - # we will use continuation indentation within containers - # which are not blocks and not logical expressions - my $bit = 0; - if ( !$routput_block_type->[$i] ) { - - # propagate flag down at nested open parens - if ( $routput_container_type->[$i] eq '(' ) { - $bit = 1 if $nesting_list_flag; - } - - # use list continuation if not a logical grouping - # /^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/ - else { - $bit = 1 - unless - $is_logical_container{ $routput_container_type->[$i] - }; - } - } - $nesting_list_string .= $bit; - $nesting_list_flag = $bit; - - $ci_string_in_tokenizer .= - ( $intervening_secondary_structure != 0 ) ? '1' : '0'; - $ci_string_sum = ones_count($ci_string_in_tokenizer); - $continuation_string_in_tokenizer .= - ( $in_statement_continuation > 0 ) ? '1' : '0'; - - # Sometimes we want to give an opening brace continuation indentation, - # and sometimes not. For code blocks, we don't do it, so that the leading - # '{' gets outdented, like this: - # - # if ( !$output_block_type[$i] - # && ($in_statement_continuation) ) - # { <--outdented - # - # For other types, we will give them continuation indentation. For example, - # here is how a list looks with the opening paren indented: - # - # @LoL = - # ( [ "fred", "barney" ], [ "george", "jane", "elroy" ], - # [ "homer", "marge", "bart" ], ); - # - # This looks best when 'ci' is one-half of the indentation (i.e., 2 and 4) - - my $total_ci = $ci_string_sum; - if ( - !$routput_block_type->[$i] # patch: skip for BLOCK - && ($in_statement_continuation) - && !( $forced_indentation_flag && $type eq ':' ) - ) - { - $total_ci += $in_statement_continuation - unless ( $ci_string_in_tokenizer =~ /1$/ ); - } - - $ci_string_i = $total_ci; - $in_statement_continuation = 0; - } - - elsif ($type eq '}' - || $type eq 'R' - || $forced_indentation_flag < 0 ) - { - - # only a nesting error in the script would prevent popping here - if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); } - - $level_i = --$level_in_tokenizer; - - # restore previous level values - if ( length($nesting_block_string) > 1 ) - { # true for valid script - chop $nesting_block_string; - $nesting_block_flag = ( $nesting_block_string =~ /1$/ ); - chop $nesting_list_string; - $nesting_list_flag = ( $nesting_list_string =~ /1$/ ); - - chop $ci_string_in_tokenizer; - $ci_string_sum = ones_count($ci_string_in_tokenizer); - - $in_statement_continuation = - chop $continuation_string_in_tokenizer; - - # zero continuation flag at terminal BLOCK '}' which - # ends a statement. - if ( $routput_block_type->[$i] ) { - - # ...These include non-anonymous subs - # note: could be sub ::abc { or sub 'abc - if ( $routput_block_type->[$i] =~ m/^sub\s*/gc ) { - - # note: older versions of perl require the /gc modifier - # here or else the \G does not work. - if ( $routput_block_type->[$i] =~ /\G('|::|\w)/gc ) - { - $in_statement_continuation = 0; - } - } - -# ...and include all block types except user subs with -# block prototypes and these: (sort|grep|map|do|eval) -# /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|;|if|elsif|else|unless|while|until|for|foreach)$/ - elsif ( - $is_zero_continuation_block_type{ - $routput_block_type->[$i] - } ) - { - $in_statement_continuation = 0; - } - - # ..but these are not terminal types: - # /^(sort|grep|map|do|eval)$/ ) - elsif ( - $is_not_zero_continuation_block_type{ - $routput_block_type->[$i] - } ) - { - } - - # ..and a block introduced by a label - # /^\w+\s*:$/gc ) { - elsif ( $routput_block_type->[$i] =~ /:$/ ) { - $in_statement_continuation = 0; - } - - # user function with block prototype - else { - $in_statement_continuation = 0; - } - } - - # If we are in a list, then - # we must set continuation indentation at the closing - # paren of something like this (paren after $check): - # assert( - # __LINE__, - # ( not defined $check ) - # or ref $check - # or $check eq "new" - # or $check eq "old", - # ); - elsif ( $tok eq ')' ) { - $in_statement_continuation = 1 - if $routput_container_type->[$i] =~ /^[;,\{\}]$/; - } - - elsif ( $tok eq ';' ) { $in_statement_continuation = 0 } - } - - # use environment after updating - $container_environment = - $nesting_block_flag ? 'BLOCK' - : $nesting_list_flag ? 'LIST' - : ""; - $ci_string_i = $ci_string_sum + $in_statement_continuation; - $nesting_block_string_i = $nesting_block_string; - $nesting_list_string_i = $nesting_list_string; - } - - # not a structural indentation type.. - else { - - $container_environment = - $nesting_block_flag ? 'BLOCK' - : $nesting_list_flag ? 'LIST' - : ""; - - # zero the continuation indentation at certain tokens so - # that they will be at the same level as its container. For - # commas, this simplifies the -lp indentation logic, which - # counts commas. For ?: it makes them stand out. - if ($nesting_list_flag) { - if ( $type =~ /^[,\?\:]$/ ) { - $in_statement_continuation = 0; - } - } - - # be sure binary operators get continuation indentation - if ( - $container_environment - && ( $type eq 'k' && $is_binary_keyword{$tok} - || $is_binary_type{$type} ) - ) - { - $in_statement_continuation = 1; - } - - # continuation indentation is sum of any open ci from previous - # levels plus the current level - $ci_string_i = $ci_string_sum + $in_statement_continuation; - - # update continuation flag ... - # if this isn't a blank or comment.. - if ( $type ne 'b' && $type ne '#' ) { - - # and we are in a BLOCK - if ($nesting_block_flag) { - - # the next token after a ';' and label starts a new stmt - if ( $type eq ';' || $type eq 'J' ) { - $in_statement_continuation = 0; - } - - # otherwise, we are continuing the current statement - else { - $in_statement_continuation = 1; - } - } - - # if we are not in a BLOCK.. - else { - - # do not use continuation indentation if not list - # environment (could be within if/elsif clause) - if ( !$nesting_list_flag ) { - $in_statement_continuation = 0; - } - - # otherwise, the token after a ',' starts a new term - - # Patch FOR RT#99961; no continuation after a ';' - # This is needed because perltidy currently marks - # a block preceded by a type character like % or @ - # as a non block, to simplify formatting. But these - # are actually blocks and can have semicolons. - # See code_block_type() and is_non_structural_brace(). - elsif ( $type eq ',' || $type eq ';' ) { - $in_statement_continuation = 0; - } - - # otherwise, we are continuing the current term - else { - $in_statement_continuation = 1; - } - } - } - } - - if ( $level_in_tokenizer < 0 ) { - unless ( $tokenizer_self->{_saw_negative_indentation} ) { - $tokenizer_self->{_saw_negative_indentation} = 1; - warning("Starting negative indentation\n"); - } - } - - # set secondary nesting levels based on all containment token types - # Note: these are set so that the nesting depth is the depth - # of the PREVIOUS TOKEN, which is convenient for setting - # the strength of token bonds - my $slevel_i = $slevel_in_tokenizer; - - # /^[L\{\(\[]$/ - if ( $is_opening_type{$type} ) { - $slevel_in_tokenizer++; - $nesting_token_string .= $tok; - $nesting_type_string .= $type; - } - - # /^[R\}\)\]]$/ - elsif ( $is_closing_type{$type} ) { - $slevel_in_tokenizer--; - my $char = chop $nesting_token_string; - - if ( $char ne $matching_start_token{$tok} ) { - $nesting_token_string .= $char . $tok; - $nesting_type_string .= $type; - } - else { - chop $nesting_type_string; - } - } - - push( @block_type, $routput_block_type->[$i] ); - push( @ci_string, $ci_string_i ); - push( @container_environment, $container_environment ); - push( @container_type, $routput_container_type->[$i] ); - push( @levels, $level_i ); - push( @nesting_tokens, $nesting_token_string_i ); - push( @nesting_types, $nesting_type_string_i ); - push( @slevels, $slevel_i ); - push( @token_type, $fix_type ); - push( @type_sequence, $routput_type_sequence->[$i] ); - push( @nesting_blocks, $nesting_block_string ); - push( @nesting_lists, $nesting_list_string ); - - # now form the previous token - if ( $im >= 0 ) { - $num = - $rtoken_map->[$i] - $rtoken_map->[$im]; # how many characters - - if ( $num > 0 ) { - push( @tokens, - substr( $input_line, $rtoken_map->[$im], $num ) ); - } - } - $im = $i; - } - - $num = length($input_line) - $rtoken_map->[$im]; # make the last token - if ( $num > 0 ) { - push( @tokens, substr( $input_line, $rtoken_map->[$im], $num ) ); - } - - $tokenizer_self->{_in_attribute_list} = $in_attribute_list; - $tokenizer_self->{_in_quote} = $in_quote; - $tokenizer_self->{_quote_target} = - $in_quote ? matching_end_token($quote_character) : ""; - $tokenizer_self->{_rhere_target_list} = $rhere_target_list; - - $line_of_tokens->{_rtoken_type} = \@token_type; - $line_of_tokens->{_rtokens} = \@tokens; - $line_of_tokens->{_rblock_type} = \@block_type; - $line_of_tokens->{_rcontainer_type} = \@container_type; - $line_of_tokens->{_rcontainer_environment} = \@container_environment; - $line_of_tokens->{_rtype_sequence} = \@type_sequence; - $line_of_tokens->{_rlevels} = \@levels; - $line_of_tokens->{_rslevels} = \@slevels; - $line_of_tokens->{_rnesting_tokens} = \@nesting_tokens; - $line_of_tokens->{_rci_levels} = \@ci_string; - $line_of_tokens->{_rnesting_blocks} = \@nesting_blocks; - - return; - } -} # end tokenize_this_line - -#########i############################################################# -# Tokenizer routines which assist in identifying token types -####################################################################### - -sub operator_expected { - - # Many perl symbols have two or more meanings. For example, '<<' - # can be a shift operator or a here-doc operator. The - # interpretation of these symbols depends on the current state of - # the tokenizer, which may either be expecting a term or an - # operator. For this example, a << would be a shift if an operator - # is expected, and a here-doc if a term is expected. This routine - # is called to make this decision for any current token. It returns - # one of three possible values: - # - # OPERATOR - operator expected (or at least, not a term) - # UNKNOWN - can't tell - # TERM - a term is expected (or at least, not an operator) - # - # The decision is based on what has been seen so far. This - # information is stored in the "$last_nonblank_type" and - # "$last_nonblank_token" variables. For example, if the - # $last_nonblank_type is '=~', then we are expecting a TERM, whereas - # if $last_nonblank_type is 'n' (numeric), we are expecting an - # OPERATOR. - # - # If a UNKNOWN is returned, the calling routine must guess. A major - # goal of this tokenizer is to minimize the possibility of returning - # UNKNOWN, because a wrong guess can spoil the formatting of a - # script. - # - # adding NEW_TOKENS: it is critically important that this routine be - # updated to allow it to determine if an operator or term is to be - # expected after the new token. Doing this simply involves adding - # the new token character to one of the regexes in this routine or - # to one of the hash lists - # that it uses, which are initialized in the BEGIN section. - # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token, - # $statement_type - - my ( $prev_type, $tok, $next_type ) = @_; - - my $op_expected = UNKNOWN; - -##print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n"; - -# Note: function prototype is available for token type 'U' for future -# program development. It contains the leading and trailing parens, -# and no blanks. It might be used to eliminate token type 'C', for -# example (prototype = '()'). Thus: -# if ($last_nonblank_type eq 'U') { -# print "previous token=$last_nonblank_token type=$last_nonblank_type prototype=$last_nonblank_prototype\n"; -# } - - # A possible filehandle (or object) requires some care... - if ( $last_nonblank_type eq 'Z' ) { - - # angle.t - if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) { - $op_expected = UNKNOWN; - } - - # For possible file handle like "$a", Perl uses weird parsing rules. - # For example: - # print $a/2,"/hi"; - division - # print $a / 2,"/hi"; - division - # print $a/ 2,"/hi"; - division - # print $a /2,"/hi"; - pattern (and error)! - elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) { - $op_expected = TERM; - } - - # Note when an operation is being done where a - # filehandle might be expected, since a change in whitespace - # could change the interpretation of the statement. - else { - if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) { - complain("operator in print statement not recommended\n"); - $op_expected = OPERATOR; - } - } - } - - # Check for smartmatch operator before preceding brace or square bracket. - # For example, at the ? after the ] in the following expressions we are - # expecting an operator: - # - # qr/3/ ~~ ['1234'] ? 1 : 0; - # map { $_ ~~ [ '0', '1' ] ? 'x' : 'o' } @a; - elsif ( $last_nonblank_type eq '}' && $last_nonblank_token eq '~~' ) { - $op_expected = OPERATOR; - } - - # handle something after 'do' and 'eval' - elsif ( $is_block_operator{$last_nonblank_token} ) { - - # something like $a = eval "expression"; - # ^ - if ( $last_nonblank_type eq 'k' ) { - $op_expected = TERM; # expression or list mode following keyword - } - - # something like $a = do { BLOCK } / 2; - # or this ? after a smartmatch anonynmous hash or array reference: - # qr/3/ ~~ ['1234'] ? 1 : 0; - # ^ - else { - $op_expected = OPERATOR; # block mode following } - } - } - - # handle bare word.. - elsif ( $last_nonblank_type eq 'w' ) { - - # unfortunately, we can't tell what type of token to expect next - # after most bare words - $op_expected = UNKNOWN; - } - - # operator, but not term possible after these types - # Note: moved ')' from type to token because parens in list context - # get marked as '{' '}' now. This is a minor glitch in the following: - # my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : (); - # - elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ ) - || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) ) - { - $op_expected = OPERATOR; - - # in a 'use' statement, numbers and v-strings are not true - # numbers, so to avoid incorrect error messages, we will - # mark them as unknown for now (use.t) - # TODO: it would be much nicer to create a new token V for VERSION - # number in a use statement. Then this could be a check on type V - # and related patches which change $statement_type for '=>' - # and ',' could be removed. Further, it would clean things up to - # scan the 'use' statement with a separate subroutine. - if ( ( $statement_type eq 'use' ) - && ( $last_nonblank_type =~ /^[nv]$/ ) ) - { - $op_expected = UNKNOWN; - } - - # expecting VERSION or {} after package NAMESPACE - elsif ($statement_type =~ /^package\b/ - && $last_nonblank_token =~ /^package\b/ ) - { - $op_expected = TERM; - } - } - - # no operator after many keywords, such as "die", "warn", etc - elsif ( $expecting_term_token{$last_nonblank_token} ) { - - # patch for dor.t (defined or). - # perl functions which may be unary operators - # TODO: This list is incomplete, and these should be put - # into a hash. - if ( $tok eq '/' - && $next_type eq '/' - && $last_nonblank_type eq 'k' - && $last_nonblank_token =~ /^eof|undef|shift|pop$/ ) - { - $op_expected = OPERATOR; - } - else { - $op_expected = TERM; - } - } - - # no operator after things like + - ** (i.e., other operators) - elsif ( $expecting_term_types{$last_nonblank_type} ) { - $op_expected = TERM; - } - - # a few operators, like "time", have an empty prototype () and so - # take no parameters but produce a value to operate on - elsif ( $expecting_operator_token{$last_nonblank_token} ) { - $op_expected = OPERATOR; - } - - # post-increment and decrement produce values to be operated on - elsif ( $expecting_operator_types{$last_nonblank_type} ) { - $op_expected = OPERATOR; - } - - # no value to operate on after sub block - elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; } - - # a right brace here indicates the end of a simple block. - # all non-structural right braces have type 'R' - # all braces associated with block operator keywords have been given those - # keywords as "last_nonblank_token" and caught above. - # (This statement is order dependent, and must come after checking - # $last_nonblank_token). - elsif ( $last_nonblank_type eq '}' ) { - - # patch for dor.t (defined or). - if ( $tok eq '/' - && $next_type eq '/' - && $last_nonblank_token eq ']' ) - { - $op_expected = OPERATOR; - } - - # Patch for RT #116344: misparse a ternary operator after an anonymous - # hash, like this: - # return ref {} ? 1 : 0; - # The right brace should really be marked type 'R' in this case, and - # it is safest to return an UNKNOWN here. Expecting a TERM will - # cause the '?' to always be interpreted as a pattern delimiter - # rather than introducing a ternary operator. - elsif ( $tok eq '?' ) { - $op_expected = UNKNOWN; - } - else { - $op_expected = TERM; - } - } - - # something else..what did I forget? - else { - - # collecting diagnostics on unknown operator types..see what was missed - $op_expected = UNKNOWN; - write_diagnostics( -"OP: unknown after type=$last_nonblank_type token=$last_nonblank_token\n" - ); - } - - TOKENIZER_DEBUG_FLAG_EXPECT && do { - print STDOUT -"EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n"; - }; - return $op_expected; -} - -sub new_statement_ok { - - # return true if the current token can start a new statement - # USES GLOBAL VARIABLES: $last_nonblank_type - - return label_ok() # a label would be ok here - - || $last_nonblank_type eq 'J'; # or we follow a label - -} - -sub label_ok { - - # Decide if a bare word followed by a colon here is a label - # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type, - # $brace_depth, @brace_type - - # if it follows an opening or closing code block curly brace.. - if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' ) - && $last_nonblank_type eq $last_nonblank_token ) - { - - # it is a label if and only if the curly encloses a code block - return $brace_type[$brace_depth]; - } - - # otherwise, it is a label if and only if it follows a ';' (real or fake) - # or another label - else { - return ( $last_nonblank_type eq ';' || $last_nonblank_type eq 'J' ); - } -} - -sub code_block_type { - - # Decide if this is a block of code, and its type. - # Must be called only when $type = $token = '{' - # The problem is to distinguish between the start of a block of code - # and the start of an anonymous hash reference - # Returns "" if not code block, otherwise returns 'last_nonblank_token' - # to indicate the type of code block. (For example, 'last_nonblank_token' - # might be 'if' for an if block, 'else' for an else block, etc). - # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type, - # $last_nonblank_block_type, $brace_depth, @brace_type - - # handle case of multiple '{'s - -# print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n"; - - my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_; - if ( $last_nonblank_token eq '{' - && $last_nonblank_type eq $last_nonblank_token ) - { - - # opening brace where a statement may appear is probably - # a code block but might be and anonymous hash reference - if ( $brace_type[$brace_depth] ) { - return decide_if_code_block( $i, $rtokens, $rtoken_type, - $max_token_index ); - } - - # cannot start a code block within an anonymous hash - else { - return ""; - } - } - - elsif ( $last_nonblank_token eq ';' ) { - - # an opening brace where a statement may appear is probably - # a code block but might be and anonymous hash reference - return decide_if_code_block( $i, $rtokens, $rtoken_type, - $max_token_index ); - } - - # handle case of '}{' - elsif ($last_nonblank_token eq '}' - && $last_nonblank_type eq $last_nonblank_token ) - { - - # a } { situation ... - # could be hash reference after code block..(blktype1.t) - if ($last_nonblank_block_type) { - return decide_if_code_block( $i, $rtokens, $rtoken_type, - $max_token_index ); - } - - # must be a block if it follows a closing hash reference - else { - return $last_nonblank_token; - } - } - - ################################################################ - # NOTE: braces after type characters start code blocks, but for - # simplicity these are not identified as such. See also - # sub is_non_structural_brace. - ################################################################ - -## elsif ( $last_nonblank_type eq 't' ) { -## return $last_nonblank_token; -## } - - # brace after label: - elsif ( $last_nonblank_type eq 'J' ) { - return $last_nonblank_token; - } - -# otherwise, look at previous token. This must be a code block if -# it follows any of these: -# /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/ - elsif ( $is_code_block_token{$last_nonblank_token} ) { - - # Bug Patch: Note that the opening brace after the 'if' in the following - # snippet is an anonymous hash ref and not a code block! - # print 'hi' if { x => 1, }->{x}; - # We can identify this situation because the last nonblank type - # will be a keyword (instead of a closing peren) - if ( $last_nonblank_token =~ /^(if|unless)$/ - && $last_nonblank_type eq 'k' ) - { - return ""; - } - else { - return $last_nonblank_token; - } - } - - # or a sub or package BLOCK - elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' ) - && $last_nonblank_token =~ /^(sub|package)\b/ ) - { - return $last_nonblank_token; - } - - elsif ( $statement_type =~ /^(sub|package)\b/ ) { - return $statement_type; - } - - # user-defined subs with block parameters (like grep/map/eval) - elsif ( $last_nonblank_type eq 'G' ) { - return $last_nonblank_token; - } - - # check bareword - elsif ( $last_nonblank_type eq 'w' ) { - return decide_if_code_block( $i, $rtokens, $rtoken_type, - $max_token_index ); - } - - # Patch for bug # RT #94338 reported by Daniel Trizen - # for-loop in a parenthesized block-map triggering an error message: - # map( { foreach my $item ( '0', '1' ) { print $item} } qw(a b c) ); - # Check for a code block within a parenthesized function call - elsif ( $last_nonblank_token eq '(' ) { - my $paren_type = $paren_type[$paren_depth]; - if ( $paren_type && $paren_type =~ /^(map|grep|sort)$/ ) { - - # We will mark this as a code block but use type 't' instead - # of the name of the contining function. This will allow for - # correct parsing but will usually produce better formatting. - # Braces with block type 't' are not broken open automatically - # in the formatter as are other code block types, and this usually - # works best. - return 't'; # (Not $paren_type) - } - else { - return ""; - } - } - - # handle unknown syntax ') {' - # we previously appended a '()' to mark this case - elsif ( $last_nonblank_token =~ /\(\)$/ ) { - return $last_nonblank_token; - } - - # anything else must be anonymous hash reference - else { - return ""; - } -} - -sub decide_if_code_block { - - # USES GLOBAL VARIABLES: $last_nonblank_token - my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_; - - my ( $next_nonblank_token, $i_next ) = - find_next_nonblank_token( $i, $rtokens, $max_token_index ); - - # we are at a '{' where a statement may appear. - # We must decide if this brace starts an anonymous hash or a code - # block. - # return "" if anonymous hash, and $last_nonblank_token otherwise - - # initialize to be code BLOCK - my $code_block_type = $last_nonblank_token; - - # Check for the common case of an empty anonymous hash reference: - # Maybe something like sub { { } } - if ( $next_nonblank_token eq '}' ) { - $code_block_type = ""; - } - - else { - - # To guess if this '{' is an anonymous hash reference, look ahead - # and test as follows: - # - # it is a hash reference if next come: - # - a string or digit followed by a comma or => - # - bareword followed by => - # otherwise it is a code block - # - # Examples of anonymous hash ref: - # {'aa',}; - # {1,2} - # - # Examples of code blocks: - # {1; print "hello\n", 1;} - # {$a,1}; - - # We are only going to look ahead one more (nonblank/comment) line. - # Strange formatting could cause a bad guess, but that's unlikely. - my @pre_types; - my @pre_tokens; - - # Ignore the rest of this line if it is a side comment - if ( $next_nonblank_token ne '#' ) { - @pre_types = @{$rtoken_type}[ $i + 1 .. $max_token_index ]; - @pre_tokens = @{$rtokens}[ $i + 1 .. $max_token_index ]; - } - my ( $rpre_tokens, $rpre_types ) = - peek_ahead_for_n_nonblank_pre_tokens(20); # 20 is arbitrary but - # generous, and prevents - # wasting lots of - # time in mangled files - if ( defined($rpre_types) && @{$rpre_types} ) { - push @pre_types, @{$rpre_types}; - push @pre_tokens, @{$rpre_tokens}; - } - - # put a sentinel token to simplify stopping the search - push @pre_types, '}'; - push @pre_types, '}'; - - my $jbeg = 0; - $jbeg = 1 if $pre_types[0] eq 'b'; - - # first look for one of these - # - bareword - # - bareword with leading - - # - digit - # - quoted string - my $j = $jbeg; - if ( $pre_types[$j] =~ /^[\'\"]/ ) { - - # find the closing quote; don't worry about escapes - my $quote_mark = $pre_types[$j]; - foreach my $k ( $j + 1 .. $#pre_types - 1 ) { - if ( $pre_types[$k] eq $quote_mark ) { - $j = $k + 1; - my $next = $pre_types[$j]; - last; - } - } - } - elsif ( $pre_types[$j] eq 'd' ) { - $j++; - } - elsif ( $pre_types[$j] eq 'w' ) { - $j++; - } - elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) { - $j++; - } - if ( $j > $jbeg ) { - - $j++ if $pre_types[$j] eq 'b'; - - # Patched for RT #95708 - if ( - - # it is a comma which is not a pattern delimeter except for qw - ( - $pre_types[$j] eq ',' - && $pre_tokens[$jbeg] !~ /^(s|m|y|tr|qr|q|qq|qx)$/ - ) - - # or a => - || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) - ) - { - $code_block_type = ""; - } - } - } - - return $code_block_type; -} - -sub report_unexpected { - - # report unexpected token type and show where it is - # USES GLOBAL VARIABLES: $tokenizer_self - my ( $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map, - $rpretoken_type, $input_line ) - = @_; - - if ( ++$tokenizer_self->{_unexpected_error_count} <= MAX_NAG_MESSAGES ) { - my $msg = "found $found where $expecting expected"; - my $pos = $rpretoken_map->[$i_tok]; - interrupt_logfile(); - my $input_line_number = $tokenizer_self->{_last_line_number}; - my ( $offset, $numbered_line, $underline ) = - make_numbered_line( $input_line_number, $input_line, $pos ); - $underline = write_on_underline( $underline, $pos - $offset, '^' ); - - my $trailer = ""; - if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) { - my $pos_prev = $rpretoken_map->[$last_nonblank_i]; - my $num; - if ( $rpretoken_type->[ $i_tok - 1 ] eq 'b' ) { - $num = $rpretoken_map->[ $i_tok - 1 ] - $pos_prev; - } - else { - $num = $pos - $pos_prev; - } - if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; } - - $underline = - write_on_underline( $underline, $pos_prev - $offset, '-' x $num ); - $trailer = " (previous token underlined)"; - } - warning( $numbered_line . "\n" ); - warning( $underline . "\n" ); - warning( $msg . $trailer . "\n" ); - resume_logfile(); - } - return; -} - -sub is_non_structural_brace { - - # Decide if a brace or bracket is structural or non-structural - # by looking at the previous token and type - # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token - - # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting. - # Tentatively deactivated because it caused the wrong operator expectation - # for this code: - # $user = @vars[1] / 100; - # Must update sub operator_expected before re-implementing. - # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) { - # return 0; - # } - - ################################################################ - # NOTE: braces after type characters start code blocks, but for - # simplicity these are not identified as such. See also - # sub code_block_type - ################################################################ - - ##if ($last_nonblank_type eq 't') {return 0} - - # otherwise, it is non-structural if it is decorated - # by type information. - # For example, the '{' here is non-structural: ${xxx} - return ( - $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/ - - # or if we follow a hash or array closing curly brace or bracket - # For example, the second '{' in this is non-structural: $a{'x'}{'y'} - # because the first '}' would have been given type 'R' - || $last_nonblank_type =~ /^([R\]])$/ - ); -} - -#########i############################################################# -# Tokenizer routines for tracking container nesting depths -####################################################################### - -# The following routines keep track of nesting depths of the nesting -# types, ( [ { and ?. This is necessary for determining the indentation -# level, and also for debugging programs. Not only do they keep track of -# nesting depths of the individual brace types, but they check that each -# of the other brace types is balanced within matching pairs. For -# example, if the program sees this sequence: -# -# { ( ( ) } -# -# then it can determine that there is an extra left paren somewhere -# between the { and the }. And so on with every other possible -# combination of outer and inner brace types. For another -# example: -# -# ( [ ..... ] ] ) -# -# which has an extra ] within the parens. -# -# The brace types have indexes 0 .. 3 which are indexes into -# the matrices. -# -# The pair ? : are treated as just another nesting type, with ? acting -# as the opening brace and : acting as the closing brace. -# -# The matrix -# -# $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b]; -# -# saves the nesting depth of brace type $b (where $b is either of the other -# nesting types) when brace type $a enters a new depth. When this depth -# decreases, a check is made that the current depth of brace types $b is -# unchanged, or otherwise there must have been an error. This can -# be very useful for localizing errors, particularly when perl runs to -# the end of a large file (such as this one) and announces that there -# is a problem somewhere. -# -# A numerical sequence number is maintained for every nesting type, -# so that each matching pair can be uniquely identified in a simple -# way. - -sub increase_nesting_depth { - my ( $aa, $pos ) = @_; - - # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth, - # @current_sequence_number, @depth_array, @starting_line_of_current_depth, - # $statement_type - $current_depth[$aa]++; - $total_depth++; - $total_depth[$aa][ $current_depth[$aa] ] = $total_depth; - my $input_line_number = $tokenizer_self->{_last_line_number}; - my $input_line = $tokenizer_self->{_line_text}; - - # Sequence numbers increment by number of items. This keeps - # a unique set of numbers but still allows the relative location - # of any type to be determined. - $nesting_sequence_number[$aa] += scalar(@closing_brace_names); - my $seqno = $nesting_sequence_number[$aa]; - $current_sequence_number[$aa][ $current_depth[$aa] ] = $seqno; - - $starting_line_of_current_depth[$aa][ $current_depth[$aa] ] = - [ $input_line_number, $input_line, $pos ]; - - for my $bb ( 0 .. $#closing_brace_names ) { - next if ( $bb == $aa ); - $depth_array[$aa][$bb][ $current_depth[$aa] ] = $current_depth[$bb]; - } - - # set a flag for indenting a nested ternary statement - my $indent = 0; - if ( $aa == QUESTION_COLON ) { - $nested_ternary_flag[ $current_depth[$aa] ] = 0; - if ( $current_depth[$aa] > 1 ) { - if ( $nested_ternary_flag[ $current_depth[$aa] - 1 ] == 0 ) { - my $pdepth = $total_depth[$aa][ $current_depth[$aa] - 1 ]; - if ( $pdepth == $total_depth - 1 ) { - $indent = 1; - $nested_ternary_flag[ $current_depth[$aa] - 1 ] = -1; - } - } - } - } - $nested_statement_type[$aa][ $current_depth[$aa] ] = $statement_type; - $statement_type = ""; - return ( $seqno, $indent ); -} - -sub decrease_nesting_depth { - - my ( $aa, $pos ) = @_; - - # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth, - # @current_sequence_number, @depth_array, @starting_line_of_current_depth - # $statement_type - my $seqno = 0; - my $input_line_number = $tokenizer_self->{_last_line_number}; - my $input_line = $tokenizer_self->{_line_text}; - - my $outdent = 0; - $total_depth--; - if ( $current_depth[$aa] > 0 ) { - - # set a flag for un-indenting after seeing a nested ternary statement - $seqno = $current_sequence_number[$aa][ $current_depth[$aa] ]; - if ( $aa == QUESTION_COLON ) { - $outdent = $nested_ternary_flag[ $current_depth[$aa] ]; - } - $statement_type = $nested_statement_type[$aa][ $current_depth[$aa] ]; - - # check that any brace types $bb contained within are balanced - for my $bb ( 0 .. $#closing_brace_names ) { - next if ( $bb == $aa ); - - unless ( $depth_array[$aa][$bb][ $current_depth[$aa] ] == - $current_depth[$bb] ) - { - my $diff = - $current_depth[$bb] - - $depth_array[$aa][$bb][ $current_depth[$aa] ]; - - # don't whine too many times - my $saw_brace_error = get_saw_brace_error(); - if ( - $saw_brace_error <= MAX_NAG_MESSAGES - - # if too many closing types have occurred, we probably - # already caught this error - && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) ) - ) - { - interrupt_logfile(); - my $rsl = - $starting_line_of_current_depth[$aa] - [ $current_depth[$aa] ]; - my $sl = $rsl->[0]; - my $rel = [ $input_line_number, $input_line, $pos ]; - my $el = $rel->[0]; - my ($ess); - - if ( $diff == 1 || $diff == -1 ) { - $ess = ''; - } - else { - $ess = 's'; - } - my $bname = - ( $diff > 0 ) - ? $opening_brace_names[$bb] - : $closing_brace_names[$bb]; - write_error_indicator_pair( @{$rsl}, '^' ); - my $msg = <<"EOM"; -Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el -EOM - - if ( $diff > 0 ) { - my $rml = - $starting_line_of_current_depth[$bb] - [ $current_depth[$bb] ]; - my $ml = $rml->[0]; - $msg .= -" The most recent un-matched $bname is on line $ml\n"; - write_error_indicator_pair( @{$rml}, '^' ); - } - write_error_indicator_pair( @{$rel}, '^' ); - warning($msg); - resume_logfile(); - } - increment_brace_error(); - } - } - $current_depth[$aa]--; - } - else { - - my $saw_brace_error = get_saw_brace_error(); - if ( $saw_brace_error <= MAX_NAG_MESSAGES ) { - my $msg = <<"EOM"; -There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number -EOM - indicate_error( $msg, $input_line_number, $input_line, $pos, '^' ); - } - increment_brace_error(); - } - return ( $seqno, $outdent ); -} - -sub check_final_nesting_depths { - - # USES GLOBAL VARIABLES: @current_depth, @starting_line_of_current_depth - - for my $aa ( 0 .. $#closing_brace_names ) { - - if ( $current_depth[$aa] ) { - my $rsl = - $starting_line_of_current_depth[$aa][ $current_depth[$aa] ]; - my $sl = $rsl->[0]; - my $msg = <<"EOM"; -Final nesting depth of $opening_brace_names[$aa]s is $current_depth[$aa] -The most recent un-matched $opening_brace_names[$aa] is on line $sl -EOM - indicate_error( $msg, @{$rsl}, '^' ); - increment_brace_error(); - } - } - return; -} - -#########i############################################################# -# Tokenizer routines for looking ahead in input stream -####################################################################### - -sub peek_ahead_for_n_nonblank_pre_tokens { - - # returns next n pretokens if they exist - # returns undef's if hits eof without seeing any pretokens - # USES GLOBAL VARIABLES: $tokenizer_self - my $max_pretokens = shift; - my $line; - my $i = 0; - my ( $rpre_tokens, $rmap, $rpre_types ); - - while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) ) - { - $line =~ s/^\s*//; # trim leading blanks - next if ( length($line) <= 0 ); # skip blank - next if ( $line =~ /^#/ ); # skip comment - ( $rpre_tokens, $rmap, $rpre_types ) = - pre_tokenize( $line, $max_pretokens ); - last; - } - return ( $rpre_tokens, $rpre_types ); -} - -# look ahead for next non-blank, non-comment line of code -sub peek_ahead_for_nonblank_token { - - # USES GLOBAL VARIABLES: $tokenizer_self - my ( $rtokens, $max_token_index ) = @_; - my $line; - my $i = 0; - - while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) ) - { - $line =~ s/^\s*//; # trim leading blanks - next if ( length($line) <= 0 ); # skip blank - next if ( $line =~ /^#/ ); # skip comment - my ( $rtok, $rmap, $rtype ) = - pre_tokenize( $line, 2 ); # only need 2 pre-tokens - my $j = $max_token_index + 1; - - foreach my $tok ( @{$rtok} ) { - last if ( $tok =~ "\n" ); - $rtokens->[ ++$j ] = $tok; - } - last; - } - return $rtokens; -} - -#########i############################################################# -# Tokenizer guessing routines for ambiguous situations -####################################################################### - -sub guess_if_pattern_or_conditional { - - # this routine is called when we have encountered a ? following an - # unknown bareword, and we must decide if it starts a pattern or not - # input parameters: - # $i - token index of the ? starting possible pattern - # output parameters: - # $is_pattern = 0 if probably not pattern, =1 if probably a pattern - # msg = a warning or diagnostic message - # USES GLOBAL VARIABLES: $last_nonblank_token - - # FIXME: this needs to be rewritten - - my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_; - my $is_pattern = 0; - my $msg = "guessing that ? after $last_nonblank_token starts a "; - - if ( $i >= $max_token_index ) { - $msg .= "conditional (no end to pattern found on the line)\n"; - } - else { - my $ibeg = $i; - $i = $ibeg + 1; - my $next_token = $rtokens->[$i]; # first token after ? - - # look for a possible ending ? on this line.. - my $in_quote = 1; - my $quote_depth = 0; - my $quote_character = ''; - my $quote_pos = 0; - my $quoted_string; - ( - $i, $in_quote, $quote_character, $quote_pos, $quote_depth, - $quoted_string - ) - = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character, - $quote_pos, $quote_depth, $max_token_index ); - - if ($in_quote) { - - # we didn't find an ending ? on this line, - # so we bias towards conditional - $is_pattern = 0; - $msg .= "conditional (no ending ? on this line)\n"; - - # we found an ending ?, so we bias towards a pattern - } - else { - - # Watch out for an ending ? in quotes, like this - # my $case_flag = File::Spec->case_tolerant ? '(?i)' : ''; - my $s_quote = 0; - my $d_quote = 0; - my $colons = 0; - foreach my $ii ( $ibeg + 1 .. $i - 1 ) { - my $tok = $rtokens->[$ii]; - if ( $tok eq ":" ) { $colons++ } - if ( $tok eq "'" ) { $s_quote++ } - if ( $tok eq '"' ) { $d_quote++ } - } - if ( $s_quote % 2 || $d_quote % 2 || $colons ) { - $is_pattern = 0; - $msg .= "found ending ? but unbalanced quote chars\n"; - } - elsif ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) { - $is_pattern = 1; - $msg .= "pattern (found ending ? and pattern expected)\n"; - } - else { - $msg .= "pattern (uncertain, but found ending ?)\n"; - } - } - } - return ( $is_pattern, $msg ); -} - -sub guess_if_pattern_or_division { - - # this routine is called when we have encountered a / following an - # unknown bareword, and we must decide if it starts a pattern or is a - # division - # input parameters: - # $i - token index of the / starting possible pattern - # output parameters: - # $is_pattern = 0 if probably division, =1 if probably a pattern - # msg = a warning or diagnostic message - # USES GLOBAL VARIABLES: $last_nonblank_token - my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_; - my $is_pattern = 0; - my $msg = "guessing that / after $last_nonblank_token starts a "; - - if ( $i >= $max_token_index ) { - $msg .= "division (no end to pattern found on the line)\n"; - } - else { - my $ibeg = $i; - my $divide_expected = - numerator_expected( $i, $rtokens, $max_token_index ); - $i = $ibeg + 1; - my $next_token = $rtokens->[$i]; # first token after slash - - # look for a possible ending / on this line.. - my $in_quote = 1; - my $quote_depth = 0; - my $quote_character = ''; - my $quote_pos = 0; - my $quoted_string; - ( - $i, $in_quote, $quote_character, $quote_pos, $quote_depth, - $quoted_string - ) - = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character, - $quote_pos, $quote_depth, $max_token_index ); - - if ($in_quote) { - - # we didn't find an ending / on this line, - # so we bias towards division - if ( $divide_expected >= 0 ) { - $is_pattern = 0; - $msg .= "division (no ending / on this line)\n"; - } - else { - $msg = "multi-line pattern (division not possible)\n"; - $is_pattern = 1; - } - - } - - # we found an ending /, so we bias towards a pattern - else { - - if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) { - - if ( $divide_expected >= 0 ) { - - if ( $i - $ibeg > 60 ) { - $msg .= "division (matching / too distant)\n"; - $is_pattern = 0; - } - else { - $msg .= "pattern (but division possible too)\n"; - $is_pattern = 1; - } - } - else { - $is_pattern = 1; - $msg .= "pattern (division not possible)\n"; - } - } - else { - - if ( $divide_expected >= 0 ) { - $is_pattern = 0; - $msg .= "division (pattern not possible)\n"; - } - else { - $is_pattern = 1; - $msg .= - "pattern (uncertain, but division would not work here)\n"; - } - } - } - } - return ( $is_pattern, $msg ); -} - -# try to resolve here-doc vs. shift by looking ahead for -# non-code or the end token (currently only looks for end token) -# returns 1 if it is probably a here doc, 0 if not -sub guess_if_here_doc { - - # This is how many lines we will search for a target as part of the - # guessing strategy. It is a constant because there is probably - # little reason to change it. - # USES GLOBAL VARIABLES: $tokenizer_self, $current_package - # %is_constant, - my $HERE_DOC_WINDOW = 40; - - my $next_token = shift; - my $here_doc_expected = 0; - my $line; - my $k = 0; - my $msg = "checking <<"; - - while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) ) - { - chomp $line; - - if ( $line =~ /^$next_token$/ ) { - $msg .= " -- found target $next_token ahead $k lines\n"; - $here_doc_expected = 1; # got it - last; - } - last if ( $k >= $HERE_DOC_WINDOW ); - } - - unless ($here_doc_expected) { - - if ( !defined($line) ) { - $here_doc_expected = -1; # hit eof without seeing target - $msg .= " -- must be shift; target $next_token not in file\n"; - - } - else { # still unsure..taking a wild guess - - if ( !$is_constant{$current_package}{$next_token} ) { - $here_doc_expected = 1; - $msg .= - " -- guessing it's a here-doc ($next_token not a constant)\n"; - } - else { - $msg .= - " -- guessing it's a shift ($next_token is a constant)\n"; - } - } - } - write_logfile_entry($msg); - return $here_doc_expected; -} - -#########i############################################################# -# Tokenizer Routines for scanning identifiers and related items -####################################################################### - -sub scan_bare_identifier_do { - - # this routine is called to scan a token starting with an alphanumeric - # variable or package separator, :: or '. - # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, - # $last_nonblank_type,@paren_type, $paren_depth - - my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map, - $max_token_index ) - = @_; - my $i_begin = $i; - my $package = undef; - - my $i_beg = $i; - - # we have to back up one pretoken at a :: since each : is one pretoken - if ( $tok eq '::' ) { $i_beg-- } - if ( $tok eq '->' ) { $i_beg-- } - my $pos_beg = $rtoken_map->[$i_beg]; - pos($input_line) = $pos_beg; - - # Examples: - # A::B::C - # A:: - # ::A - # A'B - if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) { - - my $pos = pos($input_line); - my $numc = $pos - $pos_beg; - $tok = substr( $input_line, $pos_beg, $numc ); - - # type 'w' includes anything without leading type info - # ($,%,@,*) including something like abc::def::ghi - $type = 'w'; - - my $sub_name = ""; - if ( defined($2) ) { $sub_name = $2; } - if ( defined($1) ) { - $package = $1; - - # patch: don't allow isolated package name which just ends - # in the old style package separator (single quote). Example: - # use CGI':all'; - if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) { - $pos--; - } - - $package =~ s/\'/::/g; - if ( $package =~ /^\:/ ) { $package = 'main' . $package } - $package =~ s/::$//; - } - else { - $package = $current_package; - - if ( $is_keyword{$tok} ) { - $type = 'k'; - } - } - - # if it is a bareword.. - if ( $type eq 'w' ) { - - # check for v-string with leading 'v' type character - # (This seems to have precedence over filehandle, type 'Y') - if ( $tok =~ /^v\d[_\d]*$/ ) { - - # we only have the first part - something like 'v101' - - # look for more - if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) { - $pos = pos($input_line); - $numc = $pos - $pos_beg; - $tok = substr( $input_line, $pos_beg, $numc ); - } - $type = 'v'; - - # warn if this version can't handle v-strings - report_v_string($tok); - } - - elsif ( $is_constant{$package}{$sub_name} ) { - $type = 'C'; - } - - # bareword after sort has implied empty prototype; for example: - # @sorted = sort numerically ( 53, 29, 11, 32, 7 ); - # This has priority over whatever the user has specified. - elsif ($last_nonblank_token eq 'sort' - && $last_nonblank_type eq 'k' ) - { - $type = 'Z'; - } - - # Note: strangely, perl does not seem to really let you create - # functions which act like eval and do, in the sense that eval - # and do may have operators following the final }, but any operators - # that you create with prototype (&) apparently do not allow - # trailing operators, only terms. This seems strange. - # If this ever changes, here is the update - # to make perltidy behave accordingly: - - # elsif ( $is_block_function{$package}{$tok} ) { - # $tok='eval'; # patch to do braces like eval - doesn't work - # $type = 'k'; - #} - # FIXME: This could become a separate type to allow for different - # future behavior: - elsif ( $is_block_function{$package}{$sub_name} ) { - $type = 'G'; - } - - elsif ( $is_block_list_function{$package}{$sub_name} ) { - $type = 'G'; - } - elsif ( $is_user_function{$package}{$sub_name} ) { - $type = 'U'; - $prototype = $user_function_prototype{$package}{$sub_name}; - } - - # check for indirect object - elsif ( - - # added 2001-03-27: must not be followed immediately by '(' - # see fhandle.t - ( $input_line !~ m/\G\(/gc ) - - # and - && ( - - # preceded by keyword like 'print', 'printf' and friends - $is_indirect_object_taker{$last_nonblank_token} - - # or preceded by something like 'print(' or 'printf(' - || ( - ( $last_nonblank_token eq '(' ) - && $is_indirect_object_taker{ $paren_type[$paren_depth] - } - - ) - ) - ) - { - - # may not be indirect object unless followed by a space - if ( $input_line =~ m/\G\s+/gc ) { - $type = 'Y'; - - # Abandon Hope ... - # Perl's indirect object notation is a very bad - # thing and can cause subtle bugs, especially for - # beginning programmers. And I haven't even been - # able to figure out a sane warning scheme which - # doesn't get in the way of good scripts. - - # Complain if a filehandle has any lower case - # letters. This is suggested good practice. - # Use 'sub_name' because something like - # main::MYHANDLE is ok for filehandle - if ( $sub_name =~ /[a-z]/ ) { - - # could be bug caused by older perltidy if - # followed by '(' - if ( $input_line =~ m/\G\s*\(/gc ) { - complain( -"Caution: unknown word '$tok' in indirect object slot\n" - ); - } - } - } - - # bareword not followed by a space -- may not be filehandle - # (may be function call defined in a 'use' statement) - else { - $type = 'Z'; - } - } - } - - # Now we must convert back from character position - # to pre_token index. - # I don't think an error flag can occur here ..but who knows - my $error; - ( $i, $error ) = - inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); - if ($error) { - warning("scan_bare_identifier: Possibly invalid tokenization\n"); - } - } - - # no match but line not blank - could be syntax error - # perl will take '::' alone without complaint - else { - $type = 'w'; - - # change this warning to log message if it becomes annoying - warning("didn't find identifier after leading ::\n"); - } - return ( $i, $tok, $type, $prototype ); -} - -sub scan_id_do { - -# This is the new scanner and will eventually replace scan_identifier. -# Only type 'sub' and 'package' are implemented. -# Token types $ * % @ & -> are not yet implemented. -# -# Scan identifier following a type token. -# The type of call depends on $id_scan_state: $id_scan_state = '' -# for starting call, in which case $tok must be the token defining -# the type. -# -# If the type token is the last nonblank token on the line, a value -# of $id_scan_state = $tok is returned, indicating that further -# calls must be made to get the identifier. If the type token is -# not the last nonblank token on the line, the identifier is -# scanned and handled and a value of '' is returned. -# USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, $in_attribute_list, -# $statement_type, $tokenizer_self - - my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state, - $max_token_index ) - = @_; - my $type = ''; - my ( $i_beg, $pos_beg ); - - #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n"; - #my ($a,$b,$c) = caller; - #print "NSCAN: scan_id called with tok=$tok $a $b $c\n"; - - # on re-entry, start scanning at first token on the line - if ($id_scan_state) { - $i_beg = $i; - $type = ''; - } - - # on initial entry, start scanning just after type token - else { - $i_beg = $i + 1; - $id_scan_state = $tok; - $type = 't'; - } - - # find $i_beg = index of next nonblank token, - # and handle empty lines - my $blank_line = 0; - my $next_nonblank_token = $rtokens->[$i_beg]; - if ( $i_beg > $max_token_index ) { - $blank_line = 1; - } - else { - - # only a '#' immediately after a '$' is not a comment - if ( $next_nonblank_token eq '#' ) { - unless ( $tok eq '$' ) { - $blank_line = 1; - } - } - - if ( $next_nonblank_token =~ /^\s/ ) { - ( $next_nonblank_token, $i_beg ) = - find_next_nonblank_token_on_this_line( $i_beg, $rtokens, - $max_token_index ); - if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) { - $blank_line = 1; - } - } - } - - # handle non-blank line; identifier, if any, must follow - unless ($blank_line) { - - if ( $id_scan_state eq 'sub' ) { - ( $i, $tok, $type, $id_scan_state ) = do_scan_sub( - $input_line, $i, $i_beg, - $tok, $type, $rtokens, - $rtoken_map, $id_scan_state, $max_token_index - ); - } - - elsif ( $id_scan_state eq 'package' ) { - ( $i, $tok, $type ) = - do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens, - $rtoken_map, $max_token_index ); - $id_scan_state = ''; - } - - else { - warning("invalid token in scan_id: $tok\n"); - $id_scan_state = ''; - } - } - - if ( $id_scan_state && ( !defined($type) || !$type ) ) { - - # shouldn't happen: - warning( -"Program bug in scan_id: undefined type but scan_state=$id_scan_state\n" - ); - report_definite_bug(); - } - - TOKENIZER_DEBUG_FLAG_NSCAN && do { - print STDOUT - "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n"; - }; - return ( $i, $tok, $type, $id_scan_state ); -} - -sub check_prototype { - my ( $proto, $package, $subname ) = @_; - return unless ( defined($package) && defined($subname) ); - if ( defined($proto) ) { - $proto =~ s/^\s*\(\s*//; - $proto =~ s/\s*\)$//; - if ($proto) { - $is_user_function{$package}{$subname} = 1; - $user_function_prototype{$package}{$subname} = "($proto)"; - - # prototypes containing '&' must be treated specially.. - if ( $proto =~ /\&/ ) { - - # right curly braces of prototypes ending in - # '&' may be followed by an operator - if ( $proto =~ /\&$/ ) { - $is_block_function{$package}{$subname} = 1; - } - - # right curly braces of prototypes NOT ending in - # '&' may NOT be followed by an operator - elsif ( $proto !~ /\&$/ ) { - $is_block_list_function{$package}{$subname} = 1; - } - } - } - else { - $is_constant{$package}{$subname} = 1; - } - } - else { - $is_user_function{$package}{$subname} = 1; - } - return; -} - -sub do_scan_package { - - # do_scan_package parses a package name - # it is called with $i_beg equal to the index of the first nonblank - # token following a 'package' token. - # USES GLOBAL VARIABLES: $current_package, - - # package NAMESPACE - # package NAMESPACE VERSION - # package NAMESPACE BLOCK - # package NAMESPACE VERSION BLOCK - # - # If VERSION is provided, package sets the $VERSION variable in the given - # namespace to a version object with the VERSION provided. VERSION must be - # a "strict" style version number as defined by the version module: a - # positive decimal number (integer or decimal-fraction) without - # exponentiation or else a dotted-decimal v-string with a leading 'v' - # character and at least three components. - # reference http://perldoc.perl.org/functions/package.html - - my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map, - $max_token_index ) - = @_; - my $package = undef; - my $pos_beg = $rtoken_map->[$i_beg]; - pos($input_line) = $pos_beg; - - # handle non-blank line; package name, if any, must follow - if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) { - $package = $1; - $package = ( defined($1) && $1 ) ? $1 : 'main'; - $package =~ s/\'/::/g; - if ( $package =~ /^\:/ ) { $package = 'main' . $package } - $package =~ s/::$//; - my $pos = pos($input_line); - my $numc = $pos - $pos_beg; - $tok = 'package ' . substr( $input_line, $pos_beg, $numc ); - $type = 'i'; - - # Now we must convert back from character position - # to pre_token index. - # I don't think an error flag can occur here ..but ? - my $error; - ( $i, $error ) = - inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); - if ($error) { warning("Possibly invalid package\n") } - $current_package = $package; - - # we should now have package NAMESPACE - # now expecting VERSION, BLOCK, or ; to follow ... - # package NAMESPACE VERSION - # package NAMESPACE BLOCK - # package NAMESPACE VERSION BLOCK - my ( $next_nonblank_token, $i_next ) = - find_next_nonblank_token( $i, $rtokens, $max_token_index ); - - # check that something recognizable follows, but do not parse. - # A VERSION number will be parsed later as a number or v-string in the - # normal way. What is important is to set the statement type if - # everything looks okay so that the operator_expected() routine - # knows that the number is in a package statement. - # Examples of valid primitive tokens that might follow are: - # 1235 . ; { } v3 v - if ( $next_nonblank_token =~ /^([v\.\d;\{\}])|v\d|\d+$/ ) { - $statement_type = $tok; - } - else { - warning( - "Unexpected '$next_nonblank_token' after package name '$tok'\n" - ); - } - } - - # no match but line not blank -- - # could be a label with name package, like package: , for example. - else { - $type = 'k'; - } - - return ( $i, $tok, $type ); -} - -sub scan_identifier_do { - - # This routine assembles tokens into identifiers. It maintains a - # scan state, id_scan_state. It updates id_scan_state based upon - # current id_scan_state and token, and returns an updated - # id_scan_state and the next index after the identifier. - # USES GLOBAL VARIABLES: $context, $last_nonblank_token, - # $last_nonblank_type - - my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index, - $expecting, $container_type ) - = @_; - my $i_begin = $i; - my $type = ''; - my $tok_begin = $rtokens->[$i_begin]; - if ( $tok_begin eq ':' ) { $tok_begin = '::' } - my $id_scan_state_begin = $id_scan_state; - my $identifier_begin = $identifier; - my $tok = $tok_begin; - my $message = ""; - - my $in_prototype_or_signature = $container_type =~ /^sub/; - - # these flags will be used to help figure out the type: - my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ ); - my $saw_type; - - # allow old package separator (') except in 'use' statement - my $allow_tick = ( $last_nonblank_token ne 'use' ); - - # get started by defining a type and a state if necessary - unless ($id_scan_state) { - $context = UNKNOWN_CONTEXT; - - # fixup for digraph - if ( $tok eq '>' ) { - $tok = '->'; - $tok_begin = $tok; - } - $identifier = $tok; - - if ( $tok eq '$' || $tok eq '*' ) { - $id_scan_state = '$'; - $context = SCALAR_CONTEXT; - } - elsif ( $tok eq '%' || $tok eq '@' ) { - $id_scan_state = '$'; - $context = LIST_CONTEXT; - } - elsif ( $tok eq '&' ) { - $id_scan_state = '&'; - } - elsif ( $tok eq 'sub' or $tok eq 'package' ) { - $saw_alpha = 0; # 'sub' is considered type info here - $id_scan_state = '$'; - $identifier .= ' '; # need a space to separate sub from sub name - } - elsif ( $tok eq '::' ) { - $id_scan_state = 'A'; - } - elsif ( $tok =~ /^[A-Za-z_]/ ) { - $id_scan_state = ':'; - } - elsif ( $tok eq '->' ) { - $id_scan_state = '$'; - } - else { - - # shouldn't happen - my ( $a, $b, $c ) = caller; - warning("Program Bug: scan_identifier given bad token = $tok \n"); - warning(" called from sub $a line: $c\n"); - report_definite_bug(); - } - $saw_type = !$saw_alpha; - } - else { - $i--; - $saw_type = ( $tok =~ /([\$\%\@\*\&])/ ); - } - - # now loop to gather the identifier - my $i_save = $i; - - while ( $i < $max_token_index ) { - $i_save = $i unless ( $tok =~ /^\s*$/ ); - $tok = $rtokens->[ ++$i ]; - - if ( ( $tok eq ':' ) && ( $rtokens->[ $i + 1 ] eq ':' ) ) { - $tok = '::'; - $i++; - } - - if ( $id_scan_state eq '$' ) { # starting variable name - - if ( $tok eq '$' ) { - - $identifier .= $tok; - - # we've got a punctuation variable if end of line (punct.t) - if ( $i == $max_token_index ) { - $type = 'i'; - $id_scan_state = ''; - last; - } - } - - # POSTDEFREF ->@ ->% ->& ->* - elsif ( ( $tok =~ /^[\@\%\&\*]$/ ) && $identifier =~ /\-\>$/ ) { - $identifier .= $tok; - } - elsif ( $tok =~ /^[A-Za-z_]/ ) { # alphanumeric .. - $saw_alpha = 1; - $id_scan_state = ':'; # now need :: - $identifier .= $tok; - } - elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric .. - $saw_alpha = 1; - $id_scan_state = ':'; # now need :: - $identifier .= $tok; - - # Perl will accept leading digits in identifiers, - # although they may not always produce useful results. - # Something like $main::0 is ok. But this also works: - # - # sub howdy::123::bubba{ print "bubba $54321!\n" } - # howdy::123::bubba(); - # - } - elsif ( $tok =~ /^[0-9]/ ) { # numeric - $saw_alpha = 1; - $id_scan_state = ':'; # now need :: - $identifier .= $tok; - } - elsif ( $tok eq '::' ) { - $id_scan_state = 'A'; - $identifier .= $tok; - } - - # $# and POSTDEFREF ->$# - elsif ( ( $tok eq '#' ) && ( $identifier =~ /\$$/ ) ) { # $#array - $identifier .= $tok; # keep same state, a $ could follow - } - elsif ( $tok eq '{' ) { - - # check for something like ${#} or ${©} - if ( - ( - $identifier eq '$' - || $identifier eq '@' - || $identifier eq '$#' - ) - && $i + 2 <= $max_token_index - && $rtokens->[ $i + 2 ] eq '}' - && $rtokens->[ $i + 1 ] !~ /[\s\w]/ - ) - { - my $next2 = $rtokens->[ $i + 2 ]; - my $next1 = $rtokens->[ $i + 1 ]; - $identifier .= $tok . $next1 . $next2; - $i += 2; - $id_scan_state = ''; - last; - } - - # skip something like ${xxx} or ->{ - $id_scan_state = ''; - - # if this is the first token of a line, any tokens for this - # identifier have already been accumulated - if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; } - $i = $i_save; - last; - } - - # space ok after leading $ % * & @ - elsif ( $tok =~ /^\s*$/ ) { - - if ( $identifier =~ /^[\$\%\*\&\@]/ ) { - - if ( length($identifier) > 1 ) { - $id_scan_state = ''; - $i = $i_save; - $type = 'i'; # probably punctuation variable - last; - } - else { - - # spaces after $'s are common, and space after @ - # is harmless, so only complain about space - # after other type characters. Space after $ and - # @ will be removed in formatting. Report space - # after % and * because they might indicate a - # parsing error. In other words '% ' might be a - # modulo operator. Delete this warning if it - # gets annoying. - if ( $identifier !~ /^[\@\$]$/ ) { - $message = - "Space in identifier, following $identifier\n"; - } - } - } - - # else: - # space after '->' is ok - } - elsif ( $tok eq '^' ) { - - # check for some special variables like $^W - if ( $identifier =~ /^[\$\*\@\%]$/ ) { - $identifier .= $tok; - $id_scan_state = 'A'; - - # Perl accepts '$^]' or '@^]', but - # there must not be a space before the ']'. - my $next1 = $rtokens->[ $i + 1 ]; - if ( $next1 eq ']' ) { - $i++; - $identifier .= $next1; - $id_scan_state = ""; - last; - } - } - else { - $id_scan_state = ''; - } - } - else { # something else - - if ( $in_prototype_or_signature && $tok =~ /^[\),=]/ ) { - $id_scan_state = ''; - $i = $i_save; - $type = 'i'; # probably punctuation variable - last; - } - - # check for various punctuation variables - if ( $identifier =~ /^[\$\*\@\%]$/ ) { - $identifier .= $tok; - } - - # POSTDEFREF: Postfix reference ->$* ->%* ->@* ->** ->&* ->$#* - elsif ( $tok eq '*' && $identifier =~ /([\@\%\$\*\&]|\$\#)$/ ) { - $identifier .= $tok; - } - - elsif ( $identifier eq '$#' ) { - - if ( $tok eq '{' ) { $type = 'i'; $i = $i_save } - - # perl seems to allow just these: $#: $#- $#+ - elsif ( $tok =~ /^[\:\-\+]$/ ) { - $type = 'i'; - $identifier .= $tok; - } - else { - $i = $i_save; - write_logfile_entry( 'Use of $# is deprecated' . "\n" ); - } - } - elsif ( $identifier eq '$$' ) { - - # perl does not allow references to punctuation - # variables without braces. For example, this - # won't work: - # $:=\4; - # $a = $$:; - # You would have to use - # $a = ${$:}; - - $i = $i_save; - if ( $tok eq '{' ) { $type = 't' } - else { $type = 'i' } - } - elsif ( $identifier eq '->' ) { - $i = $i_save; - } - else { - $i = $i_save; - if ( length($identifier) == 1 ) { $identifier = ''; } - } - $id_scan_state = ''; - last; - } - } - elsif ( $id_scan_state eq '&' ) { # starting sub call? - - if ( $tok =~ /^[\$A-Za-z_]/ ) { # alphanumeric .. - $id_scan_state = ':'; # now need :: - $saw_alpha = 1; - $identifier .= $tok; - } - elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric .. - $id_scan_state = ':'; # now need :: - $saw_alpha = 1; - $identifier .= $tok; - } - elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above - $id_scan_state = ':'; # now need :: - $saw_alpha = 1; - $identifier .= $tok; - } - elsif ( $tok =~ /^\s*$/ ) { # allow space - } - elsif ( $tok eq '::' ) { # leading :: - $id_scan_state = 'A'; # accept alpha next - $identifier .= $tok; - } - elsif ( $tok eq '{' ) { - if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; } - $i = $i_save; - $id_scan_state = ''; - last; - } - else { - - # punctuation variable? - # testfile: cunningham4.pl - # - # We have to be careful here. If we are in an unknown state, - # we will reject the punctuation variable. In the following - # example the '&' is a binary operator but we are in an unknown - # state because there is no sigil on 'Prima', so we don't - # know what it is. But it is a bad guess that - # '&~' is a function variable. - # $self->{text}->{colorMap}->[ - # Prima::PodView::COLOR_CODE_FOREGROUND - # & ~tb::COLOR_INDEX ] = - # $sec->{ColorCode} - if ( $identifier eq '&' && $expecting ) { - $identifier .= $tok; - } - else { - $identifier = ''; - $i = $i_save; - $type = '&'; - } - $id_scan_state = ''; - last; - } - } - elsif ( $id_scan_state eq 'A' ) { # looking for alpha (after ::) - - if ( $tok =~ /^[A-Za-z_]/ ) { # found it - $identifier .= $tok; - $id_scan_state = ':'; # now need :: - $saw_alpha = 1; - } - elsif ( $tok eq "'" && $allow_tick ) { - $identifier .= $tok; - $id_scan_state = ':'; # now need :: - $saw_alpha = 1; - } - elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above - $identifier .= $tok; - $id_scan_state = ':'; # now need :: - $saw_alpha = 1; - } - elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) { - $id_scan_state = '('; - $identifier .= $tok; - } - elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) { - $id_scan_state = ')'; - $identifier .= $tok; - } - else { - $id_scan_state = ''; - $i = $i_save; - last; - } - } - elsif ( $id_scan_state eq ':' ) { # looking for :: after alpha - - if ( $tok eq '::' ) { # got it - $identifier .= $tok; - $id_scan_state = 'A'; # now require alpha - } - elsif ( $tok =~ /^[A-Za-z_]/ ) { # more alphanumeric is ok here - $identifier .= $tok; - $id_scan_state = ':'; # now need :: - $saw_alpha = 1; - } - elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above - $identifier .= $tok; - $id_scan_state = ':'; # now need :: - $saw_alpha = 1; - } - elsif ( $tok eq "'" && $allow_tick ) { # tick - - if ( $is_keyword{$identifier} ) { - $id_scan_state = ''; # that's all - $i = $i_save; - } - else { - $identifier .= $tok; - } - } - elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) { - $id_scan_state = '('; - $identifier .= $tok; - } - elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) { - $id_scan_state = ')'; - $identifier .= $tok; - } - else { - $id_scan_state = ''; # that's all - $i = $i_save; - last; - } - } - elsif ( $id_scan_state eq '(' ) { # looking for ( of prototype - - if ( $tok eq '(' ) { # got it - $identifier .= $tok; - $id_scan_state = ')'; # now find the end of it - } - elsif ( $tok =~ /^\s*$/ ) { # blank - keep going - $identifier .= $tok; - } - else { - $id_scan_state = ''; # that's all - no prototype - $i = $i_save; - last; - } - } - elsif ( $id_scan_state eq ')' ) { # looking for ) to end - - if ( $tok eq ')' ) { # got it - $identifier .= $tok; - $id_scan_state = ''; # all done - last; - } - elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) { - $identifier .= $tok; - } - else { # probable error in script, but keep going - warning("Unexpected '$tok' while seeking end of prototype\n"); - $identifier .= $tok; - } - } - else { # can get here due to error in initialization - $id_scan_state = ''; - $i = $i_save; - last; - } - } - - if ( $id_scan_state eq ')' ) { - warning("Hit end of line while seeking ) to end prototype\n"); - } - - # once we enter the actual identifier, it may not extend beyond - # the end of the current line - if ( $id_scan_state =~ /^[A\:\(\)]/ ) { - $id_scan_state = ''; - } - if ( $i < 0 ) { $i = 0 } - - unless ($type) { - - if ($saw_type) { - - if ($saw_alpha) { - if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) { - $type = 'w'; - } - else { $type = 'i' } - } - elsif ( $identifier eq '->' ) { - $type = '->'; - } - elsif ( - ( length($identifier) > 1 ) - - # In something like '@$=' we have an identifier '@$' - # In something like '$${' we have type '$$' (and only - # part of an identifier) - && !( $identifier =~ /\$$/ && $tok eq '{' ) - && ( $identifier !~ /^(sub |package )$/ ) - ) - { - $type = 'i'; - } - else { $type = 't' } - } - elsif ($saw_alpha) { - - # type 'w' includes anything without leading type info - # ($,%,@,*) including something like abc::def::ghi - $type = 'w'; - } - else { - $type = ''; - } # this can happen on a restart - } - - if ($identifier) { - $tok = $identifier; - if ($message) { write_logfile_entry($message) } - } - else { - $tok = $tok_begin; - $i = $i_begin; - } - - TOKENIZER_DEBUG_FLAG_SCAN_ID && do { - my ( $a, $b, $c ) = caller; - print STDOUT -"SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n"; - print STDOUT -"SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n"; - }; - return ( $i, $tok, $type, $id_scan_state, $identifier ); -} - -{ - - # saved package and subnames in case prototype is on separate line - my ( $package_saved, $subname_saved ); - - sub do_scan_sub { - - # do_scan_sub parses a sub name and prototype - # it is called with $i_beg equal to the index of the first nonblank - # token following a 'sub' token. - - # TODO: add future error checks to be sure we have a valid - # sub name. For example, 'sub &doit' is wrong. Also, be sure - # a name is given if and only if a non-anonymous sub is - # appropriate. - # USES GLOBAL VARS: $current_package, $last_nonblank_token, - # $in_attribute_list, %saw_function_definition, - # $statement_type - - my ( - $input_line, $i, $i_beg, - $tok, $type, $rtokens, - $rtoken_map, $id_scan_state, $max_token_index - ) = @_; - $id_scan_state = ""; # normally we get everything in one call - my $subname = undef; - my $package = undef; - my $proto = undef; - my $attrs = undef; - my $match; - - my $pos_beg = $rtoken_map->[$i_beg]; - pos($input_line) = $pos_beg; - - # Look for the sub NAME - if ( - $input_line =~ m/\G\s* - ((?:\w*(?:'|::))*) # package - something that ends in :: or ' - (\w+) # NAME - required - /gcx - ) - { - $match = 1; - $subname = $2; - - $package = ( defined($1) && $1 ) ? $1 : $current_package; - $package =~ s/\'/::/g; - if ( $package =~ /^\:/ ) { $package = 'main' . $package } - $package =~ s/::$//; - my $pos = pos($input_line); - my $numc = $pos - $pos_beg; - $tok = 'sub ' . substr( $input_line, $pos_beg, $numc ); - $type = 'i'; - } - - # Now look for PROTO ATTRS - # Look for prototype/attributes which are usually on the same - # line as the sub name but which might be on a separate line. - # For example, we might have an anonymous sub with attributes, - # or a prototype on a separate line from its sub name - - # NOTE: We only want to parse PROTOTYPES here. If we see anything that - # does not look like a prototype, we assume it is a SIGNATURE and we - # will stop and let the the standard tokenizer handle it. In - # particular, we stop if we see any nested parens, braces, or commas. - my $saw_opening_paren = $input_line =~ /\G\s*\(/; - if ( - $input_line =~ m/\G(\s*\([^\)\(\}\{\,]*\))? # PROTO - (\s*:)? # ATTRS leading ':' - /gcx - && ( $1 || $2 ) - ) - { - $proto = $1; - $attrs = $2; - - # If we also found the sub name on this call then append PROTO. - # This is not necessary but for compatability with previous - # versions when the -csc flag is used: - if ( $match && $proto ) { - $tok .= $proto; - } - $match ||= 1; - - # Handle prototype on separate line from subname - if ($subname_saved) { - $package = $package_saved; - $subname = $subname_saved; - $tok = $last_nonblank_token; - } - $type = 'i'; - } - - if ($match) { - - # ATTRS: if there are attributes, back up and let the ':' be - # found later by the scanner. - my $pos = pos($input_line); - if ($attrs) { - $pos -= length($attrs); - } - - my $next_nonblank_token = $tok; - - # catch case of line with leading ATTR ':' after anonymous sub - if ( $pos == $pos_beg && $tok eq ':' ) { - $type = 'A'; - $in_attribute_list = 1; - } - - # Otherwise, if we found a match we must convert back from - # string position to the pre_token index for continued parsing. - else { - - # I don't think an error flag can occur here ..but ? - my $error; - ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map, - $max_token_index ); - if ($error) { warning("Possibly invalid sub\n") } - - # check for multiple definitions of a sub - ( $next_nonblank_token, my $i_next ) = - find_next_nonblank_token_on_this_line( $i, $rtokens, - $max_token_index ); - } - - if ( $next_nonblank_token =~ /^(\s*|#)$/ ) - { # skip blank or side comment - my ( $rpre_tokens, $rpre_types ) = - peek_ahead_for_n_nonblank_pre_tokens(1); - if ( defined($rpre_tokens) && @{$rpre_tokens} ) { - $next_nonblank_token = $rpre_tokens->[0]; - } - else { - $next_nonblank_token = '}'; - } - } - $package_saved = ""; - $subname_saved = ""; - - # See what's next... - if ( $next_nonblank_token eq '{' ) { - if ($subname) { - - # Check for multiple definitions of a sub, but - # it is ok to have multiple sub BEGIN, etc, - # so we do not complain if name is all caps - if ( $saw_function_definition{$package}{$subname} - && $subname !~ /^[A-Z]+$/ ) - { - my $lno = $saw_function_definition{$package}{$subname}; - warning( -"already saw definition of 'sub $subname' in package '$package' at line $lno\n" - ); - } - $saw_function_definition{$package}{$subname} = - $tokenizer_self->{_last_line_number}; - } - } - elsif ( $next_nonblank_token eq ';' ) { - } - elsif ( $next_nonblank_token eq '}' ) { - } - - # ATTRS - if an attribute list follows, remember the name - # of the sub so the next opening brace can be labeled. - # Setting 'statement_type' causes any ':'s to introduce - # attributes. - elsif ( $next_nonblank_token eq ':' ) { - $statement_type = $tok; - } - - # if we stopped before an open paren ... - elsif ( $next_nonblank_token eq '(' ) { - - # If we DID NOT see this paren above then it must be on the - # next line so we will set a flag to come back here and see if - # it is a PROTOTYPE - - # Otherwise, we assume it is a SIGNATURE rather than a - # PROTOTYPE and let the normal tokenizer handle it as a list - if ( !$saw_opening_paren ) { - $id_scan_state = 'sub'; # we must come back to get proto - $package_saved = $package; - $subname_saved = $subname; - } - $statement_type = $tok; - } - elsif ($next_nonblank_token) { # EOF technically ok - warning( -"expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n" - ); - } - check_prototype( $proto, $package, $subname ); - } - - # no match but line not blank - else { - } - return ( $i, $tok, $type, $id_scan_state ); - } -} - -#########i############################################################### -# Tokenizer utility routines which may use CONSTANTS but no other GLOBALS -######################################################################### - -sub find_next_nonblank_token { - my ( $i, $rtokens, $max_token_index ) = @_; - - if ( $i >= $max_token_index ) { - if ( !peeked_ahead() ) { - peeked_ahead(1); - $rtokens = - peek_ahead_for_nonblank_token( $rtokens, $max_token_index ); - } - } - my $next_nonblank_token = $rtokens->[ ++$i ]; - - if ( $next_nonblank_token =~ /^\s*$/ ) { - $next_nonblank_token = $rtokens->[ ++$i ]; - } - return ( $next_nonblank_token, $i ); -} - -sub numerator_expected { - - # this is a filter for a possible numerator, in support of guessing - # for the / pattern delimiter token. - # returns - - # 1 - yes - # 0 - can't tell - # -1 - no - # Note: I am using the convention that variables ending in - # _expected have these 3 possible values. - my ( $i, $rtokens, $max_token_index ) = @_; - my $numerator_expected = 0; - - my $next_token = $rtokens->[ $i + 1 ]; - if ( $next_token eq '=' ) { $i++; } # handle /= - my ( $next_nonblank_token, $i_next ) = - find_next_nonblank_token( $i, $rtokens, $max_token_index ); - - if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) { - $numerator_expected = 1; - } - else { - - if ( $next_nonblank_token =~ /^\s*$/ ) { - $numerator_expected = 0; - } - else { - $numerator_expected = -1; - } - } - return $numerator_expected; -} - -sub pattern_expected { - - # This is the start of a filter for a possible pattern. - # It looks at the token after a possible pattern and tries to - # determine if that token could end a pattern. - # returns - - # 1 - yes - # 0 - can't tell - # -1 - no - my ( $i, $rtokens, $max_token_index ) = @_; - my $is_pattern = 0; - - my $next_token = $rtokens->[ $i + 1 ]; - if ( $next_token =~ /^[msixpodualgc]/ ) { $i++; } # skip possible modifier - my ( $next_nonblank_token, $i_next ) = - find_next_nonblank_token( $i, $rtokens, $max_token_index ); - - # list of tokens which may follow a pattern - # (can probably be expanded) - if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ ) - { - $is_pattern = 1; - } - else { - - if ( $next_nonblank_token =~ /^\s*$/ ) { - $is_pattern = 0; - } - else { - $is_pattern = -1; - } - } - return $is_pattern; -} - -sub find_next_nonblank_token_on_this_line { - my ( $i, $rtokens, $max_token_index ) = @_; - my $next_nonblank_token; - - if ( $i < $max_token_index ) { - $next_nonblank_token = $rtokens->[ ++$i ]; - - if ( $next_nonblank_token =~ /^\s*$/ ) { - - if ( $i < $max_token_index ) { - $next_nonblank_token = $rtokens->[ ++$i ]; - } - } - } - else { - $next_nonblank_token = ""; - } - return ( $next_nonblank_token, $i ); -} - -sub find_angle_operator_termination { - - # We are looking at a '<' and want to know if it is an angle operator. - # We are to return: - # $i = pretoken index of ending '>' if found, current $i otherwise - # $type = 'Q' if found, '>' otherwise - my ( $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) = @_; - my $i = $i_beg; - my $type = '<'; - pos($input_line) = 1 + $rtoken_map->[$i]; - - my $filter; - - # we just have to find the next '>' if a term is expected - if ( $expecting == TERM ) { $filter = '[\>]' } - - # we have to guess if we don't know what is expected - elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' } - - # shouldn't happen - we shouldn't be here if operator is expected - else { warning("Program Bug in find_angle_operator_termination\n") } - - # To illustrate what we might be looking at, in case we are - # guessing, here are some examples of valid angle operators - # (or file globs): - # - # - # <$fh> - # <*.c *.h> - # <_> - # ( glob.t) - # <${PREFIX}*img*.$IMAGE_TYPE> - # - # - # <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl> - # - # Here are some examples of lines which do not have angle operators: - # return undef unless $self->[2]++ < $#{$self->[1]}; - # < 2 || @$t > - # - # the following line from dlister.pl caused trouble: - # print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n"; - # - # If the '<' starts an angle operator, it must end on this line and - # it must not have certain characters like ';' and '=' in it. I use - # this to limit the testing. This filter should be improved if - # possible. - - if ( $input_line =~ /($filter)/g ) { - - if ( $1 eq '>' ) { - - # We MAY have found an angle operator termination if we get - # here, but we need to do more to be sure we haven't been - # fooled. - my $pos = pos($input_line); - - my $pos_beg = $rtoken_map->[$i]; - my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) ); - - # Reject if the closing '>' follows a '-' as in: - # if ( VERSION < 5.009 && $op-> name eq 'assign' ) { } - if ( $expecting eq UNKNOWN ) { - my $check = substr( $input_line, $pos - 2, 1 ); - if ( $check eq '-' ) { - return ( $i, $type ); - } - } - - ######################################debug##### - #write_diagnostics( "ANGLE? :$str\n"); - #print "ANGLE: found $1 at pos=$pos str=$str check=$check\n"; - ######################################debug##### - $type = 'Q'; - my $error; - ( $i, $error ) = - inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); - - # It may be possible that a quote ends midway in a pretoken. - # If this happens, it may be necessary to split the pretoken. - if ($error) { - warning( - "Possible tokinization error..please check this line\n"); - report_possible_bug(); - } - - # Now let's see where we stand.... - # OK if math op not possible - if ( $expecting == TERM ) { - } - - # OK if there are no more than 2 pre-tokens inside - # (not possible to write 2 token math between < and >) - # This catches most common cases - elsif ( $i <= $i_beg + 3 ) { - write_diagnostics("ANGLE(1 or 2 tokens): $str\n"); - } - - # Not sure.. - else { - - # Let's try a Brace Test: any braces inside must balance - my $br = 0; - while ( $str =~ /\{/g ) { $br++ } - while ( $str =~ /\}/g ) { $br-- } - my $sb = 0; - while ( $str =~ /\[/g ) { $sb++ } - while ( $str =~ /\]/g ) { $sb-- } - my $pr = 0; - while ( $str =~ /\(/g ) { $pr++ } - while ( $str =~ /\)/g ) { $pr-- } - - # if braces do not balance - not angle operator - if ( $br || $sb || $pr ) { - $i = $i_beg; - $type = '<'; - write_diagnostics( - "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n"); - } - - # we should keep doing more checks here...to be continued - # Tentatively accepting this as a valid angle operator. - # There are lots more things that can be checked. - else { - write_diagnostics( - "ANGLE-Guessing yes: $str expecting=$expecting\n"); - write_logfile_entry("Guessing angle operator here: $str\n"); - } - } - } - - # didn't find ending > - else { - if ( $expecting == TERM ) { - warning("No ending > for angle operator\n"); - } - } - } - return ( $i, $type ); -} - -sub scan_number_do { - - # scan a number in any of the formats that Perl accepts - # Underbars (_) are allowed in decimal numbers. - # input parameters - - # $input_line - the string to scan - # $i - pre_token index to start scanning - # $rtoken_map - reference to the pre_token map giving starting - # character position in $input_line of token $i - # output parameters - - # $i - last pre_token index of the number just scanned - # number - the number (characters); or undef if not a number - - my ( $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = @_; - my $pos_beg = $rtoken_map->[$i]; - my $pos; - my $i_begin = $i; - my $number = undef; - my $type = $input_type; - - my $first_char = substr( $input_line, $pos_beg, 1 ); - - # Look for bad starting characters; Shouldn't happen.. - if ( $first_char !~ /[\d\.\+\-Ee]/ ) { - warning("Program bug - scan_number given character $first_char\n"); - report_definite_bug(); - return ( $i, $type, $number ); - } - - # handle v-string without leading 'v' character ('Two Dot' rule) - # (vstring.t) - # TODO: v-strings may contain underscores - pos($input_line) = $pos_beg; - if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) { - $pos = pos($input_line); - my $numc = $pos - $pos_beg; - $number = substr( $input_line, $pos_beg, $numc ); - $type = 'v'; - report_v_string($number); - } - - # handle octal, hex, binary - if ( !defined($number) ) { - pos($input_line) = $pos_beg; - if ( $input_line =~ - /\G[+-]?0(([xX][0-9a-fA-F_]+)|([0-7_]+)|([bB][01_]+))/g ) - { - $pos = pos($input_line); - my $numc = $pos - $pos_beg; - $number = substr( $input_line, $pos_beg, $numc ); - $type = 'n'; - } - } - - # handle decimal - if ( !defined($number) ) { - pos($input_line) = $pos_beg; - - if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) { - $pos = pos($input_line); - - # watch out for things like 0..40 which would give 0. by this; - if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' ) - && ( substr( $input_line, $pos, 1 ) eq '.' ) ) - { - $pos--; - } - my $numc = $pos - $pos_beg; - $number = substr( $input_line, $pos_beg, $numc ); - $type = 'n'; - } - } - - # filter out non-numbers like e + - . e2 .e3 +e6 - # the rule: at least one digit, and any 'e' must be preceded by a digit - if ( - $number !~ /\d/ # no digits - || ( $number =~ /^(.*)[eE]/ - && $1 !~ /\d/ ) # or no digits before the 'e' - ) - { - $number = undef; - $type = $input_type; - return ( $i, $type, $number ); - } - - # Found a number; now we must convert back from character position - # to pre_token index. An error here implies user syntax error. - # An example would be an invalid octal number like '009'. - my $error; - ( $i, $error ) = - inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); - if ($error) { warning("Possibly invalid number\n") } - - return ( $i, $type, $number ); -} - -sub inverse_pretoken_map { - - # Starting with the current pre_token index $i, scan forward until - # finding the index of the next pre_token whose position is $pos. - my ( $i, $pos, $rtoken_map, $max_token_index ) = @_; - my $error = 0; - - while ( ++$i <= $max_token_index ) { - - if ( $pos <= $rtoken_map->[$i] ) { - - # Let the calling routine handle errors in which we do not - # land on a pre-token boundary. It can happen by running - # perltidy on some non-perl scripts, for example. - if ( $pos < $rtoken_map->[$i] ) { $error = 1 } - $i--; - last; - } - } - return ( $i, $error ); -} - -sub find_here_doc { - - # find the target of a here document, if any - # input parameters: - # $i - token index of the second < of << - # ($i must be less than the last token index if this is called) - # output parameters: - # $found_target = 0 didn't find target; =1 found target - # HERE_TARGET - the target string (may be empty string) - # $i - unchanged if not here doc, - # or index of the last token of the here target - # $saw_error - flag noting unbalanced quote on here target - my ( $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_; - my $ibeg = $i; - my $found_target = 0; - my $here_doc_target = ''; - my $here_quote_character = ''; - my $saw_error = 0; - my ( $next_nonblank_token, $i_next_nonblank, $next_token ); - $next_token = $rtokens->[ $i + 1 ]; - - # perl allows a backslash before the target string (heredoc.t) - my $backslash = 0; - if ( $next_token eq '\\' ) { - $backslash = 1; - $next_token = $rtokens->[ $i + 2 ]; - } - - ( $next_nonblank_token, $i_next_nonblank ) = - find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index ); - - if ( $next_nonblank_token =~ /[\'\"\`]/ ) { - - my $in_quote = 1; - my $quote_depth = 0; - my $quote_pos = 0; - my $quoted_string; - - ( - $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth, - $quoted_string - ) - = follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens, - $here_quote_character, $quote_pos, $quote_depth, $max_token_index ); - - if ($in_quote) { # didn't find end of quote, so no target found - $i = $ibeg; - if ( $expecting == TERM ) { - warning( -"Did not find here-doc string terminator ($here_quote_character) before end of line \n" - ); - $saw_error = 1; - } - } - else { # found ending quote - ##my $j; - $found_target = 1; - - my $tokj; - foreach my $j ( $i_next_nonblank + 1 .. $i - 1 ) { - $tokj = $rtokens->[$j]; - - # we have to remove any backslash before the quote character - # so that the here-doc-target exactly matches this string - next - if ( $tokj eq "\\" - && $j < $i - 1 - && $rtokens->[ $j + 1 ] eq $here_quote_character ); - $here_doc_target .= $tokj; - } - } - } - - elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) { - $found_target = 1; - write_logfile_entry( - "found blank here-target after <<; suggest using \"\"\n"); - $i = $ibeg; - } - elsif ( $next_token =~ /^\w/ ) { # simple bareword or integer after << - - my $here_doc_expected; - if ( $expecting == UNKNOWN ) { - $here_doc_expected = guess_if_here_doc($next_token); - } - else { - $here_doc_expected = 1; - } - - if ($here_doc_expected) { - $found_target = 1; - $here_doc_target = $next_token; - $i = $ibeg + 1; - } - - } - else { - - if ( $expecting == TERM ) { - $found_target = 1; - write_logfile_entry("Note: bare here-doc operator <<\n"); - } - else { - $i = $ibeg; - } - } - - # patch to neglect any prepended backslash - if ( $found_target && $backslash ) { $i++ } - - return ( $found_target, $here_doc_target, $here_quote_character, $i, - $saw_error ); -} - -sub do_quote { - - # follow (or continue following) quoted string(s) - # $in_quote return code: - # 0 - ok, found end - # 1 - still must find end of quote whose target is $quote_character - # 2 - still looking for end of first of two quotes - # - # Returns updated strings: - # $quoted_string_1 = quoted string seen while in_quote=1 - # $quoted_string_2 = quoted string seen while in_quote=2 - my ( - $i, $in_quote, $quote_character, - $quote_pos, $quote_depth, $quoted_string_1, - $quoted_string_2, $rtokens, $rtoken_map, - $max_token_index - ) = @_; - - my $in_quote_starting = $in_quote; - - my $quoted_string; - if ( $in_quote == 2 ) { # two quotes/quoted_string_1s to follow - my $ibeg = $i; - ( - $i, $in_quote, $quote_character, $quote_pos, $quote_depth, - $quoted_string - ) - = follow_quoted_string( $i, $in_quote, $rtokens, $quote_character, - $quote_pos, $quote_depth, $max_token_index ); - $quoted_string_2 .= $quoted_string; - if ( $in_quote == 1 ) { - if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; } - $quote_character = ''; - } - else { - $quoted_string_2 .= "\n"; - } - } - - if ( $in_quote == 1 ) { # one (more) quote to follow - my $ibeg = $i; - ( - $i, $in_quote, $quote_character, $quote_pos, $quote_depth, - $quoted_string - ) - = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character, - $quote_pos, $quote_depth, $max_token_index ); - $quoted_string_1 .= $quoted_string; - if ( $in_quote == 1 ) { - $quoted_string_1 .= "\n"; - } - } - return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth, - $quoted_string_1, $quoted_string_2 ); -} - -sub follow_quoted_string { - - # scan for a specific token, skipping escaped characters - # if the quote character is blank, use the first non-blank character - # input parameters: - # $rtokens = reference to the array of tokens - # $i = the token index of the first character to search - # $in_quote = number of quoted strings being followed - # $beginning_tok = the starting quote character - # $quote_pos = index to check next for alphanumeric delimiter - # output parameters: - # $i = the token index of the ending quote character - # $in_quote = decremented if found end, unchanged if not - # $beginning_tok = the starting quote character - # $quote_pos = index to check next for alphanumeric delimiter - # $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested. - # $quoted_string = the text of the quote (without quotation tokens) - my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth, - $max_token_index ) - = @_; - my ( $tok, $end_tok ); - my $i = $i_beg - 1; - my $quoted_string = ""; - - TOKENIZER_DEBUG_FLAG_QUOTE && do { - print STDOUT -"QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n"; - }; - - # get the corresponding end token - if ( $beginning_tok !~ /^\s*$/ ) { - $end_tok = matching_end_token($beginning_tok); - } - - # a blank token means we must find and use the first non-blank one - else { - my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a - - while ( $i < $max_token_index ) { - $tok = $rtokens->[ ++$i ]; - - if ( $tok !~ /^\s*$/ ) { - - if ( ( $tok eq '#' ) && ($allow_quote_comments) ) { - $i = $max_token_index; - } - else { - - if ( length($tok) > 1 ) { - if ( $quote_pos <= 0 ) { $quote_pos = 1 } - $beginning_tok = substr( $tok, $quote_pos - 1, 1 ); - } - else { - $beginning_tok = $tok; - $quote_pos = 0; - } - $end_tok = matching_end_token($beginning_tok); - $quote_depth = 1; - last; - } - } - else { - $allow_quote_comments = 1; - } - } - } - - # There are two different loops which search for the ending quote - # character. In the rare case of an alphanumeric quote delimiter, we - # have to look through alphanumeric tokens character-by-character, since - # the pre-tokenization process combines multiple alphanumeric - # characters, whereas for a non-alphanumeric delimiter, only tokens of - # length 1 can match. - - ################################################################### - # Case 1 (rare): loop for case of alphanumeric quote delimiter.. - # "quote_pos" is the position the current word to begin searching - ################################################################### - if ( $beginning_tok =~ /\w/ ) { - - # Note this because it is not recommended practice except - # for obfuscated perl contests - if ( $in_quote == 1 ) { - write_logfile_entry( - "Note: alphanumeric quote delimiter ($beginning_tok) \n"); - } - - while ( $i < $max_token_index ) { - - if ( $quote_pos == 0 || ( $i < 0 ) ) { - $tok = $rtokens->[ ++$i ]; - - if ( $tok eq '\\' ) { - - # retain backslash unless it hides the end token - $quoted_string .= $tok - unless $rtokens->[ $i + 1 ] eq $end_tok; - $quote_pos++; - last if ( $i >= $max_token_index ); - $tok = $rtokens->[ ++$i ]; - } - } - my $old_pos = $quote_pos; - - unless ( defined($tok) && defined($end_tok) && defined($quote_pos) ) - { - - } - $quote_pos = 1 + index( $tok, $end_tok, $quote_pos ); - - if ( $quote_pos > 0 ) { - - $quoted_string .= - substr( $tok, $old_pos, $quote_pos - $old_pos - 1 ); - - $quote_depth--; - - if ( $quote_depth == 0 ) { - $in_quote--; - last; - } - } - else { - $quoted_string .= substr( $tok, $old_pos ); - } - } - } - - ######################################################################## - # Case 2 (normal): loop for case of a non-alphanumeric quote delimiter.. - ######################################################################## - else { - - while ( $i < $max_token_index ) { - $tok = $rtokens->[ ++$i ]; - - if ( $tok eq $end_tok ) { - $quote_depth--; - - if ( $quote_depth == 0 ) { - $in_quote--; - last; - } - } - elsif ( $tok eq $beginning_tok ) { - $quote_depth++; - } - elsif ( $tok eq '\\' ) { - - # retain backslash unless it hides the beginning or end token - $tok = $rtokens->[ ++$i ]; - $quoted_string .= '\\' - unless ( $tok eq $end_tok || $tok eq $beginning_tok ); - } - $quoted_string .= $tok; - } - } - if ( $i > $max_token_index ) { $i = $max_token_index } - return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth, - $quoted_string ); -} - -sub indicate_error { - my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_; - interrupt_logfile(); - warning($msg); - write_error_indicator_pair( $line_number, $input_line, $pos, $carrat ); - resume_logfile(); - return; -} - -sub write_error_indicator_pair { - my ( $line_number, $input_line, $pos, $carrat ) = @_; - my ( $offset, $numbered_line, $underline ) = - make_numbered_line( $line_number, $input_line, $pos ); - $underline = write_on_underline( $underline, $pos - $offset, $carrat ); - warning( $numbered_line . "\n" ); - $underline =~ s/\s*$//; - warning( $underline . "\n" ); - return; -} - -sub make_numbered_line { - - # Given an input line, its line number, and a character position of - # interest, create a string not longer than 80 characters of the form - # $lineno: sub_string - # such that the sub_string of $str contains the position of interest - # - # Here is an example of what we want, in this case we add trailing - # '...' because the line is long. - # - # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ... - # - # Here is another example, this time in which we used leading '...' - # because of excessive length: - # - # 2: ... er of the World Wide Web Consortium's - # - # input parameters are: - # $lineno = line number - # $str = the text of the line - # $pos = position of interest (the error) : 0 = first character - # - # We return : - # - $offset = an offset which corrects the position in case we only - # display part of a line, such that $pos-$offset is the effective - # position from the start of the displayed line. - # - $numbered_line = the numbered line as above, - # - $underline = a blank 'underline' which is all spaces with the same - # number of characters as the numbered line. - - my ( $lineno, $str, $pos ) = @_; - my $offset = ( $pos < 60 ) ? 0 : $pos - 40; - my $excess = length($str) - $offset - 68; - my $numc = ( $excess > 0 ) ? 68 : undef; - - if ( defined($numc) ) { - if ( $offset == 0 ) { - $str = substr( $str, $offset, $numc - 4 ) . " ..."; - } - else { - $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ..."; - } - } - else { - - if ( $offset == 0 ) { - } - else { - $str = "... " . substr( $str, $offset + 4 ); - } - } - - my $numbered_line = sprintf( "%d: ", $lineno ); - $offset -= length($numbered_line); - $numbered_line .= $str; - my $underline = " " x length($numbered_line); - return ( $offset, $numbered_line, $underline ); -} - -sub write_on_underline { - - # The "underline" is a string that shows where an error is; it starts - # out as a string of blanks with the same length as the numbered line of - # code above it, and we have to add marking to show where an error is. - # In the example below, we want to write the string '--^' just below - # the line of bad code: - # - # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ... - # ---^ - # We are given the current underline string, plus a position and a - # string to write on it. - # - # In the above example, there will be 2 calls to do this: - # First call: $pos=19, pos_chr=^ - # Second call: $pos=16, pos_chr=--- - # - # This is a trivial thing to do with substr, but there is some - # checking to do. - - my ( $underline, $pos, $pos_chr ) = @_; - - # check for error..shouldn't happen - unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) { - return $underline; - } - my $excess = length($pos_chr) + $pos - length($underline); - if ( $excess > 0 ) { - $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess ); - } - substr( $underline, $pos, length($pos_chr) ) = $pos_chr; - return ($underline); -} - -sub pre_tokenize { - - # Break a string, $str, into a sequence of preliminary tokens. We - # are interested in these types of tokens: - # words (type='w'), example: 'max_tokens_wanted' - # digits (type = 'd'), example: '0755' - # whitespace (type = 'b'), example: ' ' - # any other single character (i.e. punct; type = the character itself). - # We cannot do better than this yet because we might be in a quoted - # string or pattern. Caller sets $max_tokens_wanted to 0 to get all - # tokens. - my ( $str, $max_tokens_wanted ) = @_; - - # we return references to these 3 arrays: - my @tokens = (); # array of the tokens themselves - my @token_map = (0); # string position of start of each token - my @type = (); # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct - - do { - - # whitespace - if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; } - - # numbers - # note that this must come before words! - elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; } - - # words - elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; } - - # single-character punctuation - elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; } - - # that's all.. - else { - return ( \@tokens, \@token_map, \@type ); - } - - push @tokens, $1; - push @token_map, pos($str); - - } while ( --$max_tokens_wanted != 0 ); - - return ( \@tokens, \@token_map, \@type ); -} - -sub show_tokens { - - # this is an old debug routine - # not called, but saved for reference - my ( $rtokens, $rtoken_map ) = @_; - my $num = scalar( @{$rtokens} ); - - foreach my $i ( 0 .. $num - 1 ) { - my $len = length( $rtokens->[$i] ); - print STDOUT "$i:$len:$rtoken_map->[$i]:$rtokens->[$i]:\n"; - } - return; -} - -{ - my %matching_end_token; - - BEGIN { - %matching_end_token = ( - '{' => '}', - '(' => ')', - '[' => ']', - '<' => '>', - ); - } - - sub matching_end_token { - - # return closing character for a pattern - my $beginning_token = shift; - if ( $matching_end_token{$beginning_token} ) { - return $matching_end_token{$beginning_token}; - } - return ($beginning_token); - } -} - -sub dump_token_types { - my ( $class, $fh ) = @_; - - # This should be the latest list of token types in use - # adding NEW_TOKENS: add a comment here - print $fh <<'END_OF_LIST'; - -Here is a list of the token types currently used for lines of type 'CODE'. -For the following tokens, the "type" of a token is just the token itself. - -.. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <> -( ) <= >= == =~ !~ != ++ -- /= x= -... **= <<= >>= &&= ||= //= <=> -, + - / * | % ! x ~ = \ ? : . < > ^ & - -The following additional token types are defined: - - type meaning - b blank (white space) - { indent: opening structural curly brace or square bracket or paren - (code block, anonymous hash reference, or anonymous array reference) - } outdent: right structural curly brace or square bracket or paren - [ left non-structural square bracket (enclosing an array index) - ] right non-structural square bracket - ( left non-structural paren (all but a list right of an =) - ) right non-structural paren - L left non-structural curly brace (enclosing a key) - R right non-structural curly brace - ; terminal semicolon - f indicates a semicolon in a "for" statement - h here_doc operator << - # a comment - Q indicates a quote or pattern - q indicates a qw quote block - k a perl keyword - C user-defined constant or constant function (with void prototype = ()) - U user-defined function taking parameters - G user-defined function taking block parameter (like grep/map/eval) - M (unused, but reserved for subroutine definition name) - P (unused, but -html uses it to label pod text) - t type indicater such as %,$,@,*,&,sub - w bare word (perhaps a subroutine call) - i identifier of some type (with leading %, $, @, *, &, sub, -> ) - n a number - v a v-string - F a file test operator (like -e) - Y File handle - Z identifier in indirect object slot: may be file handle, object - J LABEL: code block label - j LABEL after next, last, redo, goto - p unary + - m unary - - pp pre-increment operator ++ - mm pre-decrement operator -- - A : used as attribute separator - - Here are the '_line_type' codes used internally: - SYSTEM - system-specific code before hash-bang line - CODE - line of perl code (including comments) - POD_START - line starting pod, such as '=head' - POD - pod documentation text - POD_END - last line of pod section, '=cut' - HERE - text of here-document - HERE_END - last line of here-doc (target word) - FORMAT - format section - FORMAT_END - last line of format section, '.' - DATA_START - __DATA__ line - DATA - unidentified text following __DATA__ - END_START - __END__ line - END - unidentified text following __END__ - ERROR - we are in big trouble, probably not a perl script -END_OF_LIST - - return; -} - -BEGIN { - - # These names are used in error messages - @opening_brace_names = qw# '{' '[' '(' '?' #; - @closing_brace_names = qw# '}' ']' ')' ':' #; - - my @q; - - my @digraphs = qw( - .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <> - <= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^. - ); - @is_digraph{@digraphs} = (1) x scalar(@digraphs); - - my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~); - @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs); - - my @tetragraphs = qw( <<>> ); - @is_tetragraph{@tetragraphs} = (1) x scalar(@tetragraphs); - - # make a hash of all valid token types for self-checking the tokenizer - # (adding NEW_TOKENS : select a new character and add to this list) - my @valid_token_types = qw# - A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v - { } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ & - #; - push( @valid_token_types, @digraphs ); - push( @valid_token_types, @trigraphs ); - push( @valid_token_types, @tetragraphs ); - push( @valid_token_types, ( '#', ',', 'CORE::' ) ); - @is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types); - - # a list of file test letters, as in -e (Table 3-4 of 'camel 3') - my @file_test_operators = - qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z); - @is_file_test_operator{@file_test_operators} = - (1) x scalar(@file_test_operators); - - # these functions have prototypes of the form (&), so when they are - # followed by a block, that block MAY BE followed by an operator. - # Smartmatch operator ~~ may be followed by anonymous hash or array ref - @q = qw( do eval ); - @is_block_operator{@q} = (1) x scalar(@q); - - # these functions allow an identifier in the indirect object slot - @q = qw( print printf sort exec system say); - @is_indirect_object_taker{@q} = (1) x scalar(@q); - - # These tokens may precede a code block - # patched for SWITCH/CASE/CATCH. Actually these could be removed - # now and we could let the extended-syntax coding handle them - @q = - qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else - unless do while until eval for foreach map grep sort - switch case given when catch try finally); - @is_code_block_token{@q} = (1) x scalar(@q); - - # I'll build the list of keywords incrementally - my @Keywords = (); - - # keywords and tokens after which a value or pattern is expected, - # but not an operator. In other words, these should consume terms - # to their right, or at least they are not expected to be followed - # immediately by operators. - my @value_requestor = qw( - AUTOLOAD - BEGIN - CHECK - DESTROY - END - EQ - GE - GT - INIT - LE - LT - NE - UNITCHECK - abs - accept - alarm - and - atan2 - bind - binmode - bless - break - caller - chdir - chmod - chomp - chop - chown - chr - chroot - close - closedir - cmp - connect - continue - cos - crypt - dbmclose - dbmopen - defined - delete - die - dump - each - else - elsif - eof - eq - exec - exists - exit - exp - fcntl - fileno - flock - for - foreach - formline - ge - getc - getgrgid - getgrnam - gethostbyaddr - gethostbyname - getnetbyaddr - getnetbyname - getpeername - getpgrp - getpriority - getprotobyname - getprotobynumber - getpwnam - getpwuid - getservbyname - getservbyport - getsockname - getsockopt - glob - gmtime - goto - grep - gt - hex - if - index - int - ioctl - join - keys - kill - last - lc - lcfirst - le - length - link - listen - local - localtime - lock - log - lstat - lt - map - mkdir - msgctl - msgget - msgrcv - msgsnd - my - ne - next - no - not - oct - open - opendir - or - ord - our - pack - pipe - pop - pos - print - printf - prototype - push - quotemeta - rand - read - readdir - readlink - readline - readpipe - recv - redo - ref - rename - require - reset - return - reverse - rewinddir - rindex - rmdir - scalar - seek - seekdir - select - semctl - semget - semop - send - sethostent - setnetent - setpgrp - setpriority - setprotoent - setservent - setsockopt - shift - shmctl - shmget - shmread - shmwrite - shutdown - sin - sleep - socket - socketpair - sort - splice - split - sprintf - sqrt - srand - stat - study - substr - symlink - syscall - sysopen - sysread - sysseek - system - syswrite - tell - telldir - tie - tied - truncate - uc - ucfirst - umask - undef - unless - unlink - unpack - unshift - untie - until - use - utime - values - vec - waitpid - warn - while - write - xor - - switch - case - given - when - err - say - - catch - ); - - # patched above for SWITCH/CASE given/when err say - # 'err' is a fairly safe addition. - # TODO: 'default' still needed if appropriate - # 'use feature' seen, but perltidy works ok without it. - # Concerned that 'default' could break code. - push( @Keywords, @value_requestor ); - - # These are treated the same but are not keywords: - my @extra_vr = qw( - constant - vars - ); - push( @value_requestor, @extra_vr ); - - @expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor); - - # this list contains keywords which do not look for arguments, - # so that they might be followed by an operator, or at least - # not a term. - my @operator_requestor = qw( - endgrent - endhostent - endnetent - endprotoent - endpwent - endservent - fork - getgrent - gethostent - getlogin - getnetent - getppid - getprotoent - getpwent - getservent - setgrent - setpwent - time - times - wait - wantarray - ); - - push( @Keywords, @operator_requestor ); - - # These are treated the same but are not considered keywords: - my @extra_or = qw( - STDERR - STDIN - STDOUT - ); - - push( @operator_requestor, @extra_or ); - - @expecting_operator_token{@operator_requestor} = - (1) x scalar(@operator_requestor); - - # these token TYPES expect trailing operator but not a term - # note: ++ and -- are post-increment and decrement, 'C' = constant - my @operator_requestor_types = qw( ++ -- C <> q ); - @expecting_operator_types{@operator_requestor_types} = - (1) x scalar(@operator_requestor_types); - - # these token TYPES consume values (terms) - # note: pp and mm are pre-increment and decrement - # f=semicolon in for, F=file test operator - my @value_requestor_type = qw# - L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x - **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //= - <= >= == != => \ > < % * / ? & | ** <=> ~~ !~~ - f F pp mm Y p m U J G j >> << ^ t - ~. ^. |. &. ^.= |.= &.= - #; - push( @value_requestor_type, ',' ) - ; # (perl doesn't like a ',' in a qw block) - @expecting_term_types{@value_requestor_type} = - (1) x scalar(@value_requestor_type); - - # Note: the following valid token types are not assigned here to - # hashes requesting to be followed by values or terms, but are - # instead currently hard-coded into sub operator_expected: - # ) -> :: Q R Z ] b h i k n v w } # - - # For simple syntax checking, it is nice to have a list of operators which - # will really be unhappy if not followed by a term. This includes most - # of the above... - %really_want_term = %expecting_term_types; - - # with these exceptions... - delete $really_want_term{'U'}; # user sub, depends on prototype - delete $really_want_term{'F'}; # file test works on $_ if no following term - delete $really_want_term{'Y'}; # indirect object, too risky to check syntax; - # let perl do it - - @q = qw(q qq qw qx qr s y tr m); - @is_q_qq_qw_qx_qr_s_y_tr_m{@q} = (1) x scalar(@q); - - # These keywords are handled specially in the tokenizer code: - my @special_keywords = qw( - do - eval - format - m - package - q - qq - qr - qw - qx - s - sub - tr - y - ); - push( @Keywords, @special_keywords ); - - # Keywords after which list formatting may be used - # WARNING: do not include |map|grep|eval or perl may die on - # syntax errors (map1.t). - my @keyword_taking_list = qw( - and - chmod - chomp - chop - chown - dbmopen - die - elsif - exec - fcntl - for - foreach - formline - getsockopt - if - index - ioctl - join - kill - local - msgctl - msgrcv - msgsnd - my - open - or - our - pack - print - printf - push - read - readpipe - recv - return - reverse - rindex - seek - select - semctl - semget - send - setpriority - setsockopt - shmctl - shmget - shmread - shmwrite - socket - socketpair - sort - splice - split - sprintf - substr - syscall - sysopen - sysread - sysseek - system - syswrite - tie - unless - unlink - unpack - unshift - until - vec - warn - while - given - when - ); - @is_keyword_taking_list{@keyword_taking_list} = - (1) x scalar(@keyword_taking_list); - - # These are not used in any way yet - # my @unused_keywords = qw( - # __FILE__ - # __LINE__ - # __PACKAGE__ - # ); - - # The list of keywords was originally extracted from function 'keyword' in - # perl file toke.c version 5.005.03, using this utility, plus a - # little editing: (file getkwd.pl): - # while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } } - # Add 'get' prefix where necessary, then split into the above lists. - # This list should be updated as necessary. - # The list should not contain these special variables: - # ARGV DATA ENV SIG STDERR STDIN STDOUT - # __DATA__ __END__ - - @is_keyword{@Keywords} = (1) x scalar(@Keywords); -} -1; diff --git a/lib/Perl/Tidy.pod b/lib/Perl/Tidy.pod index d874001..23c8b02 100644 --- a/lib/Perl/Tidy.pod +++ b/lib/Perl/Tidy.pod @@ -129,7 +129,7 @@ command line string. If the B parameter is given, it must be the reference to a hash. In this case, the parameters contained in any perltidyrc configuration file will be placed in this hash and perltidy will return immediately. This is -equivalent to running perltidy with --dump-options, except that the perameters +equivalent to running perltidy with --dump-options, except that the parameters are returned in a hash rather than dumped to standard output. Also, by default only the parameters in the perltidyrc file are returned, but this can be changed (see the next parameter). This parameter provides a convenient method @@ -214,7 +214,7 @@ B parameter. The B<-syn> (B<--check-syntax>) flag may be used with all source and destination streams except for standard input and output. However data streams which are not associated with a filename will -be copied to a temporary file before being be passed to Perl. This +be copied to a temporary file before being passed to Perl. This use of temporary files can cause somewhat confusing output from Perl. If the B<-pbp> style is used it will typically be necessary to also @@ -410,7 +410,7 @@ C in Tidy.pm. =head1 VERSION -This man page documents Perl::Tidy version 20180107. +This man page documents Perl::Tidy version 20190601 =head1 LICENSE @@ -421,11 +421,11 @@ Please refer to the file "COPYING" for details. =head1 BUG REPORTS -A list of current bugs and issues can be found at the CPAN site +A list of current bugs and issues can be found at the CPAN site L - https://rt.cpan.org/Public/Dist/Display.html?Name=Perl-Tidy +To report a new bug or problem, use the link on this page. -To report a new bug or problem, use the link on this page . +The source code repository is at L. =head1 SEE ALSO diff --git a/lib/Perl/Tidy/Debugger.pm b/lib/Perl/Tidy/Debugger.pm new file mode 100644 index 0000000..2fe1bbb --- /dev/null +++ b/lib/Perl/Tidy/Debugger.pm @@ -0,0 +1,125 @@ +##################################################################### +# +# The Perl::Tidy::Debugger class shows line tokenization +# +##################################################################### + +package Perl::Tidy::Debugger; +use strict; +use warnings; +our $VERSION = '20190601'; + +sub new { + + my ( $class, $filename ) = @_; + + return bless { + _debug_file => $filename, + _debug_file_opened => 0, + _fh => undef, + }, $class; +} + +sub really_open_debug_file { + + my $self = shift; + my $debug_file = $self->{_debug_file}; + my $fh; + unless ( $fh = IO::File->new("> $debug_file") ) { + Perl::Tidy::Warn("can't open $debug_file: $!\n"); + } + $self->{_debug_file_opened} = 1; + $self->{_fh} = $fh; + print $fh + "Use -dump-token-types (-dtt) to get a list of token type codes\n"; + return; +} + +sub close_debug_file { + + my $self = shift; + my $fh = $self->{_fh}; + if ( $self->{_debug_file_opened} ) { + if ( !eval { $self->{_fh}->close(); 1 } ) { + + # ok, maybe no close function + } + } + return; +} + +sub write_debug_entry { + + # This is a debug dump routine which may be modified as necessary + # to dump tokens on a line-by-line basis. The output will be written + # to the .DEBUG file when the -D flag is entered. + my ( $self, $line_of_tokens ) = @_; + + my $input_line = $line_of_tokens->{_line_text}; + + my $rtoken_type = $line_of_tokens->{_rtoken_type}; + my $rtokens = $line_of_tokens->{_rtokens}; + my $rlevels = $line_of_tokens->{_rlevels}; + my $rslevels = $line_of_tokens->{_rslevels}; + my $rblock_type = $line_of_tokens->{_rblock_type}; + + my $input_line_number = $line_of_tokens->{_line_number}; + my $line_type = $line_of_tokens->{_line_type}; + ##my $rtoken_array = $line_of_tokens->{_token_array}; + + my ( $j, $num ); + + my $token_str = "$input_line_number: "; + my $reconstructed_original = "$input_line_number: "; + my $block_str = "$input_line_number: "; + + #$token_str .= "$line_type: "; + #$reconstructed_original .= "$line_type: "; + + my $pattern = ""; + my @next_char = ( '"', '"' ); + my $i_next = 0; + unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() } + my $fh = $self->{_fh}; + + # FIXME: could convert to use of token_array instead + foreach my $j ( 0 .. @{$rtoken_type} - 1 ) { + + # testing patterns + if ( $rtoken_type->[$j] eq 'k' ) { + $pattern .= $rtokens->[$j]; + } + else { + $pattern .= $rtoken_type->[$j]; + } + $reconstructed_original .= $rtokens->[$j]; + $block_str .= "($rblock_type->[$j])"; + $num = length( $rtokens->[$j] ); + my $type_str = $rtoken_type->[$j]; + + # be sure there are no blank tokens (shouldn't happen) + # This can only happen if a programming error has been made + # because all valid tokens are non-blank + if ( $type_str eq ' ' ) { + print $fh "BLANK TOKEN on the next line\n"; + $type_str = $next_char[$i_next]; + $i_next = 1 - $i_next; + } + + if ( length($type_str) == 1 ) { + $type_str = $type_str x $num; + } + $token_str .= $type_str; + } + + # Write what you want here ... + # print $fh "$input_line\n"; + # print $fh "$pattern\n"; + print $fh "$reconstructed_original\n"; + print $fh "$token_str\n"; + + #print $fh "$block_str\n"; + return; +} +1; + diff --git a/lib/Perl/Tidy/DevNull.pm b/lib/Perl/Tidy/DevNull.pm new file mode 100644 index 0000000..e755e0c --- /dev/null +++ b/lib/Perl/Tidy/DevNull.pm @@ -0,0 +1,16 @@ +##################################################################### +# +# The Perl::Tidy::DevNull class supplies a dummy print method +# +##################################################################### + +package Perl::Tidy::DevNull; +use strict; +use warnings; +our $VERSION = '20190601'; +sub new { my $self = shift; return bless {}, $self } +sub print { return } +sub close { return } + +1; + diff --git a/lib/Perl/Tidy/Diagnostics.pm b/lib/Perl/Tidy/Diagnostics.pm new file mode 100644 index 0000000..f2d6ec9 --- /dev/null +++ b/lib/Perl/Tidy/Diagnostics.pm @@ -0,0 +1,64 @@ +##################################################################### +# +# The Perl::Tidy::Diagnostics class writes the DIAGNOSTICS file, which is +# useful for program development. +# +# Only one such file is created regardless of the number of input +# files processed. This allows the results of processing many files +# to be summarized in a single file. + +# Output messages go to a file named DIAGNOSTICS, where +# they are labeled by file and line. This allows many files to be +# scanned at once for some particular condition of interest. It was +# particularly useful for developing guessing strategies. +# +# NOTE: This feature is deactivated in final releases but can be +# reactivated for debugging by un-commenting the 'I' options flag +# +##################################################################### + +package Perl::Tidy::Diagnostics; +use strict; +use warnings; +our $VERSION = '20190601'; + +sub new { + + my $class = shift; + return bless { + _write_diagnostics_count => 0, + _last_diagnostic_file => "", + _input_file => "", + _fh => undef, + }, $class; +} + +sub set_input_file { + my ( $self, $input_file ) = @_; + $self->{_input_file} = $input_file; + return; +} + +sub write_diagnostics { + my ( $self, $msg ) = @_; + + unless ( $self->{_write_diagnostics_count} ) { + open( $self->{_fh}, ">", "DIAGNOSTICS" ) + or Perl::Tidy::Die("couldn't open DIAGNOSTICS: $!\n"); + } + + my $fh = $self->{_fh}; + my $last_diagnostic_file = $self->{_last_diagnostic_file}; + my $input_file = $self->{_input_file}; + if ( $last_diagnostic_file ne $input_file ) { + $fh->print("\nFILE:$input_file\n"); + } + $self->{_last_diagnostic_file} = $input_file; + my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number(); + $fh->print("$input_line_number:\t$msg"); + $self->{_write_diagnostics_count}++; + return; +} + +1; + diff --git a/lib/Perl/Tidy/FileWriter.pm b/lib/Perl/Tidy/FileWriter.pm new file mode 100644 index 0000000..c7b3192 --- /dev/null +++ b/lib/Perl/Tidy/FileWriter.pm @@ -0,0 +1,228 @@ +##################################################################### +# +# the Perl::Tidy::FileWriter class writes the output file +# +##################################################################### + +package Perl::Tidy::FileWriter; +use strict; +use warnings; +our $VERSION = '20190601'; + +# Maximum number of little messages; probably need not be changed. +my $MAX_NAG_MESSAGES = 6; + +sub write_logfile_entry { + my ( $self, $msg ) = @_; + my $logger_object = $self->{_logger_object}; + if ($logger_object) { + $logger_object->write_logfile_entry($msg); + } + return; +} + +sub new { + my ( $class, $line_sink_object, $rOpts, $logger_object ) = @_; + + return bless { + _line_sink_object => $line_sink_object, + _logger_object => $logger_object, + _rOpts => $rOpts, + _output_line_number => 1, + _consecutive_blank_lines => 0, + _consecutive_nonblank_lines => 0, + _first_line_length_error => 0, + _max_line_length_error => 0, + _last_line_length_error => 0, + _first_line_length_error_at => 0, + _max_line_length_error_at => 0, + _last_line_length_error_at => 0, + _line_length_error_count => 0, + _max_output_line_length => 0, + _max_output_line_length_at => 0, + }, $class; +} + +sub tee_on { + my $self = shift; + $self->{_line_sink_object}->tee_on(); + return; +} + +sub tee_off { + my $self = shift; + $self->{_line_sink_object}->tee_off(); + return; +} + +sub get_output_line_number { + my $self = shift; + return $self->{_output_line_number}; +} + +sub decrement_output_line_number { + my $self = shift; + $self->{_output_line_number}--; + return; +} + +sub get_consecutive_nonblank_lines { + my $self = shift; + return $self->{_consecutive_nonblank_lines}; +} + +sub reset_consecutive_blank_lines { + my $self = shift; + $self->{_consecutive_blank_lines} = 0; + return; +} + +sub want_blank_line { + my $self = shift; + unless ( $self->{_consecutive_blank_lines} ) { + $self->write_blank_code_line(); + } + return; +} + +sub require_blank_code_lines { + + # write out the requested number of blanks regardless of the value of -mbl + # unless -mbl=0. This allows extra blank lines to be written for subs and + # packages even with the default -mbl=1 + my ( $self, $count ) = @_; + my $need = $count - $self->{_consecutive_blank_lines}; + my $rOpts = $self->{_rOpts}; + my $forced = $rOpts->{'maximum-consecutive-blank-lines'} > 0; + foreach my $i ( 0 .. $need - 1 ) { + $self->write_blank_code_line($forced); + } + return; +} + +sub write_blank_code_line { + my $self = shift; + my $forced = shift; + my $rOpts = $self->{_rOpts}; + return + if (!$forced + && $self->{_consecutive_blank_lines} >= + $rOpts->{'maximum-consecutive-blank-lines'} ); + $self->{_consecutive_blank_lines}++; + $self->{_consecutive_nonblank_lines} = 0; + $self->write_line("\n"); + return; +} + +sub write_code_line { + my $self = shift; + my $a = shift; + + if ( $a =~ /^\s*$/ ) { + my $rOpts = $self->{_rOpts}; + return + if ( $self->{_consecutive_blank_lines} >= + $rOpts->{'maximum-consecutive-blank-lines'} ); + $self->{_consecutive_blank_lines}++; + $self->{_consecutive_nonblank_lines} = 0; + } + else { + $self->{_consecutive_blank_lines} = 0; + $self->{_consecutive_nonblank_lines}++; + } + $self->write_line($a); + return; +} + +sub write_line { + my ( $self, $a ) = @_; + + # TODO: go through and see if the test is necessary here + if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; } + + $self->{_line_sink_object}->write_line($a); + + # This calculation of excess line length ignores any internal tabs + my $rOpts = $self->{_rOpts}; + my $exceed = length($a) - $rOpts->{'maximum-line-length'} - 1; + if ( $a =~ /^\t+/g ) { + $exceed += pos($a) * ( $rOpts->{'indent-columns'} - 1 ); + } + + # Note that we just incremented output line number to future value + # so we must subtract 1 for current line number + if ( length($a) > 1 + $self->{_max_output_line_length} ) { + $self->{_max_output_line_length} = length($a) - 1; + $self->{_max_output_line_length_at} = $self->{_output_line_number} - 1; + } + + if ( $exceed > 0 ) { + my $output_line_number = $self->{_output_line_number}; + $self->{_last_line_length_error} = $exceed; + $self->{_last_line_length_error_at} = $output_line_number - 1; + if ( $self->{_line_length_error_count} == 0 ) { + $self->{_first_line_length_error} = $exceed; + $self->{_first_line_length_error_at} = $output_line_number - 1; + } + + if ( + $self->{_last_line_length_error} > $self->{_max_line_length_error} ) + { + $self->{_max_line_length_error} = $exceed; + $self->{_max_line_length_error_at} = $output_line_number - 1; + } + + if ( $self->{_line_length_error_count} < $MAX_NAG_MESSAGES ) { + $self->write_logfile_entry( + "Line length exceeded by $exceed characters\n"); + } + $self->{_line_length_error_count}++; + } + return; +} + +sub report_line_length_errors { + my $self = shift; + my $rOpts = $self->{_rOpts}; + my $line_length_error_count = $self->{_line_length_error_count}; + if ( $line_length_error_count == 0 ) { + $self->write_logfile_entry( + "No lines exceeded $rOpts->{'maximum-line-length'} characters\n"); + my $max_output_line_length = $self->{_max_output_line_length}; + my $max_output_line_length_at = $self->{_max_output_line_length_at}; + $self->write_logfile_entry( +" Maximum output line length was $max_output_line_length at line $max_output_line_length_at\n" + ); + + } + else { + + my $word = ( $line_length_error_count > 1 ) ? "s" : ""; + $self->write_logfile_entry( +"$line_length_error_count output line$word exceeded $rOpts->{'maximum-line-length'} characters:\n" + ); + + $word = ( $line_length_error_count > 1 ) ? "First" : ""; + my $first_line_length_error = $self->{_first_line_length_error}; + my $first_line_length_error_at = $self->{_first_line_length_error_at}; + $self->write_logfile_entry( +" $word at line $first_line_length_error_at by $first_line_length_error characters\n" + ); + + if ( $line_length_error_count > 1 ) { + my $max_line_length_error = $self->{_max_line_length_error}; + my $max_line_length_error_at = $self->{_max_line_length_error_at}; + my $last_line_length_error = $self->{_last_line_length_error}; + my $last_line_length_error_at = $self->{_last_line_length_error_at}; + $self->write_logfile_entry( +" Maximum at line $max_line_length_error_at by $max_line_length_error characters\n" + ); + $self->write_logfile_entry( +" Last at line $last_line_length_error_at by $last_line_length_error characters\n" + ); + } + } + return; +} +1; + diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm new file mode 100644 index 0000000..9766e25 --- /dev/null +++ b/lib/Perl/Tidy/Formatter.pm @@ -0,0 +1,17039 @@ +##################################################################### +# +# The Perl::Tidy::Formatter package adds indentation, whitespace, and +# line breaks to the token stream +# +# WARNING: This is not a real class for speed reasons. Only one +# Formatter may be used. +# +##################################################################### + +package Perl::Tidy::Formatter; +use strict; +use warnings; +use Carp; +our $VERSION = '20190601'; + +# The Tokenizer will be loaded with the Formatter +##use Perl::Tidy::Tokenizer; # for is_keyword() + +sub Die { + my ($msg) = @_; + Perl::Tidy::Die($msg); + croak "unexpected return from Perl::Tidy::Die"; +} + +sub Warn { + my ($msg) = @_; + Perl::Tidy::Warn($msg); + return; +} + +sub Exit { + my ($msg) = @_; + Perl::Tidy::Exit($msg); + croak "unexpected return from Perl::Tidy::Exit"; +} + +BEGIN { + + # Codes for insertion and deletion of blanks + use constant DELETE => 0; + use constant STABLE => 1; + use constant INSERT => 2; + + # Caution: these debug flags produce a lot of output + # They should all be 0 except when debugging small scripts + use constant FORMATTER_DEBUG_FLAG_RECOMBINE => 0; + use constant FORMATTER_DEBUG_FLAG_BOND_TABLES => 0; + use constant FORMATTER_DEBUG_FLAG_BOND => 0; + use constant FORMATTER_DEBUG_FLAG_BREAK => 0; + use constant FORMATTER_DEBUG_FLAG_CI => 0; + use constant FORMATTER_DEBUG_FLAG_FLUSH => 0; + use constant FORMATTER_DEBUG_FLAG_FORCE => 0; + use constant FORMATTER_DEBUG_FLAG_LIST => 0; + use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0; + use constant FORMATTER_DEBUG_FLAG_OUTPUT => 0; + use constant FORMATTER_DEBUG_FLAG_SPARSE => 0; + use constant FORMATTER_DEBUG_FLAG_STORE => 0; + use constant FORMATTER_DEBUG_FLAG_UNDOBP => 0; + use constant FORMATTER_DEBUG_FLAG_WHITE => 0; + + my $debug_warning = sub { + print STDOUT "FORMATTER_DEBUGGING with key $_[0]\n"; + }; + + FORMATTER_DEBUG_FLAG_RECOMBINE && $debug_warning->('RECOMBINE'); + FORMATTER_DEBUG_FLAG_BOND_TABLES && $debug_warning->('BOND_TABLES'); + FORMATTER_DEBUG_FLAG_BOND && $debug_warning->('BOND'); + FORMATTER_DEBUG_FLAG_BREAK && $debug_warning->('BREAK'); + FORMATTER_DEBUG_FLAG_CI && $debug_warning->('CI'); + FORMATTER_DEBUG_FLAG_FLUSH && $debug_warning->('FLUSH'); + FORMATTER_DEBUG_FLAG_FORCE && $debug_warning->('FORCE'); + FORMATTER_DEBUG_FLAG_LIST && $debug_warning->('LIST'); + FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK'); + FORMATTER_DEBUG_FLAG_OUTPUT && $debug_warning->('OUTPUT'); + FORMATTER_DEBUG_FLAG_SPARSE && $debug_warning->('SPARSE'); + FORMATTER_DEBUG_FLAG_STORE && $debug_warning->('STORE'); + FORMATTER_DEBUG_FLAG_UNDOBP && $debug_warning->('UNDOBP'); + FORMATTER_DEBUG_FLAG_WHITE && $debug_warning->('WHITE'); +} + +use vars qw{ + + @gnu_stack + $max_gnu_stack_index + $gnu_position_predictor + $line_start_index_to_go + $last_indentation_written + $last_unadjusted_indentation + $last_leading_token + $last_output_short_opening_token + $peak_batch_size + + $saw_VERSION_in_this_file + $saw_END_or_DATA_ + + @gnu_item_list + $max_gnu_item_index + $gnu_sequence_number + $last_output_indentation + %last_gnu_equals + %gnu_comma_count + %gnu_arrow_count + + @block_type_to_go + @type_sequence_to_go + @container_environment_to_go + @bond_strength_to_go + @forced_breakpoint_to_go + @token_lengths_to_go + @summed_lengths_to_go + @levels_to_go + @leading_spaces_to_go + @reduced_spaces_to_go + @matching_token_to_go + @mate_index_to_go + @ci_levels_to_go + @nesting_depth_to_go + @nobreak_to_go + @old_breakpoint_to_go + @tokens_to_go + @K_to_go + @types_to_go + @inext_to_go + @iprev_to_go + + %saved_opening_indentation + + $max_index_to_go + $comma_count_in_batch + $last_nonblank_index_to_go + $last_nonblank_type_to_go + $last_nonblank_token_to_go + $last_last_nonblank_index_to_go + $last_last_nonblank_type_to_go + $last_last_nonblank_token_to_go + @nonblank_lines_at_depth + $starting_in_quote + $ending_in_quote + @whitespace_level_stack + $whitespace_last_level + + $format_skipping_pattern_begin + $format_skipping_pattern_end + + $forced_breakpoint_count + $forced_breakpoint_undo_count + @forced_breakpoint_undo_stack + %postponed_breakpoint + + $tabbing + $embedded_tab_count + $first_embedded_tab_at + $last_embedded_tab_at + $deleted_semicolon_count + $first_deleted_semicolon_at + $last_deleted_semicolon_at + $added_semicolon_count + $first_added_semicolon_at + $last_added_semicolon_at + $first_tabbing_disagreement + $last_tabbing_disagreement + $in_tabbing_disagreement + $tabbing_disagreement_count + $input_line_tabbing + + $last_line_leading_type + $last_line_leading_level + $last_last_line_leading_level + + %block_leading_text + %block_opening_line_number + $csc_new_statement_ok + $csc_last_label + %csc_block_label + $accumulating_text_for_block + $leading_block_text + $rleading_block_if_elsif_text + $leading_block_text_level + $leading_block_text_length_exceeded + $leading_block_text_line_length + $leading_block_text_line_number + $closing_side_comment_prefix_pattern + $closing_side_comment_list_pattern + + $blank_lines_after_opening_block_pattern + $blank_lines_before_closing_block_pattern + + $last_nonblank_token + $last_nonblank_type + $last_last_nonblank_token + $last_last_nonblank_type + $last_nonblank_block_type + $last_output_level + %is_do_follower + %is_if_brace_follower + %space_after_keyword + $rbrace_follower + $looking_for_else + %is_last_next_redo_return + %is_other_brace_follower + %is_else_brace_follower + %is_anon_sub_brace_follower + %is_anon_sub_1_brace_follower + %is_sort_map_grep + %is_sort_map_grep_eval + %is_sort_map_grep_eval_do + %is_block_without_semicolon + %is_if_unless + %is_and_or + %is_assignment + %is_chain_operator + %is_if_unless_and_or_last_next_redo_return + %ok_to_add_semicolon_for_block_type + + @has_broken_sublist + @dont_align + @want_comma_break + + $is_static_block_comment + $index_start_one_line_block + $semicolons_before_block_self_destruct + $index_max_forced_break + $input_line_number + $diagnostics_object + $vertical_aligner_object + $logger_object + $file_writer_object + $formatter_self + @ci_stack + %want_break_before + %outdent_keyword + $static_block_comment_pattern + $static_side_comment_pattern + %opening_vertical_tightness + %closing_vertical_tightness + %closing_token_indentation + $some_closing_token_indentation + + %opening_token_right + %stack_opening_token + %stack_closing_token + + $block_brace_vertical_tightness_pattern + $keyword_group_list_pattern + $keyword_group_list_comment_pattern + + $rOpts_add_newlines + $rOpts_add_whitespace + $rOpts_block_brace_tightness + $rOpts_block_brace_vertical_tightness + $rOpts_brace_left_and_indent + $rOpts_comma_arrow_breakpoints + $rOpts_break_at_old_keyword_breakpoints + $rOpts_break_at_old_comma_breakpoints + $rOpts_break_at_old_logical_breakpoints + $rOpts_break_at_old_method_breakpoints + $rOpts_break_at_old_ternary_breakpoints + $rOpts_break_at_old_attribute_breakpoints + $rOpts_closing_side_comment_else_flag + $rOpts_closing_side_comment_maximum_text + $rOpts_continuation_indentation + $rOpts_delete_old_whitespace + $rOpts_fuzzy_line_length + $rOpts_indent_columns + $rOpts_line_up_parentheses + $rOpts_maximum_fields_per_table + $rOpts_maximum_line_length + $rOpts_variable_maximum_line_length + $rOpts_short_concatenation_item_length + $rOpts_keep_old_blank_lines + $rOpts_ignore_old_breakpoints + $rOpts_format_skipping + $rOpts_space_function_paren + $rOpts_space_keyword_paren + $rOpts_keep_interior_semicolons + $rOpts_ignore_side_comment_lengths + $rOpts_stack_closing_block_brace + $rOpts_space_backslash_quote + $rOpts_whitespace_cycle + $rOpts_one_line_block_semicolons + + %is_opening_type + %is_closing_type + %is_keyword_returning_list + %tightness + %matching_token + $rOpts + %right_bond_strength + %left_bond_strength + %binary_ws_rules + %want_left_space + %want_right_space + %is_digraph + %is_trigraph + $bli_pattern + $bli_list_string + %is_closing_type + %is_opening_type + %is_closing_token + %is_opening_token + + %weld_len_left_closing + %weld_len_right_closing + %weld_len_left_opening + %weld_len_right_opening + + $rcuddled_block_types + + $SUB_PATTERN + $ASUB_PATTERN + + $NVARS + +}; + +BEGIN { + + # Array index names for token variables + my $i = 0; + use constant { + _BLOCK_TYPE_ => $i++, + _CI_LEVEL_ => $i++, + _CONTAINER_ENVIRONMENT_ => $i++, + _CONTAINER_TYPE_ => $i++, + _CUMULATIVE_LENGTH_ => $i++, + _LINE_INDEX_ => $i++, + _KNEXT_SEQ_ITEM_ => $i++, + _LEVEL_ => $i++, + _LEVEL_TRUE_ => $i++, + _SLEVEL_ => $i++, + _TOKEN_ => $i++, + _TYPE_ => $i++, + _TYPE_SEQUENCE_ => $i++, + }; + $NVARS = 1 + _TYPE_SEQUENCE_; + + # default list of block types for which -bli would apply + $bli_list_string = 'if else elsif unless while for foreach do : sub'; + + my @q; + + @q = qw( + .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <> + <= >= == =~ !~ != ++ -- /= x= + ); + @is_digraph{@q} = (1) x scalar(@q); + + @q = qw( ... **= <<= >>= &&= ||= //= <=> <<~ ); + @is_trigraph{@q} = (1) x scalar(@q); + + @q = qw( + = **= += *= &= <<= &&= + -= /= |= >>= ||= //= + .= %= ^= + x= + ); + @is_assignment{@q} = (1) x scalar(@q); + + @q = qw( + grep + keys + map + reverse + sort + split + ); + @is_keyword_returning_list{@q} = (1) x scalar(@q); + + @q = qw(is if unless and or err last next redo return); + @is_if_unless_and_or_last_next_redo_return{@q} = (1) x scalar(@q); + + @q = qw(last next redo return); + @is_last_next_redo_return{@q} = (1) x scalar(@q); + + @q = qw(sort map grep); + @is_sort_map_grep{@q} = (1) x scalar(@q); + + @q = qw(sort map grep eval); + @is_sort_map_grep_eval{@q} = (1) x scalar(@q); + + @q = qw(sort map grep eval do); + @is_sort_map_grep_eval_do{@q} = (1) x scalar(@q); + + @q = qw(if unless); + @is_if_unless{@q} = (1) x scalar(@q); + + @q = qw(and or err); + @is_and_or{@q} = (1) x scalar(@q); + + # Identify certain operators which often occur in chains. + # Note: the minus (-) causes a side effect of padding of the first line in + # something like this (by sub set_logical_padding): + # Checkbutton => 'Transmission checked', + # -variable => \$TRANS + # This usually improves appearance so it seems ok. + @q = qw(&& || and or : ? . + - * /); + @is_chain_operator{@q} = (1) x scalar(@q); + + # We can remove semicolons after blocks preceded by these keywords + @q = + qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else + unless while until for foreach given when default); + @is_block_without_semicolon{@q} = (1) x scalar(@q); + + # We will allow semicolons to be added within these block types + # as well as sub and package blocks. + # NOTES: + # 1. Note that these keywords are omitted: + # switch case given when default sort map grep + # 2. It is also ok to add for sub and package blocks and a labeled block + # 3. But not okay for other perltidy types including: + # { } ; G t + # 4. Test files: blktype.t, blktype1.t, semicolon.t + @q = + qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else + unless do while until eval for foreach ); + @ok_to_add_semicolon_for_block_type{@q} = (1) x scalar(@q); + + # 'L' is token for opening { at hash key + @q = qw< L { ( [ >; + @is_opening_type{@q} = (1) x scalar(@q); + + # 'R' is token for closing } at hash key + @q = qw< R } ) ] >; + @is_closing_type{@q} = (1) x scalar(@q); + + @q = qw< { ( [ >; + @is_opening_token{@q} = (1) x scalar(@q); + + @q = qw< } ) ] >; + @is_closing_token{@q} = (1) x scalar(@q); + + # Patterns for standardizing matches to block types for regular subs and + # anonymous subs. Examples + # 'sub process' is a named sub + # 'sub ::m' is a named sub + # 'sub' is an anonymous sub + # 'sub:' is a label, not a sub + # 'substr' is a keyword + $SUB_PATTERN = '^sub\s+(::|\w)'; + $ASUB_PATTERN = '^sub$'; +} + +# whitespace codes +use constant WS_YES => 1; +use constant WS_OPTIONAL => 0; +use constant WS_NO => -1; + +# Token bond strengths. +use constant NO_BREAK => 10000; +use constant VERY_STRONG => 100; +use constant STRONG => 2.1; +use constant NOMINAL => 1.1; +use constant WEAK => 0.8; +use constant VERY_WEAK => 0.55; + +# values for testing indexes in output array +use constant UNDEFINED_INDEX => -1; + +# Maximum number of little messages; probably need not be changed. +use constant MAX_NAG_MESSAGES => 6; + +# increment between sequence numbers for each type +# For example, ?: pairs might have numbers 7,11,15,... +use constant TYPE_SEQUENCE_INCREMENT => 4; + +{ + + # methods to count instances + my $_count = 0; + sub get_count { return $_count; } + sub _increment_count { return ++$_count } + sub _decrement_count { return --$_count } +} + +sub trim { + + # trim leading and trailing whitespace from a string + my $str = shift; + $str =~ s/\s+$//; + $str =~ s/^\s+//; + return $str; +} + +sub max { + my @vals = @_; + my $max = shift @vals; + foreach my $val (@vals) { + $max = ( $max < $val ) ? $val : $max; + } + return $max; +} + +sub min { + my @vals = @_; + my $min = shift @vals; + foreach my $val (@vals) { + $min = ( $min > $val ) ? $val : $min; + } + return $min; +} + +sub split_words { + + # given a string containing words separated by whitespace, + # return the list of words + my ($str) = @_; + return unless $str; + $str =~ s/\s+$//; + $str =~ s/^\s+//; + return split( /\s+/, $str ); +} + +sub check_keys { + my ( $rtest, $rvalid, $msg, $exact_match ) = @_; + + # Check the keys of a hash: + # $rtest = ref to hash to test + # $rvalid = ref to hash with valid keys + + # $msg = a message to write in case of error + # $exact_match defines the type of check: + # = false: test hash must not have unknown key + # = true: test hash must have exactly same keys as known hash + my @unknown_keys = + grep { !exists $rvalid->{$_} } keys %{$rtest}; + my @missing_keys = + grep { !exists $rtest->{$_} } keys %{$rvalid}; + my $error = @unknown_keys; + if ($exact_match) { $error ||= @missing_keys } + if ($error) { + local $" = ')('; + my @expected_keys = sort keys %{$rvalid}; + @unknown_keys = sort @unknown_keys; + Die(<warning($msg); } + return; +} + +sub complain { + my ($msg) = @_; + if ($logger_object) { + $logger_object->complain($msg); + } + return; +} + +sub write_logfile_entry { + my @msg = @_; + if ($logger_object) { + $logger_object->write_logfile_entry(@msg); + } + return; +} + +sub black_box { + my @msg = @_; + if ($logger_object) { $logger_object->black_box(@msg); } + return; +} + +sub report_definite_bug { + if ($logger_object) { + $logger_object->report_definite_bug(); + } + return; +} + +sub get_saw_brace_error { + if ($logger_object) { + return $logger_object->get_saw_brace_error(); + } + return; +} + +sub we_are_at_the_last_line { + if ($logger_object) { + $logger_object->we_are_at_the_last_line(); + } + return; +} + +# interface to Perl::Tidy::Diagnostics routine +sub write_diagnostics { + my $msg = shift; + if ($diagnostics_object) { $diagnostics_object->write_diagnostics($msg); } + return; +} + +sub get_added_semicolon_count { + my $self = shift; + return $added_semicolon_count; +} + +sub DESTROY { + my $self = shift; + $self->_decrement_count(); + return; +} + +sub get_output_line_number { + return $vertical_aligner_object->get_output_line_number(); +} + +sub new { + + my ( $class, @args ) = @_; + + # we are given an object with a write_line() method to take lines + my %defaults = ( + sink_object => undef, + diagnostics_object => undef, + logger_object => undef, + ); + my %args = ( %defaults, @args ); + + $logger_object = $args{logger_object}; + $diagnostics_object = $args{diagnostics_object}; + + # we create another object with a get_line() and peek_ahead() method + my $sink_object = $args{sink_object}; + $file_writer_object = + Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object ); + + # initialize the leading whitespace stack to negative levels + # so that we can never run off the end of the stack + $peak_batch_size = 0; # flag to determine if we have output code + $gnu_position_predictor = 0; # where the current token is predicted to be + $max_gnu_stack_index = 0; + $max_gnu_item_index = -1; + $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 ); + @gnu_item_list = (); + $last_output_indentation = 0; + $last_indentation_written = 0; + $last_unadjusted_indentation = 0; + $last_leading_token = ""; + $last_output_short_opening_token = 0; + + $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'}; + $saw_END_or_DATA_ = 0; + + @block_type_to_go = (); + @type_sequence_to_go = (); + @container_environment_to_go = (); + @bond_strength_to_go = (); + @forced_breakpoint_to_go = (); + @summed_lengths_to_go = (); # line length to start of ith token + @token_lengths_to_go = (); + @levels_to_go = (); + @matching_token_to_go = (); + @mate_index_to_go = (); + @ci_levels_to_go = (); + @nesting_depth_to_go = (0); + @nobreak_to_go = (); + @old_breakpoint_to_go = (); + @tokens_to_go = (); + @K_to_go = (); + @types_to_go = (); + @leading_spaces_to_go = (); + @reduced_spaces_to_go = (); + @inext_to_go = (); + @iprev_to_go = (); + + @whitespace_level_stack = (); + $whitespace_last_level = -1; + + @dont_align = (); + @has_broken_sublist = (); + @want_comma_break = (); + + @ci_stack = (""); + $first_tabbing_disagreement = 0; + $last_tabbing_disagreement = 0; + $tabbing_disagreement_count = 0; + $in_tabbing_disagreement = 0; + $input_line_tabbing = undef; + + $last_last_line_leading_level = 0; + $last_line_leading_level = 0; + $last_line_leading_type = '#'; + + $last_nonblank_token = ';'; + $last_nonblank_type = ';'; + $last_last_nonblank_token = ';'; + $last_last_nonblank_type = ';'; + $last_nonblank_block_type = ""; + $last_output_level = 0; + $looking_for_else = 0; + $embedded_tab_count = 0; + $first_embedded_tab_at = 0; + $last_embedded_tab_at = 0; + $deleted_semicolon_count = 0; + $first_deleted_semicolon_at = 0; + $last_deleted_semicolon_at = 0; + $added_semicolon_count = 0; + $first_added_semicolon_at = 0; + $last_added_semicolon_at = 0; + $is_static_block_comment = 0; + %postponed_breakpoint = (); + + # variables for adding side comments + %block_leading_text = (); + %block_opening_line_number = (); + $csc_new_statement_ok = 1; + %csc_block_label = (); + + %saved_opening_indentation = (); + + reset_block_text_accumulator(); + + prepare_for_new_input_lines(); + + $vertical_aligner_object = + Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object, + $logger_object, $diagnostics_object ); + + if ( $rOpts->{'entab-leading-whitespace'} ) { + write_logfile_entry( +"Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n" + ); + } + elsif ( $rOpts->{'tabs'} ) { + write_logfile_entry("Indentation will be with a tab character\n"); + } + else { + write_logfile_entry( + "Indentation will be with $rOpts->{'indent-columns'} spaces\n"); + } + + # This hash holds the main data structures for formatting + # All hash keys must be defined here. + $formatter_self = { + rlines => [], # = ref to array of lines of the file + rlines_new => [], # = ref to array of output lines + # (FOR FUTURE DEVELOPMENT) + rLL => [], # = ref to array with all tokens + # in the file. LL originally meant + # 'Linked List'. Linked lists were a + # bad idea but LL is easy to type. + Klimit => undef, # = maximum K index for rLL. This is + # needed to catch any autovivification + # problems. + rnested_pairs => [], # for welding decisions + K_opening_container => {}, # for quickly traversing structure + K_closing_container => {}, # for quickly traversing structure + K_opening_ternary => {}, # for quickly traversing structure + K_closing_ternary => {}, # for quickly traversing structure + rK_phantom_semicolons => + undef, # for undoing phantom semicolons if iterating + rpaired_to_inner_container => {}, + rbreak_container => {}, # prevent one-line blocks + rvalid_self_keys => [], # for checking + valign_batch_count => 0, + }; + my @valid_keys = keys %{$formatter_self}; + $formatter_self->{rvalid_self_keys} = \@valid_keys; + + bless $formatter_self, $class; + + # Safety check..this is not a class yet + if ( _increment_count() > 1 ) { + confess +"Attempt to create more than 1 object in $class, which is not a true class yet\n"; + } + return $formatter_self; +} + +# Future routines for storing new lines +sub push_line { + my ( $self, $rline ) = @_; + + # my $rline = $rlines->[$index_old]; + # push @{$rlines_new}, $rline; + return; +} + +sub push_old_line { + my ( $self, $index_old ) = @_; + + # TODO: This will copy line with index $index_old to the new line array + # my $rlines = $self->{rlines}; + # my $rline = $rlines->[$index_old]; + # $self->push_line($rline); + return; +} + +sub push_blank_line { + my ($self) = @_; + + # my $rline = ... + # $self->push_line($rline); + return; +} + +sub push_CODE_line { + my ( $self, $Kmin, $Kmax ) = @_; + + # TODO: This will store the values for one new line of CODE + # CHECK TOKEN RANGE HERE + # $self->push_line($rline); + return; +} + +sub increment_valign_batch_count { + my ($self) = shift; + return ++$self->{valign_batch_count}; +} + +sub get_valign_batch_count { + my ($self) = shift; + return $self->{valign_batch_count}; +} + +sub Fault { + my ($msg) = @_; + + # This routine is called for errors that really should not occur + # except if there has been a bug introduced by a recent program change + my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0); + my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1); + my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2); + + Die(<{rvalid_self_keys} }; + my %valid_self_hash; + @valid_self_hash{@valid_self_keys} = (1) x scalar(@valid_self_keys); + check_keys( $self, \%valid_self_hash, "Checkpoint: self error", 1 ); + return; +} + +sub check_token_array { + my $self = shift; + + # Check for errors in the array of tokens + # Uses package variable $NVARS + $self->check_self_hash(); + my $rLL = $self->{rLL}; + for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) { + my $nvars = @{ $rLL->[$KK] }; + if ( $nvars != $NVARS ) { + my $type = $rLL->[$KK]->[_TYPE_]; + $type = '*' unless defined($type); + Fault( +"number of vars for node $KK, type '$type', is $nvars but should be $NVARS" + ); + } + foreach my $var ( _TOKEN_, _TYPE_ ) { + if ( !defined( $rLL->[$KK]->[$var] ) ) { + my $iline = $rLL->[$KK]->[_LINE_INDEX_]; + Fault("Undefined variable $var for K=$KK, line=$iline\n"); + } + } + } + return; +} + +sub set_rLL_max_index { + my $self = shift; + + # Set the limit of the rLL array, assuming that it is correct. + # This should only be called by routines after they make changes + # to tokenization + my $rLL = $self->{rLL}; + if ( !defined($rLL) ) { + + # Shouldn't happen because rLL was initialized to be an array ref + Fault("Undefined Memory rLL"); + } + my $Klimit_old = $self->{Klimit}; + my $num = @{$rLL}; + my $Klimit; + if ( $num > 0 ) { $Klimit = $num - 1 } + $self->{Klimit} = $Klimit; + return ($Klimit); +} + +sub get_rLL_max_index { + my $self = shift; + + # the memory location $rLL and number of tokens should be obtained + # from this routine so that any autovivication can be immediately caught. + my $rLL = $self->{rLL}; + my $Klimit = $self->{Klimit}; + if ( !defined($rLL) ) { + + # Shouldn't happen because rLL was initialized to be an array ref + Fault("Undefined Memory rLL"); + } + my $num = @{$rLL}; + if ( $num == 0 && defined($Klimit) + || $num > 0 && !defined($Klimit) + || $num > 0 && $Klimit != $num - 1 ) + { + + # Possible autovivification problem... + if ( !defined($Klimit) ) { $Klimit = '*' } + Fault("Error getting rLL: Memory items=$num and Klimit=$Klimit"); + } + return ($Klimit); +} + +sub prepare_for_new_input_lines { + + # Remember the largest batch size processed. This is needed + # by the pad routine to avoid padding the first nonblank token + if ( $max_index_to_go && $max_index_to_go > $peak_batch_size ) { + $peak_batch_size = $max_index_to_go; + } + + $gnu_sequence_number++; # increment output batch counter + %last_gnu_equals = (); + %gnu_comma_count = (); + %gnu_arrow_count = (); + $line_start_index_to_go = 0; + $max_gnu_item_index = UNDEFINED_INDEX; + $index_max_forced_break = UNDEFINED_INDEX; + $max_index_to_go = UNDEFINED_INDEX; + $last_nonblank_index_to_go = UNDEFINED_INDEX; + $last_nonblank_type_to_go = ''; + $last_nonblank_token_to_go = ''; + $last_last_nonblank_index_to_go = UNDEFINED_INDEX; + $last_last_nonblank_type_to_go = ''; + $last_last_nonblank_token_to_go = ''; + $forced_breakpoint_count = 0; + $forced_breakpoint_undo_count = 0; + $rbrace_follower = undef; + $summed_lengths_to_go[0] = 0; + $comma_count_in_batch = 0; + $starting_in_quote = 0; + + destroy_one_line_block(); + return; +} + +sub keyword_group_scan { + my $self = shift; + + # Manipulate blank lines around keyword groups (kgb* flags) + # Scan all lines looking for runs of consecutive lines beginning with + # selected keywords. Example keywords are 'my', 'our', 'local', ... but + # they may be anything. We will set flags requesting that blanks be + # inserted around and withing them according to input parameters. Note + # that we are scanning the lines as they came in in the input stream, so + # they are not necessarily well formatted. + + # The output of this sub is a return hash ref whose keys are the indexes of + # lines after which we desire a blank line. For line index i: + # $rhash_of_desires->{$i} = 1 means we want a blank line AFTER line $i + # $rhash_of_desires->{$i} = 2 means we want blank line $i removed + my $rhash_of_desires = {}; + + my $Opt_blanks_before = $rOpts->{'keyword-group-blanks-before'}; # '-kgbb' + my $Opt_blanks_after = $rOpts->{'keyword-group-blanks-after'}; # '-kgba' + my $Opt_blanks_inside = $rOpts->{'keyword-group-blanks-inside'}; # '-kgbi' + my $Opt_blanks_delete = $rOpts->{'keyword-group-blanks-delete'}; # '-kgbd' + my $Opt_size = $rOpts->{'keyword-group-blanks-size'}; # '-kgbs' + + # A range of sizes can be input with decimal notation like 'min.max' with + # any number of dots between the two numbers. Examples: + # string => min max matches + # 1.1 1 1 exactly 1 + # 1.3 1 3 1,2, or 3 + # 1..3 1 3 1,2, or 3 + # 5 5 - 5 or more + # 6. 6 - 6 or more + # .2 - 2 up to 2 + # 1.0 1 0 nothing + my ( $Opt_size_min, $Opt_size_max ) = split /\.+/, $Opt_size; + if ( $Opt_size_min && $Opt_size_min !~ /^\d+$/ + || $Opt_size_max && $Opt_size_max !~ /^\d+$/ ) + { + Warn(< 0 + && ( $Opt_blanks_before != 1 + || $Opt_blanks_after != 1 + || $Opt_blanks_inside + || $Opt_blanks_delete ); + + my $Opt_pattern = $keyword_group_list_pattern; + my $Opt_comment_pattern = $keyword_group_list_comment_pattern; + my $Opt_repeat_count = + $rOpts->{'keyword-group-blanks-repeat-count'}; # '-kgbr' + + my $rlines = $self->{rlines}; + my $rLL = $self->{rLL}; + my $K_closing_container = $self->{K_closing_container}; + + # variables for the current group and subgroups: + my ( $ibeg, $iend, $count, $level_beg, $K_closing, @iblanks, @group, + @subgroup ); + + # Definitions: + # ($ibeg, $iend) = starting and ending line indexes of this entire group + # $count = total number of keywords seen in this entire group + # $level_beg = indententation level of this group + # @group = [ $i, $token, $count ] =list of all keywords & blanks + # @subgroup = $j, index of group where token changes + # @iblanks = line indexes of blank lines in input stream in this group + # where i=starting line index + # token (the keyword) + # count = number of this token in this subgroup + # j = index in group where token changes + # + # These vars will contain values for the most recently seen line: + my ( $line_type, $CODE_type, $K_first, $K_last ); + + my $number_of_groups_seen = 0; + + #################### + # helper subroutines + #################### + + my $insert_blank_after = sub { + my ($i) = @_; + $rhash_of_desires->{$i} = 1; + my $ip = $i + 1; + if ( defined( $rhash_of_desires->{$ip} ) + && $rhash_of_desires->{$ip} == 2 ) + { + $rhash_of_desires->{$ip} = 0; + } + return; + }; + + my $split_into_sub_groups = sub { + + # place blanks around long sub-groups of keywords + # ...if requested + return unless ($Opt_blanks_inside); + + # loop over sub-groups, index k + push @subgroup, scalar @group; + my $kbeg = 1; + my $kend = @subgroup - 1; + for ( my $k = $kbeg ; $k <= $kend ; $k++ ) { + + # index j runs through all keywords found + my $j_b = $subgroup[ $k - 1 ]; + my $j_e = $subgroup[$k] - 1; + + # index i is the actual line number of a keyword + my ( $i_b, $tok_b, $count_b ) = @{ $group[$j_b] }; + my ( $i_e, $tok_e, $count_e ) = @{ $group[$j_e] }; + my $num = $count_e - $count_b + 1; + + # This subgroup runs from line $ib to line $ie-1, but may contain + # blank lines + if ( $num >= $Opt_size_min ) { + + # if there are blank lines, we require that at least $num lines + # be non-blank up to the boundary with the next subgroup. + my $nog_b = my $nog_e = 1; + if ( @iblanks && !$Opt_blanks_delete ) { + my $j_bb = $j_b + $num - 1; + my ( $i_bb, $tok_bb, $count_bb ) = @{ $group[$j_bb] }; + $nog_b = $count_bb - $count_b + 1 == $num; + + my $j_ee = $j_e - ( $num - 1 ); + my ( $i_ee, $tok_ee, $count_ee ) = @{ $group[$j_ee] }; + $nog_e = $count_e - $count_ee + 1 == $num; + } + if ( $nog_b && $k > $kbeg ) { + $insert_blank_after->( $i_b - 1 ); + } + if ( $nog_e && $k < $kend ) { + my ( $i_ep, $tok_ep, $count_ep ) = @{ $group[ $j_e + 1 ] }; + $insert_blank_after->( $i_ep - 1 ); + } + } + } + }; + + my $delete_if_blank = sub { + my ($i) = @_; + + # delete line $i if it is blank + return unless ( $i >= 0 && $i < @{$rlines} ); + my $line_type = $rlines->[$i]->{_line_type}; + return if ( $line_type ne 'CODE' ); + my $code_type = $rlines->[$i]->{_code_type}; + if ( $code_type eq 'BL' ) { $rhash_of_desires->{$i} = 2; } + return; + }; + + my $delete_inner_blank_lines = sub { + + # always remove unwanted trailing blank lines from our list + return unless (@iblanks); + while ( my $ibl = pop(@iblanks) ) { + if ( $ibl < $iend ) { push @iblanks, $ibl; last } + $iend = $ibl; + } + + # now mark mark interior blank lines for deletion if requested + return unless ($Opt_blanks_delete); + + while ( my $ibl = pop(@iblanks) ) { $rhash_of_desires->{$ibl} = 2 } + + }; + + my $end_group = sub { + + # end a group of keywords + my ($bad_ending) = @_; + if ( defined($ibeg) && $ibeg >= 0 ) { + + # then handle sufficiently large groups + if ( $count >= $Opt_size_min ) { + + $number_of_groups_seen++; + + # do any blank deletions regardless of the count + $delete_inner_blank_lines->(); + + if ( $ibeg > 0 ) { + my $code_type = $rlines->[ $ibeg - 1 ]->{_code_type}; + + # patch for hash bang line which is not currently marked as + # a comment; mark it as a comment + if ( $ibeg == 1 && !$code_type ) { + my $line_text = $rlines->[ $ibeg - 1 ]->{_line_text}; + $code_type = 'BC' + if ( $line_text && $line_text =~ /^#/ ); + } + + # Do not insert a blank after a comment + # (this could be subject to a flag in the future) + if ( $code_type !~ /(BC|SBC|SBCX)/ ) { + if ( $Opt_blanks_before == INSERT ) { + $insert_blank_after->( $ibeg - 1 ); + + } + elsif ( $Opt_blanks_before == DELETE ) { + $delete_if_blank->( $ibeg - 1 ); + } + } + } + + # We will only put blanks before code lines. We could loosen + # this rule a little, but we have to be very careful because + # for example we certainly don't want to drop a blank line + # after a line like this: + # my $var = <[$K_first]->[_LEVEL_]; + my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_]; + + if ( $level == $level_beg + && $ci_level == 0 + && !$bad_ending + && $iend < @{$rlines} + && $CODE_type ne 'HSC' ) + { + if ( $Opt_blanks_after == INSERT ) { + $insert_blank_after->($iend); + } + elsif ( $Opt_blanks_after == DELETE ) { + $delete_if_blank->( $iend + 1 ); + } + } + } + } + $split_into_sub_groups->(); + } + + # reset for another group + $ibeg = -1; + $iend = undef; + $level_beg = -1; + $K_closing = undef; + @group = (); + @subgroup = (); + @iblanks = (); + }; + + my $find_container_end = sub { + + # If the keyword lines ends with an open token, find the closing token + # '$K_closing' so that we can easily skip past the contents of the + # container. + return if ( $K_last <= $K_first ); + my $KK = $K_last; + my $type_last = $rLL->[$KK]->[_TYPE_]; + my $tok_last = $rLL->[$KK]->[_TOKEN_]; + if ( $type_last eq '#' ) { + $KK = $self->K_previous_nonblank($KK); + $tok_last = $rLL->[$KK]->[_TOKEN_]; + } + if ( $KK > $K_first && $tok_last =~ /^[\(\{\[]$/ ) { + + my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_]; + my $lev = $rLL->[$KK]->[_LEVEL_]; + if ( $lev == $level_beg ) { + $K_closing = $K_closing_container->{$type_sequence}; + } + } + }; + + my $add_to_group = sub { + my ( $i, $token, $level ) = @_; + + # End the previous group if we have reached the maximum + # group size + if ( $Opt_size_max && @group >= $Opt_size_max ) { + $end_group->(); + } + + if ( @group == 0 ) { + $ibeg = $i; + $level_beg = $level; + $count = 0; + } + + $count++; + $iend = $i; + + # New sub-group? + if ( !@group || $token ne $group[-1]->[1] ) { + push @subgroup, scalar(@group); + } + push @group, [ $i, $token, $count ]; + + # remember if this line ends in an open container + $find_container_end->(); + + return; + }; + + ################################### + # loop over all lines of the source + ################################### + $end_group->(); + my $i = -1; + foreach my $line_of_tokens ( @{$rlines} ) { + + $i++; + last + if ( $Opt_repeat_count > 0 + && $number_of_groups_seen >= $Opt_repeat_count ); + + $CODE_type = ""; + $K_first = undef; + $K_last = undef; + $line_type = $line_of_tokens->{_line_type}; + + # always end a group at non-CODE + if ( $line_type ne 'CODE' ) { $end_group->(); next } + + $CODE_type = $line_of_tokens->{_code_type}; + + # end any group at a format skipping line + if ( $CODE_type && $CODE_type eq 'FS' ) { + $end_group->(); + next; + } + + # continue in a verbatim (VB) type; it may be quoted text + if ( $CODE_type eq 'VB' ) { + if ( $ibeg >= 0 ) { $iend = $i; } + next; + } + + # and continue in blank (BL) types + if ( $CODE_type eq 'BL' ) { + if ( $ibeg >= 0 ) { + $iend = $i; + push @{iblanks}, $i; + + # propagate current subgroup token + my $tok = $group[-1]->[1]; + push @group, [ $i, $tok, $count ]; + } + next; + } + + # examine the first token of this line + my $rK_range = $line_of_tokens->{_rK_range}; + ( $K_first, $K_last ) = @{$rK_range}; + if ( !defined($K_first) ) { + + # Unexpected blank line..shouldn't happen + # $rK_range should be defined for line type CODE + Warn( +"Programming Error: Unexpected Blank Line in sub 'keyword_group_scan'. Ignoring" + ); + return $rhash_of_desires; + } + + my $level = $rLL->[$K_first]->[_LEVEL_]; + my $type = $rLL->[$K_first]->[_TYPE_]; + my $token = $rLL->[$K_first]->[_TOKEN_]; + my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_]; + + # see if this is a code type we seek (i.e. comment) + if ( $CODE_type + && $Opt_comment_pattern + && $CODE_type =~ /$Opt_comment_pattern/o ) + { + + my $tok = $CODE_type; + + # Continuing a group + if ( $ibeg >= 0 && $level == $level_beg ) { + $add_to_group->( $i, $tok, $level ); + } + + # Start new group + else { + + # first end old group if any; we might be starting new + # keywords at different level + if ( $ibeg > 0 ) { $end_group->(); } + $add_to_group->( $i, $tok, $level ); + } + next; + } + + # See if it is a keyword we seek, but never start a group in a + # continuation line; the code may be badly formatted. + if ( $ci_level == 0 + && $type eq 'k' + && $token =~ /$Opt_pattern/o ) + { + + # Continuing a keyword group + if ( $ibeg >= 0 && $level == $level_beg ) { + $add_to_group->( $i, $token, $level ); + } + + # Start new keyword group + else { + + # first end old group if any; we might be starting new + # keywords at different level + if ( $ibeg > 0 ) { $end_group->(); } + $add_to_group->( $i, $token, $level ); + } + next; + } + + # This is not one of our keywords, but we are in a keyword group + # so see if we should continue or quit + elsif ( $ibeg >= 0 ) { + + # - bail out on a large level change; we may have walked into a + # data structure or anoymous sub code. + if ( $level > $level_beg + 1 || $level < $level_beg ) { + $end_group->(); + next; + } + + # - keep going on a continuation line of the same level, since + # it is probably a continuation of our previous keyword, + # - and keep going past hanging side comments because we never + # want to interrupt them. + if ( ( ( $level == $level_beg ) && $ci_level > 0 ) + || $CODE_type eq 'HSC' ) + { + $iend = $i; + next; + } + + # - continue if if we are within in a container which started with + # the line of the previous keyword. + if ( defined($K_closing) && $K_first <= $K_closing ) { + + # continue if entire line is within container + if ( $K_last <= $K_closing ) { $iend = $i; next } + + # continue at ); or }; or ]; + my $KK = $K_closing + 1; + if ( $rLL->[$KK]->[_TYPE_] eq ';' ) { + if ( $KK < $K_last ) { + if ( $rLL->[ ++$KK ]->[_TYPE_] eq 'b' ) { ++$KK } + if ( $KK > $K_last || $rLL->[$KK]->[_TYPE_] ne '#' ) { + $end_group->(1); + next; + } + } + $iend = $i; + next; + } + + $end_group->(1); + next; + } + + # - end the group if none of the above + $end_group->(); + next; + } + + # not in a keyword group; continue + else { next } + } + + # end of loop over all lines + $end_group->(); + return $rhash_of_desires; +} + +sub break_lines { + + # Loop over old lines to set new line break points + + my $self = shift; + my $rlines = $self->{rlines}; + + # Note for RT#118553, leave only one newline at the end of a file. + # Example code to do this is in comments below: + # my $Opt_trim_ending_blank_lines = 0; + # if ($Opt_trim_ending_blank_lines) { + # while ( my $line_of_tokens = pop @{$rlines} ) { + # my $line_type = $line_of_tokens->{_line_type}; + # if ( $line_type eq 'CODE' ) { + # my $CODE_type = $line_of_tokens->{_code_type}; + # next if ( $CODE_type eq 'BL' ); + # } + # push @{$rlines}, $line_of_tokens; + # last; + # } + # } + + # But while this would be a trivial update, it would have very undesirable + # side effects when perltidy is run from within an editor on a small snippet. + # So this is best done with a separate filter, such + # as 'delete_ending_blank_lines.pl' in the examples folder. + + # Flag to prevent blank lines when POD occurs in a format skipping sect. + my $in_format_skipping_section; + + # set locations for blanks around long runs of keywords + my $rwant_blank_line_after = $self->keyword_group_scan(); + + my $line_type = ""; + my $i = -1; + foreach my $line_of_tokens ( @{$rlines} ) { + $i++; + + # insert blank lines requested for keyword sequences + if ( $i > 0 + && defined( $rwant_blank_line_after->{ $i - 1 } ) + && $rwant_blank_line_after->{ $i - 1 } == 1 ) + { + $self->want_blank_line(); + } + + my $last_line_type = $line_type; + $line_type = $line_of_tokens->{_line_type}; + my $input_line = $line_of_tokens->{_line_text}; + + # _line_type codes are: + # SYSTEM - system-specific code before hash-bang line + # CODE - line of perl code (including comments) + # POD_START - line starting pod, such as '=head' + # POD - pod documentation text + # POD_END - last line of pod section, '=cut' + # HERE - text of here-document + # HERE_END - last line of here-doc (target word) + # FORMAT - format section + # FORMAT_END - last line of format section, '.' + # DATA_START - __DATA__ line + # DATA - unidentified text following __DATA__ + # END_START - __END__ line + # END - unidentified text following __END__ + # ERROR - we are in big trouble, probably not a perl script + + # put a blank line after an =cut which comes before __END__ and __DATA__ + # (required by podchecker) + if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) { + $file_writer_object->reset_consecutive_blank_lines(); + if ( !$in_format_skipping_section && $input_line !~ /^\s*$/ ) { + $self->want_blank_line(); + } + } + + # handle line of code.. + if ( $line_type eq 'CODE' ) { + + my $CODE_type = $line_of_tokens->{_code_type}; + $in_format_skipping_section = $CODE_type eq 'FS'; + + # Handle blank lines + if ( $CODE_type eq 'BL' ) { + + # If keep-old-blank-lines is zero, we delete all + # old blank lines and let the blank line rules generate any + # needed blanks. + + # We also delete lines requested by the keyword-group logic + my $kgb_keep = !( defined( $rwant_blank_line_after->{$i} ) + && $rwant_blank_line_after->{$i} == 2 ); + + # But the keep-old-blank-lines flag has priority over kgb flags + $kgb_keep = 1 if ( $rOpts_keep_old_blank_lines == 2 ); + + if ( $rOpts_keep_old_blank_lines && $kgb_keep ) { + $self->flush(); + $file_writer_object->write_blank_code_line( + $rOpts_keep_old_blank_lines == 2 ); + $last_line_leading_type = 'b'; + } + next; + } + else { + + # let logger see all non-blank lines of code + my $output_line_number = get_output_line_number(); + black_box( $line_of_tokens, $output_line_number ); + } + + # Handle Format Skipping (FS) and Verbatim (VB) Lines + if ( $CODE_type eq 'VB' || $CODE_type eq 'FS' ) { + $self->write_unindented_line("$input_line"); + $file_writer_object->reset_consecutive_blank_lines(); + next; + } + + # Handle all other lines of code + $self->print_line_of_tokens($line_of_tokens); + } + + # handle line of non-code.. + else { + + # set special flags + my $skip_line = 0; + my $tee_line = 0; + if ( $line_type =~ /^POD/ ) { + + # Pod docs should have a preceding blank line. But stay + # out of __END__ and __DATA__ sections, because + # the user may be using this section for any purpose whatsoever + if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; } + if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; } + if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// } + if ( !$skip_line + && !$in_format_skipping_section + && $line_type eq 'POD_START' + && !$saw_END_or_DATA_ ) + { + $self->want_blank_line(); + } + } + + # leave the blank counters in a predictable state + # after __END__ or __DATA__ + elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) { + $file_writer_object->reset_consecutive_blank_lines(); + $saw_END_or_DATA_ = 1; + } + + # write unindented non-code line + if ( !$skip_line ) { + if ($tee_line) { $file_writer_object->tee_on() } + $self->write_unindented_line($input_line); + if ($tee_line) { $file_writer_object->tee_off() } + } + } + } + return; +} + +{ ## Beginning of routine to check line hashes + + my %valid_line_hash; + + BEGIN { + + # These keys are defined for each line in the formatter + # Each line must have exactly these quantities + my @valid_line_keys = qw( + _curly_brace_depth + _ending_in_quote + _guessed_indentation_level + _line_number + _line_text + _line_type + _paren_depth + _quote_character + _rK_range + _square_bracket_depth + _starting_in_quote + _ended_in_blank_token + _code_type + + _ci_level_0 + _level_0 + _nesting_blocks_0 + _nesting_tokens_0 + ); + + @valid_line_hash{@valid_line_keys} = (1) x scalar(@valid_line_keys); + } + + sub check_line_hashes { + my $self = shift; + $self->check_self_hash(); + my $rlines = $self->{rlines}; + foreach my $rline ( @{$rlines} ) { + my $iline = $rline->{_line_number}; + my $line_type = $rline->{_line_type}; + check_keys( $rline, \%valid_line_hash, + "Checkpoint: line number =$iline, line_type=$line_type", 1 ); + } + return; + } + +} ## End check line hashes + +sub write_line { + + # We are caching tokenized lines as they arrive and converting them to the + # format needed for the final formatting. + my ( $self, $line_of_tokens_old ) = @_; + my $rLL = $self->{rLL}; + my $Klimit = $self->{Klimit}; + my $rlines_new = $self->{rlines}; + + my $Kfirst; + my $line_of_tokens = {}; + foreach my $key ( + qw( + _curly_brace_depth + _ending_in_quote + _guessed_indentation_level + _line_number + _line_text + _line_type + _paren_depth + _quote_character + _square_bracket_depth + _starting_in_quote + ) + ) + { + $line_of_tokens->{$key} = $line_of_tokens_old->{$key}; + } + + # Data needed by Logger + $line_of_tokens->{_level_0} = 0; + $line_of_tokens->{_ci_level_0} = 0; + $line_of_tokens->{_nesting_blocks_0} = ""; + $line_of_tokens->{_nesting_tokens_0} = ""; + + # Needed to avoid trimming quotes + $line_of_tokens->{_ended_in_blank_token} = undef; + + my $line_type = $line_of_tokens_old->{_line_type}; + my $input_line_no = $line_of_tokens_old->{_line_number} - 1; + if ( $line_type eq 'CODE' ) { + + my $rtokens = $line_of_tokens_old->{_rtokens}; + my $rtoken_type = $line_of_tokens_old->{_rtoken_type}; + my $rblock_type = $line_of_tokens_old->{_rblock_type}; + my $rcontainer_type = $line_of_tokens_old->{_rcontainer_type}; + my $rcontainer_environment = + $line_of_tokens_old->{_rcontainer_environment}; + my $rtype_sequence = $line_of_tokens_old->{_rtype_sequence}; + my $rlevels = $line_of_tokens_old->{_rlevels}; + my $rslevels = $line_of_tokens_old->{_rslevels}; + my $rci_levels = $line_of_tokens_old->{_rci_levels}; + my $rnesting_blocks = $line_of_tokens_old->{_rnesting_blocks}; + my $rnesting_tokens = $line_of_tokens_old->{_rnesting_tokens}; + + my $jmax = @{$rtokens} - 1; + if ( $jmax >= 0 ) { + $Kfirst = defined($Klimit) ? $Klimit + 1 : 0; + foreach my $j ( 0 .. $jmax ) { + my @tokary; + @tokary[ + _TOKEN_, _TYPE_, + _BLOCK_TYPE_, _CONTAINER_TYPE_, + _CONTAINER_ENVIRONMENT_, _TYPE_SEQUENCE_, + _LEVEL_, _LEVEL_TRUE_, + _SLEVEL_, _CI_LEVEL_, + _LINE_INDEX_, + ] + = ( + $rtokens->[$j], $rtoken_type->[$j], + $rblock_type->[$j], $rcontainer_type->[$j], + $rcontainer_environment->[$j], $rtype_sequence->[$j], + $rlevels->[$j], $rlevels->[$j], + $rslevels->[$j], $rci_levels->[$j], + $input_line_no, + ); + push @{$rLL}, \@tokary; + } + + $Klimit = @{$rLL} - 1; + + # Need to remember if we can trim the input line + $line_of_tokens->{_ended_in_blank_token} = + $rtoken_type->[$jmax] eq 'b'; + + $line_of_tokens->{_level_0} = $rlevels->[0]; + $line_of_tokens->{_ci_level_0} = $rci_levels->[0]; + $line_of_tokens->{_nesting_blocks_0} = $rnesting_blocks->[0]; + $line_of_tokens->{_nesting_tokens_0} = $rnesting_tokens->[0]; + } + } + + $line_of_tokens->{_rK_range} = [ $Kfirst, $Klimit ]; + $line_of_tokens->{_code_type} = ""; + $self->{Klimit} = $Klimit; + + push @{$rlines_new}, $line_of_tokens; + return; +} + +sub initialize_whitespace_hashes { + + # initialize these global hashes, which control the use of + # whitespace around tokens: + # + # %binary_ws_rules + # %want_left_space + # %want_right_space + # %space_after_keyword + # + # Many token types are identical to the tokens themselves. + # See the tokenizer for a complete list. Here are some special types: + # k = perl keyword + # f = semicolon in for statement + # m = unary minus + # p = unary plus + # Note that :: is excluded since it should be contained in an identifier + # Note that '->' is excluded because it never gets space + # parentheses and brackets are excluded since they are handled specially + # curly braces are included but may be overridden by logic, such as + # newline logic. + + # NEW_TOKENS: create a whitespace rule here. This can be as + # simple as adding your new letter to @spaces_both_sides, for + # example. + + my @opening_type = qw< L { ( [ >; + @is_opening_type{@opening_type} = (1) x scalar(@opening_type); + + my @closing_type = qw< R } ) ] >; + @is_closing_type{@closing_type} = (1) x scalar(@closing_type); + + my @spaces_both_sides = qw# + + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -= + .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~ + &&= ||= //= <=> A k f w F n C Y U G v + #; + + my @spaces_left_side = qw< + t ! ~ m p { \ h pp mm Z j + >; + push( @spaces_left_side, '#' ); # avoids warning message + + my @spaces_right_side = qw< + ; } ) ] R J ++ -- **= + >; + push( @spaces_right_side, ',' ); # avoids warning message + + # Note that we are in a BEGIN block here. Later in processing + # the values of %want_left_space and %want_right_space + # may be overridden by any user settings specified by the + # -wls and -wrs parameters. However the binary_whitespace_rules + # are hardwired and have priority. + @want_left_space{@spaces_both_sides} = + (1) x scalar(@spaces_both_sides); + @want_right_space{@spaces_both_sides} = + (1) x scalar(@spaces_both_sides); + @want_left_space{@spaces_left_side} = + (1) x scalar(@spaces_left_side); + @want_right_space{@spaces_left_side} = + (-1) x scalar(@spaces_left_side); + @want_left_space{@spaces_right_side} = + (-1) x scalar(@spaces_right_side); + @want_right_space{@spaces_right_side} = + (1) x scalar(@spaces_right_side); + $want_left_space{'->'} = WS_NO; + $want_right_space{'->'} = WS_NO; + $want_left_space{'**'} = WS_NO; + $want_right_space{'**'} = WS_NO; + $want_right_space{'CORE::'} = WS_NO; + + # These binary_ws_rules are hardwired and have priority over the above + # settings. It would be nice to allow adjustment by the user, + # but it would be complicated to specify. + # + # hash type information must stay tightly bound + # as in : ${xxxx} + $binary_ws_rules{'i'}{'L'} = WS_NO; + $binary_ws_rules{'i'}{'{'} = WS_YES; + $binary_ws_rules{'k'}{'{'} = WS_YES; + $binary_ws_rules{'U'}{'{'} = WS_YES; + $binary_ws_rules{'i'}{'['} = WS_NO; + $binary_ws_rules{'R'}{'L'} = WS_NO; + $binary_ws_rules{'R'}{'{'} = WS_NO; + $binary_ws_rules{'t'}{'L'} = WS_NO; + $binary_ws_rules{'t'}{'{'} = WS_NO; + $binary_ws_rules{'}'}{'L'} = WS_NO; + $binary_ws_rules{'}'}{'{'} = WS_NO; + $binary_ws_rules{'$'}{'L'} = WS_NO; + $binary_ws_rules{'$'}{'{'} = WS_NO; + $binary_ws_rules{'@'}{'L'} = WS_NO; + $binary_ws_rules{'@'}{'{'} = WS_NO; + $binary_ws_rules{'='}{'L'} = WS_YES; + $binary_ws_rules{'J'}{'J'} = WS_YES; + + # the following includes ') {' + # as in : if ( xxx ) { yyy } + $binary_ws_rules{']'}{'L'} = WS_NO; + $binary_ws_rules{']'}{'{'} = WS_NO; + $binary_ws_rules{')'}{'{'} = WS_YES; + $binary_ws_rules{')'}{'['} = WS_NO; + $binary_ws_rules{']'}{'['} = WS_NO; + $binary_ws_rules{']'}{'{'} = WS_NO; + $binary_ws_rules{'}'}{'['} = WS_NO; + $binary_ws_rules{'R'}{'['} = WS_NO; + + $binary_ws_rules{']'}{'++'} = WS_NO; + $binary_ws_rules{']'}{'--'} = WS_NO; + $binary_ws_rules{')'}{'++'} = WS_NO; + $binary_ws_rules{')'}{'--'} = WS_NO; + + $binary_ws_rules{'R'}{'++'} = WS_NO; + $binary_ws_rules{'R'}{'--'} = WS_NO; + + $binary_ws_rules{'i'}{'Q'} = WS_YES; + $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()' + + # FIXME: we could to split 'i' into variables and functions + # and have no space for functions but space for variables. For now, + # I have a special patch in the special rules below + $binary_ws_rules{'i'}{'('} = WS_NO; + + $binary_ws_rules{'w'}{'('} = WS_NO; + $binary_ws_rules{'w'}{'{'} = WS_YES; + return; + +} ## end initialize_whitespace_hashes + +sub set_whitespace_flags { + + # This routine examines each pair of nonblank tokens and + # sets a flag indicating if white space is needed. + # + # $rwhitespace_flags->[$j] is a flag indicating whether a white space + # BEFORE token $j is needed, with the following values: + # + # WS_NO = -1 do not want a space before token $j + # WS_OPTIONAL= 0 optional space or $j is a whitespace + # WS_YES = 1 want a space before token $j + # + + my $self = shift; + my $rLL = $self->{rLL}; + + my $rwhitespace_flags = []; + + my ( $last_token, $last_type, $last_block_type, $last_input_line_no, + $token, $type, $block_type, $input_line_no ); + my $j_tight_closing_paren = -1; + + $token = ' '; + $type = 'b'; + $block_type = ''; + $input_line_no = 0; + $last_token = ' '; + $last_type = 'b'; + $last_block_type = ''; + $last_input_line_no = 0; + + my $jmax = @{$rLL} - 1; + + my ($ws); + + # This is some logic moved to a sub to avoid deep nesting of if stmts + my $ws_in_container = sub { + + my ($j) = @_; + my $ws = WS_YES; + if ( $j + 1 > $jmax ) { return (WS_NO) } + + # Patch to count '-foo' as single token so that + # each of $a{-foo} and $a{foo} and $a{'foo'} do + # not get spaces with default formatting. + my $j_here = $j; + ++$j_here + if ( $token eq '-' + && $last_token eq '{' + && $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' ); + + # $j_next is where a closing token should be if + # the container has a single token + if ( $j_here + 1 > $jmax ) { return (WS_NO) } + my $j_next = + ( $rLL->[ $j_here + 1 ]->[_TYPE_] eq 'b' ) + ? $j_here + 2 + : $j_here + 1; + + if ( $j_next > $jmax ) { return WS_NO } + my $tok_next = $rLL->[$j_next]->[_TOKEN_]; + my $type_next = $rLL->[$j_next]->[_TYPE_]; + + # for tightness = 1, if there is just one token + # within the matching pair, we will keep it tight + if ( + $tok_next eq $matching_token{$last_token} + + # but watch out for this: [ [ ] (misc.t) + && $last_token ne $token + + # double diamond is usually spaced + && $token ne '<<>>' + + ) + { + + # remember where to put the space for the closing paren + $j_tight_closing_paren = $j_next; + return (WS_NO); + } + return (WS_YES); + }; + + # main loop over all tokens to define the whitespace flags + for ( my $j = 0 ; $j <= $jmax ; $j++ ) { + + my $rtokh = $rLL->[$j]; + + # Set a default + $rwhitespace_flags->[$j] = WS_OPTIONAL; + + if ( $rtokh->[_TYPE_] eq 'b' ) { + next; + } + + # set a default value, to be changed as needed + $ws = undef; + $last_token = $token; + $last_type = $type; + $last_block_type = $block_type; + $last_input_line_no = $input_line_no; + $token = $rtokh->[_TOKEN_]; + $type = $rtokh->[_TYPE_]; + $block_type = $rtokh->[_BLOCK_TYPE_]; + $input_line_no = $rtokh->[_LINE_INDEX_]; + + #--------------------------------------------------------------- + # Whitespace Rules Section 1: + # Handle space on the inside of opening braces. + #--------------------------------------------------------------- + + # /^[L\{\(\[]$/ + if ( $is_opening_type{$last_type} ) { + + $j_tight_closing_paren = -1; + + # let us keep empty matched braces together: () {} [] + # except for BLOCKS + if ( $token eq $matching_token{$last_token} ) { + if ($block_type) { + $ws = WS_YES; + } + else { + $ws = WS_NO; + } + } + else { + + # we're considering the right of an opening brace + # tightness = 0 means always pad inside with space + # tightness = 1 means pad inside if "complex" + # tightness = 2 means never pad inside with space + + my $tightness; + if ( $last_type eq '{' + && $last_token eq '{' + && $last_block_type ) + { + $tightness = $rOpts_block_brace_tightness; + } + else { $tightness = $tightness{$last_token} } + + #============================================================= + # Patch for test problem <> + # We must always avoid spaces around a bare word beginning + # with ^ as in: + # my $before = ${^PREMATCH}; + # Because all of the following cause an error in perl: + # my $before = ${ ^PREMATCH }; + # my $before = ${ ^PREMATCH}; + # my $before = ${^PREMATCH }; + # So if brace tightness flag is -bt=0 we must temporarily reset + # to bt=1. Note that here we must set tightness=1 and not 2 so + # that the closing space + # is also avoided (via the $j_tight_closing_paren flag in coding) + if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 } + + #============================================================= + + if ( $tightness <= 0 ) { + $ws = WS_YES; + } + elsif ( $tightness > 1 ) { + $ws = WS_NO; + } + else { + $ws = $ws_in_container->($j); + } + } + } # end setting space flag inside opening tokens + my $ws_1; + $ws_1 = $ws + if FORMATTER_DEBUG_FLAG_WHITE; + + #--------------------------------------------------------------- + # Whitespace Rules Section 2: + # Handle space on inside of closing brace pairs. + #--------------------------------------------------------------- + + # /[\}\)\]R]/ + if ( $is_closing_type{$type} ) { + + if ( $j == $j_tight_closing_paren ) { + + $j_tight_closing_paren = -1; + $ws = WS_NO; + } + else { + + if ( !defined($ws) ) { + + my $tightness; + if ( $type eq '}' && $token eq '}' && $block_type ) { + $tightness = $rOpts_block_brace_tightness; + } + else { $tightness = $tightness{$token} } + + $ws = ( $tightness > 1 ) ? WS_NO : WS_YES; + } + } + } # end setting space flag inside closing tokens + + my $ws_2; + $ws_2 = $ws + if FORMATTER_DEBUG_FLAG_WHITE; + + #--------------------------------------------------------------- + # Whitespace Rules Section 3: + # Use the binary rule table. + #--------------------------------------------------------------- + if ( !defined($ws) ) { + $ws = $binary_ws_rules{$last_type}{$type}; + } + my $ws_3; + $ws_3 = $ws + if FORMATTER_DEBUG_FLAG_WHITE; + + #--------------------------------------------------------------- + # Whitespace Rules Section 4: + # Handle some special cases. + #--------------------------------------------------------------- + if ( $token eq '(' ) { + + # This will have to be tweaked as tokenization changes. + # We usually want a space at '} (', for example: + # <> + # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s ); + # + # But not others: + # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } ); + # At present, the above & block is marked as type L/R so this case + # won't go through here. + if ( $last_type eq '}' ) { $ws = WS_YES } + + # NOTE: some older versions of Perl had occasional problems if + # spaces are introduced between keywords or functions and opening + # parens. So the default is not to do this except is certain + # cases. The current Perl seems to tolerate spaces. + + # Space between keyword and '(' + elsif ( $last_type eq 'k' ) { + $ws = WS_NO + unless ( $rOpts_space_keyword_paren + || $space_after_keyword{$last_token} ); + } + + # Space between function and '(' + # ----------------------------------------------------- + # 'w' and 'i' checks for something like: + # myfun( &myfun( ->myfun( + # ----------------------------------------------------- + elsif (( $last_type =~ /^[wUG]$/ ) + || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) ) + { + $ws = WS_NO unless ($rOpts_space_function_paren); + } + + # space between something like $i and ( in <> + # for $i ( 0 .. 20 ) { + # FIXME: eventually, type 'i' needs to be split into multiple + # token types so this can be a hardwired rule. + elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) { + $ws = WS_YES; + } + + # allow constant function followed by '()' to retain no space + elsif ($last_type eq 'C' + && $rLL->[ $j + 1 ]->[_TOKEN_] eq ')' ) + { + $ws = WS_NO; + } + } + + # patch for SWITCH/CASE: make space at ']{' optional + # since the '{' might begin a case or when block + elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) { + $ws = WS_OPTIONAL; + } + + # keep space between 'sub' and '{' for anonymous sub definition + if ( $type eq '{' ) { + if ( $last_token eq 'sub' ) { + $ws = WS_YES; + } + + # this is needed to avoid no space in '){' + if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES } + + # avoid any space before the brace or bracket in something like + # @opts{'a','b',...} + if ( $last_type eq 'i' && $last_token =~ /^\@/ ) { + $ws = WS_NO; + } + } + + elsif ( $type eq 'i' ) { + + # never a space before -> + if ( $token =~ /^\-\>/ ) { + $ws = WS_NO; + } + } + + # retain any space between '-' and bare word + elsif ( $type eq 'w' || $type eq 'C' ) { + $ws = WS_OPTIONAL if $last_type eq '-'; + + # never a space before -> + if ( $token =~ /^\-\>/ ) { + $ws = WS_NO; + } + } + + # retain any space between '-' and bare word; for example + # avoid space between 'USER' and '-' here: <> + # $myhash{USER-NAME}='steve'; + elsif ( $type eq 'm' || $type eq '-' ) { + $ws = WS_OPTIONAL if ( $last_type eq 'w' ); + } + + # always space before side comment + elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 } + + # always preserver whatever space was used after a possible + # filehandle (except _) or here doc operator + if ( + $type ne '#' + && ( ( $last_type eq 'Z' && $last_token ne '_' ) + || $last_type eq 'h' ) + ) + { + $ws = WS_OPTIONAL; + } + + # space_backslash_quote; RT #123774 <> + # allow a space between a backslash and single or double quote + # to avoid fooling html formatters + elsif ( $last_type eq '\\' && $type eq 'Q' && $token =~ /^[\"\']/ ) { + if ($rOpts_space_backslash_quote) { + if ( $rOpts_space_backslash_quote == 1 ) { + $ws = WS_OPTIONAL; + } + elsif ( $rOpts_space_backslash_quote == 2 ) { $ws = WS_YES } + else { } # shouldnt happen + } + else { + $ws = WS_NO; + } + } + + my $ws_4; + $ws_4 = $ws + if FORMATTER_DEBUG_FLAG_WHITE; + + #--------------------------------------------------------------- + # Whitespace Rules Section 5: + # Apply default rules not covered above. + #--------------------------------------------------------------- + + # If we fall through to here, look at the pre-defined hash tables for + # the two tokens, and: + # if (they are equal) use the common value + # if (either is zero or undef) use the other + # if (either is -1) use it + # That is, + # left vs right + # 1 vs 1 --> 1 + # 0 vs 0 --> 0 + # -1 vs -1 --> -1 + # + # 0 vs -1 --> -1 + # 0 vs 1 --> 1 + # 1 vs 0 --> 1 + # -1 vs 0 --> -1 + # + # -1 vs 1 --> -1 + # 1 vs -1 --> -1 + if ( !defined($ws) ) { + my $wl = $want_left_space{$type}; + my $wr = $want_right_space{$last_type}; + if ( !defined($wl) ) { $wl = 0 } + if ( !defined($wr) ) { $wr = 0 } + $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr; + } + + if ( !defined($ws) ) { + $ws = 0; + write_diagnostics( + "WS flag is undefined for tokens $last_token $token\n"); + } + + # Treat newline as a whitespace. Otherwise, we might combine + # 'Send' and '-recipients' here according to the above rules: + # <> + # my $msg = new Fax::Send + # -recipients => $to, + # -data => $data; + if ( $ws == 0 && $input_line_no != $last_input_line_no ) { $ws = 1 } + + if ( ( $ws == 0 ) + && $j > 0 + && $j < $jmax + && ( $last_type !~ /^[Zh]$/ ) ) + { + + # If this happens, we have a non-fatal but undesirable + # hole in the above rules which should be patched. + write_diagnostics( + "WS flag is zero for tokens $last_token $token\n"); + } + + $rwhitespace_flags->[$j] = $ws; + + FORMATTER_DEBUG_FLAG_WHITE && do { + my $str = substr( $last_token, 0, 15 ); + $str .= ' ' x ( 16 - length($str) ); + if ( !defined($ws_1) ) { $ws_1 = "*" } + if ( !defined($ws_2) ) { $ws_2 = "*" } + if ( !defined($ws_3) ) { $ws_3 = "*" } + if ( !defined($ws_4) ) { $ws_4 = "*" } + print STDOUT +"NEW WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n"; + }; + } ## end main loop + + if ( $rOpts->{'tight-secret-operators'} ) { + new_secret_operator_whitespace( $rLL, $rwhitespace_flags ); + } + return $rwhitespace_flags; +} ## end sub set_whitespace_flags + +sub respace_tokens { + + my $self = shift; + return if $rOpts->{'indent-only'}; + + # This routine makes all necessary changes to the tokenization after the + # file has been read. This consists mostly of inserting and deleting spaces + # according to the selected parameters. In a few cases non-space characters + # are added, deleted or modified. + + # The old tokens are copied one-by-one, with changes, from the old + # linear storage array to a new array. + + my $rLL = $self->{rLL}; + my $Klimit_old = $self->{Klimit}; + my $rlines = $self->{rlines}; + my $rpaired_to_inner_container = $self->{rpaired_to_inner_container}; + + my $rLL_new = []; # This is the new array + my $KK = 0; + my $rtoken_vars; + my $Kmax = @{$rLL} - 1; + + # Set the whitespace flags, which indicate the token spacing preference. + my $rwhitespace_flags = $self->set_whitespace_flags(); + + # we will be setting token lengths as we go + my $cumulative_length = 0; + + # We also define these hash indexes giving container token array indexes + # as a function of the container sequence numbers. For example, + my $K_opening_container = {}; # opening [ { or ( + my $K_closing_container = {}; # closing ] } or ) + my $K_opening_ternary = {}; # opening ? of ternary + my $K_closing_ternary = {}; # closing : of ternary + + # List of new K indexes of phantom semicolons + # This will be needed if we want to undo them for iterations + my $rK_phantom_semicolons = []; + + # Temporary hashes for adding semicolons + ##my $rKfirst_new = {}; + + # a sub to link preceding nodes forward to a new node type + my $link_back = sub { + my ( $Ktop, $key ) = @_; + + my $Kprev = $Ktop - 1; + while ( $Kprev >= 0 + && !defined( $rLL_new->[$Kprev]->[$key] ) ) + { + $rLL_new->[$Kprev]->[$key] = $Ktop; + $Kprev -= 1; + } + }; + + # A sub to store one token in the new array + # All new tokens must be stored by this sub so that it can update + # all data structures on the fly. + my $last_nonblank_type = ';'; + my $store_token = sub { + my ($item) = @_; + + # This will be the index of this item in the new array + my $KK_new = @{$rLL_new}; + + # check for a sequenced item (i.e., container or ?/:) + my $type_sequence = $item->[_TYPE_SEQUENCE_]; + if ($type_sequence) { + + $link_back->( $KK_new, _KNEXT_SEQ_ITEM_ ); + + my $token = $item->[_TOKEN_]; + if ( $is_opening_token{$token} ) { + + $K_opening_container->{$type_sequence} = $KK_new; + } + elsif ( $is_closing_token{$token} ) { + + $K_closing_container->{$type_sequence} = $KK_new; + } + + # These are not yet used but could be useful + else { + if ( $token eq '?' ) { + $K_opening_ternary->{$type_sequence} = $KK_new; + } + elsif ( $token eq ':' ) { + $K_closing_ternary->{$type_sequence} = $KK_new; + } + else { + # shouldn't happen + Fault("Ugh: shouldn't happen"); + } + } + } + + # find the length of this token + my $token_length = length( $item->[_TOKEN_] ); + + # and update the cumulative length + $cumulative_length += $token_length; + + # Save the length sum to just AFTER this token + $item->[_CUMULATIVE_LENGTH_] = $cumulative_length; + + my $type = $item->[_TYPE_]; + if ( $type ne 'b' ) { $last_nonblank_type = $type } + + # and finally, add this item to the new array + push @{$rLL_new}, $item; + }; + + my $store_token_and_space = sub { + my ( $item, $want_space ) = @_; + + # store a token with preceding space if requested and needed + + # First store the space + if ( $want_space + && @{$rLL_new} + && $rLL_new->[-1]->[_TYPE_] ne 'b' + && $rOpts_add_whitespace ) + { + my $rcopy = copy_token_as_type( $item, 'b', ' ' ); + $rcopy->[_LINE_INDEX_] = + $rLL_new->[-1]->[_LINE_INDEX_]; + $store_token->($rcopy); + } + + # then the token + $store_token->($item); + }; + + my $K_end_q = sub { + my ($KK) = @_; + my $K_end = $KK; + my $Kn = $self->K_next_nonblank($KK); + while ( defined($Kn) && $rLL->[$Kn]->[_TYPE_] eq 'q' ) { + $K_end = $Kn; + $Kn = $self->K_next_nonblank($Kn); + } + return $K_end; + }; + + my $add_phantom_semicolon = sub { + + my ($KK) = @_; + + my $Kp = $self->K_previous_nonblank( undef, $rLL_new ); + return unless ( defined($Kp) ); + + # we are only adding semicolons for certain block types + my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_]; + return + unless ( $ok_to_add_semicolon_for_block_type{$block_type} + || $block_type =~ /^(sub|package)/ + || $block_type =~ /^\w+\:$/ ); + + my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_]; + + my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_]; + my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_]; + + # Do not add a semicolon if... + return + if ( + + # it would follow a comment (and be isolated) + $previous_nonblank_type eq '#' + + # it follows a code block ( because they are not always wanted + # there and may add clutter) + || $rLL_new->[$Kp]->[_BLOCK_TYPE_] + + # it would follow a label + || $previous_nonblank_type eq 'J' + + # it would be inside a 'format' statement (and cause syntax error) + || ( $previous_nonblank_type eq 'k' + && $previous_nonblank_token =~ /format/ ) + + # if it would prevent welding two containers + || $rpaired_to_inner_container->{$type_sequence} + + ); + + # We will insert an empty semicolon here as a placeholder. Later, if + # it becomes the last token on a line, we will bring it to life. The + # advantage of doing this is that (1) we just have to check line + # endings, and (2) the phantom semicolon has zero width and therefore + # won't cause needless breaks of one-line blocks. + my $Ktop = -1; + if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b' + && $want_left_space{';'} == WS_NO ) + { + + # convert the blank into a semicolon.. + # be careful: we are working on the new stack top + # on a token which has been stored. + my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', ' ' ); + + # Convert the existing blank to: + # a phantom semicolon for one_line_block option = 0 or 1 + # a real semicolon for one_line_block option = 2 + my $tok = $rOpts_one_line_block_semicolons == 2 ? ';' : ''; + + $rLL_new->[$Ktop]->[_TOKEN_] = $tok; # zero length if phantom + $rLL_new->[$Ktop]->[_TYPE_] = ';'; + $rLL_new->[$Ktop]->[_SLEVEL_] = + $rLL->[$KK]->[_SLEVEL_]; + + push @{$rK_phantom_semicolons}, @{$rLL_new} - 1; + + # Then store a new blank + $store_token->($rcopy); + } + else { + + # insert a new token + my $rcopy = copy_token_as_type( $rLL_new->[$Kp], ';', '' ); + $rcopy->[_SLEVEL_] = $rLL->[$KK]->[_SLEVEL_]; + $store_token->($rcopy); + push @{$rK_phantom_semicolons}, @{$rLL_new} - 1; + } + }; + + my $check_Q = sub { + + # Check that a quote looks okay + # This sub works but needs to by sync'd with the log file output + # before it can be used. + my ( $KK, $Kfirst ) = @_; + my $token = $rLL->[$KK]->[_TOKEN_]; + note_embedded_tab() if ( $token =~ "\t" ); + + my $Kp = $self->K_previous_nonblank( undef, $rLL_new ); + return unless ( defined($Kp) ); + my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_]; + my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_]; + + my $previous_nonblank_type_2 = 'b'; + my $previous_nonblank_token_2 = ""; + my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new ); + if ( defined($Kpp) ) { + $previous_nonblank_type_2 = $rLL_new->[$Kpp]->[_TYPE_]; + $previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_]; + } + + my $Kn = $self->K_next_nonblank($KK); + my $next_nonblank_token = ""; + if ( defined($Kn) ) { + $next_nonblank_token = $rLL->[$Kn]->[_TOKEN_]; + } + + my $token_0 = $rLL->[$Kfirst]->[_TOKEN_]; + my $type_0 = $rLL->[$Kfirst]->[_TYPE_]; + + # make note of something like '$var = s/xxx/yyy/;' + # in case it should have been '$var =~ s/xxx/yyy/;' + if ( + $token =~ /^(s|tr|y|m|\/)/ + && $previous_nonblank_token =~ /^(=|==|!=)$/ + + # preceded by simple scalar + && $previous_nonblank_type_2 eq 'i' + && $previous_nonblank_token_2 =~ /^\$/ + + # followed by some kind of termination + # (but give complaint if we can not see far enough ahead) + && $next_nonblank_token =~ /^[; \)\}]$/ + + # scalar is not declared + && !( $type_0 eq 'k' && $token_0 =~ /^(my|our|local)$/ ) + ) + { + my $guess = substr( $last_nonblank_token, 0, 1 ) . '~'; + complain( +"Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n" + ); + } + }; + + # Main loop over all lines of the file + my $last_K_out; + my $CODE_type = ""; + my $line_type = ""; + + # Testing option to break qw. Do not use; it can make a mess. + my $ALLOW_BREAK_MULTILINE_QW = 0; + my $in_multiline_qw; + foreach my $line_of_tokens ( @{$rlines} ) { + + $input_line_number = $line_of_tokens->{_line_number}; + my $last_line_type = $line_type; + $line_type = $line_of_tokens->{_line_type}; + next unless ( $line_type eq 'CODE' ); + my $last_CODE_type = $CODE_type; + $CODE_type = $line_of_tokens->{_code_type}; + my $rK_range = $line_of_tokens->{_rK_range}; + my ( $Kfirst, $Klast ) = @{$rK_range}; + next unless defined($Kfirst); + + # Check for correct sequence of token indexes... + # An error here means that sub write_line() did not correctly + # package the tokenized lines as it received them. + if ( defined($last_K_out) ) { + if ( $Kfirst != $last_K_out + 1 ) { + Fault( + "Program Bug: last K out was $last_K_out but Kfirst=$Kfirst" + ); + } + } + else { + if ( $Kfirst != 0 ) { + Fault("Program Bug: first K is $Kfirst but should be 0"); + } + } + $last_K_out = $Klast; + + # Handle special lines of code + if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) { + + # CODE_types are as follows. + # 'BL' = Blank Line + # 'VB' = Verbatim - line goes out verbatim + # 'FS' = Format Skipping - line goes out verbatim, no blanks + # 'IO' = Indent Only - only indentation may be changed + # 'NIN' = No Internal Newlines - line does not get broken + # 'HSC'=Hanging Side Comment - fix this hanging side comment + # 'BC'=Block Comment - an ordinary full line comment + # 'SBC'=Static Block Comment - a block comment which does not get + # indented + # 'SBCX'=Static Block Comment Without Leading Space + # 'DEL'=Delete this line + # 'VER'=VERSION statement + # '' or (undefined) - no restructions + + # For a hanging side comment we insert an empty quote before + # the comment so that it becomes a normal side comment and + # will be aligned by the vertical aligner + if ( $CODE_type eq 'HSC' ) { + + # Safety Check: This must be a line with one token (a comment) + my $rtoken_vars = $rLL->[$Kfirst]; + if ( $Kfirst == $Klast && $rtoken_vars->[_TYPE_] eq '#' ) { + + # Note that even if the flag 'noadd-whitespace' is set, we + # will make an exception here and allow a blank to be + # inserted to push the comment to the right. We can think + # of this as an adjustment of indentation rather than + # whitespace between tokens. This will also prevent the + # hanging side comment from getting converted to a block + # comment if whitespace gets deleted, as for example with + # the -extrude and -mangle options. + my $rcopy = copy_token_as_type( $rtoken_vars, 'q', '' ); + $store_token->($rcopy); + $rcopy = copy_token_as_type( $rtoken_vars, 'b', ' ' ); + $store_token->($rcopy); + $store_token->($rtoken_vars); + next; + } + else { + + # This line was mis-marked by sub scan_comment + Fault( + "Program bug. A hanging side comment has been mismarked" + ); + } + } + + # Copy tokens unchanged + foreach my $KK ( $Kfirst .. $Klast ) { + $store_token->( $rLL->[$KK] ); + } + next; + } + + # Handle normal line.. + + # Insert any essential whitespace between lines + # if last line was normal CODE. + # Patch for rt #125012: use K_previous_code rather than '_nonblank' + # because comments may disappear. + my $type_next = $rLL->[$Kfirst]->[_TYPE_]; + my $token_next = $rLL->[$Kfirst]->[_TOKEN_]; + my $Kp = $self->K_previous_code( undef, $rLL_new ); + if ( $last_line_type eq 'CODE' + && $type_next ne 'b' + && defined($Kp) ) + { + my $token_p = $rLL_new->[$Kp]->[_TOKEN_]; + my $type_p = $rLL_new->[$Kp]->[_TYPE_]; + + my ( $token_pp, $type_pp ); + my $Kpp = $self->K_previous_code( $Kp, $rLL_new ); + if ( defined($Kpp) ) { + $token_pp = $rLL_new->[$Kpp]->[_TOKEN_]; + $type_pp = $rLL_new->[$Kpp]->[_TYPE_]; + } + else { + $token_pp = ";"; + $type_pp = ';'; + } + + if ( + is_essential_whitespace( + $token_pp, $type_pp, $token_p, + $type_p, $token_next, $type_next, + ) + ) + { + + # Copy this first token as blank, but use previous line number + my $rcopy = copy_token_as_type( $rLL->[$Kfirst], 'b', ' ' ); + $rcopy->[_LINE_INDEX_] = + $rLL_new->[-1]->[_LINE_INDEX_]; + $store_token->($rcopy); + } + } + + # loop to copy all tokens on this line, with any changes + my $type_sequence; + for ( my $KK = $Kfirst ; $KK <= $Klast ; $KK++ ) { + $rtoken_vars = $rLL->[$KK]; + my $token = $rtoken_vars->[_TOKEN_]; + my $type = $rtoken_vars->[_TYPE_]; + my $last_type_sequence = $type_sequence; + $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_]; + + # Handle a blank space ... + if ( $type eq 'b' ) { + + # Delete it if not wanted by whitespace rules + # or we are deleting all whitespace + # Note that whitespace flag is a flag indicating whether a + # white space BEFORE the token is needed + next if ( $KK >= $Kmax ); # skip terminal blank + my $Knext = $KK + 1; + my $ws = $rwhitespace_flags->[$Knext]; + if ( $ws == -1 + || $rOpts_delete_old_whitespace ) + { + + # FIXME: maybe switch to using _new + my $Kp = $self->K_previous_nonblank($KK); + next unless defined($Kp); + my $token_p = $rLL->[$Kp]->[_TOKEN_]; + my $type_p = $rLL->[$Kp]->[_TYPE_]; + + my ( $token_pp, $type_pp ); + + #my $Kpp = $K_previous_nonblank->($Kp); + my $Kpp = $self->K_previous_nonblank($Kp); + if ( defined($Kpp) ) { + $token_pp = $rLL->[$Kpp]->[_TOKEN_]; + $type_pp = $rLL->[$Kpp]->[_TYPE_]; + } + else { + $token_pp = ";"; + $type_pp = ';'; + } + my $token_next = $rLL->[$Knext]->[_TOKEN_]; + my $type_next = $rLL->[$Knext]->[_TYPE_]; + + my $do_not_delete = is_essential_whitespace( + $token_pp, $type_pp, $token_p, + $type_p, $token_next, $type_next, + ); + + next unless ($do_not_delete); + } + + # make it just one character if allowed + if ($rOpts_add_whitespace) { + $rtoken_vars->[_TOKEN_] = ' '; + } + $store_token->($rtoken_vars); + next; + } + + # Handle a nonblank token... + + # check for a qw quote + if ( $type eq 'q' ) { + + # trim blanks from right of qw quotes + # (To avoid trimming qw quotes use -ntqw; the tokenizer handles + # this) + $token =~ s/\s*$//; + $rtoken_vars->[_TOKEN_] = $token; + note_embedded_tab() if ( $token =~ "\t" ); + + if ($in_multiline_qw) { + + # If we are at the end of a multiline qw .. + if ( $in_multiline_qw == $KK ) { + + # Split off the closing delimiter character + # so that the formatter can put a line break there if necessary + my $part1 = $token; + my $part2 = substr( $part1, -1, 1, "" ); + + if ($part1) { + my $rcopy = + copy_token_as_type( $rtoken_vars, 'q', $part1 ); + $store_token->($rcopy); + $token = $part2; + $rtoken_vars->[_TOKEN_] = $token; + + } + $in_multiline_qw = undef; + + # store without preceding blank + $store_token->($rtoken_vars); + next; + } + else { + # continuing a multiline qw + $store_token->($rtoken_vars); + next; + } + } + + else { + + # we are encountered new qw token...see if multiline + my $K_end = $K_end_q->($KK); + if ( $ALLOW_BREAK_MULTILINE_QW && $K_end != $KK ) { + + # Starting multiline qw... + # set flag equal to the ending K + $in_multiline_qw = $K_end; + + # Split off the leading part + # so that the formatter can put a line break there if necessary + if ( $token =~ /^(qw\s*.)(.*)$/ ) { + my $part1 = $1; + my $part2 = $2; + if ($part2) { + my $rcopy = + copy_token_as_type( $rtoken_vars, 'q', + $part1 ); + $store_token_and_space->( + $rcopy, $rwhitespace_flags->[$KK] == WS_YES + ); + $token = $part2; + $rtoken_vars->[_TOKEN_] = $token; + + # Second part goes without intermediate blank + $store_token->($rtoken_vars); + next; + } + } + } + else { + + # this is a new single token qw - + # store with possible preceding blank + $store_token_and_space->( + $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES + ); + next; + } + } + } ## end if ( $type eq 'q' ) + + # Modify certain tokens here for whitespace + # The following is not yet done, but could be: + # sub (x x x) + elsif ( $type =~ /^[wit]$/ ) { + + # Examples: <> + # change '$ var' to '$var' etc + # '-> new' to '->new' + if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) { + $token =~ s/\s*//g; + $rtoken_vars->[_TOKEN_] = $token; + } + + # Split identifiers with leading arrows, inserting blanks if + # necessary. It is easier and safer here than in the + # tokenizer. For example '->new' becomes two tokens, '->' and + # 'new' with a possible blank between. + # + # Note: there is a related patch in sub set_whitespace_flags + if ( $token =~ /^\-\>(.*)$/ && $1 ) { + my $token_save = $1; + my $type_save = $type; + + # store a blank to left of arrow if necessary + my $Kprev = $self->K_previous_nonblank($KK); + if ( defined($Kprev) + && $rLL->[$Kprev]->[_TYPE_] ne 'b' + && $rOpts_add_whitespace + && $want_left_space{'->'} == WS_YES ) + { + my $rcopy = + copy_token_as_type( $rtoken_vars, 'b', ' ' ); + $store_token->($rcopy); + } + + # then store the arrow + my $rcopy = copy_token_as_type( $rtoken_vars, '->', '->' ); + $store_token->($rcopy); + + # then reset the current token to be the remainder, + # and reset the whitespace flag according to the arrow + $token = $rtoken_vars->[_TOKEN_] = $token_save; + $type = $rtoken_vars->[_TYPE_] = $type_save; + $store_token->($rtoken_vars); + next; + } + + if ( $token =~ /$SUB_PATTERN/ ) { + $token =~ s/\s+/ /g; + $rtoken_vars->[_TOKEN_] = $token; + } + + # trim identifiers of trailing blanks which can occur + # under some unusual circumstances, such as if the + # identifier 'witch' has trailing blanks on input here: + # + # sub + # witch + # () # prototype may be on new line ... + # ... + if ( $type eq 'i' ) { + $token =~ s/\s+$//g; + $rtoken_vars->[_TOKEN_] = $token; + } + } + + # change 'LABEL :' to 'LABEL:' + elsif ( $type eq 'J' ) { + $token =~ s/\s+//g; + $rtoken_vars->[_TOKEN_] = $token; + } + + # patch to add space to something like "x10" + # This avoids having to split this token in the pre-tokenizer + elsif ( $type eq 'n' ) { + if ( $token =~ /^x\d+/ ) { + $token =~ s/x/x /; + $rtoken_vars->[_TOKEN_] = $token; + } + } + + # check a quote for problems + elsif ( $type eq 'Q' ) { + + # This is ready to go but is commented out because there is + # still identical logic in sub break_lines. + # $check_Q->($KK, $Kfirst); + } + + elsif ($type_sequence) { + + # if ( $is_opening_token{$token} ) { + # } + + if ( $is_closing_token{$token} ) { + + # Insert a tentative missing semicolon if the next token is + # a closing block brace + if ( + $type eq '}' + && $token eq '}' + + # not preceded by a ';' + && $last_nonblank_type ne ';' + + # and this is not a VERSION stmt (is all one line, we are not + # inserting semicolons on one-line blocks) + && $CODE_type ne 'VER' + + # and we are allowed to add semicolons + && $rOpts->{'add-semicolons'} + ) + { + $add_phantom_semicolon->($KK); + } + } + } + + # Store this token with possible previous blank + $store_token_and_space->( + $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES + ); + + } # End token loop + } # End line loop + + # Reset memory to be the new array + $self->{rLL} = $rLL_new; + $self->set_rLL_max_index(); + $self->{K_opening_container} = $K_opening_container; + $self->{K_closing_container} = $K_closing_container; + $self->{K_opening_ternary} = $K_opening_ternary; + $self->{K_closing_ternary} = $K_closing_ternary; + $self->{rK_phantom_semicolons} = $rK_phantom_semicolons; + + # make sure the new array looks okay + $self->check_token_array(); + + # reset the token limits of each line + $self->resync_lines_and_tokens(); + + return; +} + +{ # scan_comments + + my $Last_line_had_side_comment; + my $In_format_skipping_section; + my $Saw_VERSION_in_this_file; + + sub scan_comments { + my $self = shift; + my $rlines = $self->{rlines}; + + $Last_line_had_side_comment = undef; + $In_format_skipping_section = undef; + $Saw_VERSION_in_this_file = undef; + + # Loop over all lines + foreach my $line_of_tokens ( @{$rlines} ) { + my $line_type = $line_of_tokens->{_line_type}; + next unless ( $line_type eq 'CODE' ); + my $CODE_type = $self->get_CODE_type($line_of_tokens); + $line_of_tokens->{_code_type} = $CODE_type; + } + return; + } + + sub get_CODE_type { + my ( $self, $line_of_tokens ) = @_; + + # We are looking at a line of code and setting a flag to + # describe any special processing that it requires + + # Possible CODE_types are as follows. + # 'BL' = Blank Line + # 'VB' = Verbatim - line goes out verbatim + # 'IO' = Indent Only - line goes out unchanged except for indentation + # 'NIN' = No Internal Newlines - line does not get broken + # 'HSC'=Hanging Side Comment - fix this hanging side comment + # 'BC'=Block Comment - an ordinary full line comment + # 'SBC'=Static Block Comment - a block comment which does not get + # indented + # 'SBCX'=Static Block Comment Without Leading Space + # 'DEL'=Delete this line + # 'VER'=VERSION statement + # '' or (undefined) - no restructions + + my $rLL = $self->{rLL}; + my $Klimit = $self->{Klimit}; + + my $CODE_type = $rOpts->{'indent-only'} ? 'IO' : ""; + my $no_internal_newlines = 1 - $rOpts_add_newlines; + if ( !$CODE_type && $no_internal_newlines ) { $CODE_type = 'NIN' } + + # extract what we need for this line.. + + # Global value for error messages: + $input_line_number = $line_of_tokens->{_line_number}; + + my $rK_range = $line_of_tokens->{_rK_range}; + my ( $Kfirst, $Klast ) = @{$rK_range}; + my $jmax = -1; + if ( defined($Kfirst) ) { $jmax = $Klast - $Kfirst } + my $input_line = $line_of_tokens->{_line_text}; + my $in_continued_quote = my $starting_in_quote = + $line_of_tokens->{_starting_in_quote}; + my $in_quote = $line_of_tokens->{_ending_in_quote}; + my $ending_in_quote = $in_quote; + my $guessed_indentation_level = + $line_of_tokens->{_guessed_indentation_level}; + + my $is_static_block_comment = 0; + + # Handle a continued quote.. + if ($in_continued_quote) { + + # A line which is entirely a quote or pattern must go out + # verbatim. Note: the \n is contained in $input_line. + if ( $jmax <= 0 ) { + if ( ( $input_line =~ "\t" ) ) { + note_embedded_tab(); + } + $Last_line_had_side_comment = 0; + return 'VB'; + } + } + + my $is_block_comment = + ( $jmax == 0 && $rLL->[$Kfirst]->[_TYPE_] eq '#' ); + + # Write line verbatim if we are in a formatting skip section + if ($In_format_skipping_section) { + $Last_line_had_side_comment = 0; + + # Note: extra space appended to comment simplifies pattern matching + if ( $is_block_comment + && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~ + /$format_skipping_pattern_end/o ) + { + $In_format_skipping_section = 0; + write_logfile_entry("Exiting formatting skip section\n"); + } + return 'FS'; + } + + # See if we are entering a formatting skip section + if ( $rOpts_format_skipping + && $is_block_comment + && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~ + /$format_skipping_pattern_begin/o ) + { + $In_format_skipping_section = 1; + write_logfile_entry("Entering formatting skip section\n"); + $Last_line_had_side_comment = 0; + return 'FS'; + } + + # ignore trailing blank tokens (they will get deleted later) + if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) { + $jmax--; + } + + # Handle a blank line.. + if ( $jmax < 0 ) { + $Last_line_had_side_comment = 0; + return 'BL'; + } + + # see if this is a static block comment (starts with ## by default) + my $is_static_block_comment_without_leading_space = 0; + if ( $is_block_comment + && $rOpts->{'static-block-comments'} + && $input_line =~ /$static_block_comment_pattern/o ) + { + $is_static_block_comment = 1; + $is_static_block_comment_without_leading_space = + substr( $input_line, 0, 1 ) eq '#'; + } + + # Check for comments which are line directives + # Treat exactly as static block comments without leading space + # reference: perlsyn, near end, section Plain Old Comments (Not!) + # example: '# line 42 "new_filename.plx"' + if ( + $is_block_comment + && $input_line =~ /^\# \s* + line \s+ (\d+) \s* + (?:\s("?)([^"]+)\2)? \s* + $/x + ) + { + $is_static_block_comment = 1; + $is_static_block_comment_without_leading_space = 1; + } + + # look for hanging side comment + if ( + $is_block_comment + && $Last_line_had_side_comment # last line had side comment + && $input_line =~ /^\s/ # there is some leading space + && !$is_static_block_comment # do not make static comment hanging + && $rOpts->{'hanging-side-comments'} # user is allowing + # hanging side comments + # like this + ) + { + $Last_line_had_side_comment = 1; + return 'HSC'; + } + + # remember if this line has a side comment + $Last_line_had_side_comment = + ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq '#' ); + + # Handle a block (full-line) comment.. + if ($is_block_comment) { + + if ( $rOpts->{'delete-block-comments'} ) { return 'DEL' } + + # TRIM COMMENTS -- This could be turned off as a option + $rLL->[$Kfirst]->[_TOKEN_] =~ s/\s*$//; # trim right end + + if ($is_static_block_comment_without_leading_space) { + return 'SBCX'; + } + elsif ($is_static_block_comment) { + return 'SBC'; + } + else { + return 'BC'; + } + } + +=pod + # NOTE: This does not work yet. Version in print-line-of-tokens + # is Still used until fixed + + # compare input/output indentation except for continuation lines + # (because they have an unknown amount of initial blank space) + # and lines which are quotes (because they may have been outdented) + # Note: this test is placed here because we know the continuation flag + # at this point, which allows us to avoid non-meaningful checks. + my $structural_indentation_level = $rLL->[$Kfirst]->[_LEVEL_]; + compare_indentation_levels( $guessed_indentation_level, + $structural_indentation_level ) + unless ( $rLL->[$Kfirst]->[_CI_LEVEL_] > 0 + || $guessed_indentation_level == 0 + && $rLL->[$Kfirst]->[_TYPE_] eq 'Q' ); +=cut + + # Patch needed for MakeMaker. Do not break a statement + # in which $VERSION may be calculated. See MakeMaker.pm; + # this is based on the coding in it. + # The first line of a file that matches this will be eval'd: + # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ + # Examples: + # *VERSION = \'1.01'; + # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/; + # We will pass such a line straight through without breaking + # it unless -npvl is used. + + # Patch for problem reported in RT #81866, where files + # had been flattened into a single line and couldn't be + # tidied without -npvl. There are two parts to this patch: + # First, it is not done for a really long line (80 tokens for now). + # Second, we will only allow up to one semicolon + # before the VERSION. We need to allow at least one semicolon + # for statements like this: + # require Exporter; our $VERSION = $Exporter::VERSION; + # where both statements must be on a single line for MakeMaker + + my $is_VERSION_statement = 0; + if ( !$Saw_VERSION_in_this_file + && $jmax < 80 + && $input_line =~ + /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ ) + { + $Saw_VERSION_in_this_file = 1; + write_logfile_entry("passing VERSION line; -npvl deactivates\n"); + $CODE_type = 'VER'; + } + return $CODE_type; + } +} + +sub find_nested_pairs { + my $self = shift; + + my $rLL = $self->{rLL}; + return unless ( defined($rLL) && @{$rLL} ); + + # We define an array of pairs of nested containers + my @nested_pairs; + + # We also set the following hash values to identify container pairs for + # which the opening and closing tokens are adjacent in the token stream: + # $rpaired_to_inner_container->{$seqno_out}=$seqno_in where $seqno_out and + # $seqno_in are the seqence numbers of the outer and inner containers of + # the pair We need these later to decide if we can insert a missing + # semicolon + my $rpaired_to_inner_container = {}; + + # This local hash remembers if an outer container has a close following + # inner container; + # The key is the outer sequence number + # The value is the token_hash of the inner container + + my %has_close_following_opening; + + # Names of calling routines can either be marked as 'i' or 'w', + # and they may invoke a sub call with an '->'. We will consider + # any consecutive string of such types as a single unit when making + # weld decisions. We also allow a leading ! + my $is_name_type = { + 'i' => 1, + 'w' => 1, + 'U' => 1, + '->' => 1, + '!' => 1, + }; + + my $is_name = sub { + my $type = shift; + return $type && $is_name_type->{$type}; + }; + + my $last_container; + my $last_last_container; + my $last_nonblank_token_vars; + my $last_count; + + my $nonblank_token_count = 0; + + # loop over all tokens + foreach my $rtoken_vars ( @{$rLL} ) { + + my $type = $rtoken_vars->[_TYPE_]; + + next if ( $type eq 'b' ); + + # long identifier-like items are counted as a single item + $nonblank_token_count++ + unless ( $is_name->($type) + && $is_name->( $last_nonblank_token_vars->[_TYPE_] ) ); + + my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_]; + if ($type_sequence) { + + my $token = $rtoken_vars->[_TOKEN_]; + + if ( $is_opening_token{$token} ) { + + # following previous opening token ... + if ( $last_container + && $is_opening_token{ $last_container->[_TOKEN_] } ) + { + + # adjacent to this one + my $tok_diff = $nonblank_token_count - $last_count; + + my $last_tok = $last_nonblank_token_vars->[_TOKEN_]; + + if ( $tok_diff == 1 + || $tok_diff == 2 && $last_container->[_TOKEN_] eq '(' ) + { + + # remember this pair... + my $outer_seqno = $last_container->[_TYPE_SEQUENCE_]; + my $inner_seqno = $type_sequence; + $has_close_following_opening{$outer_seqno} = + $rtoken_vars; + } + } + } + + elsif ( $is_closing_token{$token} ) { + + # if the corresponding opening token had an adjacent opening + if ( $has_close_following_opening{$type_sequence} + && $is_closing_token{ $last_container->[_TOKEN_] } + && $has_close_following_opening{$type_sequence} + ->[_TYPE_SEQUENCE_] == $last_container->[_TYPE_SEQUENCE_] ) + { + + # The closing weld tokens must be adjacent + # NOTE: so intermediate commas and semicolons + # can currently block a weld. This is something + # that could be fixed in the future by including + # a flag to delete un-necessary commas and semicolons. + my $tok_diff = $nonblank_token_count - $last_count; + + if ( $tok_diff == 1 ) { + + # This is a closely nested pair .. + my $inner_seqno = $last_container->[_TYPE_SEQUENCE_]; + my $outer_seqno = $type_sequence; + $rpaired_to_inner_container->{$outer_seqno} = + $inner_seqno; + + push @nested_pairs, [ $inner_seqno, $outer_seqno ]; + } + } + } + + $last_last_container = $last_container; + $last_container = $rtoken_vars; + $last_count = $nonblank_token_count; + } + $last_nonblank_token_vars = $rtoken_vars; + } + $self->{rnested_pairs} = \@nested_pairs; + $self->{rpaired_to_inner_container} = $rpaired_to_inner_container; + return; +} + +sub dump_tokens { + + # a debug routine, not normally used + my ( $self, $msg ) = @_; + my $rLL = $self->{rLL}; + my $nvars = @{$rLL}; + print STDERR "$msg\n"; + print STDERR "ntokens=$nvars\n"; + print STDERR "K\t_TOKEN_\t_TYPE_\n"; + my $K = 0; + + foreach my $item ( @{$rLL} ) { + print STDERR "$K\t$item->[_TOKEN_]\t$item->[_TYPE_]\n"; + $K++; + } + return; +} + +sub get_old_line_index { + my ( $self, $K ) = @_; + my $rLL = $self->{rLL}; + return 0 unless defined($K); + return $rLL->[$K]->[_LINE_INDEX_]; +} + +sub get_old_line_count { + my ( $self, $Kbeg, $Kend ) = @_; + my $rLL = $self->{rLL}; + return 0 unless defined($Kbeg); + return 0 unless defined($Kend); + return $rLL->[$Kend]->[_LINE_INDEX_] - $rLL->[$Kbeg]->[_LINE_INDEX_] + 1; +} + +sub K_next_code { + my ( $self, $KK, $rLL ) = @_; + + # return the index K of the next nonblank, non-comment token + return unless ( defined($KK) && $KK >= 0 ); + + # use the standard array unless given otherwise + $rLL = $self->{rLL} unless ( defined($rLL) ); + my $Num = @{$rLL}; + my $Knnb = $KK + 1; + while ( $Knnb < $Num ) { + if ( !defined( $rLL->[$Knnb] ) ) { + Fault("Undefined entry for k=$Knnb"); + } + if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' + && $rLL->[$Knnb]->[_TYPE_] ne '#' ) + { + return $Knnb; + } + $Knnb++; + } + return; +} + +sub K_next_nonblank { + my ( $self, $KK, $rLL ) = @_; + + # return the index K of the next nonblank token + return unless ( defined($KK) && $KK >= 0 ); + + # use the standard array unless given otherwise + $rLL = $self->{rLL} unless ( defined($rLL) ); + my $Num = @{$rLL}; + my $Knnb = $KK + 1; + while ( $Knnb < $Num ) { + if ( !defined( $rLL->[$Knnb] ) ) { + Fault("Undefined entry for k=$Knnb"); + } + if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb } + $Knnb++; + } + return; +} + +sub K_previous_code { + + # return the index K of the previous nonblank, non-comment token + # Call with $KK=undef to start search at the top of the array + my ( $self, $KK, $rLL ) = @_; + + # use the standard array unless given otherwise + $rLL = $self->{rLL} unless ( defined($rLL) ); + my $Num = @{$rLL}; + if ( !defined($KK) ) { $KK = $Num } + elsif ( $KK > $Num ) { + + # The caller should make the first call with KK_new=undef to + # avoid this error + Fault( +"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num" + ); + } + my $Kpnb = $KK - 1; + while ( $Kpnb >= 0 ) { + if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' + && $rLL->[$Kpnb]->[_TYPE_] ne '#' ) + { + return $Kpnb; + } + $Kpnb--; + } + return; +} + +sub K_previous_nonblank { + + # return index of previous nonblank token before item K; + # Call with $KK=undef to start search at the top of the array + my ( $self, $KK, $rLL ) = @_; + + # use the standard array unless given otherwise + $rLL = $self->{rLL} unless ( defined($rLL) ); + my $Num = @{$rLL}; + if ( !defined($KK) ) { $KK = $Num } + elsif ( $KK > $Num ) { + + # The caller should make the first call with KK_new=undef to + # avoid this error + Fault( +"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num" + ); + } + my $Kpnb = $KK - 1; + while ( $Kpnb >= 0 ) { + if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb } + $Kpnb--; + } + return; +} + +sub weld_containers { + + # do any welding operations + my $self = shift; + + # initialize weld length hashes needed later for checking line lengths + # TODO: These should eventually be stored in $self rather than be package vars + %weld_len_left_closing = (); + %weld_len_right_closing = (); + %weld_len_left_opening = (); + %weld_len_right_opening = (); + + return if ( $rOpts->{'indent-only'} ); + return unless ($rOpts_add_newlines); + + if ( $rOpts->{'weld-nested-containers'} ) { + + # if called, weld_nested_containers must be called before other weld + # operations. # This is because weld_nested_containers could overwrite + # hash values written by weld_cuddled_blocks and weld_nested_quotes. + $self->weld_nested_containers(); + + $self->weld_nested_quotes(); + } + + # Note that weld_nested_containers() changes the _LEVEL_ values, so + # weld_cuddled_blocks must use the _TRUE_LEVEL_ values instead. + + # Here is a good test case to Be sure that both cuddling and welding + # are working and not interfering with each other: <> + + # perltidy -wn -ce + + # if ($BOLD_MATH) { ( + # $labels, $comment, + # join( '', '', &make_math( $mode, '', '', $_ ), '' ) + # ) } else { ( + # &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ), + # $after + # ) } + + $self->weld_cuddled_blocks(); + + return; +} + +sub cumulative_length_before_K { + my ( $self, $KK ) = @_; + my $rLL = $self->{rLL}; + return ( $KK <= 0 ) ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_]; +} + +sub cumulative_length_after_K { + my ( $self, $KK ) = @_; + my $rLL = $self->{rLL}; + return $rLL->[$KK]->[_CUMULATIVE_LENGTH_]; +} + +sub weld_cuddled_blocks { + my $self = shift; + + # This routine implements the -cb flag by finding the appropriate + # closing and opening block braces and welding them together. + return unless ( %{$rcuddled_block_types} ); + + my $rLL = $self->{rLL}; + return unless ( defined($rLL) && @{$rLL} ); + my $rbreak_container = $self->{rbreak_container}; + + my $K_opening_container = $self->{K_opening_container}; + my $K_closing_container = $self->{K_closing_container}; + + my $length_to_opening_seqno = sub { + my ($seqno) = @_; + my $KK = $K_opening_container->{$seqno}; + my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_]; + return $lentot; + }; + my $length_to_closing_seqno = sub { + my ($seqno) = @_; + my $KK = $K_closing_container->{$seqno}; + my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_]; + return $lentot; + }; + + my $is_broken_block = sub { + + # a block is broken if the input line numbers of the braces differ + # we can only cuddle between broken blocks + my ($seqno) = @_; + my $K_opening = $K_opening_container->{$seqno}; + return unless ( defined($K_opening) ); + my $K_closing = $K_closing_container->{$seqno}; + return unless ( defined($K_closing) ); + return $rbreak_container->{$seqno} + || $rLL->[$K_closing]->[_LINE_INDEX_] != + $rLL->[$K_opening]->[_LINE_INDEX_]; + }; + + # A stack to remember open chains at all levels: + # $in_chain[$level] = [$chain_type, $type_sequence]; + my @in_chain; + my $CBO = $rOpts->{'cuddled-break-option'}; + + # loop over structure items to find cuddled pairs + my $level = 0; + my $KK = 0; + while ( defined( $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_] ) ) { + my $rtoken_vars = $rLL->[$KK]; + my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_]; + if ( !$type_sequence ) { + Fault("sequence = $type_sequence not defined"); + } + + # We use the original levels because they get changed by sub + # 'weld_nested_containers'. So if this were to be called before that + # routine, the levels would be wrong and things would go bad. + my $last_level = $level; + $level = $rtoken_vars->[_LEVEL_TRUE_]; + + if ( $level < $last_level ) { $in_chain[$last_level] = undef } + elsif ( $level > $last_level ) { $in_chain[$level] = undef } + + # We are only looking at code blocks + my $token = $rtoken_vars->[_TOKEN_]; + my $type = $rtoken_vars->[_TYPE_]; + next unless ( $type eq $token ); + + if ( $token eq '{' ) { + + my $block_type = $rtoken_vars->[_BLOCK_TYPE_]; + if ( !$block_type ) { + + # patch for unrecognized block types which may not be labeled + my $Kp = $self->K_previous_nonblank($KK); + while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) { + $Kp = $self->K_previous_nonblank($Kp); + } + next unless $Kp; + $block_type = $rLL->[$Kp]->[_TOKEN_]; + } + if ( $in_chain[$level] ) { + + # we are in a chain and are at an opening block brace. + # See if we are welding this opening brace with the previous + # block brace. Get their identification numbers: + my $closing_seqno = $in_chain[$level]->[1]; + my $opening_seqno = $type_sequence; + + # The preceding block must be on multiple lines so that its + # closing brace will start a new line. + if ( !$is_broken_block->($closing_seqno) ) { + next unless ( $CBO == 2 ); + $rbreak_container->{$closing_seqno} = 1; + } + + # we will let the trailing block be either broken or intact + ## && $is_broken_block->($opening_seqno); + + # We can weld the closing brace to its following word .. + my $Ko = $K_closing_container->{$closing_seqno}; + my $Kon = $self->K_next_nonblank($Ko); + + # ..unless it is a comment + if ( $rLL->[$Kon]->[_TYPE_] ne '#' ) { + my $dlen = + $rLL->[$Kon]->[_CUMULATIVE_LENGTH_] - + $rLL->[ $Ko - 1 ]->[_CUMULATIVE_LENGTH_]; + $weld_len_right_closing{$closing_seqno} = $dlen; + + # Set flag that we want to break the next container + # so that the cuddled line is balanced. + $rbreak_container->{$opening_seqno} = 1 + if ($CBO); + } + + } + else { + + # We are not in a chain. Start a new chain if we see the + # starting block type. + if ( $rcuddled_block_types->{$block_type} ) { + $in_chain[$level] = [ $block_type, $type_sequence ]; + } + else { + $block_type = '*'; + $in_chain[$level] = [ $block_type, $type_sequence ]; + } + } + } + elsif ( $token eq '}' ) { + if ( $in_chain[$level] ) { + + # We are in a chain at a closing brace. See if this chain + # continues.. + my $Knn = $self->K_next_code($KK); + next unless $Knn; + + my $chain_type = $in_chain[$level]->[0]; + my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_]; + if ( + $rcuddled_block_types->{$chain_type}->{$next_nonblank_token} + ) + { + + # Note that we do not weld yet because we must wait until + # we we are sure that an opening brace for this follows. + $in_chain[$level]->[1] = $type_sequence; + } + else { $in_chain[$level] = undef } + } + } + } + + return; +} + +sub weld_nested_containers { + my $self = shift; + + # This routine implements the -wn flag by "welding together" + # the nested closing and opening tokens which were previously + # identified by sub 'find_nested_pairs'. "welding" simply + # involves setting certain hash values which will be checked + # later during formatting. + + my $rLL = $self->{rLL}; + my $Klimit = $self->get_rLL_max_index(); + my $rnested_pairs = $self->{rnested_pairs}; + my $rlines = $self->{rlines}; + my $K_opening_container = $self->{K_opening_container}; + my $K_closing_container = $self->{K_closing_container}; + + # Return unless there are nested pairs to weld + return unless defined($rnested_pairs) && @{$rnested_pairs}; + + # This array will hold the sequence numbers of the tokens to be welded. + my @welds; + + # Variables needed for estimating line lengths + my $starting_indent; + my $starting_lentot; + + # A tolerance to the length for length estimates. In some rare cases + # this can avoid problems where a final weld slightly exceeds the + # line length and gets broken in a bad spot. + my $length_tol = 1; + + my $excess_length_to_K = sub { + my ($K) = @_; + + # Estimate the length from the line start to a given token + my $length = $self->cumulative_length_before_K($K) - $starting_lentot; + my $excess_length = + $starting_indent + $length + $length_tol - $rOpts_maximum_line_length; + return ($excess_length); + }; + + my $length_to_opening_seqno = sub { + my ($seqno) = @_; + my $KK = $K_opening_container->{$seqno}; + my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_]; + return $lentot; + }; + + my $length_to_closing_seqno = sub { + my ($seqno) = @_; + my $KK = $K_closing_container->{$seqno}; + my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_]; + return $lentot; + }; + + # Abbreviations: + # _oo=outer opening, i.e. first of { { + # _io=inner opening, i.e. second of { { + # _oc=outer closing, i.e. second of } { + # _ic=inner closing, i.e. first of } } + + my $previous_pair; + + # We are working from outermost to innermost pairs so that + # level changes will be complete when we arrive at the inner pairs. + + while ( my $item = pop( @{$rnested_pairs} ) ) { + my ( $inner_seqno, $outer_seqno ) = @{$item}; + + my $Kouter_opening = $K_opening_container->{$outer_seqno}; + my $Kinner_opening = $K_opening_container->{$inner_seqno}; + my $Kouter_closing = $K_closing_container->{$outer_seqno}; + my $Kinner_closing = $K_closing_container->{$inner_seqno}; + + my $outer_opening = $rLL->[$Kouter_opening]; + my $inner_opening = $rLL->[$Kinner_opening]; + my $outer_closing = $rLL->[$Kouter_closing]; + my $inner_closing = $rLL->[$Kinner_closing]; + + my $iline_oo = $outer_opening->[_LINE_INDEX_]; + my $iline_io = $inner_opening->[_LINE_INDEX_]; + + # Set flag saying if this pair starts a new weld + my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] ); + + # Set flag saying if this pair is adjacent to the previous nesting pair + # (even if previous pair was rejected as a weld) + my $touch_previous_pair = + defined($previous_pair) && $outer_seqno == $previous_pair->[0]; + $previous_pair = $item; + + # Set a flag if we should not weld. It sometimes looks best not to weld + # when the opening and closing tokens are very close. However, there + # is a danger that we will create a "blinker", which oscillates between + # two semi-stable states, if we do not weld. So the rules for + # not welding have to be carefully defined and tested. + my $do_not_weld; + if ( !$touch_previous_pair ) { + + # If this pair is not adjacent to the previous pair (skipped or + # not), then measure lengths from the start of line of oo + + my $rK_range = $rlines->[$iline_oo]->{_rK_range}; + my ( $Kfirst, $Klast ) = @{$rK_range}; + $starting_lentot = + $Kfirst <= 0 ? 0 : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_]; + $starting_indent = 0; + if ( !$rOpts_variable_maximum_line_length ) { + my $level = $rLL->[$Kfirst]->[_LEVEL_]; + $starting_indent = $rOpts_indent_columns * $level; + } + + # DO-NOT-WELD RULE 1: + # Do not weld something that looks like the start of a two-line + # function call, like this: <> + # $trans->add_transformation( + # PDL::Graphics::TriD::Scale->new( $sx, $sy, $sz ) ); + # We will look for a semicolon after the closing paren. + + # We want to weld something complex, like this though + # my $compass = uc( opposite_direction( line_to_canvas_direction( + # @{ $coords[0] }, @{ $coords[1] } ) ) ); + # Otherwise we will get a 'blinker' + + my $iline_oc = $outer_closing->[_LINE_INDEX_]; + if ( $iline_oc <= $iline_oo + 1 ) { + + # Look for following semicolon... + my $Knext_nonblank = $self->K_next_nonblank($Kouter_closing); + my $next_nonblank_type = + defined($Knext_nonblank) + ? $rLL->[$Knext_nonblank]->[_TYPE_] + : 'b'; + if ( $next_nonblank_type eq ';' ) { + + # Then do not weld if no other containers between inner + # opening and closing. + my $Knext_seq_item = $inner_opening->[_KNEXT_SEQ_ITEM_]; + if ( $Knext_seq_item == $Kinner_closing ) { + $do_not_weld ||= 1; + } + } + } + } + + my $iline_ic = $inner_closing->[_LINE_INDEX_]; + + # DO-NOT-WELD RULE 2: + # Do not weld an opening paren to an inner one line brace block + # We will just use old line numbers for this test and require + # iterations if necessary for convergence + + # For example, otherwise we could cause the opening paren + # in the following example to separate from the caller name + # as here: + + # $_[0]->code_handler + # ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } ); + + # Here is another example where we do not want to weld: + # $wrapped->add_around_modifier( + # sub { push @tracelog => 'around 1'; $_[0]->(); } ); + + # If the one line sub block gets broken due to length or by the + # user, then we can weld. The result will then be: + # $wrapped->add_around_modifier( sub { + # push @tracelog => 'around 1'; + # $_[0]->(); + # } ); + + if ( $iline_ic == $iline_io ) { + + my $token_oo = $outer_opening->[_TOKEN_]; + my $block_type_io = $inner_opening->[_BLOCK_TYPE_]; + my $token_io = $inner_opening->[_TOKEN_]; + $do_not_weld ||= $token_oo eq '(' && $token_io eq '{'; + } + + # DO-NOT-WELD RULE 3: + # Do not weld if this makes our line too long + $do_not_weld ||= $excess_length_to_K->($Kinner_opening) > 0; + + if ($do_not_weld) { + + # After neglecting a pair, we start measuring from start of point io + $starting_lentot = + $self->cumulative_length_before_K($Kinner_opening); + $starting_indent = 0; + if ( !$rOpts_variable_maximum_line_length ) { + my $level = $inner_opening->[_LEVEL_]; + $starting_indent = $rOpts_indent_columns * $level; + } + + # Normally, a broken pair should not decrease indentation of + # intermediate tokens: + ## if ( $last_pair_broken ) { next } + # However, for long strings of welded tokens, such as '{{{{{{...' + # we will allow broken pairs to also remove indentation. + # This will keep very long strings of opening and closing + # braces from marching off to the right. We will do this if the + # number of tokens in a weld before the broken weld is 4 or more. + # This rule will mainly be needed for test scripts, since typical + # welds have fewer than about 4 welded tokens. + if ( !@welds || @{ $welds[-1] } < 4 ) { next } + } + + # otherwise start new weld ... + elsif ($starting_new_weld) { + push @welds, $item; + } + + # ... or extend current weld + else { + unshift @{ $welds[-1] }, $inner_seqno; + } + + # After welding, reduce the indentation level if all intermediate tokens + my $dlevel = $outer_opening->[_LEVEL_] - $inner_opening->[_LEVEL_]; + if ( $dlevel != 0 ) { + my $Kstart = $Kinner_opening; + my $Kstop = $Kinner_closing; + for ( my $KK = $Kstart ; $KK <= $Kstop ; $KK++ ) { + $rLL->[$KK]->[_LEVEL_] += $dlevel; + } + } + } + + # Define weld lengths needed later to set line breaks + foreach my $item (@welds) { + + # sweep from inner to outer + + my $inner_seqno; + my $len_close = 0; + my $len_open = 0; + foreach my $outer_seqno ( @{$item} ) { + if ($inner_seqno) { + + my $dlen_opening = + $length_to_opening_seqno->($inner_seqno) - + $length_to_opening_seqno->($outer_seqno); + + my $dlen_closing = + $length_to_closing_seqno->($outer_seqno) - + $length_to_closing_seqno->($inner_seqno); + + $len_open += $dlen_opening; + $len_close += $dlen_closing; + + } + + $weld_len_left_closing{$outer_seqno} = $len_close; + $weld_len_right_opening{$outer_seqno} = $len_open; + + $inner_seqno = $outer_seqno; + } + + # sweep from outer to inner + foreach my $seqno ( reverse @{$item} ) { + $weld_len_right_closing{$seqno} = + $len_close - $weld_len_left_closing{$seqno}; + $weld_len_left_opening{$seqno} = + $len_open - $weld_len_right_opening{$seqno}; + } + } + + ##################################### + # DEBUG + ##################################### + if (0) { + my $count = 0; + local $" = ')('; + foreach my $weld (@welds) { + print "\nWeld number $count has seq: (@{$weld})\n"; + foreach my $seq ( @{$weld} ) { + print <{rLL}; + return unless ( defined($rLL) && @{$rLL} ); + + my $K_opening_container = $self->{K_opening_container}; + my $K_closing_container = $self->{K_closing_container}; + my $rlines = $self->{rlines}; + + my $is_single_quote = sub { + my ( $Kbeg, $Kend, $quote_type ) = @_; + foreach my $K ( $Kbeg .. $Kend ) { + my $test_type = $rLL->[$K]->[_TYPE_]; + next if ( $test_type eq 'b' ); + return if ( $test_type ne $quote_type ); + } + return 1; + }; + + my $excess_line_length = sub { + my ( $KK, $Ktest ) = @_; + + # what is the excess length if we add token $Ktest to the line with $KK? + my $iline = $rLL->[$KK]->[_LINE_INDEX_]; + my $rK_range = $rlines->[$iline]->{_rK_range}; + my ( $Kfirst, $Klast ) = @{$rK_range}; + my $starting_lentot = + $Kfirst <= 0 ? 0 : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_]; + my $starting_indent = 0; + my $length_tol = 1; + if ( !$rOpts_variable_maximum_line_length ) { + my $level = $rLL->[$Kfirst]->[_LEVEL_]; + $starting_indent = $rOpts_indent_columns * $level; + } + + my $length = $rLL->[$Ktest]->[_CUMULATIVE_LENGTH_] - $starting_lentot; + my $excess_length = + $starting_indent + $length + $length_tol - $rOpts_maximum_line_length; + return $excess_length; + }; + + # look for single qw quotes nested in containers + my $KK = 0; + while ( defined( $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_] ) ) { + my $rtoken_vars = $rLL->[$KK]; + my $outer_seqno = $rtoken_vars->[_TYPE_SEQUENCE_]; + if ( !$outer_seqno ) { + Fault("sequence = $outer_seqno not defined"); + } + + my $token = $rtoken_vars->[_TOKEN_]; + if ( $is_opening_token{$token} ) { + + # see if the next token is a quote of some type + my $Kn = $self->K_next_nonblank($KK); + next unless $Kn; + my $next_token = $rLL->[$Kn]->[_TOKEN_]; + my $next_type = $rLL->[$Kn]->[_TYPE_]; + next + unless ( ( $next_type eq 'q' || $next_type eq 'Q' ) + && $next_token =~ /^q/ ); + + # The token before the closing container must also be a quote + my $K_closing = $K_closing_container->{$outer_seqno}; + my $Kt_end = $self->K_previous_nonblank($K_closing); + next unless $rLL->[$Kt_end]->[_TYPE_] eq $next_type; + + # Do not weld to single-line quotes. Nothing is gained, and it may + # look bad. + next if ( $Kt_end == $Kn ); + + # Only weld to quotes delimited with container tokens. This is + # because welding to arbitrary quote delimiters can produce code + # which is less readable than without welding. + my $closing_delimiter = substr( $rLL->[$Kt_end]->[_TOKEN_], -1, 1 ); + next + unless ( $is_closing_token{$closing_delimiter} + || $closing_delimiter eq '>' ); + + # Now make sure that there is just a single quote in the container + next + unless ( $is_single_quote->( $Kn + 1, $Kt_end - 1, $next_type ) ); + + # If welded, the line must not exceed allowed line length + # Assume old line breaks for this estimate. + next if ( $excess_line_length->( $KK, $Kn ) > 0 ); + + # OK to weld + # FIXME: Are these always correct? + $weld_len_left_closing{$outer_seqno} = 1; + $weld_len_right_opening{$outer_seqno} = 2; + + # QW PATCH 1 (Testing) + # undo CI for welded quotes + foreach my $K ( $Kn .. $Kt_end ) { + $rLL->[$K]->[_CI_LEVEL_] = 0; + } + + # Change the level of a closing qw token to be that of the outer + # containing token. This will allow -lp indentation to function + # correctly in the vertical aligner. + $rLL->[$Kt_end]->[_LEVEL_] = $rLL->[$K_closing]->[_LEVEL_]; + } + } + return; +} + +sub weld_len_left { + + my ( $seqno, $type_or_tok ) = @_; + + # Given the sequence number of a token, and the token or its type, + # return the length of any weld to its left + + my $weld_len; + if ($seqno) { + if ( $is_closing_type{$type_or_tok} ) { + $weld_len = $weld_len_left_closing{$seqno}; + } + elsif ( $is_opening_type{$type_or_tok} ) { + $weld_len = $weld_len_left_opening{$seqno}; + } + } + if ( !defined($weld_len) ) { $weld_len = 0 } + return $weld_len; +} + +sub weld_len_right { + + my ( $seqno, $type_or_tok ) = @_; + + # Given the sequence number of a token, and the token or its type, + # return the length of any weld to its right + + my $weld_len; + if ($seqno) { + if ( $is_closing_type{$type_or_tok} ) { + $weld_len = $weld_len_right_closing{$seqno}; + } + elsif ( $is_opening_type{$type_or_tok} ) { + $weld_len = $weld_len_right_opening{$seqno}; + } + } + if ( !defined($weld_len) ) { $weld_len = 0 } + return $weld_len; +} + +sub weld_len_left_to_go { + my ($i) = @_; + + # Given the index of a token in the 'to_go' array + # return the length of any weld to its left + return if ( $i < 0 ); + my $weld_len = + weld_len_left( $type_sequence_to_go[$i], $types_to_go[$i] ); + return $weld_len; +} + +sub weld_len_right_to_go { + my ($i) = @_; + + # Given the index of a token in the 'to_go' array + # return the length of any weld to its right + return if ( $i < 0 ); + if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- } + my $weld_len = + weld_len_right( $type_sequence_to_go[$i], $types_to_go[$i] ); + return $weld_len; +} + +sub link_sequence_items { + + # This has been merged into 'respace_tokens' but retained for reference + my $self = shift; + my $rlines = $self->{rlines}; + my $rLL = $self->{rLL}; + + # We walk the token list and make links to the next sequence item. + # We also define these hashes to container tokens using sequence number as + # the key: + my $K_opening_container = {}; # opening [ { or ( + my $K_closing_container = {}; # closing ] } or ) + my $K_opening_ternary = {}; # opening ? of ternary + my $K_closing_ternary = {}; # closing : of ternary + + # sub to link preceding nodes forward to a new node type + my $link_back = sub { + my ( $Ktop, $key ) = @_; + + my $Kprev = $Ktop - 1; + while ( $Kprev >= 0 + && !defined( $rLL->[$Kprev]->[$key] ) ) + { + $rLL->[$Kprev]->[$key] = $Ktop; + $Kprev -= 1; + } + }; + + for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) { + + $rLL->[$KK]->[_KNEXT_SEQ_ITEM_] = undef; + + my $type = $rLL->[$KK]->[_TYPE_]; + + next if ( $type eq 'b' ); + + my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_]; + if ($type_sequence) { + + $link_back->( $KK, _KNEXT_SEQ_ITEM_ ); + + my $token = $rLL->[$KK]->[_TOKEN_]; + if ( $is_opening_token{$token} ) { + + $K_opening_container->{$type_sequence} = $KK; + } + elsif ( $is_closing_token{$token} ) { + + $K_closing_container->{$type_sequence} = $KK; + } + + # These are not yet used but could be useful + else { + if ( $token eq '?' ) { + $K_opening_ternary->{$type_sequence} = $KK; + } + elsif ( $token eq ':' ) { + $K_closing_ternary->{$type_sequence} = $KK; + } + else { + Fault(<{K_opening_container} = $K_opening_container; + $self->{K_closing_container} = $K_closing_container; + $self->{K_opening_ternary} = $K_opening_ternary; + $self->{K_closing_ternary} = $K_closing_ternary; + return; +} + +sub sum_token_lengths { + my $self = shift; + + # This has been merged into 'respace_tokens' but retained for reference + my $rLL = $self->{rLL}; + my $cumulative_length = 0; + for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) { + + # now set the length of this token + my $token_length = length( $rLL->[$KK]->[_TOKEN_] ); + + $cumulative_length += $token_length; + + # Save the length sum to just AFTER this token + $rLL->[$KK]->[_CUMULATIVE_LENGTH_] = $cumulative_length; + + } + return; +} + +sub resync_lines_and_tokens { + + my $self = shift; + my $rLL = $self->{rLL}; + my $Klimit = $self->{Klimit}; + my $rlines = $self->{rlines}; + + # Re-construct the arrays of tokens associated with the original input lines + # since they have probably changed due to inserting and deleting blanks + # and a few other tokens. + + my $Kmax = -1; + + # This is the next token and its line index: + my $Knext = 0; + my $inext; + if ( defined($rLL) && @{$rLL} ) { + $Kmax = @{$rLL} - 1; + $inext = $rLL->[$Knext]->[_LINE_INDEX_]; + } + + my $get_inext = sub { + if ( $Knext < 0 || $Knext > $Kmax ) { $inext = undef } + else { + $inext = $rLL->[$Knext]->[_LINE_INDEX_]; + } + return $inext; + }; + + # Remember the most recently output token index + my $Klast_out; + + my $iline = -1; + foreach my $line_of_tokens ( @{$rlines} ) { + $iline++; + my $line_type = $line_of_tokens->{_line_type}; + if ( $line_type eq 'CODE' ) { + + my @K_array; + my $rK_range; + $inext = $get_inext->(); + while ( defined($inext) && $inext <= $iline ) { + push @{K_array}, $Knext; + $Knext += 1; + $inext = $get_inext->(); + } + + # Delete any terminal blank token + if (@K_array) { + if ( $rLL->[ $K_array[-1] ]->[_TYPE_] eq 'b' ) { + pop @K_array; + } + } + + # Define the range of K indexes for the line: + # $Kfirst = index of first token on line + # $Klast_out = index of last token on line + my ( $Kfirst, $Klast ); + if (@K_array) { + $Kfirst = $K_array[0]; + $Klast = $K_array[-1]; + $Klast_out = $Klast; + } + + # It is only safe to trim the actual line text if the input + # line had a terminal blank token. Otherwise, we may be + # in a quote. + if ( $line_of_tokens->{_ended_in_blank_token} ) { + $line_of_tokens->{_line_text} =~ s/\s+$//; + } + $line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ]; + } + } + + # There shouldn't be any nodes beyond the last one unless we start + # allowing 'link_after' calls + if ( defined($inext) ) { + + Fault("unexpected tokens at end of file when reconstructing lines"); + } + + return; +} + +sub dump_verbatim { + my $self = shift; + my $rlines = $self->{rlines}; + foreach my $line ( @{$rlines} ) { + my $input_line = $line->{_line_text}; + $self->write_unindented_line($input_line); + } + return; +} + +sub finish_formatting { + + my ( $self, $severe_error ) = @_; + + # The file has been tokenized and is ready to be formatted. + # All of the relevant data is stored in $self, ready to go. + + # output file verbatim if severe error or no formatting requested + if ( $severe_error || $rOpts->{notidy} ) { + $self->dump_verbatim(); + $self->wrapup(); + return; + } + + # Make a pass through the lines, looking at lines of CODE and identifying + # special processing needs, such format skipping sections marked by + # special comments + $self->scan_comments(); + + # Find nested pairs of container tokens for any welding. This information + # is also needed for adding semicolons, so it is split apart from the + # welding step. + $self->find_nested_pairs(); + + # Make sure everything looks good + $self->check_line_hashes(); + + # Future: Place to Begin future Iteration Loop + # foreach my $it_count(1..$maxit) { + + # Future: We must reset some things after the first iteration. + # This includes: + # - resetting levels if there was any welding + # - resetting any phantom semicolons + # - dealing with any line numbering issues so we can relate final lines + # line numbers with input line numbers. + # + # If ($it_count>1) { + # Copy {level_raw} to [_LEVEL_] if ($it_count>1) + # Renumber lines + # } + + # Make a pass through all tokens, adding or deleting any whitespace as + # required. Also make any other changes, such as adding semicolons. + # All token changes must be made here so that the token data structure + # remains fixed for the rest of this iteration. + $self->respace_tokens(); + + # Implement any welding needed for the -wn or -cb options + $self->weld_containers(); + + # Finishes formatting and write the result to the line sink. + # Eventually this call should just change the 'rlines' data according to the + # new line breaks and then return so that we can do an internal iteration + # before continuing with the next stages of formatting. + $self->break_lines(); + + ############################################################ + # A possible future decomposition of 'break_lines()' follows. + # Benefits: + # - allow perltidy to do an internal iteration which eliminates + # many unnecessary steps, such as re-parsing and vertical alignment. + # This will allow iterations to be automatic. + # - consolidate all length calculations to allow utf8 alignment + ############################################################ + + # Future: Check for convergence of beginning tokens on CODE lines + + # Future: End of Iteration Loop + + # Future: add_padding($rargs); + + # Future: add_closing_side_comments($rargs); + + # Future: vertical_alignment($rargs); + + # Future: output results + + # A final routine to tie up any loose ends + $self->wrapup(); + return; +} + +sub create_one_line_block { + ( $index_start_one_line_block, $semicolons_before_block_self_destruct ) = + @_; + return; +} + +sub destroy_one_line_block { + $index_start_one_line_block = UNDEFINED_INDEX; + $semicolons_before_block_self_destruct = 0; + return; +} + +sub leading_spaces_to_go { + + # return the number of indentation spaces for a token in the output stream; + # these were previously stored by 'set_leading_whitespace'. + + my $ii = shift; + if ( $ii < 0 ) { $ii = 0 } + return get_spaces( $leading_spaces_to_go[$ii] ); + +} + +sub get_spaces { + + # return the number of leading spaces associated with an indentation + # variable $indentation is either a constant number of spaces or an object + # with a get_spaces method. + my $indentation = shift; + return ref($indentation) ? $indentation->get_spaces() : $indentation; +} + +sub get_recoverable_spaces { + + # return the number of spaces (+ means shift right, - means shift left) + # that we would like to shift a group of lines with the same indentation + # to get them to line up with their opening parens + my $indentation = shift; + return ref($indentation) ? $indentation->get_recoverable_spaces() : 0; +} + +sub get_available_spaces_to_go { + + my $ii = shift; + my $item = $leading_spaces_to_go[$ii]; + + # return the number of available leading spaces associated with an + # indentation variable. $indentation is either a constant number of + # spaces or an object with a get_available_spaces method. + return ref($item) ? $item->get_available_spaces() : 0; +} + +sub new_lp_indentation_item { + + # this is an interface to the IndentationItem class + my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_; + + # A negative level implies not to store the item in the item_list + my $index = 0; + if ( $level >= 0 ) { $index = ++$max_gnu_item_index; } + + my $item = Perl::Tidy::IndentationItem->new( + $spaces, $level, + $ci_level, $available_spaces, + $index, $gnu_sequence_number, + $align_paren, $max_gnu_stack_index, + $line_start_index_to_go, + ); + + if ( $level >= 0 ) { + $gnu_item_list[$max_gnu_item_index] = $item; + } + + return $item; +} + +sub set_leading_whitespace { + + # This routine defines leading whitespace + # given: the level and continuation_level of a token, + # define: space count of leading string which would apply if it + # were the first token of a new line. + + my ( $level_abs, $ci_level, $in_continued_quote ) = @_; + + # Adjust levels if necessary to recycle whitespace: + # given $level_abs, the absolute level + # define $level, a possibly reduced level for whitespace + my $level = $level_abs; + if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle > 0 ) { + if ( $level_abs < $whitespace_last_level ) { + pop(@whitespace_level_stack); + } + if ( !@whitespace_level_stack ) { + push @whitespace_level_stack, $level_abs; + } + elsif ( $level_abs > $whitespace_last_level ) { + $level = $whitespace_level_stack[-1] + + ( $level_abs - $whitespace_last_level ); + + if ( + # 1 Try to break at a block brace + ( + $level > $rOpts_whitespace_cycle + && $last_nonblank_type eq '{' + && $last_nonblank_token eq '{' + ) + + # 2 Then either a brace or bracket + || ( $level > $rOpts_whitespace_cycle + 1 + && $last_nonblank_token =~ /^[\{\[]$/ ) + + # 3 Then a paren too + || $level > $rOpts_whitespace_cycle + 2 + ) + { + $level = 1; + } + push @whitespace_level_stack, $level; + } + $level = $whitespace_level_stack[-1]; + } + $whitespace_last_level = $level_abs; + + # modify for -bli, which adds one continuation indentation for + # opening braces + if ( $rOpts_brace_left_and_indent + && $max_index_to_go == 0 + && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o ) + { + $ci_level++; + } + + # patch to avoid trouble when input file has negative indentation. + # other logic should catch this error. + if ( $level < 0 ) { $level = 0 } + + #------------------------------------------- + # handle the standard indentation scheme + #------------------------------------------- + unless ($rOpts_line_up_parentheses) { + my $space_count = + $ci_level * $rOpts_continuation_indentation + + $level * $rOpts_indent_columns; + my $ci_spaces = + ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation; + + if ($in_continued_quote) { + $space_count = 0; + $ci_spaces = 0; + } + $leading_spaces_to_go[$max_index_to_go] = $space_count; + $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces; + return; + } + + #------------------------------------------------------------- + # handle case of -lp indentation.. + #------------------------------------------------------------- + + # The continued_quote flag means that this is the first token of a + # line, and it is the continuation of some kind of multi-line quote + # or pattern. It requires special treatment because it must have no + # added leading whitespace. So we create a special indentation item + # which is not in the stack. + if ($in_continued_quote) { + my $space_count = 0; + my $available_space = 0; + $level = -1; # flag to prevent storing in item_list + $leading_spaces_to_go[$max_index_to_go] = + $reduced_spaces_to_go[$max_index_to_go] = + new_lp_indentation_item( $space_count, $level, $ci_level, + $available_space, 0 ); + return; + } + + # get the top state from the stack + my $space_count = $gnu_stack[$max_gnu_stack_index]->get_spaces(); + my $current_level = $gnu_stack[$max_gnu_stack_index]->get_level(); + my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_ci_level(); + + my $type = $types_to_go[$max_index_to_go]; + my $token = $tokens_to_go[$max_index_to_go]; + my $total_depth = $nesting_depth_to_go[$max_index_to_go]; + + if ( $type eq '{' || $type eq '(' ) { + + $gnu_comma_count{ $total_depth + 1 } = 0; + $gnu_arrow_count{ $total_depth + 1 } = 0; + + # If we come to an opening token after an '=' token of some type, + # see if it would be helpful to 'break' after the '=' to save space + my $last_equals = $last_gnu_equals{$total_depth}; + if ( $last_equals && $last_equals > $line_start_index_to_go ) { + + # find the position if we break at the '=' + my $i_test = $last_equals; + if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ } + + # TESTING + ##my $too_close = ($i_test==$max_index_to_go-1); + + my $test_position = total_line_length( $i_test, $max_index_to_go ); + my $mll = maximum_line_length($i_test); + + if ( + + # the equals is not just before an open paren (testing) + ##!$too_close && + + # if we are beyond the midpoint + $gnu_position_predictor > $mll - $rOpts_maximum_line_length / 2 + + # or we are beyond the 1/4 point and there was an old + # break at the equals + || ( + $gnu_position_predictor > + $mll - $rOpts_maximum_line_length * 3 / 4 + && ( + $old_breakpoint_to_go[$last_equals] + || ( $last_equals > 0 + && $old_breakpoint_to_go[ $last_equals - 1 ] ) + || ( $last_equals > 1 + && $types_to_go[ $last_equals - 1 ] eq 'b' + && $old_breakpoint_to_go[ $last_equals - 2 ] ) + ) + ) + ) + { + + # then make the switch -- note that we do not set a real + # breakpoint here because we may not really need one; sub + # scan_list will do that if necessary + $line_start_index_to_go = $i_test + 1; + $gnu_position_predictor = $test_position; + } + } + } + + my $halfway = + maximum_line_length_for_level($level) - $rOpts_maximum_line_length / 2; + + # Check for decreasing depth .. + # Note that one token may have both decreasing and then increasing + # depth. For example, (level, ci) can go from (1,1) to (2,0). So, + # in this example we would first go back to (1,0) then up to (2,0) + # in a single call. + if ( $level < $current_level || $ci_level < $current_ci_level ) { + + # loop to find the first entry at or completely below this level + my ( $lev, $ci_lev ); + while (1) { + if ($max_gnu_stack_index) { + + # save index of token which closes this level + $gnu_stack[$max_gnu_stack_index]->set_closed($max_index_to_go); + + # Undo any extra indentation if we saw no commas + my $available_spaces = + $gnu_stack[$max_gnu_stack_index]->get_available_spaces(); + + my $comma_count = 0; + my $arrow_count = 0; + if ( $type eq '}' || $type eq ')' ) { + $comma_count = $gnu_comma_count{$total_depth}; + $arrow_count = $gnu_arrow_count{$total_depth}; + $comma_count = 0 unless $comma_count; + $arrow_count = 0 unless $arrow_count; + } + $gnu_stack[$max_gnu_stack_index]->set_comma_count($comma_count); + $gnu_stack[$max_gnu_stack_index]->set_arrow_count($arrow_count); + + if ( $available_spaces > 0 ) { + + if ( $comma_count <= 0 || $arrow_count > 0 ) { + + my $i = $gnu_stack[$max_gnu_stack_index]->get_index(); + my $seqno = + $gnu_stack[$max_gnu_stack_index] + ->get_sequence_number(); + + # Be sure this item was created in this batch. This + # should be true because we delete any available + # space from open items at the end of each batch. + if ( $gnu_sequence_number != $seqno + || $i > $max_gnu_item_index ) + { + warning( +"Program bug with -lp. seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n" + ); + report_definite_bug(); + } + + else { + if ( $arrow_count == 0 ) { + $gnu_item_list[$i] + ->permanently_decrease_available_spaces( + $available_spaces); + } + else { + $gnu_item_list[$i] + ->tentatively_decrease_available_spaces( + $available_spaces); + } + foreach my $j ( $i + 1 .. $max_gnu_item_index ) { + $gnu_item_list[$j] + ->decrease_SPACES($available_spaces); + } + } + } + } + + # go down one level + --$max_gnu_stack_index; + $lev = $gnu_stack[$max_gnu_stack_index]->get_level(); + $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_ci_level(); + + # stop when we reach a level at or below the current level + if ( $lev <= $level && $ci_lev <= $ci_level ) { + $space_count = + $gnu_stack[$max_gnu_stack_index]->get_spaces(); + $current_level = $lev; + $current_ci_level = $ci_lev; + last; + } + } + + # reached bottom of stack .. should never happen because + # only negative levels can get here, and $level was forced + # to be positive above. + else { + warning( +"program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n" + ); + report_definite_bug(); + last; + } + } + } + + # handle increasing depth + if ( $level > $current_level || $ci_level > $current_ci_level ) { + + # Compute the standard incremental whitespace. This will be + # the minimum incremental whitespace that will be used. This + # choice results in a smooth transition between the gnu-style + # and the standard style. + my $standard_increment = + ( $level - $current_level ) * $rOpts_indent_columns + + ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation; + + # Now we have to define how much extra incremental space + # ("$available_space") we want. This extra space will be + # reduced as necessary when long lines are encountered or when + # it becomes clear that we do not have a good list. + my $available_space = 0; + my $align_paren = 0; + my $excess = 0; + + # initialization on empty stack.. + if ( $max_gnu_stack_index == 0 ) { + $space_count = $level * $rOpts_indent_columns; + } + + # if this is a BLOCK, add the standard increment + elsif ($last_nonblank_block_type) { + $space_count += $standard_increment; + } + + # if last nonblank token was not structural indentation, + # just use standard increment + elsif ( $last_nonblank_type ne '{' ) { + $space_count += $standard_increment; + } + + # otherwise use the space to the first non-blank level change token + else { + + $space_count = $gnu_position_predictor; + + my $min_gnu_indentation = + $gnu_stack[$max_gnu_stack_index]->get_spaces(); + + $available_space = $space_count - $min_gnu_indentation; + if ( $available_space >= $standard_increment ) { + $min_gnu_indentation += $standard_increment; + } + elsif ( $available_space > 1 ) { + $min_gnu_indentation += $available_space + 1; + } + elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) { + if ( ( $tightness{$last_nonblank_token} < 2 ) ) { + $min_gnu_indentation += 2; + } + else { + $min_gnu_indentation += 1; + } + } + else { + $min_gnu_indentation += $standard_increment; + } + $available_space = $space_count - $min_gnu_indentation; + + if ( $available_space < 0 ) { + $space_count = $min_gnu_indentation; + $available_space = 0; + } + $align_paren = 1; + } + + # update state, but not on a blank token + if ( $types_to_go[$max_index_to_go] ne 'b' ) { + + $gnu_stack[$max_gnu_stack_index]->set_have_child(1); + + ++$max_gnu_stack_index; + $gnu_stack[$max_gnu_stack_index] = + new_lp_indentation_item( $space_count, $level, $ci_level, + $available_space, $align_paren ); + + # If the opening paren is beyond the half-line length, then + # we will use the minimum (standard) indentation. This will + # help avoid problems associated with running out of space + # near the end of a line. As a result, in deeply nested + # lists, there will be some indentations which are limited + # to this minimum standard indentation. But the most deeply + # nested container will still probably be able to shift its + # parameters to the right for proper alignment, so in most + # cases this will not be noticeable. + if ( $available_space > 0 && $space_count > $halfway ) { + $gnu_stack[$max_gnu_stack_index] + ->tentatively_decrease_available_spaces($available_space); + } + } + } + + # Count commas and look for non-list characters. Once we see a + # non-list character, we give up and don't look for any more commas. + if ( $type eq '=>' ) { + $gnu_arrow_count{$total_depth}++; + + # tentatively treating '=>' like '=' for estimating breaks + # TODO: this could use some experimentation + $last_gnu_equals{$total_depth} = $max_index_to_go; + } + + elsif ( $type eq ',' ) { + $gnu_comma_count{$total_depth}++; + } + + elsif ( $is_assignment{$type} ) { + $last_gnu_equals{$total_depth} = $max_index_to_go; + } + + # this token might start a new line + # if this is a non-blank.. + if ( $type ne 'b' ) { + + # and if .. + if ( + + # this is the first nonblank token of the line + $max_index_to_go == 1 && $types_to_go[0] eq 'b' + + # or previous character was one of these: + || $last_nonblank_type_to_go =~ /^([\:\?\,f])$/ + + # or previous character was opening and this does not close it + || ( $last_nonblank_type_to_go eq '{' && $type ne '}' ) + || ( $last_nonblank_type_to_go eq '(' and $type ne ')' ) + + # or this token is one of these: + || $type =~ /^([\.]|\|\||\&\&)$/ + + # or this is a closing structure + || ( $last_nonblank_type_to_go eq '}' + && $last_nonblank_token_to_go eq $last_nonblank_type_to_go ) + + # or previous token was keyword 'return' + || ( $last_nonblank_type_to_go eq 'k' + && ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) ) + + # or starting a new line at certain keywords is fine + || ( $type eq 'k' + && $is_if_unless_and_or_last_next_redo_return{$token} ) + + # or this is after an assignment after a closing structure + || ( + $is_assignment{$last_nonblank_type_to_go} + && ( + $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/ + + # and it is significantly to the right + || $gnu_position_predictor > $halfway + ) + ) + ) + { + check_for_long_gnu_style_lines(); + $line_start_index_to_go = $max_index_to_go; + + # back up 1 token if we want to break before that type + # otherwise, we may strand tokens like '?' or ':' on a line + if ( $line_start_index_to_go > 0 ) { + if ( $last_nonblank_type_to_go eq 'k' ) { + + if ( $want_break_before{$last_nonblank_token_to_go} ) { + $line_start_index_to_go--; + } + } + elsif ( $want_break_before{$last_nonblank_type_to_go} ) { + $line_start_index_to_go--; + } + } + } + } + + # remember the predicted position of this token on the output line + if ( $max_index_to_go > $line_start_index_to_go ) { + $gnu_position_predictor = + total_line_length( $line_start_index_to_go, $max_index_to_go ); + } + else { + $gnu_position_predictor = + $space_count + $token_lengths_to_go[$max_index_to_go]; + } + + # store the indentation object for this token + # this allows us to manipulate the leading whitespace + # (in case we have to reduce indentation to fit a line) without + # having to change any token values + $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index]; + $reduced_spaces_to_go[$max_index_to_go] = + ( $max_gnu_stack_index > 0 && $ci_level ) + ? $gnu_stack[ $max_gnu_stack_index - 1 ] + : $gnu_stack[$max_gnu_stack_index]; + return; +} + +sub check_for_long_gnu_style_lines { + + # look at the current estimated maximum line length, and + # remove some whitespace if it exceeds the desired maximum + + # this is only for the '-lp' style + return unless ($rOpts_line_up_parentheses); + + # nothing can be done if no stack items defined for this line + return if ( $max_gnu_item_index == UNDEFINED_INDEX ); + + # see if we have exceeded the maximum desired line length + # keep 2 extra free because they are needed in some cases + # (result of trial-and-error testing) + my $spaces_needed = + $gnu_position_predictor - maximum_line_length($max_index_to_go) + 2; + + return if ( $spaces_needed <= 0 ); + + # We are over the limit, so try to remove a requested number of + # spaces from leading whitespace. We are only allowed to remove + # from whitespace items created on this batch, since others have + # already been used and cannot be undone. + my @candidates = (); + my $i; + + # loop over all whitespace items created for the current batch + for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) { + my $item = $gnu_item_list[$i]; + + # item must still be open to be a candidate (otherwise it + # cannot influence the current token) + next if ( $item->get_closed() >= 0 ); + + my $available_spaces = $item->get_available_spaces(); + + if ( $available_spaces > 0 ) { + push( @candidates, [ $i, $available_spaces ] ); + } + } + + return unless (@candidates); + + # sort by available whitespace so that we can remove whitespace + # from the maximum available first + @candidates = sort { $b->[1] <=> $a->[1] } @candidates; + + # keep removing whitespace until we are done or have no more + foreach my $candidate (@candidates) { + my ( $i, $available_spaces ) = @{$candidate}; + my $deleted_spaces = + ( $available_spaces > $spaces_needed ) + ? $spaces_needed + : $available_spaces; + + # remove the incremental space from this item + $gnu_item_list[$i]->decrease_available_spaces($deleted_spaces); + + my $i_debug = $i; + + # update the leading whitespace of this item and all items + # that came after it + for ( ; $i <= $max_gnu_item_index ; $i++ ) { + + my $old_spaces = $gnu_item_list[$i]->get_spaces(); + if ( $old_spaces >= $deleted_spaces ) { + $gnu_item_list[$i]->decrease_SPACES($deleted_spaces); + } + + # shouldn't happen except for code bug: + else { + my $level = $gnu_item_list[$i_debug]->get_level(); + my $ci_level = $gnu_item_list[$i_debug]->get_ci_level(); + my $old_level = $gnu_item_list[$i]->get_level(); + my $old_ci_level = $gnu_item_list[$i]->get_ci_level(); + warning( +"program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n" + ); + report_definite_bug(); + } + } + $gnu_position_predictor -= $deleted_spaces; + $spaces_needed -= $deleted_spaces; + last unless ( $spaces_needed > 0 ); + } + return; +} + +sub finish_lp_batch { + + # This routine is called once after each output stream batch is + # finished to undo indentation for all incomplete -lp + # indentation levels. It is too risky to leave a level open, + # because then we can't backtrack in case of a long line to follow. + # This means that comments and blank lines will disrupt this + # indentation style. But the vertical aligner may be able to + # get the space back if there are side comments. + + # this is only for the 'lp' style + return unless ($rOpts_line_up_parentheses); + + # nothing can be done if no stack items defined for this line + return if ( $max_gnu_item_index == UNDEFINED_INDEX ); + + # loop over all whitespace items created for the current batch + foreach my $i ( 0 .. $max_gnu_item_index ) { + my $item = $gnu_item_list[$i]; + + # only look for open items + next if ( $item->get_closed() >= 0 ); + + # Tentatively remove all of the available space + # (The vertical aligner will try to get it back later) + my $available_spaces = $item->get_available_spaces(); + if ( $available_spaces > 0 ) { + + # delete incremental space for this item + $gnu_item_list[$i] + ->tentatively_decrease_available_spaces($available_spaces); + + # Reduce the total indentation space of any nodes that follow + # Note that any such nodes must necessarily be dependents + # of this node. + foreach ( $i + 1 .. $max_gnu_item_index ) { + $gnu_item_list[$_]->decrease_SPACES($available_spaces); + } + } + } + return; +} + +sub reduce_lp_indentation { + + # reduce the leading whitespace at token $i if possible by $spaces_needed + # (a large value of $spaces_needed will remove all excess space) + # NOTE: to be called from scan_list only for a sequence of tokens + # contained between opening and closing parens/braces/brackets + + my ( $i, $spaces_wanted ) = @_; + my $deleted_spaces = 0; + + my $item = $leading_spaces_to_go[$i]; + my $available_spaces = $item->get_available_spaces(); + + if ( + $available_spaces > 0 + && ( ( $spaces_wanted <= $available_spaces ) + || !$item->get_have_child() ) + ) + { + + # we'll remove these spaces, but mark them as recoverable + $deleted_spaces = + $item->tentatively_decrease_available_spaces($spaces_wanted); + } + + return $deleted_spaces; +} + +sub token_sequence_length { + + # return length of tokens ($ibeg .. $iend) including $ibeg & $iend + # returns 0 if $ibeg > $iend (shouldn't happen) + my ( $ibeg, $iend ) = @_; + return 0 if ( $iend < 0 || $ibeg > $iend ); + return $summed_lengths_to_go[ $iend + 1 ] if ( $ibeg < 0 ); + return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg]; +} + +sub total_line_length { + + # return length of a line of tokens ($ibeg .. $iend) + my ( $ibeg, $iend ) = @_; + return leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend ); +} + +sub maximum_line_length_for_level { + + # return maximum line length for line starting with a given level + my $maximum_line_length = $rOpts_maximum_line_length; + + # Modify if -vmll option is selected + if ($rOpts_variable_maximum_line_length) { + my $level = shift; + if ( $level < 0 ) { $level = 0 } + $maximum_line_length += $level * $rOpts_indent_columns; + } + return $maximum_line_length; +} + +sub maximum_line_length { + + # return maximum line length for line starting with the token at given index + my $ii = shift; + return maximum_line_length_for_level( $levels_to_go[$ii] ); +} + +sub excess_line_length { + + # return number of characters by which a line of tokens ($ibeg..$iend) + # exceeds the allowable line length. + my ( $ibeg, $iend, $ignore_left_weld, $ignore_right_weld ) = @_; + + # Include left and right weld lengths unless requested not to + my $wl = $ignore_left_weld ? 0 : weld_len_left_to_go($iend); + my $wr = $ignore_right_weld ? 0 : weld_len_right_to_go($iend); + + return total_line_length( $ibeg, $iend ) + $wl + $wr - + maximum_line_length($ibeg); +} + +sub wrapup { + + # flush buffer and write any informative messages + my $self = shift; + + $self->flush(); + $file_writer_object->decrement_output_line_number() + ; # fix up line number since it was incremented + we_are_at_the_last_line(); + if ( $added_semicolon_count > 0 ) { + my $first = ( $added_semicolon_count > 1 ) ? "First" : ""; + my $what = + ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was"; + write_logfile_entry("$added_semicolon_count $what added:\n"); + write_logfile_entry( + " $first at input line $first_added_semicolon_at\n"); + + if ( $added_semicolon_count > 1 ) { + write_logfile_entry( + " Last at input line $last_added_semicolon_at\n"); + } + write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n"); + write_logfile_entry("\n"); + } + + if ( $deleted_semicolon_count > 0 ) { + my $first = ( $deleted_semicolon_count > 1 ) ? "First" : ""; + my $what = + ( $deleted_semicolon_count > 1 ) + ? "semicolons were" + : "semicolon was"; + write_logfile_entry( + "$deleted_semicolon_count unnecessary $what deleted:\n"); + write_logfile_entry( + " $first at input line $first_deleted_semicolon_at\n"); + + if ( $deleted_semicolon_count > 1 ) { + write_logfile_entry( + " Last at input line $last_deleted_semicolon_at\n"); + } + write_logfile_entry(" (Use -ndsc to prevent semicolon deletion)\n"); + write_logfile_entry("\n"); + } + + if ( $embedded_tab_count > 0 ) { + my $first = ( $embedded_tab_count > 1 ) ? "First" : ""; + my $what = + ( $embedded_tab_count > 1 ) + ? "quotes or patterns" + : "quote or pattern"; + write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n"); + write_logfile_entry( +"This means the display of this script could vary with device or software\n" + ); + write_logfile_entry(" $first at input line $first_embedded_tab_at\n"); + + if ( $embedded_tab_count > 1 ) { + write_logfile_entry( + " Last at input line $last_embedded_tab_at\n"); + } + write_logfile_entry("\n"); + } + + if ($first_tabbing_disagreement) { + write_logfile_entry( +"First indentation disagreement seen at input line $first_tabbing_disagreement\n" + ); + } + + if ($in_tabbing_disagreement) { + write_logfile_entry( +"Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n" + ); + } + else { + + if ($last_tabbing_disagreement) { + + write_logfile_entry( +"Last indentation disagreement seen at input line $last_tabbing_disagreement\n" + ); + } + else { + write_logfile_entry("No indentation disagreement seen\n"); + } + } + if ($first_tabbing_disagreement) { + write_logfile_entry( +"Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n" + ); + } + write_logfile_entry("\n"); + + $vertical_aligner_object->report_anything_unusual(); + + $file_writer_object->report_line_length_errors(); + + return; +} + +sub check_options { + + # This routine is called to check the Opts hash after it is defined + $rOpts = shift; + + initialize_whitespace_hashes(); + initialize_bond_strength_hashes(); + + make_static_block_comment_pattern(); + make_static_side_comment_pattern(); + make_closing_side_comment_prefix(); + make_closing_side_comment_list_pattern(); + $format_skipping_pattern_begin = + make_format_skipping_pattern( 'format-skipping-begin', '#<<<' ); + $format_skipping_pattern_end = + make_format_skipping_pattern( 'format-skipping-end', '#>>>' ); + + # If closing side comments ARE selected, then we can safely + # delete old closing side comments unless closing side comment + # warnings are requested. This is a good idea because it will + # eliminate any old csc's which fall below the line count threshold. + # We cannot do this if warnings are turned on, though, because we + # might delete some text which has been added. So that must + # be handled when comments are created. + if ( $rOpts->{'closing-side-comments'} ) { + if ( !$rOpts->{'closing-side-comment-warnings'} ) { + $rOpts->{'delete-closing-side-comments'} = 1; + } + } + + # If closing side comments ARE NOT selected, but warnings ARE + # selected and we ARE DELETING csc's, then we will pretend to be + # adding with a huge interval. This will force the comments to be + # generated for comparison with the old comments, but not added. + elsif ( $rOpts->{'closing-side-comment-warnings'} ) { + if ( $rOpts->{'delete-closing-side-comments'} ) { + $rOpts->{'delete-closing-side-comments'} = 0; + $rOpts->{'closing-side-comments'} = 1; + $rOpts->{'closing-side-comment-interval'} = 100000000; + } + } + + make_bli_pattern(); + make_block_brace_vertical_tightness_pattern(); + make_blank_line_pattern(); + make_keyword_group_list_pattern(); + + prepare_cuddled_block_types(); + if ( $rOpts->{'dump-cuddled-block-list'} ) { + dump_cuddled_block_list(*STDOUT); + Exit(0); + } + + if ( $rOpts->{'line-up-parentheses'} ) { + + if ( $rOpts->{'indent-only'} + || !$rOpts->{'add-newlines'} + || !$rOpts->{'delete-old-newlines'} ) + { + Warn(<{'line-up-parentheses'} = 0; + } + } + + # At present, tabs are not compatible with the line-up-parentheses style + # (it would be possible to entab the total leading whitespace + # just prior to writing the line, if desired). + if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) { + Warn(<{'tabs'} = 0; + } + + # Likewise, tabs are not compatible with outdenting.. + if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) { + Warn(<{'tabs'} = 0; + } + + if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) { + Warn(<{'tabs'} = 0; + } + + if ( !$rOpts->{'space-for-semicolon'} ) { + $want_left_space{'f'} = -1; + } + + if ( $rOpts->{'space-terminal-semicolon'} ) { + $want_left_space{';'} = 1; + } + + # implement outdenting preferences for keywords + %outdent_keyword = (); + my @okw = split_words( $rOpts->{'outdent-keyword-okl'} ); + unless (@okw) { + @okw = qw(next last redo goto return); # defaults + } + + # FUTURE: if not a keyword, assume that it is an identifier + foreach (@okw) { + if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) { + $outdent_keyword{$_} = 1; + } + else { + Warn("ignoring '$_' in -okwl list; not a perl keyword"); + } + } + + # implement user whitespace preferences + if ( my @q = split_words( $rOpts->{'want-left-space'} ) ) { + @want_left_space{@q} = (1) x scalar(@q); + } + + if ( my @q = split_words( $rOpts->{'want-right-space'} ) ) { + @want_right_space{@q} = (1) x scalar(@q); + } + + if ( my @q = split_words( $rOpts->{'nowant-left-space'} ) ) { + @want_left_space{@q} = (-1) x scalar(@q); + } + + if ( my @q = split_words( $rOpts->{'nowant-right-space'} ) ) { + @want_right_space{@q} = (-1) x scalar(@q); + } + if ( $rOpts->{'dump-want-left-space'} ) { + dump_want_left_space(*STDOUT); + Exit(0); + } + + if ( $rOpts->{'dump-want-right-space'} ) { + dump_want_right_space(*STDOUT); + Exit(0); + } + + # default keywords for which space is introduced before an opening paren + # (at present, including them messes up vertical alignment) + my @sak = qw(my local our and or err eq ne if else elsif until + unless while for foreach return switch case given when catch); + @space_after_keyword{@sak} = (1) x scalar(@sak); + + # first remove any or all of these if desired + if ( my @q = split_words( $rOpts->{'nospace-after-keyword'} ) ) { + + # -nsak='*' selects all the above keywords + if ( @q == 1 && $q[0] eq '*' ) { @q = keys(%space_after_keyword) } + @space_after_keyword{@q} = (0) x scalar(@q); + } + + # then allow user to add to these defaults + if ( my @q = split_words( $rOpts->{'space-after-keyword'} ) ) { + @space_after_keyword{@q} = (1) x scalar(@q); + } + + # implement user break preferences + my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | & + = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= + . : ? && || and or err xor + ); + + my $break_after = sub { + my @toks = @_; + foreach my $tok (@toks) { + if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/: + my $lbs = $left_bond_strength{$tok}; + my $rbs = $right_bond_strength{$tok}; + if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) { + ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) = + ( $lbs, $rbs ); + } + } + }; + + my $break_before = sub { + my @toks = @_; + foreach my $tok (@toks) { + my $lbs = $left_bond_strength{$tok}; + my $rbs = $right_bond_strength{$tok}; + if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) { + ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) = + ( $lbs, $rbs ); + } + } + }; + + $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} ); + $break_before->(@all_operators) + if ( $rOpts->{'break-before-all-operators'} ); + + $break_after->( split_words( $rOpts->{'want-break-after'} ) ); + $break_before->( split_words( $rOpts->{'want-break-before'} ) ); + + # make note if breaks are before certain key types + %want_break_before = (); + foreach my $tok ( @all_operators, ',' ) { + $want_break_before{$tok} = + $left_bond_strength{$tok} < $right_bond_strength{$tok}; + } + + # Coordinate ?/: breaks, which must be similar + if ( !$want_break_before{':'} ) { + $want_break_before{'?'} = $want_break_before{':'}; + $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01; + $left_bond_strength{'?'} = NO_BREAK; + } + + # Define here tokens which may follow the closing brace of a do statement + # on the same line, as in: + # } while ( $something); + my @dof = qw(until while unless if ; : ); + push @dof, ','; + @is_do_follower{@dof} = (1) x scalar(@dof); + + # What tokens may follow the closing brace of an if or elsif block? + # Not used. Previously used for cuddled else, but no longer needed. + %is_if_brace_follower = (); + + # nothing can follow the closing curly of an else { } block: + %is_else_brace_follower = (); + + # what can follow a multi-line anonymous sub definition closing curly: + my @asf = qw# ; : => or and && || ~~ !~~ ) #; + push @asf, ','; + @is_anon_sub_brace_follower{@asf} = (1) x scalar(@asf); + + # what can follow a one-line anonymous sub closing curly: + # one-line anonymous subs also have ']' here... + # see tk3.t and PP.pm + my @asf1 = qw# ; : => or and && || ) ] ~~ !~~ #; + push @asf1, ','; + @is_anon_sub_1_brace_follower{@asf1} = (1) x scalar(@asf1); + + # What can follow a closing curly of a block + # which is not an if/elsif/else/do/sort/map/grep/eval/sub + # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl' + my @obf = qw# ; : => or and && || ) #; + push @obf, ','; + @is_other_brace_follower{@obf} = (1) x scalar(@obf); + + $right_bond_strength{'{'} = WEAK; + $left_bond_strength{'{'} = VERY_STRONG; + + # make -l=0 equal to -l=infinite + if ( !$rOpts->{'maximum-line-length'} ) { + $rOpts->{'maximum-line-length'} = 1000000; + } + + # make -lbl=0 equal to -lbl=infinite + if ( !$rOpts->{'long-block-line-count'} ) { + $rOpts->{'long-block-line-count'} = 1000000; + } + + my $enc = $rOpts->{'character-encoding'}; + if ( $enc && $enc !~ /^(none|utf8)$/i ) { + Die(<{'output-line-ending'}; + if ($ole) { + my %endings = ( + dos => "\015\012", + win => "\015\012", + mac => "\015", + unix => "\012", + ); + + # Patch for RT #99514, a memoization issue. + # Normally, the user enters one of 'dos', 'win', etc, and we change the + # value in the options parameter to be the corresponding line ending + # character. But, if we are using memoization, on later passes through + # here the option parameter will already have the desired ending + # character rather than the keyword 'dos', 'win', etc. So + # we must check to see if conversion has already been done and, if so, + # bypass the conversion step. + my %endings_inverted = ( + "\015\012" => 'dos', + "\015\012" => 'win', + "\015" => 'mac', + "\012" => 'unix', + ); + + if ( defined( $endings_inverted{$ole} ) ) { + + # we already have valid line ending, nothing more to do + } + else { + $ole = lc $ole; + unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) { + my $str = join " ", keys %endings; + Die(<{'preserve-line-endings'} ) { + Warn("Ignoring -ple; conflicts with -ole\n"); + $rOpts->{'preserve-line-endings'} = undef; + } + } + } + + # hashes used to simplify setting whitespace + %tightness = ( + '{' => $rOpts->{'brace-tightness'}, + '}' => $rOpts->{'brace-tightness'}, + '(' => $rOpts->{'paren-tightness'}, + ')' => $rOpts->{'paren-tightness'}, + '[' => $rOpts->{'square-bracket-tightness'}, + ']' => $rOpts->{'square-bracket-tightness'}, + ); + %matching_token = ( + '{' => '}', + '(' => ')', + '[' => ']', + '?' => ':', + ); + + # frequently used parameters + $rOpts_add_newlines = $rOpts->{'add-newlines'}; + $rOpts_add_whitespace = $rOpts->{'add-whitespace'}; + $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'}; + $rOpts_block_brace_vertical_tightness = + $rOpts->{'block-brace-vertical-tightness'}; + $rOpts_brace_left_and_indent = $rOpts->{'brace-left-and-indent'}; + $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'}; + $rOpts_break_at_old_ternary_breakpoints = + $rOpts->{'break-at-old-ternary-breakpoints'}; + $rOpts_break_at_old_attribute_breakpoints = + $rOpts->{'break-at-old-attribute-breakpoints'}; + $rOpts_break_at_old_comma_breakpoints = + $rOpts->{'break-at-old-comma-breakpoints'}; + $rOpts_break_at_old_keyword_breakpoints = + $rOpts->{'break-at-old-keyword-breakpoints'}; + $rOpts_break_at_old_logical_breakpoints = + $rOpts->{'break-at-old-logical-breakpoints'}; + $rOpts_break_at_old_method_breakpoints = + $rOpts->{'break-at-old-method-breakpoints'}; + $rOpts_closing_side_comment_else_flag = + $rOpts->{'closing-side-comment-else-flag'}; + $rOpts_closing_side_comment_maximum_text = + $rOpts->{'closing-side-comment-maximum-text'}; + $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'}; + $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'}; + $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'}; + $rOpts_indent_columns = $rOpts->{'indent-columns'}; + $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'}; + $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'}; + $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'}; + $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'}; + $rOpts_one_line_block_semicolons = $rOpts->{'one-line-block-semicolons'}; + + $rOpts_variable_maximum_line_length = + $rOpts->{'variable-maximum-line-length'}; + $rOpts_short_concatenation_item_length = + $rOpts->{'short-concatenation-item-length'}; + + $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'}; + $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'}; + $rOpts_format_skipping = $rOpts->{'format-skipping'}; + $rOpts_space_function_paren = $rOpts->{'space-function-paren'}; + $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'}; + $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'}; + $rOpts_ignore_side_comment_lengths = + $rOpts->{'ignore-side-comment-lengths'}; + + # Note that both opening and closing tokens can access the opening + # and closing flags of their container types. + %opening_vertical_tightness = ( + '(' => $rOpts->{'paren-vertical-tightness'}, + '{' => $rOpts->{'brace-vertical-tightness'}, + '[' => $rOpts->{'square-bracket-vertical-tightness'}, + ')' => $rOpts->{'paren-vertical-tightness'}, + '}' => $rOpts->{'brace-vertical-tightness'}, + ']' => $rOpts->{'square-bracket-vertical-tightness'}, + ); + + %closing_vertical_tightness = ( + '(' => $rOpts->{'paren-vertical-tightness-closing'}, + '{' => $rOpts->{'brace-vertical-tightness-closing'}, + '[' => $rOpts->{'square-bracket-vertical-tightness-closing'}, + ')' => $rOpts->{'paren-vertical-tightness-closing'}, + '}' => $rOpts->{'brace-vertical-tightness-closing'}, + ']' => $rOpts->{'square-bracket-vertical-tightness-closing'}, + ); + + # assume flag for '>' same as ')' for closing qw quotes + %closing_token_indentation = ( + ')' => $rOpts->{'closing-paren-indentation'}, + '}' => $rOpts->{'closing-brace-indentation'}, + ']' => $rOpts->{'closing-square-bracket-indentation'}, + '>' => $rOpts->{'closing-paren-indentation'}, + ); + + # flag indicating if any closing tokens are indented + $some_closing_token_indentation = + $rOpts->{'closing-paren-indentation'} + || $rOpts->{'closing-brace-indentation'} + || $rOpts->{'closing-square-bracket-indentation'} + || $rOpts->{'indent-closing-brace'}; + + %opening_token_right = ( + '(' => $rOpts->{'opening-paren-right'}, + '{' => $rOpts->{'opening-hash-brace-right'}, + '[' => $rOpts->{'opening-square-bracket-right'}, + ); + + %stack_opening_token = ( + '(' => $rOpts->{'stack-opening-paren'}, + '{' => $rOpts->{'stack-opening-hash-brace'}, + '[' => $rOpts->{'stack-opening-square-bracket'}, + ); + + %stack_closing_token = ( + ')' => $rOpts->{'stack-closing-paren'}, + '}' => $rOpts->{'stack-closing-hash-brace'}, + ']' => $rOpts->{'stack-closing-square-bracket'}, + ); + $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'}; + $rOpts_space_backslash_quote = $rOpts->{'space-backslash-quote'}; + return; +} + +sub bad_pattern { + + # See if a pattern will compile. We have to use a string eval here, + # but it should be safe because the pattern has been constructed + # by this program. + my ($pattern) = @_; + eval "'##'=~/$pattern/"; + return $@; +} + +{ + my %no_cuddle; + + # Add keywords here which really should not be cuddled + BEGIN { + my @q = qw(if unless for foreach while); + @no_cuddle{@q} = (1) x scalar(@q); + } + + sub prepare_cuddled_block_types { + + # the cuddled-else style, if used, is controlled by a hash that + # we construct here + + # Include keywords here which should not be cuddled + + my $cuddled_string = ""; + if ( $rOpts->{'cuddled-else'} ) { + + # set the default + $cuddled_string = 'elsif else continue catch finally' + unless ( $rOpts->{'cuddled-block-list-exclusive'} ); + + # This is the old equivalent but more complex version + # $cuddled_string = 'if-elsif-else unless-elsif-else -continue '; + + # Add users other blocks to be cuddled + my $cuddled_block_list = $rOpts->{'cuddled-block-list'}; + if ($cuddled_block_list) { + $cuddled_string .= " " . $cuddled_block_list; + } + + } + + # If we have a cuddled string of the form + # 'try-catch-finally' + + # we want to prepare a hash of the form + + # $rcuddled_block_types = { + # 'try' => { + # 'catch' => 1, + # 'finally' => 1 + # }, + # }; + + # use -dcbl to dump this hash + + # Multiple such strings are input as a space or comma separated list + + # If we get two lists with the same leading type, such as + # -cbl = "-try-catch-finally -try-catch-otherwise" + # then they will get merged as follows: + # $rcuddled_block_types = { + # 'try' => { + # 'catch' => 1, + # 'finally' => 2, + # 'otherwise' => 1, + # }, + # }; + # This will allow either type of chain to be followed. + + $cuddled_string =~ s/,/ /g; # allow space or comma separated lists + my @cuddled_strings = split /\s+/, $cuddled_string; + + $rcuddled_block_types = {}; + + # process each dash-separated string... + my $string_count = 0; + foreach my $string (@cuddled_strings) { + next unless $string; + my @words = split /-+/, $string; # allow multiple dashes + + # we could look for and report possible errors here... + next unless ( @words > 0 ); + + # allow either '-continue' or *-continue' for arbitrary starting type + my $start = '*'; + + # a single word without dashes is a secondary block type + if ( @words > 1 ) { + $start = shift @words; + } + + # always make an entry for the leading word. If none follow, this + # will still prevent a wildcard from matching this word. + if ( !defined( $rcuddled_block_types->{$start} ) ) { + $rcuddled_block_types->{$start} = {}; + } + + # The count gives the original word order in case we ever want it. + $string_count++; + my $word_count = 0; + foreach my $word (@words) { + next unless $word; + if ( $no_cuddle{$word} ) { + Warn( +"## Ignoring keyword '$word' in -cbl; does not seem right\n" + ); + next; + } + $word_count++; + $rcuddled_block_types->{$start}->{$word} = + 1; #"$string_count.$word_count"; + } + } + return; + } +} + +sub dump_cuddled_block_list { + my ($fh) = @_; + + # ORIGINAL METHOD: Here is the format of the cuddled block type hash + # which controls this routine + # my $rcuddled_block_types = { + # 'if' => { + # 'else' => 1, + # 'elsif' => 1 + # }, + # 'try' => { + # 'catch' => 1, + # 'finally' => 1 + # }, + # }; + + # SIMPLFIED METHOD: the simplified method uses a wildcard for + # the starting block type and puts all cuddled blocks together: + # my $rcuddled_block_types = { + # '*' => { + # 'else' => 1, + # 'elsif' => 1 + # 'catch' => 1, + # 'finally' => 1 + # }, + # }; + + # Both methods work, but the simplified method has proven to be adequate and + # easier to manage. + + my $cuddled_string = $rOpts->{'cuddled-block-list'}; + $cuddled_string = '' unless $cuddled_string; + + my $flags = ""; + $flags .= "-ce" if ( $rOpts->{'cuddled-else'} ); + $flags .= " -cbl='$cuddled_string'"; + + unless ( $rOpts->{'cuddled-else'} ) { + $flags .= "\nNote: You must specify -ce to generate a cuddled hash"; + } + + $fh->print(<print( Dumper($rcuddled_block_types) ); + + $fh->print(<{'static-block-comment-prefix'} ) { + my $prefix = $rOpts->{'static-block-comment-prefix'}; + $prefix =~ s/^\s*//; + my $pattern = $prefix; + + # user may give leading caret to force matching left comments only + if ( $prefix !~ /^\^#/ ) { + if ( $prefix !~ /^#/ ) { + Die( +"ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n" + ); + } + $pattern = '^\s*' . $prefix; + } + if ( bad_pattern($pattern) ) { + Die( +"ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n" + ); + } + $static_block_comment_pattern = $pattern; + } + return; +} + +sub make_format_skipping_pattern { + my ( $opt_name, $default ) = @_; + my $param = $rOpts->{$opt_name}; + unless ($param) { $param = $default } + $param =~ s/^\s*//; + if ( $param !~ /^#/ ) { + Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n"); + } + my $pattern = '^' . $param . '\s'; + if ( bad_pattern($pattern) ) { + Die( +"ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n" + ); + } + return $pattern; +} + +sub make_closing_side_comment_list_pattern { + + # turn any input list into a regex for recognizing selected block types + $closing_side_comment_list_pattern = '^\w+'; + if ( defined( $rOpts->{'closing-side-comment-list'} ) + && $rOpts->{'closing-side-comment-list'} ) + { + $closing_side_comment_list_pattern = + make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} ); + } + return; +} + +sub make_bli_pattern { + + if ( defined( $rOpts->{'brace-left-and-indent-list'} ) + && $rOpts->{'brace-left-and-indent-list'} ) + { + $bli_list_string = $rOpts->{'brace-left-and-indent-list'}; + } + + $bli_pattern = make_block_pattern( '-blil', $bli_list_string ); + return; +} + +sub make_keyword_group_list_pattern { + + # turn any input list into a regex for recognizing selected block types. + # Here are the defaults: + $keyword_group_list_pattern = '^(our|local|my|use|require|)$'; + $keyword_group_list_comment_pattern = ''; + if ( defined( $rOpts->{'keyword-group-blanks-list'} ) + && $rOpts->{'keyword-group-blanks-list'} ) + { + my @words = split /\s+/, $rOpts->{'keyword-group-blanks-list'}; + my @keyword_list; + my @comment_list; + foreach my $word (@words) { + if ( $word =~ /^(BC|SBC)$/ ) { + push @comment_list, $word; + if ( $word eq 'SBC' ) { push @comment_list, 'SBCX' } + } + else { + push @keyword_list, $word; + } + } + $keyword_group_list_pattern = + make_block_pattern( '-kgbl', $rOpts->{'keyword-group-blanks-list'} ); + $keyword_group_list_comment_pattern = + make_block_pattern( '-kgbl', join( ' ', @comment_list ) ); + } + return; +} + +sub make_block_brace_vertical_tightness_pattern { + + # turn any input list into a regex for recognizing selected block types + $block_brace_vertical_tightness_pattern = + '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)'; + if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} ) + && $rOpts->{'block-brace-vertical-tightness-list'} ) + { + $block_brace_vertical_tightness_pattern = + make_block_pattern( '-bbvtl', + $rOpts->{'block-brace-vertical-tightness-list'} ); + } + return; +} + +sub make_blank_line_pattern { + + $blank_lines_before_closing_block_pattern = $SUB_PATTERN; + my $key = 'blank-lines-before-closing-block-list'; + if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) { + $blank_lines_before_closing_block_pattern = + make_block_pattern( '-blbcl', $rOpts->{$key} ); + } + + $blank_lines_after_opening_block_pattern = $SUB_PATTERN; + $key = 'blank-lines-after-opening-block-list'; + if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) { + $blank_lines_after_opening_block_pattern = + make_block_pattern( '-blaol', $rOpts->{$key} ); + } + return; +} + +sub make_block_pattern { + + # given a string of block-type keywords, return a regex to match them + # The only tricky part is that labels are indicated with a single ':' + # and the 'sub' token text may have additional text after it (name of + # sub). + # + # Example: + # + # input string: "if else elsif unless while for foreach do : sub"; + # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)'; + + # Minor Update: + # + # To distinguish between anonymous subs and named subs, use 'sub' to + # indicate a named sub, and 'asub' to indicate an anonymous sub + + my ( $abbrev, $string ) = @_; + my @list = split_words($string); + my @words = (); + my %seen; + for my $i (@list) { + if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern } + next if $seen{$i}; + $seen{$i} = 1; + if ( $i eq 'sub' ) { + } + elsif ( $i eq 'asub' ) { + } + elsif ( $i eq ';' ) { + push @words, ';'; + } + elsif ( $i eq '{' ) { + push @words, '\{'; + } + elsif ( $i eq ':' ) { + push @words, '\w+:'; + } + elsif ( $i =~ /^\w/ ) { + push @words, $i; + } + else { + Warn("unrecognized block type $i after $abbrev, ignoring\n"); + } + } + my $pattern = '(' . join( '|', @words ) . ')$'; + my $sub_patterns = ""; + if ( $seen{'sub'} ) { + $sub_patterns .= '|' . $SUB_PATTERN; + } + if ( $seen{'asub'} ) { + $sub_patterns .= '|' . $ASUB_PATTERN; + } + if ($sub_patterns) { + $pattern = '(' . $pattern . $sub_patterns . ')'; + } + $pattern = '^' . $pattern; + return $pattern; +} + +sub make_static_side_comment_pattern { + + # create the pattern used to identify static side comments + $static_side_comment_pattern = '^##'; + + # allow the user to change it + if ( $rOpts->{'static-side-comment-prefix'} ) { + my $prefix = $rOpts->{'static-side-comment-prefix'}; + $prefix =~ s/^\s*//; + my $pattern = '^' . $prefix; + if ( bad_pattern($pattern) ) { + Die( +"ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n" + ); + } + $static_side_comment_pattern = $pattern; + } + return; +} + +sub make_closing_side_comment_prefix { + + # Be sure we have a valid closing side comment prefix + my $csc_prefix = $rOpts->{'closing-side-comment-prefix'}; + my $csc_prefix_pattern; + if ( !defined($csc_prefix) ) { + $csc_prefix = '## end'; + $csc_prefix_pattern = '^##\s+end'; + } + else { + my $test_csc_prefix = $csc_prefix; + if ( $test_csc_prefix !~ /^#/ ) { + $test_csc_prefix = '#' . $test_csc_prefix; + } + + # make a regex to recognize the prefix + my $test_csc_prefix_pattern = $test_csc_prefix; + + # escape any special characters + $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g; + + $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern; + + # allow exact number of intermediate spaces to vary + $test_csc_prefix_pattern =~ s/\s+/\\s\+/g; + + # make sure we have a good pattern + # if we fail this we probably have an error in escaping + # characters. + + if ( bad_pattern($test_csc_prefix_pattern) ) { + + # shouldn't happen..must have screwed up escaping, above + report_definite_bug(); + Warn( +"Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n" + ); + + # just warn and keep going with defaults + Warn("Please consider using a simpler -cscp prefix\n"); + Warn("Using default -cscp instead; please check output\n"); + } + else { + $csc_prefix = $test_csc_prefix; + $csc_prefix_pattern = $test_csc_prefix_pattern; + } + } + $rOpts->{'closing-side-comment-prefix'} = $csc_prefix; + $closing_side_comment_prefix_pattern = $csc_prefix_pattern; + return; +} + +sub dump_want_left_space { + my $fh = shift; + local $" = "\n"; + print $fh <> + # my $msg = new Fax::Send + # -recipients => $to, + # -data => $data; + # This is the safest thing to do. If we had the token to the right of + # the minus we could do a better check. + || ( ( $tokenr eq '-' ) && ( $typel eq 'w' ) ) + + # and something like this could become ambiguous without space + # after the '-': + # use constant III=>1; + # $a = $b - III; + # and even this: + # $a = - III; + || ( ( $tokenl eq '-' ) + && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) ) + + # '= -' should not become =- or you will get a warning + # about reversed -= + # || ($tokenr eq '-') + + # keep a space between a quote and a bareword to prevent the + # bareword from becoming a quote modifier. + || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) ) + + # keep a space between a token ending in '$' and any word; + # this caused trouble: "die @$ if $@" + || ( ( $typel eq 'i' && $tokenl =~ /\$$/ ) + && ( $tokenr =~ /^[a-zA-Z_]/ ) ) + + # perl is very fussy about spaces before << + || ( $tokenr =~ /^\<\ [qw#= ( ) =#], #=( )= + 'Venus1' => [qw#0 +#], # 0+ + 'Venus2' => [qw#+ 0#], # +0 + 'Enterprise' => [qw#) x ! !#], # ()x!! + 'Kite1' => [qw#~ ~ <>#], # ~~<> + 'Kite2' => [qw#~~ <>#], # ~~<> + 'Winking Fat Comma' => [ ( ',', '=>' ) ], # ,=> + 'Bang bang ' => [qw#! !#], # !! + ); + + # The following operators and constants are not included because they + # are normally kept tight by perltidy: + # ~~ <~> + # + + # Make a lookup table indexed by the first token of each operator: + # first token => [list, list, ...] + foreach my $value ( values(%secret_operators) ) { + my $tok = $value->[0]; + push @{ $is_leading_secret_token{$tok} }, $value; + } + } + + sub new_secret_operator_whitespace { + + my ( $rlong_array, $rwhitespace_flags ) = @_; + + # Loop over all tokens in this line + my ( $token, $type ); + my $jmax = @{$rlong_array} - 1; + foreach my $j ( 0 .. $jmax ) { + + $token = $rlong_array->[$j]->[_TOKEN_]; + $type = $rlong_array->[$j]->[_TYPE_]; + + # Skip unless this token might start a secret operator + next if ( $type eq 'b' ); + next unless ( $is_leading_secret_token{$token} ); + + # Loop over all secret operators with this leading token + foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) { + my $jend = $j - 1; + foreach my $tok ( @{$rpattern} ) { + $jend++; + $jend++ + + if ( $jend <= $jmax + && $rlong_array->[$jend]->[_TYPE_] eq 'b' ); + if ( $jend > $jmax + || $tok ne $rlong_array->[$jend]->[_TOKEN_] ) + { + $jend = undef; + last; + } + } + + if ($jend) { + + # set flags to prevent spaces within this operator + foreach my $jj ( $j + 1 .. $jend ) { + $rwhitespace_flags->[$jj] = WS_NO; + } + $j = $jend; + last; + } + } ## End Loop over all operators + } ## End loop over all tokens + return; + } # End sub +} + +{ # begin print_line_of_tokens + + my $rinput_token_array; # Current working array + my $rinput_K_array; # Future working array + + my $in_quote; + my $guessed_indentation_level; + + # This should be a return variable from extract_token + # These local token variables are stored by store_token_to_go: + my $Ktoken_vars; + my $block_type; + my $ci_level; + my $container_environment; + my $container_type; + my $in_continued_quote; + my $level; + my $no_internal_newlines; + my $slevel; + my $token; + my $type; + my $type_sequence; + + # routine to pull the jth token from the line of tokens + sub extract_token { + my ( $self, $j ) = @_; + + my $rLL = $self->{rLL}; + $Ktoken_vars = $rinput_K_array->[$j]; + if ( !defined($Ktoken_vars) ) { + + # Shouldn't happen: an error here would be due to a recent program change + Fault("undefined index K for j=$j"); + } + my $rtoken_vars = $rLL->[$Ktoken_vars]; + + if ( $rtoken_vars->[_TOKEN_] ne $rLL->[$Ktoken_vars]->[_TOKEN_] ) { + + # Shouldn't happen: an error here would be due to a recent program change + Fault(<[_TOKEN_]' ne '$rLL->[$Ktoken_vars]' +EOM + } + + ######################################################### + # these are now redundant and can eventually be eliminated + + $token = $rtoken_vars->[_TOKEN_]; + $type = $rtoken_vars->[_TYPE_]; + $block_type = $rtoken_vars->[_BLOCK_TYPE_]; + $container_type = $rtoken_vars->[_CONTAINER_TYPE_]; + $container_environment = $rtoken_vars->[_CONTAINER_ENVIRONMENT_]; + $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_]; + $level = $rtoken_vars->[_LEVEL_]; + $slevel = $rtoken_vars->[_SLEVEL_]; + $ci_level = $rtoken_vars->[_CI_LEVEL_]; + ######################################################### + + return; + } + + { + my @saved_token; + + sub save_current_token { + + @saved_token = ( + $block_type, $ci_level, + $container_environment, $container_type, + $in_continued_quote, $level, + $no_internal_newlines, $slevel, + $token, $type, + $type_sequence, $Ktoken_vars, + ); + return; + } + + sub restore_current_token { + ( + $block_type, $ci_level, + $container_environment, $container_type, + $in_continued_quote, $level, + $no_internal_newlines, $slevel, + $token, $type, + $type_sequence, $Ktoken_vars, + ) = @saved_token; + return; + } + } + + sub token_length { + + # Returns the length of a token, given: + # $token=text of the token + # $type = type + # $not_first_token = should be TRUE if this is not the first token of + # the line. It might the index of this token in an array. It is + # used to test for a side comment vs a block comment. + # Note: Eventually this should be the only routine determining the + # length of a token in this package. + my ( $token, $type, $not_first_token ) = @_; + my $token_length = length($token); + + # We mark lengths of side comments as just 1 if we are + # ignoring their lengths when setting line breaks. + $token_length = 1 + if ( $rOpts_ignore_side_comment_lengths + && $not_first_token + && $type eq '#' ); + return $token_length; + } + + sub rtoken_length { + + # return length of ith token in @{$rtokens} + my ($i) = @_; + return token_length( $rinput_token_array->[$i]->[_TOKEN_], + $rinput_token_array->[$i]->[_TYPE_], $i ); + } + + # Routine to place the current token into the output stream. + # Called once per output token. + sub store_token_to_go { + + my ( $self, $side_comment_follows ) = @_; + + my $flag = $side_comment_follows ? 1 : $no_internal_newlines; + + ++$max_index_to_go; + $K_to_go[$max_index_to_go] = $Ktoken_vars; + $tokens_to_go[$max_index_to_go] = $token; + $types_to_go[$max_index_to_go] = $type; + $nobreak_to_go[$max_index_to_go] = $flag; + $old_breakpoint_to_go[$max_index_to_go] = 0; + $forced_breakpoint_to_go[$max_index_to_go] = 0; + $block_type_to_go[$max_index_to_go] = $block_type; + $type_sequence_to_go[$max_index_to_go] = $type_sequence; + $container_environment_to_go[$max_index_to_go] = $container_environment; + $ci_levels_to_go[$max_index_to_go] = $ci_level; + $mate_index_to_go[$max_index_to_go] = -1; + $matching_token_to_go[$max_index_to_go] = ''; + $bond_strength_to_go[$max_index_to_go] = 0; + + # Note: negative levels are currently retained as a diagnostic so that + # the 'final indentation level' is correctly reported for bad scripts. + # But this means that every use of $level as an index must be checked. + # If this becomes too much of a problem, we might give up and just clip + # them at zero. + ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0; + $levels_to_go[$max_index_to_go] = $level; + $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0; + + # link the non-blank tokens + my $iprev = $max_index_to_go - 1; + $iprev-- if ( $iprev >= 0 && $types_to_go[$iprev] eq 'b' ); + $iprev_to_go[$max_index_to_go] = $iprev; + $inext_to_go[$iprev] = $max_index_to_go + if ( $iprev >= 0 && $type ne 'b' ); + $inext_to_go[$max_index_to_go] = $max_index_to_go + 1; + + $token_lengths_to_go[$max_index_to_go] = + token_length( $token, $type, $max_index_to_go ); + + # We keep a running sum of token lengths from the start of this batch: + # summed_lengths_to_go[$i] = total length to just before token $i + # summed_lengths_to_go[$i+1] = total length to just after token $i + $summed_lengths_to_go[ $max_index_to_go + 1 ] = + $summed_lengths_to_go[$max_index_to_go] + + $token_lengths_to_go[$max_index_to_go]; + + # Define the indentation that this token would have if it started + # a new line. We have to do this now because we need to know this + # when considering one-line blocks. + set_leading_whitespace( $level, $ci_level, $in_continued_quote ); + + # remember previous nonblank tokens seen + if ( $type ne 'b' ) { + $last_last_nonblank_index_to_go = $last_nonblank_index_to_go; + $last_last_nonblank_type_to_go = $last_nonblank_type_to_go; + $last_last_nonblank_token_to_go = $last_nonblank_token_to_go; + $last_nonblank_index_to_go = $max_index_to_go; + $last_nonblank_type_to_go = $type; + $last_nonblank_token_to_go = $token; + if ( $type eq ',' ) { + $comma_count_in_batch++; + } + } + + FORMATTER_DEBUG_FLAG_STORE && do { + my ( $a, $b, $c ) = caller(); + print STDOUT +"STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n"; + }; + return; + } + + sub insert_new_token_to_go { + + # insert a new token into the output stream. use same level as + # previous token; assumes a character at max_index_to_go. + my ( $self, @args ) = @_; + save_current_token(); + ( $token, $type, $slevel, $no_internal_newlines ) = @args; + + if ( $max_index_to_go == UNDEFINED_INDEX ) { + warning("code bug: bad call to insert_new_token_to_go\n"); + } + $level = $levels_to_go[$max_index_to_go]; + + # FIXME: it seems to be necessary to use the next, rather than + # previous, value of this variable when creating a new blank (align.t) + #my $slevel = $nesting_depth_to_go[$max_index_to_go]; + $ci_level = $ci_levels_to_go[$max_index_to_go]; + $container_environment = $container_environment_to_go[$max_index_to_go]; + $in_continued_quote = 0; + $block_type = ""; + $type_sequence = ""; + + # store an undef for the K value to catch unexpected usage + # This routine is only called by add_closing_side_comments, and + # eventually that call will be eliminated. + $Ktoken_vars = undef; + + $self->store_token_to_go(); + restore_current_token(); + return; + } + + sub copy_hash { + my ($rold_token_hash) = @_; + my %new_token_hash = + map { ( $_, $rold_token_hash->{$_} ) } keys %{$rold_token_hash}; + return \%new_token_hash; + } + + sub copy_array { + my ($rold) = @_; + my @new = map { $_ } @{$rold}; + return \@new; + } + + sub copy_token_as_type { + my ( $rold_token, $type, $token ) = @_; + if ( $type eq 'b' ) { + $token = " " unless defined($token); + } + elsif ( $type eq 'q' ) { + $token = '' unless defined($token); + } + elsif ( $type eq '->' ) { + $token = '->' unless defined($token); + } + elsif ( $type eq ';' ) { + $token = ';' unless defined($token); + } + else { + Fault( +"Programming error: copy_token_as has type $type but should be 'b' or 'q'" + ); + } + my $rnew_token = copy_array($rold_token); + $rnew_token->[_TYPE_] = $type; + $rnew_token->[_TOKEN_] = $token; + $rnew_token->[_BLOCK_TYPE_] = ''; + $rnew_token->[_CONTAINER_TYPE_] = ''; + $rnew_token->[_CONTAINER_ENVIRONMENT_] = ''; + $rnew_token->[_TYPE_SEQUENCE_] = ''; + return $rnew_token; + } + + sub boolean_equals { + my ( $val1, $val2 ) = @_; + return ( $val1 && $val2 || !$val1 && !$val2 ); + } + + sub print_line_of_tokens { + + my ( $self, $line_of_tokens ) = @_; + + # This routine is called once per input line to process all of + # the tokens on that line. This is the first stage of + # beautification. + # + # Full-line comments and blank lines may be processed immediately. + # + # For normal lines of code, the tokens are stored one-by-one, + # via calls to 'sub store_token_to_go', until a known line break + # point is reached. Then, the batch of collected tokens is + # passed along to 'sub output_line_to_go' for further + # processing. This routine decides if there should be + # whitespace between each pair of non-white tokens, so later + # routines only need to decide on any additional line breaks. + # Any whitespace is initially a single space character. Later, + # the vertical aligner may expand that to be multiple space + # characters if necessary for alignment. + + $input_line_number = $line_of_tokens->{_line_number}; + my $input_line = $line_of_tokens->{_line_text}; + my $CODE_type = $line_of_tokens->{_code_type}; + + my $rK_range = $line_of_tokens->{_rK_range}; + my ( $K_first, $K_last ) = @{$rK_range}; + + my $rLL = $self->{rLL}; + my $rbreak_container = $self->{rbreak_container}; + + if ( !defined($K_first) ) { + + # Unexpected blank line.. + # Calling routine was supposed to handle this + Warn( +"Programming Error: Unexpected Blank Line in print_line_of_tokens. Ignoring" + ); + return; + } + + $no_internal_newlines = 1 - $rOpts_add_newlines; + my $is_comment = + ( $K_first == $K_last && $rLL->[$K_first]->[_TYPE_] eq '#' ); + my $is_static_block_comment_without_leading_space = + $CODE_type eq 'SBCX'; + $is_static_block_comment = + $CODE_type eq 'SBC' || $is_static_block_comment_without_leading_space; + my $is_hanging_side_comment = $CODE_type eq 'HSC'; + my $is_VERSION_statement = $CODE_type eq 'VER'; + if ($is_VERSION_statement) { + $saw_VERSION_in_this_file = 1; + $no_internal_newlines = 1; + } + + # Add interline blank if any + my $last_old_nonblank_type = "b"; + my $first_new_nonblank_type = "b"; + my $first_new_nonblank_token = " "; + if ( $max_index_to_go >= 0 ) { + $last_old_nonblank_type = $types_to_go[$max_index_to_go]; + $first_new_nonblank_type = $rLL->[$K_first]->[_TYPE_]; + $first_new_nonblank_token = $rLL->[$K_first]->[_TOKEN_]; + if ( !$is_comment + && $types_to_go[$max_index_to_go] ne 'b' + && $K_first > 0 + && $rLL->[ $K_first - 1 ]->[_TYPE_] eq 'b' ) + { + $K_first -= 1; + } + } + + # Copy the tokens into local arrays + $rinput_token_array = []; + $rinput_K_array = []; + $rinput_K_array = [ ( $K_first .. $K_last ) ]; + $rinput_token_array = [ map { $rLL->[$_] } @{$rinput_K_array} ]; + my $jmax = @{$rinput_K_array} - 1; + + $in_continued_quote = $starting_in_quote = + $line_of_tokens->{_starting_in_quote}; + $in_quote = $line_of_tokens->{_ending_in_quote}; + $ending_in_quote = $in_quote; + $guessed_indentation_level = + $line_of_tokens->{_guessed_indentation_level}; + + my $j_next; + my $next_nonblank_token; + my $next_nonblank_token_type; + + $block_type = ""; + $container_type = ""; + $container_environment = ""; + $type_sequence = ""; + + ###################################### + # Handle a block (full-line) comment.. + ###################################### + if ($is_comment) { + + if ( $rOpts->{'delete-block-comments'} ) { return } + + if ( $rOpts->{'tee-block-comments'} ) { + $file_writer_object->tee_on(); + } + + destroy_one_line_block(); + $self->output_line_to_go(); + + # output a blank line before block comments + if ( + # unless we follow a blank or comment line + $last_line_leading_type !~ /^[#b]$/ + + # only if allowed + && $rOpts->{'blanks-before-comments'} + + # if this is NOT an empty comment line + && $rinput_token_array->[0]->[_TOKEN_] ne '#' + + # not after a short line ending in an opening token + # because we already have space above this comment. + # Note that the first comment in this if block, after + # the 'if (', does not get a blank line because of this. + && !$last_output_short_opening_token + + # never before static block comments + && !$is_static_block_comment + ) + { + $self->flush(); # switching to new output stream + $file_writer_object->write_blank_code_line(); + $last_line_leading_type = 'b'; + } + + # TRIM COMMENTS -- This could be turned off as a option + $rinput_token_array->[0]->[_TOKEN_] =~ s/\s*$//; # trim right end + + if ( + $rOpts->{'indent-block-comments'} + && ( !$rOpts->{'indent-spaced-block-comments'} + || $input_line =~ /^\s+/ ) + && !$is_static_block_comment_without_leading_space + ) + { + $self->extract_token(0); + $self->store_token_to_go(); + $self->output_line_to_go(); + } + else { + $self->flush(); # switching to new output stream + $file_writer_object->write_code_line( + $rinput_token_array->[0]->[_TOKEN_] . "\n" ); + $last_line_leading_type = '#'; + } + if ( $rOpts->{'tee-block-comments'} ) { + $file_writer_object->tee_off(); + } + return; + } + + # TODO: Move to sub scan_comments + # compare input/output indentation except for continuation lines + # (because they have an unknown amount of initial blank space) + # and lines which are quotes (because they may have been outdented) + # Note: this test is placed here because we know the continuation flag + # at this point, which allows us to avoid non-meaningful checks. + my $structural_indentation_level = $rinput_token_array->[0]->[_LEVEL_]; + compare_indentation_levels( $guessed_indentation_level, + $structural_indentation_level ) + unless ( $is_hanging_side_comment + || $rinput_token_array->[0]->[_CI_LEVEL_] > 0 + || $guessed_indentation_level == 0 + && $rinput_token_array->[0]->[_TYPE_] eq 'Q' ); + + ########################## + # Handle indentation-only + ########################## + + # NOTE: In previous versions we sent all qw lines out immediately here. + # No longer doing this: also write a line which is entirely a 'qw' list + # to allow stacking of opening and closing tokens. Note that interior + # qw lines will still go out at the end of this routine. + if ( $CODE_type eq 'IO' ) { + $self->flush(); + my $line = $input_line; + + # delete side comments if requested with -io, but + # we will not allow deleting of closing side comments with -io + # because the coding would be more complex + if ( $rOpts->{'delete-side-comments'} + && $rinput_token_array->[$jmax]->[_TYPE_] eq '#' ) + { + + $line = ""; + foreach my $jj ( 0 .. $jmax - 1 ) { + $line .= $rinput_token_array->[$jj]->[_TOKEN_]; + } + } + + # Fix for rt #125506 Unexpected string formating + # in which leading space of a terminal quote was removed + $line =~ s/\s+$//; + $line =~ s/^\s+// unless ($in_continued_quote); + + $self->extract_token(0); + $token = $line; + $type = 'q'; + $block_type = ""; + $container_type = ""; + $container_environment = ""; + $type_sequence = ""; + $self->store_token_to_go(); + $self->output_line_to_go(); + return; + } + + ############################ + # Handle all other lines ... + ############################ + + ####################################################### + # FIXME: this should become unnecessary + # making $j+2 valid simplifies coding + my $rnew_blank = + copy_token_as_type( $rinput_token_array->[$jmax], 'b' ); + push @{$rinput_token_array}, $rnew_blank; + push @{$rinput_token_array}, $rnew_blank; + ####################################################### + + # If we just saw the end of an elsif block, write nag message + # if we do not see another elseif or an else. + if ($looking_for_else) { + + unless ( $rinput_token_array->[0]->[_TOKEN_] =~ /^(elsif|else)$/ ) { + write_logfile_entry("(No else block)\n"); + } + $looking_for_else = 0; + } + + # This is a good place to kill incomplete one-line blocks + if ( + ( + ( $semicolons_before_block_self_destruct == 0 ) + && ( $max_index_to_go >= 0 ) + && ( $last_old_nonblank_type eq ';' ) + && ( $first_new_nonblank_token ne '}' ) + ) + + # Patch for RT #98902. Honor request to break at old commas. + || ( $rOpts_break_at_old_comma_breakpoints + && $max_index_to_go >= 0 + && $last_old_nonblank_type eq ',' ) + ) + { + $forced_breakpoint_to_go[$max_index_to_go] = 1 + if ($rOpts_break_at_old_comma_breakpoints); + destroy_one_line_block(); + $self->output_line_to_go(); + } + + # loop to process the tokens one-by-one + $type = 'b'; + $token = ""; + + # We do not want a leading blank if the previous batch just got output + my $jmin = 0; + if ( $max_index_to_go < 0 && $rLL->[$K_first]->[_TYPE_] eq 'b' ) { + $jmin = 1; + } + + foreach my $j ( $jmin .. $jmax ) { + + # pull out the local values for this token + $self->extract_token($j); + + if ( $type eq '#' ) { + + # trim trailing whitespace + # (there is no option at present to prevent this) + $token =~ s/\s*$//; + + if ( + $rOpts->{'delete-side-comments'} + + # delete closing side comments if necessary + || ( $rOpts->{'delete-closing-side-comments'} + && $token =~ /$closing_side_comment_prefix_pattern/o + && $last_nonblank_block_type =~ + /$closing_side_comment_list_pattern/o ) + ) + { + if ( $types_to_go[$max_index_to_go] eq 'b' ) { + unstore_token_to_go(); + } + last; + } + } + + # If we are continuing after seeing a right curly brace, flush + # buffer unless we see what we are looking for, as in + # } else ... + if ( $rbrace_follower && $type ne 'b' ) { + + unless ( $rbrace_follower->{$token} ) { + $self->output_line_to_go(); + } + $rbrace_follower = undef; + } + + $j_next = + ( $rinput_token_array->[ $j + 1 ]->[_TYPE_] eq 'b' ) + ? $j + 2 + : $j + 1; + $next_nonblank_token = $rinput_token_array->[$j_next]->[_TOKEN_]; + $next_nonblank_token_type = + $rinput_token_array->[$j_next]->[_TYPE_]; + + ###################### + # MAYBE MOVE ELSEWHERE? + ###################### + if ( $type eq 'Q' ) { + note_embedded_tab() if ( $token =~ "\t" ); + + # make note of something like '$var = s/xxx/yyy/;' + # in case it should have been '$var =~ s/xxx/yyy/;' + if ( + $token =~ /^(s|tr|y|m|\/)/ + && $last_nonblank_token =~ /^(=|==|!=)$/ + + # preceded by simple scalar + && $last_last_nonblank_type eq 'i' + && $last_last_nonblank_token =~ /^\$/ + + # followed by some kind of termination + # (but give complaint if we can's see far enough ahead) + && $next_nonblank_token =~ /^[; \)\}]$/ + + # scalar is not declared + && !( + $types_to_go[0] eq 'k' + && $tokens_to_go[0] =~ /^(my|our|local)$/ + ) + ) + { + my $guess = substr( $last_nonblank_token, 0, 1 ) . '~'; + complain( +"Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n" + ); + } + } + + # Do not allow breaks which would promote a side comment to a + # block comment. In order to allow a break before an opening + # or closing BLOCK, followed by a side comment, those sections + # of code will handle this flag separately. + my $side_comment_follows = ( $next_nonblank_token_type eq '#' ); + my $is_opening_BLOCK = + ( $type eq '{' + && $token eq '{' + && $block_type + && $block_type ne 't' ); + my $is_closing_BLOCK = + ( $type eq '}' + && $token eq '}' + && $block_type + && $block_type ne 't' ); + + if ( $side_comment_follows + && !$is_opening_BLOCK + && !$is_closing_BLOCK ) + { + $no_internal_newlines = 1; + } + + # We're only going to handle breaking for code BLOCKS at this + # (top) level. Other indentation breaks will be handled by + # sub scan_list, which is better suited to dealing with them. + if ($is_opening_BLOCK) { + + # Tentatively output this token. This is required before + # calling starting_one_line_block. We may have to unstore + # it, though, if we have to break before it. + $self->store_token_to_go($side_comment_follows); + + # Look ahead to see if we might form a one-line block.. + my $too_long = + $self->starting_one_line_block( $j, $jmax, $level, $slevel, + $ci_level, $rinput_token_array ); + clear_breakpoint_undo_stack(); + + # to simplify the logic below, set a flag to indicate if + # this opening brace is far from the keyword which introduces it + my $keyword_on_same_line = 1; + if ( ( $max_index_to_go >= 0 ) + && ( $last_nonblank_type eq ')' ) + && ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long ) ) + { + $keyword_on_same_line = 0; + } + + # decide if user requested break before '{' + my $want_break = + + # use -bl flag if not a sub block of any type + $block_type !~ /^sub\b/ + ? $rOpts->{'opening-brace-on-new-line'} + + # use -sbl flag for a named sub block + : $block_type !~ /$ASUB_PATTERN/ + ? $rOpts->{'opening-sub-brace-on-new-line'} + + # use -asbl flag for an anonymous sub block + : $rOpts->{'opening-anonymous-sub-brace-on-new-line'}; + + # Do not break if this token is welded to the left + if ( weld_len_left( $type_sequence, $token ) ) { + $want_break = 0; + } + + # Break before an opening '{' ... + if ( + + # if requested + $want_break + + # and we were unable to start looking for a block, + && $index_start_one_line_block == UNDEFINED_INDEX + + # or if it will not be on same line as its keyword, so that + # it will be outdented (eval.t, overload.t), and the user + # has not insisted on keeping it on the right + || ( !$keyword_on_same_line + && !$rOpts->{'opening-brace-always-on-right'} ) + + ) + { + + # but only if allowed + unless ($no_internal_newlines) { + + # since we already stored this token, we must unstore it + $self->unstore_token_to_go(); + + # then output the line + $self->output_line_to_go(); + + # and now store this token at the start of a new line + $self->store_token_to_go($side_comment_follows); + } + } + + # Now update for side comment + if ($side_comment_follows) { $no_internal_newlines = 1 } + + # now output this line + unless ($no_internal_newlines) { + $self->output_line_to_go(); + } + } + + elsif ($is_closing_BLOCK) { + + # If there is a pending one-line block .. + if ( $index_start_one_line_block != UNDEFINED_INDEX ) { + + # we have to terminate it if.. + if ( + + # it is too long (final length may be different from + # initial estimate). note: must allow 1 space for this + # token + excess_line_length( $index_start_one_line_block, + $max_index_to_go ) >= 0 + + # or if it has too many semicolons + || ( $semicolons_before_block_self_destruct == 0 + && $last_nonblank_type ne ';' ) + ) + { + destroy_one_line_block(); + } + } + + # put a break before this closing curly brace if appropriate + unless ( $no_internal_newlines + || $index_start_one_line_block != UNDEFINED_INDEX ) + { + + # write out everything before this closing curly brace + $self->output_line_to_go(); + } + + # Now update for side comment + if ($side_comment_follows) { $no_internal_newlines = 1 } + + # store the closing curly brace + $self->store_token_to_go(); + + # ok, we just stored a closing curly brace. Often, but + # not always, we want to end the line immediately. + # So now we have to check for special cases. + + # if this '}' successfully ends a one-line block.. + my $is_one_line_block = 0; + my $keep_going = 0; + if ( $index_start_one_line_block != UNDEFINED_INDEX ) { + + # Remember the type of token just before the + # opening brace. It would be more general to use + # a stack, but this will work for one-line blocks. + $is_one_line_block = + $types_to_go[$index_start_one_line_block]; + + # we have to actually make it by removing tentative + # breaks that were set within it + undo_forced_breakpoint_stack(0); + set_nobreaks( $index_start_one_line_block, + $max_index_to_go - 1 ); + + # then re-initialize for the next one-line block + destroy_one_line_block(); + + # then decide if we want to break after the '}' .. + # We will keep going to allow certain brace followers as in: + # do { $ifclosed = 1; last } unless $losing; + # + # But make a line break if the curly ends a + # significant block: + if ( + ( + $is_block_without_semicolon{$block_type} + + # Follow users break point for + # one line block types U & G, such as a 'try' block + || $is_one_line_block =~ /^[UG]$/ && $j == $jmax + ) + + # if needless semicolon follows we handle it later + && $next_nonblank_token ne ';' + ) + { + $self->output_line_to_go() + unless ($no_internal_newlines); + } + } + + # set string indicating what we need to look for brace follower + # tokens + if ( $block_type eq 'do' ) { + $rbrace_follower = \%is_do_follower; + } + elsif ( $block_type =~ /^(if|elsif|unless)$/ ) { + $rbrace_follower = \%is_if_brace_follower; + } + elsif ( $block_type eq 'else' ) { + $rbrace_follower = \%is_else_brace_follower; + } + + # added eval for borris.t + elsif ($is_sort_map_grep_eval{$block_type} + || $is_one_line_block eq 'G' ) + { + $rbrace_follower = undef; + $keep_going = 1; + } + + # anonymous sub + elsif ( $block_type =~ /$ASUB_PATTERN/ ) { + + if ($is_one_line_block) { + $rbrace_follower = \%is_anon_sub_1_brace_follower; + } + else { + $rbrace_follower = \%is_anon_sub_brace_follower; + } + } + + # None of the above: specify what can follow a closing + # brace of a block which is not an + # if/elsif/else/do/sort/map/grep/eval + # Testfiles: + # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t + else { + $rbrace_follower = \%is_other_brace_follower; + } + + # See if an elsif block is followed by another elsif or else; + # complain if not. + if ( $block_type eq 'elsif' ) { + + if ( $next_nonblank_token_type eq 'b' ) { # end of line? + $looking_for_else = 1; # ok, check on next line + } + else { + + unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) { + write_logfile_entry("No else block :(\n"); + } + } + } + + # keep going after certain block types (map,sort,grep,eval) + # added eval for borris.t + if ($keep_going) { + + # keep going + } + + # if no more tokens, postpone decision until re-entring + elsif ( ( $next_nonblank_token_type eq 'b' ) + && $rOpts_add_newlines ) + { + unless ($rbrace_follower) { + $self->output_line_to_go() + unless ($no_internal_newlines); + } + } + + elsif ($rbrace_follower) { + + unless ( $rbrace_follower->{$next_nonblank_token} ) { + $self->output_line_to_go() + unless ($no_internal_newlines); + } + $rbrace_follower = undef; + } + + else { + $self->output_line_to_go() unless ($no_internal_newlines); + } + + } # end treatment of closing block token + + # handle semicolon + elsif ( $type eq ';' ) { + + # kill one-line blocks with too many semicolons + $semicolons_before_block_self_destruct--; + if ( + ( $semicolons_before_block_self_destruct < 0 ) + || ( $semicolons_before_block_self_destruct == 0 + && $next_nonblank_token_type !~ /^[b\}]$/ ) + ) + { + destroy_one_line_block(); + } + + # Remove unnecessary semicolons, but not after bare + # blocks, where it could be unsafe if the brace is + # mistokenized. + if ( + ( + $last_nonblank_token eq '}' + && ( + $is_block_without_semicolon{ + $last_nonblank_block_type} + || $last_nonblank_block_type =~ /$SUB_PATTERN/ + || $last_nonblank_block_type =~ /^\w+:$/ ) + ) + || $last_nonblank_type eq ';' + ) + { + + if ( + $rOpts->{'delete-semicolons'} + + # don't delete ; before a # because it would promote it + # to a block comment + && ( $next_nonblank_token_type ne '#' ) + ) + { + note_deleted_semicolon(); + $self->output_line_to_go() + unless ( $no_internal_newlines + || $index_start_one_line_block != UNDEFINED_INDEX ); + next; + } + else { + write_logfile_entry("Extra ';'\n"); + } + } + $self->store_token_to_go(); + + $self->output_line_to_go() + unless ( $no_internal_newlines + || ( $rOpts_keep_interior_semicolons && $j < $jmax ) + || ( $next_nonblank_token eq '}' ) ); + + } + + # handle here_doc target string + elsif ( $type eq 'h' ) { + + # no newlines after seeing here-target + $no_internal_newlines = 1; + destroy_one_line_block(); + $self->store_token_to_go(); + } + + # handle all other token types + else { + + $self->store_token_to_go(); + } + + # remember two previous nonblank OUTPUT tokens + if ( $type ne '#' && $type ne 'b' ) { + $last_last_nonblank_token = $last_nonblank_token; + $last_last_nonblank_type = $last_nonblank_type; + $last_nonblank_token = $token; + $last_nonblank_type = $type; + $last_nonblank_block_type = $block_type; + } + + # unset the continued-quote flag since it only applies to the + # first token, and we want to resume normal formatting if + # there are additional tokens on the line + $in_continued_quote = 0; + + } # end of loop over all tokens in this 'line_of_tokens' + + # we have to flush .. + if ( + + # if there is a side comment + ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} ) + + # if this line ends in a quote + # NOTE: This is critically important for insuring that quoted lines + # do not get processed by things like -sot and -sct + || $in_quote + + # if this is a VERSION statement + || $is_VERSION_statement + + # to keep a label at the end of a line + || $type eq 'J' + + # if we are instructed to keep all old line breaks + || !$rOpts->{'delete-old-newlines'} + ) + { + destroy_one_line_block(); + $self->output_line_to_go(); + } + + # mark old line breakpoints in current output stream + if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) { + my $jobp = $max_index_to_go; + if ( $types_to_go[$max_index_to_go] eq 'b' && $max_index_to_go > 0 ) + { + $jobp--; + } + $old_breakpoint_to_go[$jobp] = 1; + } + return; + } ## end sub print_line_of_tokens +} ## end block print_line_of_tokens + +sub consecutive_nonblank_lines { + return $file_writer_object->get_consecutive_nonblank_lines() + + $vertical_aligner_object->get_cached_line_count(); +} + +# sub output_line_to_go sends one logical line of tokens on down the +# pipeline to the VerticalAligner package, breaking the line into continuation +# lines as necessary. The line of tokens is ready to go in the "to_go" +# arrays. +sub output_line_to_go { + + my $self = shift; + my $rLL = $self->{rLL}; + + # debug stuff; this routine can be called from many points + FORMATTER_DEBUG_FLAG_OUTPUT && do { + my ( $a, $b, $c ) = caller; + write_diagnostics( +"OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n" + ); + my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ]; + write_diagnostics("$output_str\n"); + }; + + # Do not end line in a weld + # TODO: Move this fix into the routine? + #my $jnb = $max_index_to_go; + #if ( $jnb > 0 && $types_to_go[$jnb] eq 'b' ) { $jnb-- } + return if ( weld_len_right_to_go($max_index_to_go) ); + + # just set a tentative breakpoint if we might be in a one-line block + if ( $index_start_one_line_block != UNDEFINED_INDEX ) { + set_forced_breakpoint($max_index_to_go); + return; + } + +## my $cscw_block_comment; +## $cscw_block_comment = $self->add_closing_side_comment() +## if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 ); + + my $comma_arrow_count_contained = match_opening_and_closing_tokens(); + + # tell the -lp option we are outputting a batch so it can close + # any unfinished items in its stack + finish_lp_batch(); + + # If this line ends in a code block brace, set breaks at any + # previous closing code block braces to breakup a chain of code + # blocks on one line. This is very rare but can happen for + # user-defined subs. For example we might be looking at this: + # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR { + my $saw_good_break = 0; # flag to force breaks even if short line + if ( + + # looking for opening or closing block brace + $block_type_to_go[$max_index_to_go] + + # but not one of these which are never duplicated on a line: + # until|while|for|if|elsif|else + && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] } + ) + { + my $lev = $nesting_depth_to_go[$max_index_to_go]; + + # Walk backwards from the end and + # set break at any closing block braces at the same level. + # But quit if we are not in a chain of blocks. + for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) { + last if ( $levels_to_go[$i] < $lev ); # stop at a lower level + next if ( $levels_to_go[$i] > $lev ); # skip past higher level + + if ( $block_type_to_go[$i] ) { + if ( $tokens_to_go[$i] eq '}' ) { + set_forced_breakpoint($i); + $saw_good_break = 1; + } + } + + # quit if we see anything besides words, function, blanks + # at this level + elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last } + } + } + + my $imin = 0; + my $imax = $max_index_to_go; + + # trim any blank tokens + if ( $max_index_to_go >= 0 ) { + if ( $types_to_go[$imin] eq 'b' ) { $imin++ } + if ( $types_to_go[$imax] eq 'b' ) { $imax-- } + } + + # anything left to write? + if ( $imin <= $imax ) { + + # add a blank line before certain key types but not after a comment + if ( $last_line_leading_type !~ /^[#]/ ) { + my $want_blank = 0; + my $leading_token = $tokens_to_go[$imin]; + my $leading_type = $types_to_go[$imin]; + + # blank lines before subs except declarations and one-liners + # MCONVERSION LOCATION - for sub tokenization change + if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) { + $want_blank = $rOpts->{'blank-lines-before-subs'} + if ( + terminal_type( \@types_to_go, \@block_type_to_go, $imin, + $imax ) !~ /^[\;\}]$/ + ); + } + + # break before all package declarations + # MCONVERSION LOCATION - for tokenizaton change + elsif ($leading_token =~ /^(package\s)/ + && $leading_type eq 'i' ) + { + $want_blank = $rOpts->{'blank-lines-before-packages'}; + } + + # break before certain key blocks except one-liners + if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) { + $want_blank = $rOpts->{'blank-lines-before-subs'} + if ( + terminal_type( \@types_to_go, \@block_type_to_go, $imin, + $imax ) ne '}' + ); + } + + # Break before certain block types if we haven't had a + # break at this level for a while. This is the + # difficult decision.. + elsif ($leading_type eq 'k' + && $last_line_leading_type ne 'b' + && $leading_token =~ /^(unless|if|while|until|for|foreach)$/ ) + { + my $lc = $nonblank_lines_at_depth[$last_line_leading_level]; + if ( !defined($lc) ) { $lc = 0 } + + # patch for RT #128216: no blank line inserted at a level change + if ( $levels_to_go[$imin] != $last_line_leading_level ) { + $lc = 0; + } + + $want_blank = + $rOpts->{'blanks-before-blocks'} + && $lc >= $rOpts->{'long-block-line-count'} + && consecutive_nonblank_lines() >= + $rOpts->{'long-block-line-count'} + && ( + terminal_type( \@types_to_go, \@block_type_to_go, $imin, + $imax ) ne '}' + ); + } + + # Check for blank lines wanted before a closing brace + if ( $leading_token eq '}' ) { + if ( $rOpts->{'blank-lines-before-closing-block'} + && $block_type_to_go[$imin] + && $block_type_to_go[$imin] =~ + /$blank_lines_before_closing_block_pattern/ ) + { + my $nblanks = $rOpts->{'blank-lines-before-closing-block'}; + if ( $nblanks > $want_blank ) { + $want_blank = $nblanks; + } + } + } + + if ($want_blank) { + + # future: send blank line down normal path to VerticalAligner + Perl::Tidy::VerticalAligner::flush(); + $file_writer_object->require_blank_code_lines($want_blank); + } + } + + # update blank line variables and count number of consecutive + # non-blank, non-comment lines at this level + $last_last_line_leading_level = $last_line_leading_level; + $last_line_leading_level = $levels_to_go[$imin]; + if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 } + $last_line_leading_type = $types_to_go[$imin]; + if ( $last_line_leading_level == $last_last_line_leading_level + && $last_line_leading_type ne 'b' + && $last_line_leading_type ne '#' + && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) ) + { + $nonblank_lines_at_depth[$last_line_leading_level]++; + } + else { + $nonblank_lines_at_depth[$last_line_leading_level] = 1; + } + + FORMATTER_DEBUG_FLAG_FLUSH && do { + my ( $package, $file, $line ) = caller; + print STDOUT +"FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n"; + }; + + # add a couple of extra terminal blank tokens + pad_array_to_go(); + + # set all forced breakpoints for good list formatting + my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0; + + my $old_line_count_in_batch = + $self->get_old_line_count( $K_to_go[0], $K_to_go[$max_index_to_go] ); + + if ( + $is_long_line + || $old_line_count_in_batch > 1 + + # must always call scan_list() with unbalanced batches because it + # is maintaining some stacks + || is_unbalanced_batch() + + # call scan_list if we might want to break at commas + || ( + $comma_count_in_batch + && ( $rOpts_maximum_fields_per_table > 0 + || $rOpts_comma_arrow_breakpoints == 0 ) + ) + + # call scan_list if user may want to break open some one-line + # hash references + || ( $comma_arrow_count_contained + && $rOpts_comma_arrow_breakpoints != 3 ) + ) + { + ## This caused problems in one version of perl for unknown reasons: + ## $saw_good_break ||= scan_list(); + my $sgb = scan_list(); + $saw_good_break ||= $sgb; + } + + # let $ri_first and $ri_last be references to lists of + # first and last tokens of line fragments to output.. + my ( $ri_first, $ri_last ); + + # write a single line if.. + if ( + + # we aren't allowed to add any newlines + !$rOpts_add_newlines + + # or, we don't already have an interior breakpoint + # and we didn't see a good breakpoint + || ( + !$forced_breakpoint_count + && !$saw_good_break + + # and this line is 'short' + && !$is_long_line + ) + ) + { + @{$ri_first} = ($imin); + @{$ri_last} = ($imax); + } + + # otherwise use multiple lines + else { + + ( $ri_first, $ri_last, my $colon_count ) = + set_continuation_breaks($saw_good_break); + + break_all_chain_tokens( $ri_first, $ri_last ); + + break_equals( $ri_first, $ri_last ); + + # now we do a correction step to clean this up a bit + # (The only time we would not do this is for debugging) + if ( $rOpts->{'recombine'} ) { + ( $ri_first, $ri_last ) = + recombine_breakpoints( $ri_first, $ri_last ); + } + + insert_final_breaks( $ri_first, $ri_last ) if $colon_count; + } + + # do corrector step if -lp option is used + my $do_not_pad = 0; + if ($rOpts_line_up_parentheses) { + $do_not_pad = correct_lp_indentation( $ri_first, $ri_last ); + } + $self->unmask_phantom_semicolons( $ri_first, $ri_last ); + if ( $rOpts_one_line_block_semicolons == 0 ) { + $self->delete_one_line_semicolons( $ri_first, $ri_last ); + } + $self->send_lines_to_vertical_aligner( $ri_first, $ri_last, + $do_not_pad ); + + # Insert any requested blank lines after an opening brace. We have to + # skip back before any side comment to find the terminal token + my $iterm; + for ( $iterm = $imax ; $iterm >= $imin ; $iterm-- ) { + next if $types_to_go[$iterm] eq '#'; + next if $types_to_go[$iterm] eq 'b'; + last; + } + + # write requested number of blank lines after an opening block brace + if ( $iterm >= $imin && $types_to_go[$iterm] eq '{' ) { + if ( $rOpts->{'blank-lines-after-opening-block'} + && $block_type_to_go[$iterm] + && $block_type_to_go[$iterm] =~ + /$blank_lines_after_opening_block_pattern/ ) + { + my $nblanks = $rOpts->{'blank-lines-after-opening-block'}; + Perl::Tidy::VerticalAligner::flush(); + $file_writer_object->require_blank_code_lines($nblanks); + } + } + } + + prepare_for_new_input_lines(); + +## # output any new -cscw block comment +## if ($cscw_block_comment) { +## $self->flush(); +## $file_writer_object->write_code_line( $cscw_block_comment . "\n" ); +## } + return; +} + +sub note_added_semicolon { + my ($line_number) = @_; + $last_added_semicolon_at = $line_number; + if ( $added_semicolon_count == 0 ) { + $first_added_semicolon_at = $last_added_semicolon_at; + } + $added_semicolon_count++; + write_logfile_entry("Added ';' here\n"); + return; +} + +sub note_deleted_semicolon { + $last_deleted_semicolon_at = $input_line_number; + if ( $deleted_semicolon_count == 0 ) { + $first_deleted_semicolon_at = $last_deleted_semicolon_at; + } + $deleted_semicolon_count++; + write_logfile_entry("Deleted unnecessary ';'\n"); # i hope ;) + return; +} + +sub note_embedded_tab { + $embedded_tab_count++; + $last_embedded_tab_at = $input_line_number; + if ( !$first_embedded_tab_at ) { + $first_embedded_tab_at = $last_embedded_tab_at; + } + + if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) { + write_logfile_entry("Embedded tabs in quote or pattern\n"); + } + return; +} + +sub starting_one_line_block { + + # after seeing an opening curly brace, look for the closing brace + # and see if the entire block will fit on a line. This routine is + # not always right because it uses the old whitespace, so a check + # is made later (at the closing brace) to make sure we really + # have a one-line block. We have to do this preliminary check, + # though, because otherwise we would always break at a semicolon + # within a one-line block if the block contains multiple statements. + + my ( $self, $j, $jmax, $level, $slevel, $ci_level, $rtoken_array ) = @_; + my $rbreak_container = $self->{rbreak_container}; + + my $jmax_check = @{$rtoken_array}; + if ( $jmax_check < $jmax ) { + Fault("jmax=$jmax > $jmax_check"); + } + + # kill any current block - we can only go 1 deep + destroy_one_line_block(); + + # return value: + # 1=distance from start of block to opening brace exceeds line length + # 0=otherwise + + my $i_start = 0; + + # shouldn't happen: there must have been a prior call to + # store_token_to_go to put the opening brace in the output stream + if ( $max_index_to_go < 0 ) { + Fault("program bug: store_token_to_go called incorrectly\n"); + } + + # return if block should be broken + my $type_sequence = $rtoken_array->[$j]->[_TYPE_SEQUENCE_]; + if ( $rbreak_container->{$type_sequence} ) { + return 0; + } + + my $block_type = $rtoken_array->[$j]->[_BLOCK_TYPE_]; + + # find the starting keyword for this block (such as 'if', 'else', ...) + + if ( $block_type =~ /^[\{\}\;\:]$/ || $block_type =~ /^package/ ) { + $i_start = $max_index_to_go; + } + + # the previous nonblank token should start these block types + elsif (( $last_last_nonblank_token_to_go eq $block_type ) + || ( $block_type =~ /^sub\b/ ) + || $block_type =~ /\(\)/ ) + { + $i_start = $last_last_nonblank_index_to_go; + + # For signatures and extended syntax ... + # If this brace follows a parenthesized list, we should look back to + # find the keyword before the opening paren because otherwise we might + # form a one line block which stays intack, and cause the parenthesized + # expression to break open. That looks bad. However, actually + # searching for the opening paren is slow and tedius. + # The actual keyword is often at the start of a line, but might not be. + # For example, we might have an anonymous sub with signature list + # following a =>. It is safe to mark the start anywhere before the + # opening paren, so we just go back to the prevoious break (or start of + # the line) if that is before the opening paren. The minor downside is + # that we may very occasionally break open a block unnecessarily. + if ( $tokens_to_go[$i_start] eq ')' ) { + $i_start = $index_max_forced_break + 1; + if ( $types_to_go[$i_start] eq 'b' ) { $i_start++; } + my $lev = $levels_to_go[$i_start]; + if ( $lev > $level ) { return 0 } + } + } + + elsif ( $last_last_nonblank_token_to_go eq ')' ) { + + # For something like "if (xxx) {", the keyword "if" will be + # just after the most recent break. This will be 0 unless + # we have just killed a one-line block and are starting another. + # (doif.t) + # Note: cannot use inext_index_to_go[] here because that array + # is still being constructed. + $i_start = $index_max_forced_break + 1; + if ( $types_to_go[$i_start] eq 'b' ) { + $i_start++; + } + + # Patch to avoid breaking short blocks defined with extended_syntax: + # Strip off any trailing () which was added in the parser to mark + # the opening keyword. For example, in the following + # create( TypeFoo $e) {$bubba} + # the blocktype would be marked as create() + my $stripped_block_type = $block_type; + $stripped_block_type =~ s/\(\)$//; + + unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) { + return 0; + } + } + + # patch for SWITCH/CASE to retain one-line case/when blocks + elsif ( $block_type eq 'case' || $block_type eq 'when' ) { + + # Note: cannot use inext_index_to_go[] here because that array + # is still being constructed. + $i_start = $index_max_forced_break + 1; + if ( $types_to_go[$i_start] eq 'b' ) { + $i_start++; + } + unless ( $tokens_to_go[$i_start] eq $block_type ) { + return 0; + } + } + + else { + return 1; + } + + my $pos = total_line_length( $i_start, $max_index_to_go ) - 1; + + # see if length is too long to even start + if ( $pos > maximum_line_length($i_start) ) { + return 1; + } + + foreach my $i ( $j + 1 .. $jmax ) { + + # old whitespace could be arbitrarily large, so don't use it + if ( $rtoken_array->[$i]->[_TYPE_] eq 'b' ) { $pos += 1 } + else { $pos += rtoken_length($i) } + + # Return false result if we exceed the maximum line length, + if ( $pos > maximum_line_length($i_start) ) { + return 0; + } + + # or encounter another opening brace before finding the closing brace. + elsif ($rtoken_array->[$i]->[_TOKEN_] eq '{' + && $rtoken_array->[$i]->[_TYPE_] eq '{' + && $rtoken_array->[$i]->[_BLOCK_TYPE_] ) + { + return 0; + } + + # if we find our closing brace.. + elsif ($rtoken_array->[$i]->[_TOKEN_] eq '}' + && $rtoken_array->[$i]->[_TYPE_] eq '}' + && $rtoken_array->[$i]->[_BLOCK_TYPE_] ) + { + + # be sure any trailing comment also fits on the line + my $i_nonblank = + ( $rtoken_array->[ $i + 1 ]->[_TYPE_] eq 'b' ) ? $i + 2 : $i + 1; + + # Patch for one-line sort/map/grep/eval blocks with side comments: + # We will ignore the side comment length for sort/map/grep/eval + # because this can lead to statements which change every time + # perltidy is run. Here is an example from Denis Moskowitz which + # oscillates between these two states without this patch: + +## -------- +## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf +## @baz; +## +## grep { +## $_->foo ne 'bar' +## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf +## @baz; +## -------- + + # When the first line is input it gets broken apart by the main + # line break logic in sub print_line_of_tokens. + # When the second line is input it gets recombined by + # print_line_of_tokens and passed to the output routines. The + # output routines (set_continuation_breaks) do not break it apart + # because the bond strengths are set to the highest possible value + # for grep/map/eval/sort blocks, so the first version gets output. + # It would be possible to fix this by changing bond strengths, + # but they are high to prevent errors in older versions of perl. + + if ( $rtoken_array->[$i_nonblank]->[_TYPE_] eq '#' + && !$is_sort_map_grep{$block_type} ) + { + + $pos += rtoken_length($i_nonblank); + + if ( $i_nonblank > $i + 1 ) { + + # source whitespace could be anything, assume + # at least one space before the hash on output + if ( $rtoken_array->[ $i + 1 ]->[_TYPE_] eq 'b' ) { + $pos += 1; + } + else { $pos += rtoken_length( $i + 1 ) } + } + + if ( $pos >= maximum_line_length($i_start) ) { + return 0; + } + } + + # ok, it's a one-line block + create_one_line_block( $i_start, 20 ); + return 0; + } + + # just keep going for other characters + else { + } + } + + # Allow certain types of new one-line blocks to form by joining + # input lines. These can be safely done, but for other block types, + # we keep old one-line blocks but do not form new ones. It is not + # always a good idea to make as many one-line blocks as possible, + # so other types are not done. The user can always use -mangle. + if ( $is_sort_map_grep_eval{$block_type} ) { + create_one_line_block( $i_start, 1 ); + } + return 0; +} + +sub unstore_token_to_go { + + # remove most recent token from output stream + my $self = shift; + if ( $max_index_to_go > 0 ) { + $max_index_to_go--; + } + else { + $max_index_to_go = UNDEFINED_INDEX; + } + return; +} + +sub want_blank_line { + my $self = shift; + $self->flush(); + $file_writer_object->want_blank_line(); + return; +} + +sub write_unindented_line { + my ( $self, $line ) = @_; + $self->flush(); + $file_writer_object->write_line($line); + return; +} + +sub undo_ci { + + # Undo continuation indentation in certain sequences + # For example, we can undo continuation indentation in sort/map/grep chains + # my $dat1 = pack( "n*", + # map { $_, $lookup->{$_} } + # sort { $a <=> $b } + # grep { $lookup->{$_} ne $default } keys %$lookup ); + # To align the map/sort/grep keywords like this: + # my $dat1 = pack( "n*", + # map { $_, $lookup->{$_} } + # sort { $a <=> $b } + # grep { $lookup->{$_} ne $default } keys %$lookup ); + my ( $ri_first, $ri_last ) = @_; + my ( $line_1, $line_2, $lev_last ); + my $this_line_is_semicolon_terminated; + my $max_line = @{$ri_first} - 1; + + # looking at each line of this batch.. + # We are looking at leading tokens and looking for a sequence + # all at the same level and higher level than enclosing lines. + foreach my $line ( 0 .. $max_line ) { + + my $ibeg = $ri_first->[$line]; + my $lev = $levels_to_go[$ibeg]; + if ( $line > 0 ) { + + # if we have started a chain.. + if ($line_1) { + + # see if it continues.. + if ( $lev == $lev_last ) { + if ( $types_to_go[$ibeg] eq 'k' + && $is_sort_map_grep{ $tokens_to_go[$ibeg] } ) + { + + # chain continues... + # check for chain ending at end of a statement + if ( $line == $max_line ) { + + # see of this line ends a statement + my $iend = $ri_last->[$line]; + $this_line_is_semicolon_terminated = + $types_to_go[$iend] eq ';' + + # with possible side comment + || ( $types_to_go[$iend] eq '#' + && $iend - $ibeg >= 2 + && $types_to_go[ $iend - 2 ] eq ';' + && $types_to_go[ $iend - 1 ] eq 'b' ); + } + $line_2 = $line if ($this_line_is_semicolon_terminated); + } + else { + + # kill chain + $line_1 = undef; + } + } + elsif ( $lev < $lev_last ) { + + # chain ends with previous line + $line_2 = $line - 1; + } + elsif ( $lev > $lev_last ) { + + # kill chain + $line_1 = undef; + } + + # undo the continuation indentation if a chain ends + if ( defined($line_2) && defined($line_1) ) { + my $continuation_line_count = $line_2 - $line_1 + 1; + @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ] = + (0) x ($continuation_line_count) + if ( $continuation_line_count >= 0 ); + @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ] + = @reduced_spaces_to_go[ @{$ri_first} + [ $line_1 .. $line_2 ] ]; + $line_1 = undef; + } + } + + # not in a chain yet.. + else { + + # look for start of a new sort/map/grep chain + if ( $lev > $lev_last ) { + if ( $types_to_go[$ibeg] eq 'k' + && $is_sort_map_grep{ $tokens_to_go[$ibeg] } ) + { + $line_1 = $line; + } + } + } + } + $lev_last = $lev; + } + return; +} + +sub undo_lp_ci { + + # If there is a single, long parameter within parens, like this: + # + # $self->command( "/msg " + # . $infoline->chan + # . " You said $1, but did you know that it's square was " + # . $1 * $1 . " ?" ); + # + # we can remove the continuation indentation of the 2nd and higher lines + # to achieve this effect, which is more pleasing: + # + # $self->command("/msg " + # . $infoline->chan + # . " You said $1, but did you know that it's square was " + # . $1 * $1 . " ?"); + + my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_; + my $max_line = @{$ri_first} - 1; + + # must be multiple lines + return unless $max_line > $line_open; + + my $lev_start = $levels_to_go[$i_start]; + my $ci_start_plus = 1 + $ci_levels_to_go[$i_start]; + + # see if all additional lines in this container have continuation + # indentation + my $n; + my $line_1 = 1 + $line_open; + for ( $n = $line_1 ; $n <= $max_line ; ++$n ) { + my $ibeg = $ri_first->[$n]; + my $iend = $ri_last->[$n]; + if ( $ibeg eq $closing_index ) { $n--; last } + return if ( $lev_start != $levels_to_go[$ibeg] ); + return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] ); + last if ( $closing_index <= $iend ); + } + + # we can reduce the indentation of all continuation lines + my $continuation_line_count = $n - $line_open; + @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $n ] ] = + (0) x ($continuation_line_count); + @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ] = + @reduced_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ]; + return; +} + +sub pad_token { + + # insert $pad_spaces before token number $ipad + my ( $ipad, $pad_spaces ) = @_; + if ( $pad_spaces > 0 ) { + $tokens_to_go[$ipad] = ' ' x $pad_spaces . $tokens_to_go[$ipad]; + } + elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq ' ' ) { + $tokens_to_go[$ipad] = ""; + } + else { + + # shouldn't happen + return; + } + + $token_lengths_to_go[$ipad] += $pad_spaces; + foreach my $i ( $ipad .. $max_index_to_go ) { + $summed_lengths_to_go[ $i + 1 ] += $pad_spaces; + } + return; +} + +{ + my %is_math_op; + + BEGIN { + + my @q = qw( + - * / ); + @is_math_op{@q} = (1) x scalar(@q); + } + + sub set_logical_padding { + + # Look at a batch of lines and see if extra padding can improve the + # alignment when there are certain leading operators. Here is an + # example, in which some extra space is introduced before + # '( $year' to make it line up with the subsequent lines: + # + # if ( ( $Year < 1601 ) + # || ( $Year > 2899 ) + # || ( $EndYear < 1601 ) + # || ( $EndYear > 2899 ) ) + # { + # &Error_OutOfRange; + # } + # + my ( $ri_first, $ri_last ) = @_; + my $max_line = @{$ri_first} - 1; + + # FIXME: move these declarations below + my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $pad_spaces, + $tok_next, $type_next, $has_leading_op_next, $has_leading_op ); + + # looking at each line of this batch.. + foreach my $line ( 0 .. $max_line - 1 ) { + + # see if the next line begins with a logical operator + $ibeg = $ri_first->[$line]; + $iend = $ri_last->[$line]; + $ibeg_next = $ri_first->[ $line + 1 ]; + $tok_next = $tokens_to_go[$ibeg_next]; + $type_next = $types_to_go[$ibeg_next]; + + $has_leading_op_next = ( $tok_next =~ /^\w/ ) + ? $is_chain_operator{$tok_next} # + - * / : ? && || + : $is_chain_operator{$type_next}; # and, or + + next unless ($has_leading_op_next); + + # next line must not be at lesser depth + next + if ( $nesting_depth_to_go[$ibeg] > + $nesting_depth_to_go[$ibeg_next] ); + + # identify the token in this line to be padded on the left + $ipad = undef; + + # handle lines at same depth... + if ( $nesting_depth_to_go[$ibeg] == + $nesting_depth_to_go[$ibeg_next] ) + { + + # if this is not first line of the batch ... + if ( $line > 0 ) { + + # and we have leading operator.. + next if $has_leading_op; + + # Introduce padding if.. + # 1. the previous line is at lesser depth, or + # 2. the previous line ends in an assignment + # 3. the previous line ends in a 'return' + # 4. the previous line ends in a comma + # Example 1: previous line at lesser depth + # if ( ( $Year < 1601 ) # <- we are here but + # || ( $Year > 2899 ) # list has not yet + # || ( $EndYear < 1601 ) # collapsed vertically + # || ( $EndYear > 2899 ) ) + # { + # + # Example 2: previous line ending in assignment: + # $leapyear = + # $year % 4 ? 0 # <- We are here + # : $year % 100 ? 1 + # : $year % 400 ? 0 + # : 1; + # + # Example 3: previous line ending in comma: + # push @expr, + # /test/ ? undef + # : eval($_) ? 1 + # : eval($_) ? 1 + # : 0; + + # be sure levels agree (do not indent after an indented 'if') + next + if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] ); + + # allow padding on first line after a comma but only if: + # (1) this is line 2 and + # (2) there are at more than three lines and + # (3) lines 3 and 4 have the same leading operator + # These rules try to prevent padding within a long + # comma-separated list. + my $ok_comma; + if ( $types_to_go[$iendm] eq ',' + && $line == 1 + && $max_line > 2 ) + { + my $ibeg_next_next = $ri_first->[ $line + 2 ]; + my $tok_next_next = $tokens_to_go[$ibeg_next_next]; + $ok_comma = $tok_next_next eq $tok_next; + } + + next + unless ( + $is_assignment{ $types_to_go[$iendm] } + || $ok_comma + || ( $nesting_depth_to_go[$ibegm] < + $nesting_depth_to_go[$ibeg] ) + || ( $types_to_go[$iendm] eq 'k' + && $tokens_to_go[$iendm] eq 'return' ) + ); + + # we will add padding before the first token + $ipad = $ibeg; + } + + # for first line of the batch.. + else { + + # WARNING: Never indent if first line is starting in a + # continued quote, which would change the quote. + next if $starting_in_quote; + + # if this is text after closing '}' + # then look for an interior token to pad + if ( $types_to_go[$ibeg] eq '}' ) { + + } + + # otherwise, we might pad if it looks really good + else { + + # we might pad token $ibeg, so be sure that it + # is at the same depth as the next line. + next + if ( $nesting_depth_to_go[$ibeg] != + $nesting_depth_to_go[$ibeg_next] ); + + # We can pad on line 1 of a statement if at least 3 + # lines will be aligned. Otherwise, it + # can look very confusing. + + # We have to be careful not to pad if there are too few + # lines. The current rule is: + # (1) in general we require at least 3 consecutive lines + # with the same leading chain operator token, + # (2) but an exception is that we only require two lines + # with leading colons if there are no more lines. For example, + # the first $i in the following snippet would get padding + # by the second rule: + # + # $i == 1 ? ( "First", "Color" ) + # : $i == 2 ? ( "Then", "Rarity" ) + # : ( "Then", "Name" ); + + if ( $max_line > 1 ) { + my $leading_token = $tokens_to_go[$ibeg_next]; + my $tokens_differ; + + # never indent line 1 of a '.' series because + # previous line is most likely at same level. + # TODO: we should also look at the leasing_spaces + # of the last output line and skip if it is same + # as this line. + next if ( $leading_token eq '.' ); + + my $count = 1; + foreach my $l ( 2 .. 3 ) { + last if ( $line + $l > $max_line ); + my $ibeg_next_next = $ri_first->[ $line + $l ]; + if ( $tokens_to_go[$ibeg_next_next] ne + $leading_token ) + { + $tokens_differ = 1; + last; + } + $count++; + } + next if ($tokens_differ); + next if ( $count < 3 && $leading_token ne ':' ); + $ipad = $ibeg; + } + else { + next; + } + } + } + } + + # find interior token to pad if necessary + if ( !defined($ipad) ) { + + for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) { + + # find any unclosed container + next + unless ( $type_sequence_to_go[$i] + && $mate_index_to_go[$i] > $iend ); + + # find next nonblank token to pad + $ipad = $inext_to_go[$i]; + last if ( $ipad > $iend ); + } + last unless $ipad; + } + + # We cannot pad the first leading token of a file because + # it could cause a bug in which the starting indentation + # level is guessed incorrectly each time the code is run + # though perltidy, thus causing the code to march off to + # the right. For example, the following snippet would have + # this problem: + +## ov_method mycan( $package, '(""' ), $package +## or ov_method mycan( $package, '(0+' ), $package +## or ov_method mycan( $package, '(bool' ), $package +## or ov_method mycan( $package, '(nomethod' ), $package; + + # If this snippet is within a block this won't happen + # unless the user just processes the snippet alone within + # an editor. In that case either the user will see and + # fix the problem or it will be corrected next time the + # entire file is processed with perltidy. + next if ( $ipad == 0 && $peak_batch_size <= 1 ); + +## THIS PATCH REMOVES THE FOLLOWING POOR PADDING (math.t) with -pbp, BUT +## IT DID MORE HARM THAN GOOD +## ceil( +## $font->{'loca'}->{'glyphs'}[$x]->read->{'xMin'} * 1000 +## / $upem +## ), +##? # do not put leading padding for just 2 lines of math +##? if ( $ipad == $ibeg +##? && $line > 0 +##? && $levels_to_go[$ipad] > $levels_to_go[ $ipad - 1 ] +##? && $is_math_op{$type_next} +##? && $line + 2 <= $max_line ) +##? { +##? my $ibeg_next_next = $ri_first->[ $line + 2 ]; +##? my $type_next_next = $types_to_go[$ibeg_next_next]; +##? next if !$is_math_op{$type_next_next}; +##? } + + # next line must not be at greater depth + my $iend_next = $ri_last->[ $line + 1 ]; + next + if ( $nesting_depth_to_go[ $iend_next + 1 ] > + $nesting_depth_to_go[$ipad] ); + + # lines must be somewhat similar to be padded.. + my $inext_next = $inext_to_go[$ibeg_next]; + my $type = $types_to_go[$ipad]; + my $type_next = $types_to_go[ $ipad + 1 ]; + + # see if there are multiple continuation lines + my $logical_continuation_lines = 1; + if ( $line + 2 <= $max_line ) { + my $leading_token = $tokens_to_go[$ibeg_next]; + my $ibeg_next_next = $ri_first->[ $line + 2 ]; + if ( $tokens_to_go[$ibeg_next_next] eq $leading_token + && $nesting_depth_to_go[$ibeg_next] eq + $nesting_depth_to_go[$ibeg_next_next] ) + { + $logical_continuation_lines++; + } + } + + # see if leading types match + my $types_match = $types_to_go[$inext_next] eq $type; + my $matches_without_bang; + + # if first line has leading ! then compare the following token + if ( !$types_match && $type eq '!' ) { + $types_match = $matches_without_bang = + $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ]; + } + + if ( + + # either we have multiple continuation lines to follow + # and we are not padding the first token + ( $logical_continuation_lines > 1 && $ipad > 0 ) + + # or.. + || ( + + # types must match + $types_match + + # and keywords must match if keyword + && !( + $type eq 'k' + && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next] + ) + ) + ) + { + + #----------------------begin special checks-------------- + # + # SPECIAL CHECK 1: + # A check is needed before we can make the pad. + # If we are in a list with some long items, we want each + # item to stand out. So in the following example, the + # first line beginning with '$casefold->' would look good + # padded to align with the next line, but then it + # would be indented more than the last line, so we + # won't do it. + # + # ok( + # $casefold->{code} eq '0041' + # && $casefold->{status} eq 'C' + # && $casefold->{mapping} eq '0061', + # 'casefold 0x41' + # ); + # + # Note: + # It would be faster, and almost as good, to use a comma + # count, and not pad if comma_count > 1 and the previous + # line did not end with a comma. + # + my $ok_to_pad = 1; + + my $ibg = $ri_first->[ $line + 1 ]; + my $depth = $nesting_depth_to_go[ $ibg + 1 ]; + + # just use simplified formula for leading spaces to avoid + # needless sub calls + my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg]; + + # look at each line beyond the next .. + my $l = $line + 1; + foreach my $ltest ( $line + 2 .. $max_line ) { + $l = $ltest; + my $ibg = $ri_first->[$l]; + + # quit looking at the end of this container + last + if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth ) + || ( $nesting_depth_to_go[$ibg] < $depth ); + + # cannot do the pad if a later line would be + # outdented more + if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) { + $ok_to_pad = 0; + last; + } + } + + # don't pad if we end in a broken list + if ( $l == $max_line ) { + my $i2 = $ri_last->[$l]; + if ( $types_to_go[$i2] eq '#' ) { + my $i1 = $ri_first->[$l]; + next + if ( + terminal_type( \@types_to_go, \@block_type_to_go, + $i1, $i2 ) eq ',' + ); + } + } + + # SPECIAL CHECK 2: + # a minus may introduce a quoted variable, and we will + # add the pad only if this line begins with a bare word, + # such as for the word 'Button' here: + # [ + # Button => "Print letter \"~$_\"", + # -command => [ sub { print "$_[0]\n" }, $_ ], + # -accelerator => "Meta+$_" + # ]; + # + # On the other hand, if 'Button' is quoted, it looks best + # not to pad: + # [ + # 'Button' => "Print letter \"~$_\"", + # -command => [ sub { print "$_[0]\n" }, $_ ], + # -accelerator => "Meta+$_" + # ]; + if ( $types_to_go[$ibeg_next] eq 'm' ) { + $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q'; + } + + next unless $ok_to_pad; + + #----------------------end special check--------------- + + my $length_1 = total_line_length( $ibeg, $ipad - 1 ); + my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 ); + $pad_spaces = $length_2 - $length_1; + + # If the first line has a leading ! and the second does + # not, then remove one space to try to align the next + # leading characters, which are often the same. For example: + # if ( !$ts + # || $ts == $self->Holder + # || $self->Holder->Type eq "Arena" ) + # + # This usually helps readability, but if there are subsequent + # ! operators things will still get messed up. For example: + # + # if ( !exists $Net::DNS::typesbyname{$qtype} + # && exists $Net::DNS::classesbyname{$qtype} + # && !exists $Net::DNS::classesbyname{$qclass} + # && exists $Net::DNS::typesbyname{$qclass} ) + # We can't fix that. + if ($matches_without_bang) { $pad_spaces-- } + + # make sure this won't change if -lp is used + my $indentation_1 = $leading_spaces_to_go[$ibeg]; + if ( ref($indentation_1) ) { + if ( $indentation_1->get_recoverable_spaces() == 0 ) { + my $indentation_2 = $leading_spaces_to_go[$ibeg_next]; + unless ( $indentation_2->get_recoverable_spaces() == 0 ) + { + $pad_spaces = 0; + } + } + } + + # we might be able to handle a pad of -1 by removing a blank + # token + if ( $pad_spaces < 0 ) { + + if ( $pad_spaces == -1 ) { + if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' ) + { + pad_token( $ipad - 1, $pad_spaces ); + } + } + $pad_spaces = 0; + } + + # now apply any padding for alignment + if ( $ipad >= 0 && $pad_spaces ) { + + my $length_t = total_line_length( $ibeg, $iend ); + if ( $pad_spaces + $length_t <= maximum_line_length($ibeg) ) + { + pad_token( $ipad, $pad_spaces ); + } + } + } + } + continue { + $iendm = $iend; + $ibegm = $ibeg; + $has_leading_op = $has_leading_op_next; + } # end of loop over lines + return; + } +} + +sub correct_lp_indentation { + + # When the -lp option is used, we need to make a last pass through + # each line to correct the indentation positions in case they differ + # from the predictions. This is necessary because perltidy uses a + # predictor/corrector method for aligning with opening parens. The + # predictor is usually good, but sometimes stumbles. The corrector + # tries to patch things up once the actual opening paren locations + # are known. + my ( $ri_first, $ri_last ) = @_; + my $do_not_pad = 0; + + # Note on flag '$do_not_pad': + # We want to avoid a situation like this, where the aligner inserts + # whitespace before the '=' to align it with a previous '=', because + # otherwise the parens might become mis-aligned in a situation like + # this, where the '=' has become aligned with the previous line, + # pushing the opening '(' forward beyond where we want it. + # + # $mkFloor::currentRoom = ''; + # $mkFloor::c_entry = $c->Entry( + # -width => '10', + # -relief => 'sunken', + # ... + # ); + # + # We leave it to the aligner to decide how to do this. + + # first remove continuation indentation if appropriate + my $max_line = @{$ri_first} - 1; + + # looking at each line of this batch.. + my ( $ibeg, $iend ); + foreach my $line ( 0 .. $max_line ) { + $ibeg = $ri_first->[$line]; + $iend = $ri_last->[$line]; + + # looking at each token in this output line.. + foreach my $i ( $ibeg .. $iend ) { + + # How many space characters to place before this token + # for special alignment. Actual padding is done in the + # continue block. + + # looking for next unvisited indentation item + my $indentation = $leading_spaces_to_go[$i]; + if ( !$indentation->get_marked() ) { + $indentation->set_marked(1); + + # looking for indentation item for which we are aligning + # with parens, braces, and brackets + next unless ( $indentation->get_align_paren() ); + + # skip closed container on this line + if ( $i > $ibeg ) { + my $im = max( $ibeg, $iprev_to_go[$i] ); + if ( $type_sequence_to_go[$im] + && $mate_index_to_go[$im] <= $iend ) + { + next; + } + } + + if ( $line == 1 && $i == $ibeg ) { + $do_not_pad = 1; + } + + # Ok, let's see what the error is and try to fix it + my $actual_pos; + my $predicted_pos = $indentation->get_spaces(); + if ( $i > $ibeg ) { + + # token is mid-line - use length to previous token + $actual_pos = total_line_length( $ibeg, $i - 1 ); + + # for mid-line token, we must check to see if all + # additional lines have continuation indentation, + # and remove it if so. Otherwise, we do not get + # good alignment. + my $closing_index = $indentation->get_closed(); + if ( $closing_index > $iend ) { + my $ibeg_next = $ri_first->[ $line + 1 ]; + if ( $ci_levels_to_go[$ibeg_next] > 0 ) { + undo_lp_ci( $line, $i, $closing_index, $ri_first, + $ri_last ); + } + } + } + elsif ( $line > 0 ) { + + # handle case where token starts a new line; + # use length of previous line + my $ibegm = $ri_first->[ $line - 1 ]; + my $iendm = $ri_last->[ $line - 1 ]; + $actual_pos = total_line_length( $ibegm, $iendm ); + + # follow -pt style + ++$actual_pos + if ( $types_to_go[ $iendm + 1 ] eq 'b' ); + } + else { + + # token is first character of first line of batch + $actual_pos = $predicted_pos; + } + + my $move_right = $actual_pos - $predicted_pos; + + # done if no error to correct (gnu2.t) + if ( $move_right == 0 ) { + $indentation->set_recoverable_spaces($move_right); + next; + } + + # if we have not seen closure for this indentation in + # this batch, we can only pass on a request to the + # vertical aligner + my $closing_index = $indentation->get_closed(); + + if ( $closing_index < 0 ) { + $indentation->set_recoverable_spaces($move_right); + next; + } + + # If necessary, look ahead to see if there is really any + # leading whitespace dependent on this whitespace, and + # also find the longest line using this whitespace. + # Since it is always safe to move left if there are no + # dependents, we only need to do this if we may have + # dependent nodes or need to move right. + + my $right_margin = 0; + my $have_child = $indentation->get_have_child(); + + my %saw_indentation; + my $line_count = 1; + $saw_indentation{$indentation} = $indentation; + + if ( $have_child || $move_right > 0 ) { + $have_child = 0; + my $max_length = 0; + if ( $i == $ibeg ) { + $max_length = total_line_length( $ibeg, $iend ); + } + + # look ahead at the rest of the lines of this batch.. + foreach my $line_t ( $line + 1 .. $max_line ) { + my $ibeg_t = $ri_first->[$line_t]; + my $iend_t = $ri_last->[$line_t]; + last if ( $closing_index <= $ibeg_t ); + + # remember all different indentation objects + my $indentation_t = $leading_spaces_to_go[$ibeg_t]; + $saw_indentation{$indentation_t} = $indentation_t; + $line_count++; + + # remember longest line in the group + my $length_t = total_line_length( $ibeg_t, $iend_t ); + if ( $length_t > $max_length ) { + $max_length = $length_t; + } + } + $right_margin = maximum_line_length($ibeg) - $max_length; + if ( $right_margin < 0 ) { $right_margin = 0 } + } + + my $first_line_comma_count = + grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ]; + my $comma_count = $indentation->get_comma_count(); + my $arrow_count = $indentation->get_arrow_count(); + + # This is a simple approximate test for vertical alignment: + # if we broke just after an opening paren, brace, bracket, + # and there are 2 or more commas in the first line, + # and there are no '=>'s, + # then we are probably vertically aligned. We could set + # an exact flag in sub scan_list, but this is good + # enough. + my $indentation_count = keys %saw_indentation; + my $is_vertically_aligned = + ( $i == $ibeg + && $first_line_comma_count > 1 + && $indentation_count == 1 + && ( $arrow_count == 0 || $arrow_count == $line_count ) ); + + # Make the move if possible .. + if ( + + # we can always move left + $move_right < 0 + + # but we should only move right if we are sure it will + # not spoil vertical alignment + || ( $comma_count == 0 ) + || ( $comma_count > 0 && !$is_vertically_aligned ) + ) + { + my $move = + ( $move_right <= $right_margin ) + ? $move_right + : $right_margin; + + foreach ( keys %saw_indentation ) { + $saw_indentation{$_} + ->permanently_decrease_available_spaces( -$move ); + } + } + + # Otherwise, record what we want and the vertical aligner + # will try to recover it. + else { + $indentation->set_recoverable_spaces($move_right); + } + } + } + } + return $do_not_pad; +} + +# flush is called to output any tokens in the pipeline, so that +# an alternate source of lines can be written in the correct order + +sub flush { + my $self = shift; + destroy_one_line_block(); + $self->output_line_to_go(); + Perl::Tidy::VerticalAligner::flush(); + return; +} + +sub reset_block_text_accumulator { + + # save text after 'if' and 'elsif' to append after 'else' + if ($accumulating_text_for_block) { + + if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) { + push @{$rleading_block_if_elsif_text}, $leading_block_text; + } + } + $accumulating_text_for_block = ""; + $leading_block_text = ""; + $leading_block_text_level = 0; + $leading_block_text_length_exceeded = 0; + $leading_block_text_line_number = 0; + $leading_block_text_line_length = 0; + return; +} + +sub set_block_text_accumulator { + my $i = shift; + $accumulating_text_for_block = $tokens_to_go[$i]; + if ( $accumulating_text_for_block !~ /^els/ ) { + $rleading_block_if_elsif_text = []; + } + $leading_block_text = ""; + $leading_block_text_level = $levels_to_go[$i]; + $leading_block_text_line_number = get_output_line_number(); + $leading_block_text_length_exceeded = 0; + + # this will contain the column number of the last character + # of the closing side comment + $leading_block_text_line_length = + length($csc_last_label) + + length($accumulating_text_for_block) + + length( $rOpts->{'closing-side-comment-prefix'} ) + + $leading_block_text_level * $rOpts_indent_columns + 3; + return; +} + +sub accumulate_block_text { + my $i = shift; + + # accumulate leading text for -csc, ignoring any side comments + if ( $accumulating_text_for_block + && !$leading_block_text_length_exceeded + && $types_to_go[$i] ne '#' ) + { + + my $added_length = $token_lengths_to_go[$i]; + $added_length += 1 if $i == 0; + my $new_line_length = $leading_block_text_line_length + $added_length; + + # we can add this text if we don't exceed some limits.. + if ( + + # we must not have already exceeded the text length limit + length($leading_block_text) < + $rOpts_closing_side_comment_maximum_text + + # and either: + # the new total line length must be below the line length limit + # or the new length must be below the text length limit + # (ie, we may allow one token to exceed the text length limit) + && ( + $new_line_length < + maximum_line_length_for_level($leading_block_text_level) + + || length($leading_block_text) + $added_length < + $rOpts_closing_side_comment_maximum_text + ) + + # UNLESS: we are adding a closing paren before the brace we seek. + # This is an attempt to avoid situations where the ... to be + # added are longer than the omitted right paren, as in: + + # foreach my $item (@a_rather_long_variable_name_here) { + # &whatever; + # } ## end foreach my $item (@a_rather_long_variable_name_here... + + || ( + $tokens_to_go[$i] eq ')' + && ( + ( + $i + 1 <= $max_index_to_go + && $block_type_to_go[ $i + 1 ] eq + $accumulating_text_for_block + ) + || ( $i + 2 <= $max_index_to_go + && $block_type_to_go[ $i + 2 ] eq + $accumulating_text_for_block ) + ) + ) + ) + { + + # add an extra space at each newline + if ( $i == 0 ) { $leading_block_text .= ' ' } + + # add the token text + $leading_block_text .= $tokens_to_go[$i]; + $leading_block_text_line_length = $new_line_length; + } + + # show that text was truncated if necessary + elsif ( $types_to_go[$i] ne 'b' ) { + $leading_block_text_length_exceeded = 1; + $leading_block_text .= '...'; + } + } + return; +} + +{ + my %is_if_elsif_else_unless_while_until_for_foreach; + + BEGIN { + + # These block types may have text between the keyword and opening + # curly. Note: 'else' does not, but must be included to allow trailing + # if/elsif text to be appended. + # patch for SWITCH/CASE: added 'case' and 'when' + my @q = + qw(if elsif else unless while until for foreach case when catch); + @is_if_elsif_else_unless_while_until_for_foreach{@q} = + (1) x scalar(@q); + } + + sub accumulate_csc_text { + + # called once per output buffer when -csc is used. Accumulates + # the text placed after certain closing block braces. + # Defines and returns the following for this buffer: + + my $block_leading_text = ""; # the leading text of the last '}' + my $rblock_leading_if_elsif_text; + my $i_block_leading_text = + -1; # index of token owning block_leading_text + my $block_line_count = 100; # how many lines the block spans + my $terminal_type = 'b'; # type of last nonblank token + my $i_terminal = 0; # index of last nonblank token + my $terminal_block_type = ""; + + # update most recent statement label + $csc_last_label = "" unless ($csc_last_label); + if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] } + my $block_label = $csc_last_label; + + # Loop over all tokens of this batch + for my $i ( 0 .. $max_index_to_go ) { + my $type = $types_to_go[$i]; + my $block_type = $block_type_to_go[$i]; + my $token = $tokens_to_go[$i]; + + # remember last nonblank token type + if ( $type ne '#' && $type ne 'b' ) { + $terminal_type = $type; + $terminal_block_type = $block_type; + $i_terminal = $i; + } + + my $type_sequence = $type_sequence_to_go[$i]; + if ( $block_type && $type_sequence ) { + + if ( $token eq '}' ) { + + # restore any leading text saved when we entered this block + if ( defined( $block_leading_text{$type_sequence} ) ) { + ( $block_leading_text, $rblock_leading_if_elsif_text ) + = @{ $block_leading_text{$type_sequence} }; + $i_block_leading_text = $i; + delete $block_leading_text{$type_sequence}; + $rleading_block_if_elsif_text = + $rblock_leading_if_elsif_text; + } + + if ( defined( $csc_block_label{$type_sequence} ) ) { + $block_label = $csc_block_label{$type_sequence}; + delete $csc_block_label{$type_sequence}; + } + + # if we run into a '}' then we probably started accumulating + # at something like a trailing 'if' clause..no harm done. + if ( $accumulating_text_for_block + && $levels_to_go[$i] <= $leading_block_text_level ) + { + my $lev = $levels_to_go[$i]; + reset_block_text_accumulator(); + } + + if ( defined( $block_opening_line_number{$type_sequence} ) ) + { + my $output_line_number = get_output_line_number(); + $block_line_count = + $output_line_number - + $block_opening_line_number{$type_sequence} + 1; + delete $block_opening_line_number{$type_sequence}; + } + else { + + # Error: block opening line undefined for this line.. + # This shouldn't be possible, but it is not a + # significant problem. + } + } + + elsif ( $token eq '{' ) { + + my $line_number = get_output_line_number(); + $block_opening_line_number{$type_sequence} = $line_number; + + # set a label for this block, except for + # a bare block which already has the label + # A label can only be used on the next { + if ( $block_type =~ /:$/ ) { $csc_last_label = "" } + $csc_block_label{$type_sequence} = $csc_last_label; + $csc_last_label = ""; + + if ( $accumulating_text_for_block + && $levels_to_go[$i] == $leading_block_text_level ) + { + + if ( $accumulating_text_for_block eq $block_type ) { + + # save any leading text before we enter this block + $block_leading_text{$type_sequence} = [ + $leading_block_text, + $rleading_block_if_elsif_text + ]; + $block_opening_line_number{$type_sequence} = + $leading_block_text_line_number; + reset_block_text_accumulator(); + } + else { + + # shouldn't happen, but not a serious error. + # We were accumulating -csc text for block type + # $accumulating_text_for_block and unexpectedly + # encountered a '{' for block type $block_type. + } + } + } + } + + if ( $type eq 'k' + && $csc_new_statement_ok + && $is_if_elsif_else_unless_while_until_for_foreach{$token} + && $token =~ /$closing_side_comment_list_pattern/o ) + { + set_block_text_accumulator($i); + } + else { + + # note: ignoring type 'q' because of tricks being played + # with 'q' for hanging side comments + if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) { + $csc_new_statement_ok = + ( $block_type || $type eq 'J' || $type eq ';' ); + } + if ( $type eq ';' + && $accumulating_text_for_block + && $levels_to_go[$i] == $leading_block_text_level ) + { + reset_block_text_accumulator(); + } + else { + accumulate_block_text($i); + } + } + } + + # Treat an 'else' block specially by adding preceding 'if' and + # 'elsif' text. Otherwise, the 'end else' is not helpful, + # especially for cuddled-else formatting. + if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) { + $block_leading_text = + make_else_csc_text( $i_terminal, $terminal_block_type, + $block_leading_text, $rblock_leading_if_elsif_text ); + } + + # if this line ends in a label then remember it for the next pass + $csc_last_label = ""; + if ( $terminal_type eq 'J' ) { + $csc_last_label = $tokens_to_go[$i_terminal]; + } + + return ( $terminal_type, $i_terminal, $i_block_leading_text, + $block_leading_text, $block_line_count, $block_label ); + } +} + +sub make_else_csc_text { + + # create additional -csc text for an 'else' and optionally 'elsif', + # depending on the value of switch + # $rOpts_closing_side_comment_else_flag: + # + # = 0 add 'if' text to trailing else + # = 1 same as 0 plus: + # add 'if' to 'elsif's if can fit in line length + # add last 'elsif' to trailing else if can fit in one line + # = 2 same as 1 but do not check if exceed line length + # + # $rif_elsif_text = a reference to a list of all previous closing + # side comments created for this if block + # + my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_; + my $csc_text = $block_leading_text; + + if ( $block_type eq 'elsif' + && $rOpts_closing_side_comment_else_flag == 0 ) + { + return $csc_text; + } + + my $count = @{$rif_elsif_text}; + return $csc_text unless ($count); + + my $if_text = '[ if' . $rif_elsif_text->[0]; + + # always show the leading 'if' text on 'else' + if ( $block_type eq 'else' ) { + $csc_text .= $if_text; + } + + # see if that's all + if ( $rOpts_closing_side_comment_else_flag == 0 ) { + return $csc_text; + } + + my $last_elsif_text = ""; + if ( $count > 1 ) { + $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ]; + if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; } + } + + # tentatively append one more item + my $saved_text = $csc_text; + if ( $block_type eq 'else' ) { + $csc_text .= $last_elsif_text; + } + else { + $csc_text .= ' ' . $if_text; + } + + # all done if no length checks requested + if ( $rOpts_closing_side_comment_else_flag == 2 ) { + return $csc_text; + } + + # undo it if line length exceeded + my $length = + length($csc_text) + + length($block_type) + + length( $rOpts->{'closing-side-comment-prefix'} ) + + $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3; + if ( $length > maximum_line_length_for_level($leading_block_text_level) ) { + $csc_text = $saved_text; + } + return $csc_text; +} + +{ # sub balance_csc_text + + my %matching_char; + + BEGIN { + %matching_char = ( + '{' => '}', + '(' => ')', + '[' => ']', + '}' => '{', + ')' => '(', + ']' => '[', + ); + } + + sub balance_csc_text { + + # Append characters to balance a closing side comment so that editors + # such as vim can correctly jump through code. + # Simple Example: + # input = ## end foreach my $foo ( sort { $b ... + # output = ## end foreach my $foo ( sort { $b ...}) + + # NOTE: This routine does not currently filter out structures within + # quoted text because the bounce algorithms in text editors do not + # necessarily do this either (a version of vim was checked and + # did not do this). + + # Some complex examples which will cause trouble for some editors: + # while ( $mask_string =~ /\{[^{]*?\}/g ) { + # if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) { + # if ( $1 eq '{' ) { + # test file test1/braces.pl has many such examples. + + my ($csc) = @_; + + # loop to examine characters one-by-one, RIGHT to LEFT and + # build a balancing ending, LEFT to RIGHT. + for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) { + + my $char = substr( $csc, $pos, 1 ); + + # ignore everything except structural characters + next unless ( $matching_char{$char} ); + + # pop most recently appended character + my $top = chop($csc); + + # push it back plus the mate to the newest character + # unless they balance each other. + $csc = $csc . $top . $matching_char{$char} unless $top eq $char; + } + + # return the balanced string + return $csc; + } +} + +sub add_closing_side_comment { + + my $self = shift; + + # add closing side comments after closing block braces if -csc used + my $cscw_block_comment; + + #--------------------------------------------------------------- + # Step 1: loop through all tokens of this line to accumulate + # the text needed to create the closing side comments. Also see + # how the line ends. + #--------------------------------------------------------------- + + my ( $terminal_type, $i_terminal, $i_block_leading_text, + $block_leading_text, $block_line_count, $block_label ) + = accumulate_csc_text(); + + #--------------------------------------------------------------- + # Step 2: make the closing side comment if this ends a block + #--------------------------------------------------------------- + my $have_side_comment = $types_to_go[$max_index_to_go] eq '#'; + + # if this line might end in a block closure.. + if ( + $terminal_type eq '}' + + # ..and either + && ( + + # the block is long enough + ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} ) + + # or there is an existing comment to check + || ( $have_side_comment + && $rOpts->{'closing-side-comment-warnings'} ) + ) + + # .. and if this is one of the types of interest + && $block_type_to_go[$i_terminal] =~ + /$closing_side_comment_list_pattern/o + + # .. but not an anonymous sub + # These are not normally of interest, and their closing braces are + # often followed by commas or semicolons anyway. This also avoids + # possible erratic output due to line numbering inconsistencies + # in the cases where their closing braces terminate a line. + && $block_type_to_go[$i_terminal] ne 'sub' + + # ..and the corresponding opening brace must is not in this batch + # (because we do not need to tag one-line blocks, although this + # should also be caught with a positive -csci value) + && $mate_index_to_go[$i_terminal] < 0 + + # ..and either + && ( + + # this is the last token (line doesn't have a side comment) + !$have_side_comment + + # or the old side comment is a closing side comment + || $tokens_to_go[$max_index_to_go] =~ + /$closing_side_comment_prefix_pattern/o + ) + ) + { + + # then make the closing side comment text + if ($block_label) { $block_label .= " " } + my $token = +"$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]"; + + # append any extra descriptive text collected above + if ( $i_block_leading_text == $i_terminal ) { + $token .= $block_leading_text; + } + + $token = balance_csc_text($token) + if $rOpts->{'closing-side-comments-balanced'}; + + $token =~ s/\s*$//; # trim any trailing whitespace + + # handle case of existing closing side comment + if ($have_side_comment) { + + # warn if requested and tokens differ significantly + if ( $rOpts->{'closing-side-comment-warnings'} ) { + my $old_csc = $tokens_to_go[$max_index_to_go]; + my $new_csc = $token; + $new_csc =~ s/\s+//g; # trim all whitespace + $old_csc =~ s/\s+//g; # trim all whitespace + $new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures + $old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures + $new_csc =~ s/(\.\.\.)$//; # trim trailing '...' + my $new_trailing_dots = $1; + $old_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...' + + # Patch to handle multiple closing side comments at + # else and elsif's. These have become too complicated + # to check, so if we see an indication of + # '[ if' or '[ # elsif', then assume they were made + # by perltidy. + if ( $block_type_to_go[$i_terminal] eq 'else' ) { + if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc } + } + elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) { + if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc } + } + + # if old comment is contained in new comment, + # only compare the common part. + if ( length($new_csc) > length($old_csc) ) { + $new_csc = substr( $new_csc, 0, length($old_csc) ); + } + + # if the new comment is shorter and has been limited, + # only compare the common part. + if ( length($new_csc) < length($old_csc) + && $new_trailing_dots ) + { + $old_csc = substr( $old_csc, 0, length($new_csc) ); + } + + # any remaining difference? + if ( $new_csc ne $old_csc ) { + + # just leave the old comment if we are below the threshold + # for creating side comments + if ( $block_line_count < + $rOpts->{'closing-side-comment-interval'} ) + { + $token = undef; + } + + # otherwise we'll make a note of it + else { + + warning( +"perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n" + ); + + # save the old side comment in a new trailing block + # comment + my $timestamp = ""; + if ( $rOpts->{'timestamp'} ) { + my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ]; + $year += 1900; + $month += 1; + $timestamp = "$year-$month-$day"; + } + $cscw_block_comment = +"## perltidy -cscw $timestamp: $tokens_to_go[$max_index_to_go]"; +## "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]"; + } + } + else { + + # No differences.. we can safely delete old comment if we + # are below the threshold + if ( $block_line_count < + $rOpts->{'closing-side-comment-interval'} ) + { + $token = undef; + $self->unstore_token_to_go() + if ( $types_to_go[$max_index_to_go] eq '#' ); + $self->unstore_token_to_go() + if ( $types_to_go[$max_index_to_go] eq 'b' ); + } + } + } + + # switch to the new csc (unless we deleted it!) + $tokens_to_go[$max_index_to_go] = $token if $token; + } + + # handle case of NO existing closing side comment + else { + + # Remove any existing blank and add another below. + # This is a tricky point. A side comment needs to have the same level + # as the preceding closing brace or else the line will not get the right + # indentation. So even if we have a blank, we are going to replace it. + if ( $types_to_go[$max_index_to_go] eq 'b' ) { + unstore_token_to_go(); + } + + # insert the new side comment into the output token stream + my $type = '#'; + my $block_type = ''; + my $type_sequence = ''; + my $container_environment = + $container_environment_to_go[$max_index_to_go]; + my $level = $levels_to_go[$max_index_to_go]; + my $slevel = $nesting_depth_to_go[$max_index_to_go]; + my $no_internal_newlines = 0; + + my $ci_level = $ci_levels_to_go[$max_index_to_go]; + my $in_continued_quote = 0; + + # insert a blank token + $self->insert_new_token_to_go( ' ', 'b', $slevel, + $no_internal_newlines ); + + # then the side comment + $self->insert_new_token_to_go( $token, $type, $slevel, + $no_internal_newlines ); + } + } + return $cscw_block_comment; +} + +sub previous_nonblank_token { + my ($i) = @_; + my $name = ""; + my $im = $i - 1; + return "" if ( $im < 0 ); + if ( $types_to_go[$im] eq 'b' ) { $im--; } + return "" if ( $im < 0 ); + $name = $tokens_to_go[$im]; + + # prepend any sub name to an isolated -> to avoid unwanted alignments + # [test case is test8/penco.pl] + if ( $name eq '->' ) { + $im--; + if ( $im >= 0 && $types_to_go[$im] ne 'b' ) { + $name = $tokens_to_go[$im] . $name; + } + } + return $name; +} + +sub send_lines_to_vertical_aligner { + + my ( $self, $ri_first, $ri_last, $do_not_pad ) = @_; + + my $valign_batch_number = $self->increment_valign_batch_count(); + + my $cscw_block_comment; + if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 ) { + $cscw_block_comment = $self->add_closing_side_comment(); + + # Add or update any closing side comment + if ( $types_to_go[$max_index_to_go] eq '#' ) { + $ri_last->[-1] = $max_index_to_go; + } + } + + my $rindentation_list = [0]; # ref to indentations for each line + + # define the array @matching_token_to_go for the output tokens + # which will be non-blank for each special token (such as =>) + # for which alignment is required. + set_vertical_alignment_markers( $ri_first, $ri_last ); + + # flush if necessary to avoid unwanted alignment + my $must_flush = 0; + if ( @{$ri_first} > 1 ) { + + # flush before a long if statement + if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) { + $must_flush = 1; + } + } + if ($must_flush) { + Perl::Tidy::VerticalAligner::flush(); + } + + undo_ci( $ri_first, $ri_last ); + + set_logical_padding( $ri_first, $ri_last ); + + # loop to prepare each line for shipment + my $n_last_line = @{$ri_first} - 1; + my $in_comma_list; + for my $n ( 0 .. $n_last_line ) { + my $ibeg = $ri_first->[$n]; + my $iend = $ri_last->[$n]; + + my ( $rtokens, $rfields, $rpatterns ) = + make_alignment_patterns( $ibeg, $iend ); + + # Set flag to show how much level changes between this line + # and the next line, if we have it. + my $ljump = 0; + if ( $n < $n_last_line ) { + my $ibegp = $ri_first->[ $n + 1 ]; + $ljump = $levels_to_go[$ibegp] - $levels_to_go[$iend]; + } + + my ( $indentation, $lev, $level_end, $terminal_type, + $is_semicolon_terminated, $is_outdented_line ) + = $self->set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns, + $ri_first, $ri_last, $rindentation_list, $ljump ); + + # we will allow outdenting of long lines.. + my $outdent_long_lines = ( + + # which are long quotes, if allowed + ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} ) + + # which are long block comments, if allowed + || ( + $types_to_go[$ibeg] eq '#' + && $rOpts->{'outdent-long-comments'} + + # but not if this is a static block comment + && !$is_static_block_comment + ) + ); + + my $level_jump = + $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg]; + + my $rvertical_tightness_flags = + set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend, + $ri_first, $ri_last ); + + # flush an outdented line to avoid any unwanted vertical alignment + Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line); + + # Set a flag at the final ':' of a ternary chain to request + # vertical alignment of the final term. Here is a + # slightly complex example: + # + # $self->{_text} = ( + # !$section ? '' + # : $type eq 'item' ? "the $section entry" + # : "the section on $section" + # ) + # . ( + # $page + # ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage" + # : ' elsewhere in this document' + # ); + # + my $is_terminal_ternary = 0; + if ( $tokens_to_go[$ibeg] eq ':' + || $n > 0 && $tokens_to_go[ $ri_last->[ $n - 1 ] ] eq ':' ) + { + my $last_leading_type = ":"; + if ( $n > 0 ) { + my $iprev = $ri_first->[ $n - 1 ]; + $last_leading_type = $types_to_go[$iprev]; + } + if ( $terminal_type ne ';' + && $n_last_line > $n + && $level_end == $lev ) + { + my $inext = $ri_first->[ $n + 1 ]; + $level_end = $levels_to_go[$inext]; + $terminal_type = $types_to_go[$inext]; + } + + $is_terminal_ternary = $last_leading_type eq ':' + && ( ( $terminal_type eq ';' && $level_end <= $lev ) + || ( $terminal_type ne ':' && $level_end < $lev ) ) + + # the terminal term must not contain any ternary terms, as in + # my $ECHO = ( + # $Is_MSWin32 ? ".\\echo$$" + # : $Is_MacOS ? ":echo$$" + # : ( $Is_NetWare ? "echo$$" : "./echo$$" ) + # ); + && !grep { /^[\?\:]$/ } @types_to_go[ $ibeg + 1 .. $iend ]; + } + + # send this new line down the pipe + my $forced_breakpoint = $forced_breakpoint_to_go[$iend]; + + my $rvalign_hash = {}; + $rvalign_hash->{level} = $lev; + $rvalign_hash->{level_end} = $level_end; + $rvalign_hash->{indentation} = $indentation; + $rvalign_hash->{is_forced_break} = + $forced_breakpoint_to_go[$iend] || $in_comma_list; + $rvalign_hash->{outdent_long_lines} = $outdent_long_lines; + $rvalign_hash->{is_terminal_ternary} = $is_terminal_ternary; + $rvalign_hash->{is_terminal_statement} = $is_semicolon_terminated; + $rvalign_hash->{do_not_pad} = $do_not_pad; + $rvalign_hash->{rvertical_tightness_flags} = $rvertical_tightness_flags; + $rvalign_hash->{level_jump} = $level_jump; + + $rvalign_hash->{valign_batch_number} = $valign_batch_number; + + Perl::Tidy::VerticalAligner::valign_input( $rvalign_hash, $rfields, + $rtokens, $rpatterns ); + + $in_comma_list = + $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend]; + + # flush an outdented line to avoid any unwanted vertical alignment + Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line); + + $do_not_pad = 0; + + # Set flag indicating if this line ends in an opening + # token and is very short, so that a blank line is not + # needed if the subsequent line is a comment. + # Examples of what we are looking for: + # { + # && ( + # BEGIN { + # default { + # sub { + $last_output_short_opening_token + + # line ends in opening token + = $types_to_go[$iend] =~ /^[\{\(\[L]$/ + + # and either + && ( + # line has either single opening token + $iend == $ibeg + + # or is a single token followed by opening token. + # Note that sub identifiers have blanks like 'sub doit' + || ( $iend - $ibeg <= 2 && $tokens_to_go[$ibeg] !~ /\s+/ ) + ) + + # and limit total to 10 character widths + && token_sequence_length( $ibeg, $iend ) <= 10; + + } # end of loop to output each line + + # remember indentation of lines containing opening containers for + # later use by sub set_adjusted_indentation + save_opening_indentation( $ri_first, $ri_last, $rindentation_list ); + + # output any new -cscw block comment + if ($cscw_block_comment) { + Perl::Tidy::VerticalAligner::flush(); + $file_writer_object->write_code_line( $cscw_block_comment . "\n" ); + } + return; +} + +{ # begin make_alignment_patterns + + my %block_type_map; + my %keyword_map; + + BEGIN { + + # map related block names into a common name to + # allow alignment + %block_type_map = ( + 'unless' => 'if', + 'else' => 'if', + 'elsif' => 'if', + 'when' => 'if', + 'default' => 'if', + 'case' => 'if', + 'sort' => 'map', + 'grep' => 'map', + ); + + # map certain keywords to the same 'if' class to align + # long if/elsif sequences. [elsif.pl] + %keyword_map = ( + 'unless' => 'if', + 'else' => 'if', + 'elsif' => 'if', + 'when' => 'given', + 'default' => 'given', + 'case' => 'switch', + + # treat an 'undef' similar to numbers and quotes + 'undef' => 'Q', + ); + } + + sub make_alignment_patterns { + + # Here we do some important preliminary work for the + # vertical aligner. We create three arrays for one + # output line. These arrays contain strings that can + # be tested by the vertical aligner to see if + # consecutive lines can be aligned vertically. + # + # The three arrays are indexed on the vertical + # alignment fields and are: + # @tokens - a list of any vertical alignment tokens for this line. + # These are tokens, such as '=' '&&' '#' etc which + # we want to might align vertically. These are + # decorated with various information such as + # nesting depth to prevent unwanted vertical + # alignment matches. + # @fields - the actual text of the line between the vertical alignment + # tokens. + # @patterns - a modified list of token types, one for each alignment + # field. These should normally each match before alignment is + # allowed, even when the alignment tokens match. + my ( $ibeg, $iend ) = @_; + my @tokens = (); + my @fields = (); + my @patterns = (); + my $i_start = $ibeg; + + my $depth = 0; + my @container_name = (""); + my @multiple_comma_arrows = (undef); + + my $j = 0; # field index + + $patterns[0] = ""; + for my $i ( $ibeg .. $iend ) { + + # Keep track of containers balanced on this line only. + # These are used below to prevent unwanted cross-line alignments. + # Unbalanced containers already avoid aligning across + # container boundaries. + my $tok = $tokens_to_go[$i]; + if ( $tok =~ /^[\(\{\[]/ ) { #'(' ) { + + # if container is balanced on this line... + my $i_mate = $mate_index_to_go[$i]; + if ( $i_mate > $i && $i_mate <= $iend ) { + $depth++; + my $seqno = $type_sequence_to_go[$i]; + my $count = comma_arrow_count($seqno); + $multiple_comma_arrows[$depth] = $count && $count > 1; + + # Append the previous token name to make the container name + # more unique. This name will also be given to any commas + # within this container, and it helps avoid undesirable + # alignments of different types of containers. + + # Containers beginning with { and [ are given those names + # for uniqueness. That way commas in different containers + # will not match. Here is an example of what this prevents: + # a => [ 1, 2, 3 ], + # b => { b1 => 4, b2 => 5 }, + # Here is another example of what we avoid by labeling the + # commas properly: + # is_d( [ $a, $a ], [ $b, $c ] ); + # is_d( { foo => $a, bar => $a }, { foo => $b, bar => $c } ); + # is_d( [ \$a, \$a ], [ \$b, \$c ] ); + + my $name = $tok; + if ( $tok eq '(' ) { + $name = previous_nonblank_token($i); + $name =~ s/^->//; + } + $container_name[$depth] = "+" . $name; + + # Make the container name even more unique if necessary. + # If we are not vertically aligning this opening paren, + # append a character count to avoid bad alignment because + # it usually looks bad to align commas within containers + # for which the opening parens do not align. Here + # is an example very BAD alignment of commas (because + # the atan2 functions are not all aligned): + # $XY = + # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) + + # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) - + # $X * atan2( $X, 1 ) - + # $Y * atan2( $Y, 1 ); + # + # On the other hand, it is usually okay to align commas if + # opening parens align, such as: + # glVertex3d( $cx + $s * $xs, $cy, $z ); + # glVertex3d( $cx, $cy + $s * $ys, $z ); + # glVertex3d( $cx - $s * $xs, $cy, $z ); + # glVertex3d( $cx, $cy - $s * $ys, $z ); + # + # To distinguish between these situations, we will + # append the length of the line from the previous matching + # token, or beginning of line, to the function name. This + # will allow the vertical aligner to reject undesirable + # matches. + + # if we are not aligning on this paren... + if ( $matching_token_to_go[$i] eq '' ) { + + # Sum length from previous alignment, or start of line. + my $len = + ( $i_start == $ibeg ) + ? total_line_length( $i_start, $i - 1 ) + : token_sequence_length( $i_start, $i - 1 ); + + # tack length onto the container name to make unique + $container_name[$depth] .= "-" . $len; + } + } + } + elsif ( $tokens_to_go[$i] =~ /^[\)\}\]]/ ) { + $depth-- if $depth > 0; + } + + # if we find a new synchronization token, we are done with + # a field + if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) { + + my $tok = my $raw_tok = $matching_token_to_go[$i]; + + # map similar items + if ( $tok eq '!~' ) { $tok = '=~' } + + # make separators in different nesting depths unique + # by appending the nesting depth digit. + if ( $raw_tok ne '#' ) { + $tok .= "$nesting_depth_to_go[$i]"; + } + + # also decorate commas with any container name to avoid + # unwanted cross-line alignments. + if ( $raw_tok eq ',' || $raw_tok eq '=>' ) { + if ( $container_name[$depth] ) { + $tok .= $container_name[$depth]; + } + } + + # Patch to avoid aligning leading and trailing if, unless. + # Mark trailing if, unless statements with container names. + # This makes them different from leading if, unless which + # are not so marked at present. If we ever need to name + # them too, we could use ci to distinguish them. + # Example problem to avoid: + # return ( 2, "DBERROR" ) + # if ( $retval == 2 ); + # if ( scalar @_ ) { + # my ( $a, $b, $c, $d, $e, $f ) = @_; + # } + if ( $raw_tok eq '(' ) { + my $ci = $ci_levels_to_go[$ibeg]; + if ( $container_name[$depth] =~ /^\+(if|unless)/ + && $ci ) + { + $tok .= $container_name[$depth]; + } + } + + # Decorate block braces with block types to avoid + # unwanted alignments such as the following: + # foreach ( @{$routput_array} ) { $fh->print($_) } + # eval { $fh->close() }; + if ( $raw_tok eq '{' && $block_type_to_go[$i] ) { + my $block_type = $block_type_to_go[$i]; + + # map certain related block types to allow + # else blocks to align + $block_type = $block_type_map{$block_type} + if ( defined( $block_type_map{$block_type} ) ); + + # remove sub names to allow one-line sub braces to align + # regardless of name + #if ( $block_type =~ /^sub / ) { $block_type = 'sub' } + if ( $block_type =~ /$SUB_PATTERN/ ) { $block_type = 'sub' } + + # allow all control-type blocks to align + if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' } + + $tok .= $block_type; + } + + # concatenate the text of the consecutive tokens to form + # the field + push( @fields, + join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) ); + + # store the alignment token for this field + push( @tokens, $tok ); + + # get ready for the next batch + $i_start = $i; + $j++; + $patterns[$j] = ""; + } + + # continue accumulating tokens + # handle non-keywords.. + if ( $types_to_go[$i] ne 'k' ) { + my $type = $types_to_go[$i]; + + # Mark most things before arrows as a quote to + # get them to line up. Testfile: mixed.pl. + if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) { + my $next_type = $types_to_go[ $i + 1 ]; + my $i_next_nonblank = + ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); + + if ( $types_to_go[$i_next_nonblank] eq '=>' ) { + $type = 'Q'; + + # Patch to ignore leading minus before words, + # by changing pattern 'mQ' into just 'Q', + # so that we can align things like this: + # Button => "Print letter \"~$_\"", + # -command => [ sub { print "$_[0]\n" }, $_ ], + if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" } + } + } + + # Convert a bareword within braces into a quote for matching. This will + # allow alignment of expressions like this: + # local ( $SIG{'INT'} ) = IGNORE; + # local ( $SIG{ALRM} ) = 'POSTMAN'; + if ( $type eq 'w' + && $i > $ibeg + && $i < $iend + && $types_to_go[ $i - 1 ] eq 'L' + && $types_to_go[ $i + 1 ] eq 'R' ) + { + $type = 'Q'; + } + + # patch to make numbers and quotes align + if ( $type eq 'n' ) { $type = 'Q' } + + # patch to ignore any ! in patterns + if ( $type eq '!' ) { $type = '' } + + $patterns[$j] .= $type; + } + + # for keywords we have to use the actual text + else { + + my $tok = $tokens_to_go[$i]; + + # but map certain keywords to a common string to allow + # alignment. + $tok = $keyword_map{$tok} + if ( defined( $keyword_map{$tok} ) ); + $patterns[$j] .= $tok; + } + } + + # done with this line .. join text of tokens to make the last field + push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) ); + return ( \@tokens, \@fields, \@patterns ); + } + +} # end make_alignment_patterns + +{ # begin unmatched_indexes + + # closure to keep track of unbalanced containers. + # arrays shared by the routines in this block: + my @unmatched_opening_indexes_in_this_batch; + my @unmatched_closing_indexes_in_this_batch; + my %comma_arrow_count; + + sub is_unbalanced_batch { + return @unmatched_opening_indexes_in_this_batch + + @unmatched_closing_indexes_in_this_batch; + } + + sub comma_arrow_count { + my $seqno = shift; + return $comma_arrow_count{$seqno}; + } + + sub match_opening_and_closing_tokens { + + # Match up indexes of opening and closing braces, etc, in this batch. + # This has to be done after all tokens are stored because unstoring + # of tokens would otherwise cause trouble. + + @unmatched_opening_indexes_in_this_batch = (); + @unmatched_closing_indexes_in_this_batch = (); + %comma_arrow_count = (); + my $comma_arrow_count_contained = 0; + + foreach my $i ( 0 .. $max_index_to_go ) { + if ( $type_sequence_to_go[$i] ) { + my $token = $tokens_to_go[$i]; + if ( $token =~ /^[\(\[\{\?]$/ ) { + push @unmatched_opening_indexes_in_this_batch, $i; + } + elsif ( $token =~ /^[\)\]\}\:]$/ ) { + + my $i_mate = pop @unmatched_opening_indexes_in_this_batch; + if ( defined($i_mate) && $i_mate >= 0 ) { + if ( $type_sequence_to_go[$i_mate] == + $type_sequence_to_go[$i] ) + { + $mate_index_to_go[$i] = $i_mate; + $mate_index_to_go[$i_mate] = $i; + my $seqno = $type_sequence_to_go[$i]; + if ( $comma_arrow_count{$seqno} ) { + $comma_arrow_count_contained += + $comma_arrow_count{$seqno}; + } + } + else { + push @unmatched_opening_indexes_in_this_batch, + $i_mate; + push @unmatched_closing_indexes_in_this_batch, $i; + } + } + else { + push @unmatched_closing_indexes_in_this_batch, $i; + } + } + } + elsif ( $tokens_to_go[$i] eq '=>' ) { + if (@unmatched_opening_indexes_in_this_batch) { + my $j = $unmatched_opening_indexes_in_this_batch[-1]; + my $seqno = $type_sequence_to_go[$j]; + $comma_arrow_count{$seqno}++; + } + } + } + return $comma_arrow_count_contained; + } + + sub save_opening_indentation { + + # This should be called after each batch of tokens is output. It + # saves indentations of lines of all unmatched opening tokens. + # These will be used by sub get_opening_indentation. + + my ( $ri_first, $ri_last, $rindentation_list ) = @_; + + # we no longer need indentations of any saved indentations which + # are unmatched closing tokens in this batch, because we will + # never encounter them again. So we can delete them to keep + # the hash size down. + foreach (@unmatched_closing_indexes_in_this_batch) { + my $seqno = $type_sequence_to_go[$_]; + delete $saved_opening_indentation{$seqno}; + } + + # we need to save indentations of any unmatched opening tokens + # in this batch because we may need them in a subsequent batch. + foreach (@unmatched_opening_indexes_in_this_batch) { + my $seqno = $type_sequence_to_go[$_]; + $saved_opening_indentation{$seqno} = [ + lookup_opening_indentation( + $_, $ri_first, $ri_last, $rindentation_list + ) + ]; + } + return; + } +} # end unmatched_indexes + +sub get_opening_indentation { + + # get the indentation of the line which output the opening token + # corresponding to a given closing token in the current output batch. + # + # given: + # $i_closing - index in this line of a closing token ')' '}' or ']' + # + # $ri_first - reference to list of the first index $i for each output + # line in this batch + # $ri_last - reference to list of the last index $i for each output line + # in this batch + # $rindentation_list - reference to a list containing the indentation + # used for each line. + # + # return: + # -the indentation of the line which contained the opening token + # which matches the token at index $i_opening + # -and its offset (number of columns) from the start of the line + # + my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_; + + # first, see if the opening token is in the current batch + my $i_opening = $mate_index_to_go[$i_closing]; + my ( $indent, $offset, $is_leading, $exists ); + $exists = 1; + if ( $i_opening >= 0 ) { + + # it is..look up the indentation + ( $indent, $offset, $is_leading ) = + lookup_opening_indentation( $i_opening, $ri_first, $ri_last, + $rindentation_list ); + } + + # if not, it should have been stored in the hash by a previous batch + else { + my $seqno = $type_sequence_to_go[$i_closing]; + if ($seqno) { + if ( $saved_opening_indentation{$seqno} ) { + ( $indent, $offset, $is_leading ) = + @{ $saved_opening_indentation{$seqno} }; + } + + # some kind of serious error + # (example is badfile.t) + else { + $indent = 0; + $offset = 0; + $is_leading = 0; + $exists = 0; + } + } + + # if no sequence number it must be an unbalanced container + else { + $indent = 0; + $offset = 0; + $is_leading = 0; + $exists = 0; + } + } + return ( $indent, $offset, $is_leading, $exists ); +} + +sub lookup_opening_indentation { + + # get the indentation of the line in the current output batch + # which output a selected opening token + # + # given: + # $i_opening - index of an opening token in the current output batch + # whose line indentation we need + # $ri_first - reference to list of the first index $i for each output + # line in this batch + # $ri_last - reference to list of the last index $i for each output line + # in this batch + # $rindentation_list - reference to a list containing the indentation + # used for each line. (NOTE: the first slot in + # this list is the last returned line number, and this is + # followed by the list of indentations). + # + # return + # -the indentation of the line which contained token $i_opening + # -and its offset (number of columns) from the start of the line + + my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_; + + my $nline = $rindentation_list->[0]; # line number of previous lookup + + # reset line location if necessary + $nline = 0 if ( $i_opening < $ri_start->[$nline] ); + + # find the correct line + unless ( $i_opening > $ri_last->[-1] ) { + while ( $i_opening > $ri_last->[$nline] ) { $nline++; } + } + + # error - token index is out of bounds - shouldn't happen + else { + warning( +"non-fatal program bug in lookup_opening_indentation - index out of range\n" + ); + report_definite_bug(); + $nline = $#{$ri_last}; + } + + $rindentation_list->[0] = + $nline; # save line number to start looking next call + my $ibeg = $ri_start->[$nline]; + my $offset = token_sequence_length( $ibeg, $i_opening ) - 1; + my $is_leading = ( $ibeg == $i_opening ); + return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading ); +} + +{ + my %is_if_elsif_else_unless_while_until_for_foreach; + + BEGIN { + + # These block types may have text between the keyword and opening + # curly. Note: 'else' does not, but must be included to allow trailing + # if/elsif text to be appended. + # patch for SWITCH/CASE: added 'case' and 'when' + my @q = qw(if elsif else unless while until for foreach case when); + @is_if_elsif_else_unless_while_until_for_foreach{@q} = + (1) x scalar(@q); + } + + sub set_adjusted_indentation { + + # This routine has the final say regarding the actual indentation of + # a line. It starts with the basic indentation which has been + # defined for the leading token, and then takes into account any + # options that the user has set regarding special indenting and + # outdenting. + + my ( + $self, $ibeg, $iend, + $rfields, $rpatterns, $ri_first, + $ri_last, $rindentation_list, $level_jump + ) = @_; + + my $rLL = $self->{rLL}; + + # we need to know the last token of this line + my ( $terminal_type, $i_terminal ) = + terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend ); + + my $is_outdented_line = 0; + + my $is_semicolon_terminated = $terminal_type eq ';' + && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg]; + + # NOTE: A future improvement would be to make it semicolon terminated + # even if it does not have a semicolon but is followed by a closing + # block brace. This would undo ci even for something like the + # following, in which the final paren does not have a semicolon because + # it is a possible weld location: + + # if ($BOLD_MATH) { + # ( + # $labels, $comment, + # join( '', '', &make_math( $mode, '', '', $_ ), '' ) + # ) + # } + # + + # MOJO: Set a flag if this lines begins with ')->' + my $leading_paren_arrow = ( + $types_to_go[$ibeg] eq '}' + && $tokens_to_go[$ibeg] eq ')' + && ( + ( $ibeg < $i_terminal && $types_to_go[ $ibeg + 1 ] eq '->' ) + || ( $ibeg < $i_terminal - 1 + && $types_to_go[ $ibeg + 1 ] eq 'b' + && $types_to_go[ $ibeg + 2 ] eq '->' ) + ) + ); + + ########################################################## + # Section 1: set a flag and a default indentation + # + # Most lines are indented according to the initial token. + # But it is common to outdent to the level just after the + # terminal token in certain cases... + # adjust_indentation flag: + # 0 - do not adjust + # 1 - outdent + # 2 - vertically align with opening token + # 3 - indent + ########################################################## + my $adjust_indentation = 0; + my $default_adjust_indentation = $adjust_indentation; + + my ( + $opening_indentation, $opening_offset, + $is_leading, $opening_exists + ); + + my $type_beg = $types_to_go[$ibeg]; + my $token_beg = $tokens_to_go[$ibeg]; + my $K_beg = $K_to_go[$ibeg]; + my $ibeg_weld_fix = $ibeg; + + # QW PATCH 2 (Testing) + # At an isolated closing token of a qw quote which is welded to + # a following closing token, we will locally change its type to + # be the same as its token. This will allow formatting to be the + # same as for an ordinary closing token. + + # For -lp formatting se use $ibeg_weld_fix to get around the problem + # that with -lp type formatting the opening and closing tokens to not + # have sequence numbers. + if ( $type_beg eq 'q' && $token_beg =~ /^[\)\}\]\>]/ ) { + my $K_next_nonblank = $self->K_next_code($K_beg); + if ( defined($K_next_nonblank) ) { + my $type_sequence = $rLL->[$K_next_nonblank]->[_TYPE_SEQUENCE_]; + my $token = $rLL->[$K_next_nonblank]->[_TOKEN_]; + my $welded = weld_len_left( $type_sequence, $token ); + if ($welded) { + $ibeg_weld_fix = $ibeg + ( $K_next_nonblank - $K_beg ); + $type_beg = ')'; ##$token_beg; + } + } + } + + # if we are at a closing token of some type.. + if ( $type_beg =~ /^[\)\}\]R]$/ ) { + + # get the indentation of the line containing the corresponding + # opening token + ( + $opening_indentation, $opening_offset, + $is_leading, $opening_exists + ) + = get_opening_indentation( $ibeg_weld_fix, $ri_first, $ri_last, + $rindentation_list ); + + # First set the default behavior: + if ( + + # default behavior is to outdent closing lines + # of the form: "); }; ]; )->xxx;" + $is_semicolon_terminated + + # and 'cuddled parens' of the form: ")->pack(" + # Bug fix for RT #123749]: the types here were + # incorrectly '(' and ')'. Corrected to be '{' and '}' + || ( + $terminal_type eq '{' + && $type_beg eq '}' + && ( $nesting_depth_to_go[$iend] + 1 == + $nesting_depth_to_go[$ibeg] ) + ) + + # remove continuation indentation for any line like + # } ... { + # or without ending '{' and unbalanced, such as + # such as '}->{$operator}' + || ( + $type_beg eq '}' + + && ( $types_to_go[$iend] eq '{' + || $levels_to_go[$iend] < $levels_to_go[$ibeg] ) + ) + + # and when the next line is at a lower indentation level + # PATCH: and only if the style allows undoing continuation + # for all closing token types. We should really wait until + # the indentation of the next line is known and then make + # a decision, but that would require another pass. + || ( $level_jump < 0 && !$some_closing_token_indentation ) + + # Patch for -wn=2, multiple welded closing tokens + || ( $i_terminal > $ibeg + && $types_to_go[$iend] =~ /^[\)\}\]R]$/ ) + + ) + { + $adjust_indentation = 1; + } + + # outdent something like '),' + if ( + $terminal_type eq ',' + + # Removed this constraint for -wn + # OLD: allow just one character before the comma + # && $i_terminal == $ibeg + 1 + + # require LIST environment; otherwise, we may outdent too much - + # this can happen in calls without parentheses (overload.t); + && $container_environment_to_go[$i_terminal] eq 'LIST' + ) + { + $adjust_indentation = 1; + } + + # undo continuation indentation of a terminal closing token if + # it is the last token before a level decrease. This will allow + # a closing token to line up with its opening counterpart, and + # avoids a indentation jump larger than 1 level. + if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/ + && $i_terminal == $ibeg + && defined($K_beg) ) + { + my $K_next_nonblank = $self->K_next_code($K_beg); + if ( defined($K_next_nonblank) ) { + my $lev = $rLL->[$K_beg]->[_LEVEL_]; + my $level_next = $rLL->[$K_next_nonblank]->[_LEVEL_]; + $adjust_indentation = 1 if ( $level_next < $lev ); + } + + # Patch for RT #96101, in which closing brace of anonymous subs + # was not outdented. We should look ahead and see if there is + # a level decrease at the next token (i.e., a closing token), + # but right now we do not have that information. For now + # we see if we are in a list, and this works well. + # See test files 'sub*.t' for good test cases. + if ( $block_type_to_go[$ibeg] =~ /$ASUB_PATTERN/ + && $container_environment_to_go[$i_terminal] eq 'LIST' + && !$rOpts->{'indent-closing-brace'} ) + { + ( + $opening_indentation, $opening_offset, + $is_leading, $opening_exists + ) + = get_opening_indentation( $ibeg, $ri_first, $ri_last, + $rindentation_list ); + my $indentation = $leading_spaces_to_go[$ibeg]; + if ( defined($opening_indentation) + && get_spaces($indentation) > + get_spaces($opening_indentation) ) + { + $adjust_indentation = 1; + } + } + } + + # YVES patch 1 of 2: + # Undo ci of line with leading closing eval brace, + # but not beyond the indention of the line with + # the opening brace. + if ( $block_type_to_go[$ibeg] eq 'eval' + && !$rOpts->{'line-up-parentheses'} + && !$rOpts->{'indent-closing-brace'} ) + { + ( + $opening_indentation, $opening_offset, + $is_leading, $opening_exists + ) + = get_opening_indentation( $ibeg, $ri_first, $ri_last, + $rindentation_list ); + my $indentation = $leading_spaces_to_go[$ibeg]; + if ( defined($opening_indentation) + && get_spaces($indentation) > + get_spaces($opening_indentation) ) + { + $adjust_indentation = 1; + } + } + + $default_adjust_indentation = $adjust_indentation; + + # Now modify default behavior according to user request: + # handle option to indent non-blocks of the form ); }; ]; + # But don't do special indentation to something like ')->pack(' + if ( !$block_type_to_go[$ibeg] ) { + my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] }; + if ( $cti == 1 ) { + if ( $i_terminal <= $ibeg + 1 + || $is_semicolon_terminated ) + { + $adjust_indentation = 2; + } + else { + $adjust_indentation = 0; + } + } + elsif ( $cti == 2 ) { + if ($is_semicolon_terminated) { + $adjust_indentation = 3; + } + else { + $adjust_indentation = 0; + } + } + elsif ( $cti == 3 ) { + $adjust_indentation = 3; + } + } + + # handle option to indent blocks + else { + if ( + $rOpts->{'indent-closing-brace'} + && ( + $i_terminal == $ibeg # isolated terminal '}' + || $is_semicolon_terminated + ) + ) # } xxxx ; + { + $adjust_indentation = 3; + } + } + } + + # if at ');', '};', '>;', and '];' of a terminal qw quote + elsif ($rpatterns->[0] =~ /^qb*;$/ + && $rfields->[0] =~ /^([\)\}\]\>]);$/ ) + { + if ( $closing_token_indentation{$1} == 0 ) { + $adjust_indentation = 1; + } + else { + $adjust_indentation = 3; + } + } + + # if line begins with a ':', align it with any + # previous line leading with corresponding ? + elsif ( $types_to_go[$ibeg] eq ':' ) { + ( + $opening_indentation, $opening_offset, + $is_leading, $opening_exists + ) + = get_opening_indentation( $ibeg, $ri_first, $ri_last, + $rindentation_list ); + if ($is_leading) { $adjust_indentation = 2; } + } + + ########################################################## + # Section 2: set indentation according to flag set above + # + # Select the indentation object to define leading + # whitespace. If we are outdenting something like '} } );' + # then we want to use one level below the last token + # ($i_terminal) in order to get it to fully outdent through + # all levels. + ########################################################## + my $indentation; + my $lev; + my $level_end = $levels_to_go[$iend]; + + if ( $adjust_indentation == 0 ) { + $indentation = $leading_spaces_to_go[$ibeg]; + $lev = $levels_to_go[$ibeg]; + } + elsif ( $adjust_indentation == 1 ) { + + # Change the indentation to be that of a different token on the line + # Previously, the indentation of the terminal token was used: + # OLD CODING: + # $indentation = $reduced_spaces_to_go[$i_terminal]; + # $lev = $levels_to_go[$i_terminal]; + + # Generalization for MOJO: + # Use the lowest level indentation of the tokens on the line. + # For example, here we can use the indentation of the ending ';': + # } until ($selection > 0 and $selection < 10); # ok to use ';' + # But this will not outdent if we use the terminal indentation: + # )->then( sub { # use indentation of the ->, not the { + # Warning: reduced_spaces_to_go[] may be a reference, do not + # do numerical checks with it + + my $i_ind = $ibeg; + $indentation = $reduced_spaces_to_go[$i_ind]; + $lev = $levels_to_go[$i_ind]; + while ( $i_ind < $i_terminal ) { + $i_ind++; + if ( $levels_to_go[$i_ind] < $lev ) { + $indentation = $reduced_spaces_to_go[$i_ind]; + $lev = $levels_to_go[$i_ind]; + } + } + } + + # handle indented closing token which aligns with opening token + elsif ( $adjust_indentation == 2 ) { + + # handle option to align closing token with opening token + $lev = $levels_to_go[$ibeg]; + + # calculate spaces needed to align with opening token + my $space_count = + get_spaces($opening_indentation) + $opening_offset; + + # Indent less than the previous line. + # + # Problem: For -lp we don't exactly know what it was if there + # were recoverable spaces sent to the aligner. A good solution + # would be to force a flush of the vertical alignment buffer, so + # that we would know. For now, this rule is used for -lp: + # + # When the last line did not start with a closing token we will + # be optimistic that the aligner will recover everything wanted. + # + # This rule will prevent us from breaking a hierarchy of closing + # tokens, and in a worst case will leave a closing paren too far + # indented, but this is better than frequently leaving it not + # indented enough. + my $last_spaces = get_spaces($last_indentation_written); + if ( $last_leading_token !~ /^[\}\]\)]$/ ) { + $last_spaces += + get_recoverable_spaces($last_indentation_written); + } + + # reset the indentation to the new space count if it works + # only options are all or none: nothing in-between looks good + $lev = $levels_to_go[$ibeg]; + if ( $space_count < $last_spaces ) { + if ($rOpts_line_up_parentheses) { + my $lev = $levels_to_go[$ibeg]; + $indentation = + new_lp_indentation_item( $space_count, $lev, 0, 0, 0 ); + } + else { + $indentation = $space_count; + } + } + + # revert to default if it doesn't work + else { + $space_count = leading_spaces_to_go($ibeg); + if ( $default_adjust_indentation == 0 ) { + $indentation = $leading_spaces_to_go[$ibeg]; + } + elsif ( $default_adjust_indentation == 1 ) { + $indentation = $reduced_spaces_to_go[$i_terminal]; + $lev = $levels_to_go[$i_terminal]; + } + } + } + + # Full indentaion of closing tokens (-icb and -icp or -cti=2) + else { + + # handle -icb (indented closing code block braces) + # Updated method for indented block braces: indent one full level if + # there is no continuation indentation. This will occur for major + # structures such as sub, if, else, but not for things like map + # blocks. + # + # Note: only code blocks without continuation indentation are + # handled here (if, else, unless, ..). In the following snippet, + # the terminal brace of the sort block will have continuation + # indentation as shown so it will not be handled by the coding + # here. We would have to undo the continuation indentation to do + # this, but it probably looks ok as is. This is a possible future + # update for semicolon terminated lines. + # + # if ($sortby eq 'date' or $sortby eq 'size') { + # @files = sort { + # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby} + # or $a cmp $b + # } @files; + # } + # + if ( $block_type_to_go[$ibeg] + && $ci_levels_to_go[$i_terminal] == 0 ) + { + my $spaces = get_spaces( $leading_spaces_to_go[$i_terminal] ); + $indentation = $spaces + $rOpts_indent_columns; + + # NOTE: for -lp we could create a new indentation object, but + # there is probably no need to do it + } + + # handle -icp and any -icb block braces which fall through above + # test such as the 'sort' block mentioned above. + else { + + # There are currently two ways to handle -icp... + # One way is to use the indentation of the previous line: + # $indentation = $last_indentation_written; + + # The other way is to use the indentation that the previous line + # would have had if it hadn't been adjusted: + $indentation = $last_unadjusted_indentation; + + # Current method: use the minimum of the two. This avoids + # inconsistent indentation. + if ( get_spaces($last_indentation_written) < + get_spaces($indentation) ) + { + $indentation = $last_indentation_written; + } + } + + # use previous indentation but use own level + # to cause list to be flushed properly + $lev = $levels_to_go[$ibeg]; + } + + # remember indentation except for multi-line quotes, which get + # no indentation + unless ( $ibeg == 0 && $starting_in_quote ) { + $last_indentation_written = $indentation; + $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg]; + $last_leading_token = $tokens_to_go[$ibeg]; + } + + # be sure lines with leading closing tokens are not outdented more + # than the line which contained the corresponding opening token. + + ############################################################# + # updated per bug report in alex_bug.pl: we must not + # mess with the indentation of closing logical braces so + # we must treat something like '} else {' as if it were + # an isolated brace my $is_isolated_block_brace = ( + # $iend == $ibeg ) && $block_type_to_go[$ibeg]; + ############################################################# + my $is_isolated_block_brace = $block_type_to_go[$ibeg] + && ( $iend == $ibeg + || $is_if_elsif_else_unless_while_until_for_foreach{ + $block_type_to_go[$ibeg] + } ); + + # only do this for a ':; which is aligned with its leading '?' + my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading; + + if ( + defined($opening_indentation) + && !$leading_paren_arrow # MOJO + && !$is_isolated_block_brace + && !$is_unaligned_colon + ) + { + if ( get_spaces($opening_indentation) > get_spaces($indentation) ) { + $indentation = $opening_indentation; + } + } + + # remember the indentation of each line of this batch + push @{$rindentation_list}, $indentation; + + # outdent lines with certain leading tokens... + if ( + + # must be first word of this batch + $ibeg == 0 + + # and ... + && ( + + # certain leading keywords if requested + ( + $rOpts->{'outdent-keywords'} + && $types_to_go[$ibeg] eq 'k' + && $outdent_keyword{ $tokens_to_go[$ibeg] } + ) + + # or labels if requested + || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' ) + + # or static block comments if requested + || ( $types_to_go[$ibeg] eq '#' + && $rOpts->{'outdent-static-block-comments'} + && $is_static_block_comment ) + ) + ) + + { + my $space_count = leading_spaces_to_go($ibeg); + if ( $space_count > 0 ) { + $space_count -= $rOpts_continuation_indentation; + $is_outdented_line = 1; + if ( $space_count < 0 ) { $space_count = 0 } + + # do not promote a spaced static block comment to non-spaced; + # this is not normally necessary but could be for some + # unusual user inputs (such as -ci = -i) + if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) { + $space_count = 1; + } + + if ($rOpts_line_up_parentheses) { + $indentation = + new_lp_indentation_item( $space_count, $lev, 0, 0, 0 ); + } + else { + $indentation = $space_count; + } + } + } + + return ( $indentation, $lev, $level_end, $terminal_type, + $is_semicolon_terminated, $is_outdented_line ); + } +} + +sub set_vertical_tightness_flags { + + my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_; + + # Define vertical tightness controls for the nth line of a batch. + # We create an array of parameters which tell the vertical aligner + # if we should combine this line with the next line to achieve the + # desired vertical tightness. The array of parameters contains: + # + # [0] type: 1=opening non-block 2=closing non-block + # 3=opening block brace 4=closing block brace + # + # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok + # if closing: spaces of padding to use + # [2] sequence number of container + # [3] valid flag: do not append if this flag is false. Will be + # true if appropriate -vt flag is set. Otherwise, Will be + # made true only for 2 line container in parens with -lp + # + # These flags are used by sub set_leading_whitespace in + # the vertical aligner + + my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ]; + + #-------------------------------------------------------------- + # Vertical Tightness Flags Section 1: + # Handle Lines 1 .. n-1 but not the last line + # For non-BLOCK tokens, we will need to examine the next line + # too, so we won't consider the last line. + #-------------------------------------------------------------- + if ( $n < $n_last_line ) { + + #-------------------------------------------------------------- + # Vertical Tightness Flags Section 1a: + # Look for Type 1, last token of this line is a non-block opening token + #-------------------------------------------------------------- + my $ibeg_next = $ri_first->[ $n + 1 ]; + my $token_end = $tokens_to_go[$iend]; + my $iend_next = $ri_last->[ $n + 1 ]; + if ( + $type_sequence_to_go[$iend] + && !$block_type_to_go[$iend] + && $is_opening_token{$token_end} + && ( + $opening_vertical_tightness{$token_end} > 0 + + # allow 2-line method call to be closed up + || ( $rOpts_line_up_parentheses + && $token_end eq '(' + && $iend > $ibeg + && $types_to_go[ $iend - 1 ] ne 'b' ) + ) + ) + { + + # avoid multiple jumps in nesting depth in one line if + # requested + my $ovt = $opening_vertical_tightness{$token_end}; + my $iend_next = $ri_last->[ $n + 1 ]; + unless ( + $ovt < 2 + && ( $nesting_depth_to_go[ $iend_next + 1 ] != + $nesting_depth_to_go[$ibeg_next] ) + ) + { + + # If -vt flag has not been set, mark this as invalid + # and aligner will validate it if it sees the closing paren + # within 2 lines. + my $valid_flag = $ovt; + @{$rvertical_tightness_flags} = + ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag ); + } + } + + #-------------------------------------------------------------- + # Vertical Tightness Flags Section 1b: + # Look for Type 2, first token of next line is a non-block closing + # token .. and be sure this line does not have a side comment + #-------------------------------------------------------------- + my $token_next = $tokens_to_go[$ibeg_next]; + if ( $type_sequence_to_go[$ibeg_next] + && !$block_type_to_go[$ibeg_next] + && $is_closing_token{$token_next} + && $types_to_go[$iend] !~ '#' ) # for safety, shouldn't happen! + { + my $ovt = $opening_vertical_tightness{$token_next}; + my $cvt = $closing_vertical_tightness{$token_next}; + if ( + + # never append a trailing line like )->pack( + # because it will throw off later alignment + ( + $nesting_depth_to_go[$ibeg_next] == + $nesting_depth_to_go[ $iend_next + 1 ] + 1 + ) + && ( + $cvt == 2 + || ( + $container_environment_to_go[$ibeg_next] ne 'LIST' + && ( + $cvt == 1 + + # allow closing up 2-line method calls + || ( $rOpts_line_up_parentheses + && $token_next eq ')' ) + ) + ) + ) + ) + { + + # decide which trailing closing tokens to append.. + my $ok = 0; + if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 } + else { + my $str = join( '', + @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] ); + + # append closing token if followed by comment or ';' + if ( $str =~ /^b?[#;]/ ) { $ok = 1 } + } + + if ($ok) { + my $valid_flag = $cvt; + @{$rvertical_tightness_flags} = ( + 2, + $tightness{$token_next} == 2 ? 0 : 1, + $type_sequence_to_go[$ibeg_next], $valid_flag, + ); + } + } + } + + #-------------------------------------------------------------- + # Vertical Tightness Flags Section 1c: + # Implement the Opening Token Right flag (Type 2).. + # If requested, move an isolated trailing opening token to the end of + # the previous line which ended in a comma. We could do this + # in sub recombine_breakpoints but that would cause problems + # with -lp formatting. The problem is that indentation will + # quickly move far to the right in nested expressions. By + # doing it after indentation has been set, we avoid changes + # to the indentation. Actual movement of the token takes place + # in sub valign_output_step_B. + #-------------------------------------------------------------- + if ( + $opening_token_right{ $tokens_to_go[$ibeg_next] } + + # previous line is not opening + # (use -sot to combine with it) + && !$is_opening_token{$token_end} + + # previous line ended in one of these + # (add other cases if necessary; '=>' and '.' are not necessary + && !$block_type_to_go[$ibeg_next] + + # this is a line with just an opening token + && ( $iend_next == $ibeg_next + || $iend_next == $ibeg_next + 2 + && $types_to_go[$iend_next] eq '#' ) + + # looks bad if we align vertically with the wrong container + && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next] + ) + { + my $valid_flag = 1; + my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0; + @{$rvertical_tightness_flags} = + ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, ); + } + + #-------------------------------------------------------------- + # Vertical Tightness Flags Section 1d: + # Stacking of opening and closing tokens (Type 2) + #-------------------------------------------------------------- + my $stackable; + my $token_beg_next = $tokens_to_go[$ibeg_next]; + + # patch to make something like 'qw(' behave like an opening paren + # (aran.t) + if ( $types_to_go[$ibeg_next] eq 'q' ) { + if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) { + $token_beg_next = $1; + } + } + + if ( $is_closing_token{$token_end} + && $is_closing_token{$token_beg_next} ) + { + $stackable = $stack_closing_token{$token_beg_next} + unless ( $block_type_to_go[$ibeg_next] ) + ; # shouldn't happen; just checking + } + elsif ($is_opening_token{$token_end} + && $is_opening_token{$token_beg_next} ) + { + $stackable = $stack_opening_token{$token_beg_next} + unless ( $block_type_to_go[$ibeg_next] ) + ; # shouldn't happen; just checking + } + + if ($stackable) { + + my $is_semicolon_terminated; + if ( $n + 1 == $n_last_line ) { + my ( $terminal_type, $i_terminal ) = terminal_type( + \@types_to_go, \@block_type_to_go, + $ibeg_next, $iend_next + ); + $is_semicolon_terminated = $terminal_type eq ';' + && $nesting_depth_to_go[$iend_next] < + $nesting_depth_to_go[$ibeg_next]; + } + + # this must be a line with just an opening token + # or end in a semicolon + if ( + $is_semicolon_terminated + || ( $iend_next == $ibeg_next + || $iend_next == $ibeg_next + 2 + && $types_to_go[$iend_next] eq '#' ) + ) + { + my $valid_flag = 1; + my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0; + @{$rvertical_tightness_flags} = + ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, + ); + } + } + } + + #-------------------------------------------------------------- + # Vertical Tightness Flags Section 2: + # Handle type 3, opening block braces on last line of the batch + # Check for a last line with isolated opening BLOCK curly + #-------------------------------------------------------------- + elsif ($rOpts_block_brace_vertical_tightness + && $ibeg eq $iend + && $types_to_go[$iend] eq '{' + && $block_type_to_go[$iend] =~ + /$block_brace_vertical_tightness_pattern/o ) + { + @{$rvertical_tightness_flags} = + ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 ); + } + + #-------------------------------------------------------------- + # Vertical Tightness Flags Section 3: + # Handle type 4, a closing block brace on the last line of the batch Check + # for a last line with isolated closing BLOCK curly + #-------------------------------------------------------------- + elsif ($rOpts_stack_closing_block_brace + && $ibeg eq $iend + && $block_type_to_go[$iend] + && $types_to_go[$iend] eq '}' ) + { + my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1; + @{$rvertical_tightness_flags} = + ( 4, $spaces, $type_sequence_to_go[$iend], 1 ); + } + + # pack in the sequence numbers of the ends of this line + $rvertical_tightness_flags->[4] = get_seqno($ibeg); + $rvertical_tightness_flags->[5] = get_seqno($iend); + return $rvertical_tightness_flags; +} + +sub get_seqno { + + # get opening and closing sequence numbers of a token for the vertical + # aligner. Assign qw quotes a value to allow qw opening and closing tokens + # to be treated somewhat like opening and closing tokens for stacking + # tokens by the vertical aligner. + my ($ii) = @_; + my $seqno = $type_sequence_to_go[$ii]; + if ( $types_to_go[$ii] eq 'q' ) { + my $SEQ_QW = -1; + if ( $ii > 0 ) { + $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ ); + } + else { + if ( !$ending_in_quote ) { + $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ ); + } + } + } + return ($seqno); +} + +{ + my %is_vertical_alignment_type; + my %is_vertical_alignment_keyword; + my %is_terminal_alignment_type; + + BEGIN { + + my @q; + + # Replaced =~ and // in the list. // had been removed in RT 119588 + @q = qw# + = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= + { ? : => && || ~~ !~~ =~ !~ // + #; + @is_vertical_alignment_type{@q} = (1) x scalar(@q); + + # only align these at end of line + @q = qw(&& ||); + @is_terminal_alignment_type{@q} = (1) x scalar(@q); + + # eq and ne were removed from this list to improve alignment chances + @q = qw(if unless and or err for foreach while until); + @is_vertical_alignment_keyword{@q} = (1) x scalar(@q); + } + + sub set_vertical_alignment_markers { + + # This routine takes the first step toward vertical alignment of the + # lines of output text. It looks for certain tokens which can serve as + # vertical alignment markers (such as an '='). + # + # Method: We look at each token $i in this output batch and set + # $matching_token_to_go[$i] equal to those tokens at which we would + # accept vertical alignment. + + my ( $ri_first, $ri_last ) = @_; + + # nothing to do if we aren't allowed to change whitespace + if ( !$rOpts_add_whitespace ) { + for my $i ( 0 .. $max_index_to_go ) { + $matching_token_to_go[$i] = ''; + } + return; + } + + # remember the index of last nonblank token before any sidecomment + my $i_terminal = $max_index_to_go; + if ( $types_to_go[$i_terminal] eq '#' ) { + if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) { + if ( $i_terminal > 0 ) { --$i_terminal } + } + } + + # look at each line of this batch.. + my $last_vertical_alignment_before_index; + my $vert_last_nonblank_type; + my $vert_last_nonblank_token; + my $vert_last_nonblank_block_type; + my $max_line = @{$ri_first} - 1; + + foreach my $line ( 0 .. $max_line ) { + my $ibeg = $ri_first->[$line]; + my $iend = $ri_last->[$line]; + $last_vertical_alignment_before_index = -1; + $vert_last_nonblank_type = ''; + $vert_last_nonblank_token = ''; + $vert_last_nonblank_block_type = ''; + + # look at each token in this output line.. + my $count = 0; + foreach my $i ( $ibeg .. $iend ) { + my $alignment_type = ''; + my $type = $types_to_go[$i]; + my $block_type = $block_type_to_go[$i]; + my $token = $tokens_to_go[$i]; + + # check for flag indicating that we should not align + # this token + if ( $matching_token_to_go[$i] ) { + $matching_token_to_go[$i] = ''; + next; + } + + #-------------------------------------------------------- + # First see if we want to align BEFORE this token + #-------------------------------------------------------- + + # The first possible token that we can align before + # is index 2 because: 1) it doesn't normally make sense to + # align before the first token and 2) the second + # token must be a blank if we are to align before + # the third + if ( $i < $ibeg + 2 ) { } + + # must follow a blank token + elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { } + + # align a side comment -- + elsif ( $type eq '#' ) { + + unless ( + + # it is a static side comment + ( + $rOpts->{'static-side-comments'} + && $token =~ /$static_side_comment_pattern/o + ) + + # or a closing side comment + || ( $vert_last_nonblank_block_type + && $token =~ + /$closing_side_comment_prefix_pattern/o ) + ) + { + $alignment_type = $type; + } ## Example of a static side comment + } + + # otherwise, do not align two in a row to create a + # blank field + elsif ( $last_vertical_alignment_before_index == $i - 2 ) { } + + # align before one of these keywords + # (within a line, since $i>1) + elsif ( $type eq 'k' ) { + + # /^(if|unless|and|or|eq|ne)$/ + if ( $is_vertical_alignment_keyword{$token} ) { + $alignment_type = $token; + } + } + + # align before one of these types.. + # Note: add '.' after new vertical aligner is operational + elsif ( $is_vertical_alignment_type{$type} ) { + $alignment_type = $token; + + # Do not align a terminal token. Although it might + # occasionally look ok to do this, this has been found to be + # a good general rule. The main problems are: + # (1) that the terminal token (such as an = or :) might get + # moved far to the right where it is hard to see because + # nothing follows it, and + # (2) doing so may prevent other good alignments. + # Current exceptions are && and || + if ( $i == $iend || $i >= $i_terminal ) { + $alignment_type = "" + unless ( $is_terminal_alignment_type{$type} ); + } + + # Do not align leading ': (' or '. ('. This would prevent + # alignment in something like the following: + # $extra_space .= + # ( $input_line_number < 10 ) ? " " + # : ( $input_line_number < 100 ) ? " " + # : ""; + # or + # $code = + # ( $case_matters ? $accessor : " lc($accessor) " ) + # . ( $yesno ? " eq " : " ne " ) + if ( $i == $ibeg + 2 + && $types_to_go[$ibeg] =~ /^[\.\:]$/ + && $types_to_go[ $i - 1 ] eq 'b' ) + { + $alignment_type = ""; + } + + # For a paren after keyword, only align something like this: + # if ( $a ) { &a } + # elsif ( $b ) { &b } + if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) { + $alignment_type = "" + unless $vert_last_nonblank_token =~ + /^(if|unless|elsif)$/; + } + + # be sure the alignment tokens are unique + # This didn't work well: reason not determined + # if ($token ne $type) {$alignment_type .= $type} + } + + # NOTE: This is deactivated because it causes the previous + # if/elsif alignment to fail + #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i]) + #{ $alignment_type = $type; } + + if ($alignment_type) { + $last_vertical_alignment_before_index = $i; + } + + #-------------------------------------------------------- + # Next see if we want to align AFTER the previous nonblank + #-------------------------------------------------------- + + # We want to line up ',' and interior ';' tokens, with the added + # space AFTER these tokens. (Note: interior ';' is included + # because it may occur in short blocks). + if ( + + # we haven't already set it + !$alignment_type + + # and its not the first token of the line + && ( $i > $ibeg ) + + # and it follows a blank + && $types_to_go[ $i - 1 ] eq 'b' + + # and previous token IS one of these: + && ( $vert_last_nonblank_type =~ /^[\,\;]$/ ) + + # and it's NOT one of these + && ( $type !~ /^[b\#\)\]\}]$/ ) + + # then go ahead and align + ) + + { + $alignment_type = $vert_last_nonblank_type; + } + + #-------------------------------------------------------- + # patch for =~ operator. We only align this if it + # is the first operator in a line, and the line is a simple + # statement. Aligning them within a statement + # interferes could interfere with other good alignments. + #-------------------------------------------------------- + if ( $alignment_type eq '=~' ) { + my $terminal_type = $types_to_go[$i_terminal]; + if ( $count > 0 || $max_line > 0 || $terminal_type ne ';' ) + { + $alignment_type = ""; + } + } + + #-------------------------------------------------------- + # then store the value + #-------------------------------------------------------- + $matching_token_to_go[$i] = $alignment_type; + $count++ if ($alignment_type); + if ( $type ne 'b' ) { + $vert_last_nonblank_type = $type; + $vert_last_nonblank_token = $token; + $vert_last_nonblank_block_type = $block_type; + } + } + } + return; + } +} + +sub terminal_type { + + # returns type of last token on this line (terminal token), as follows: + # returns # for a full-line comment + # returns ' ' for a blank line + # otherwise returns final token type + + my ( $rtype, $rblock_type, $ibeg, $iend ) = @_; + + # check for full-line comment.. + if ( $rtype->[$ibeg] eq '#' ) { + return wantarray ? ( $rtype->[$ibeg], $ibeg ) : $rtype->[$ibeg]; + } + else { + + # start at end and walk backwards.. + for ( my $i = $iend ; $i >= $ibeg ; $i-- ) { + + # skip past any side comment and blanks + next if ( $rtype->[$i] eq 'b' ); + next if ( $rtype->[$i] eq '#' ); + + # found it..make sure it is a BLOCK termination, + # but hide a terminal } after sort/grep/map because it is not + # necessarily the end of the line. (terminal.t) + my $terminal_type = $rtype->[$i]; + if ( + $terminal_type eq '}' + && ( !$rblock_type->[$i] + || ( $is_sort_map_grep_eval_do{ $rblock_type->[$i] } ) ) + ) + { + $terminal_type = 'b'; + } + return wantarray ? ( $terminal_type, $i ) : $terminal_type; + } + + # empty line + return wantarray ? ( ' ', $ibeg ) : ' '; + } +} + +{ # set_bond_strengths + + my %is_good_keyword_breakpoint; + my %is_lt_gt_le_ge; + + my %binary_bond_strength; + my %nobreak_lhs; + my %nobreak_rhs; + + my @bias_tokens; + my $delta_bias; + + sub bias_table_key { + my ( $type, $token ) = @_; + my $bias_table_key = $type; + if ( $type eq 'k' ) { + $bias_table_key = $token; + if ( $token eq 'err' ) { $bias_table_key = 'or' } + } + return $bias_table_key; + } + + sub initialize_bond_strength_hashes { + + my @q; + @q = qw(if unless while until for foreach); + @is_good_keyword_breakpoint{@q} = (1) x scalar(@q); + + @q = qw(lt gt le ge); + @is_lt_gt_le_ge{@q} = (1) x scalar(@q); + # + # The decision about where to break a line depends upon a "bond + # strength" between tokens. The LOWER the bond strength, the MORE + # likely a break. A bond strength may be any value but to simplify + # things there are several pre-defined strength levels: + + # NO_BREAK => 10000; + # VERY_STRONG => 100; + # STRONG => 2.1; + # NOMINAL => 1.1; + # WEAK => 0.8; + # VERY_WEAK => 0.55; + + # The strength values are based on trial-and-error, and need to be + # tweaked occasionally to get desired results. Some comments: + # + # 1. Only relative strengths are important. small differences + # in strengths can make big formatting differences. + # 2. Each indentation level adds one unit of bond strength. + # 3. A value of NO_BREAK makes an unbreakable bond + # 4. A value of VERY_WEAK is the strength of a ',' + # 5. Values below NOMINAL are considered ok break points. + # 6. Values above NOMINAL are considered poor break points. + # + # The bond strengths should roughly follow precedence order where + # possible. If you make changes, please check the results very + # carefully on a variety of scripts. Testing with the -extrude + # options is particularly helpful in exercising all of the rules. + + # Wherever possible, bond strengths are defined in the following + # tables. There are two main stages to setting bond strengths and + # two types of tables: + # + # The first stage involves looking at each token individually and + # defining left and right bond strengths, according to if we want + # to break to the left or right side, and how good a break point it + # is. For example tokens like =, ||, && make good break points and + # will have low strengths, but one might want to break on either + # side to put them at the end of one line or beginning of the next. + # + # The second stage involves looking at certain pairs of tokens and + # defining a bond strength for that particular pair. This second + # stage has priority. + + #--------------------------------------------------------------- + # Bond Strength BEGIN Section 1. + # Set left and right bond strengths of individual tokens. + #--------------------------------------------------------------- + + # NOTE: NO_BREAK's set in this section first are HINTS which will + # probably not be honored. Essential NO_BREAKS's should be set in + # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end + # of this subroutine. + + # Note that we are setting defaults in this section. The user + # cannot change bond strengths but can cause the left and right + # bond strengths of any token type to be swapped through the use of + # the -wba and -wbb flags. In this way the user can determine if a + # breakpoint token should appear at the end of one line or the + # beginning of the next line. + + # The hash keys in this section are token types, plus the text of + # certain keywords like 'or', 'and'. + + # no break around possible filehandle + $left_bond_strength{'Z'} = NO_BREAK; + $right_bond_strength{'Z'} = NO_BREAK; + + # never put a bare word on a new line: + # example print (STDERR, "bla"); will fail with break after ( + $left_bond_strength{'w'} = NO_BREAK; + + # blanks always have infinite strength to force breaks after + # real tokens + $right_bond_strength{'b'} = NO_BREAK; + + # try not to break on exponentation + @q = qw# ** .. ... <=> #; + @left_bond_strength{@q} = (STRONG) x scalar(@q); + @right_bond_strength{@q} = (STRONG) x scalar(@q); + + # The comma-arrow has very low precedence but not a good break point + $left_bond_strength{'=>'} = NO_BREAK; + $right_bond_strength{'=>'} = NOMINAL; + + # ok to break after label + $left_bond_strength{'J'} = NO_BREAK; + $right_bond_strength{'J'} = NOMINAL; + $left_bond_strength{'j'} = STRONG; + $right_bond_strength{'j'} = STRONG; + $left_bond_strength{'A'} = STRONG; + $right_bond_strength{'A'} = STRONG; + + $left_bond_strength{'->'} = STRONG; + $right_bond_strength{'->'} = VERY_STRONG; + + $left_bond_strength{'CORE::'} = NOMINAL; + $right_bond_strength{'CORE::'} = NO_BREAK; + + # breaking AFTER modulus operator is ok: + @q = qw< % >; + @left_bond_strength{@q} = (STRONG) x scalar(@q); + @right_bond_strength{@q} = + ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@q); + + # Break AFTER math operators * and / + @q = qw< * / x >; + @left_bond_strength{@q} = (STRONG) x scalar(@q); + @right_bond_strength{@q} = (NOMINAL) x scalar(@q); + + # Break AFTER weakest math operators + and - + # Make them weaker than * but a bit stronger than '.' + @q = qw< + - >; + @left_bond_strength{@q} = (STRONG) x scalar(@q); + @right_bond_strength{@q} = + ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@q); + + # breaking BEFORE these is just ok: + @q = qw# >> << #; + @right_bond_strength{@q} = (STRONG) x scalar(@q); + @left_bond_strength{@q} = (NOMINAL) x scalar(@q); + + # breaking before the string concatenation operator seems best + # because it can be hard to see at the end of a line + $right_bond_strength{'.'} = STRONG; + $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK; + + @q = qw< } ] ) R >; + @left_bond_strength{@q} = (STRONG) x scalar(@q); + @right_bond_strength{@q} = (NOMINAL) x scalar(@q); + + # make these a little weaker than nominal so that they get + # favored for end-of-line characters + @q = qw< != == =~ !~ ~~ !~~ >; + @left_bond_strength{@q} = (STRONG) x scalar(@q); + @right_bond_strength{@q} = + ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@q); + + # break AFTER these + @q = qw# < > | & >= <= #; + @left_bond_strength{@q} = (VERY_STRONG) x scalar(@q); + @right_bond_strength{@q} = + ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@q); + + # breaking either before or after a quote is ok + # but bias for breaking before a quote + $left_bond_strength{'Q'} = NOMINAL; + $right_bond_strength{'Q'} = NOMINAL + 0.02; + $left_bond_strength{'q'} = NOMINAL; + $right_bond_strength{'q'} = NOMINAL; + + # starting a line with a keyword is usually ok + $left_bond_strength{'k'} = NOMINAL; + + # we usually want to bond a keyword strongly to what immediately + # follows, rather than leaving it stranded at the end of a line + $right_bond_strength{'k'} = STRONG; + + $left_bond_strength{'G'} = NOMINAL; + $right_bond_strength{'G'} = STRONG; + + # assignment operators + @q = qw( + = **= += *= &= <<= &&= + -= /= |= >>= ||= //= + .= %= ^= + x= + ); + + # Default is to break AFTER various assignment operators + @left_bond_strength{@q} = (STRONG) x scalar(@q); + @right_bond_strength{@q} = + ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@q); + + # Default is to break BEFORE '&&' and '||' and '//' + # set strength of '||' to same as '=' so that chains like + # $a = $b || $c || $d will break before the first '||' + $right_bond_strength{'||'} = NOMINAL; + $left_bond_strength{'||'} = $right_bond_strength{'='}; + + # same thing for '//' + $right_bond_strength{'//'} = NOMINAL; + $left_bond_strength{'//'} = $right_bond_strength{'='}; + + # set strength of && a little higher than || + $right_bond_strength{'&&'} = NOMINAL; + $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1; + + $left_bond_strength{';'} = VERY_STRONG; + $right_bond_strength{';'} = VERY_WEAK; + $left_bond_strength{'f'} = VERY_STRONG; + + # make right strength of for ';' a little less than '=' + # to make for contents break after the ';' to avoid this: + # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j += + # $number_of_fields ) + # and make it weaker than ',' and 'and' too + $right_bond_strength{'f'} = VERY_WEAK - 0.03; + + # The strengths of ?/: should be somewhere between + # an '=' and a quote (NOMINAL), + # make strength of ':' slightly less than '?' to help + # break long chains of ? : after the colons + $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL; + $right_bond_strength{':'} = NO_BREAK; + $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01; + $right_bond_strength{'?'} = NO_BREAK; + + $left_bond_strength{','} = VERY_STRONG; + $right_bond_strength{','} = VERY_WEAK; + + # remaining digraphs and trigraphs not defined above + @q = qw( :: <> ++ --); + @left_bond_strength{@q} = (WEAK) x scalar(@q); + @right_bond_strength{@q} = (STRONG) x scalar(@q); + + # Set bond strengths of certain keywords + # make 'or', 'err', 'and' slightly weaker than a ',' + $left_bond_strength{'and'} = VERY_WEAK - 0.01; + $left_bond_strength{'or'} = VERY_WEAK - 0.02; + $left_bond_strength{'err'} = VERY_WEAK - 0.02; + $left_bond_strength{'xor'} = NOMINAL; + $right_bond_strength{'and'} = NOMINAL; + $right_bond_strength{'or'} = NOMINAL; + $right_bond_strength{'err'} = NOMINAL; + $right_bond_strength{'xor'} = STRONG; + + #--------------------------------------------------------------- + # Bond Strength BEGIN Section 2. + # Set binary rules for bond strengths between certain token types. + #--------------------------------------------------------------- + + # We have a little problem making tables which apply to the + # container tokens. Here is a list of container tokens and + # their types: + # + # type tokens // meaning + # { {, [, ( // indent + # } }, ], ) // outdent + # [ [ // left non-structural [ (enclosing an array index) + # ] ] // right non-structural square bracket + # ( ( // left non-structural paren + # ) ) // right non-structural paren + # L { // left non-structural curly brace (enclosing a key) + # R } // right non-structural curly brace + # + # Some rules apply to token types and some to just the token + # itself. We solve the problem by combining type and token into a + # new hash key for the container types. + # + # If a rule applies to a token 'type' then we need to make rules + # for each of these 'type.token' combinations: + # Type Type.Token + # { {{, {[, {( + # [ [[ + # ( (( + # L L{ + # } }}, }], }) + # ] ]] + # ) )) + # R R} + # + # If a rule applies to a token then we need to make rules for + # these 'type.token' combinations: + # Token Type.Token + # { {{, L{ + # [ {[, [[ + # ( {(, (( + # } }}, R} + # ] }], ]] + # ) }), )) + + # allow long lines before final { in an if statement, as in: + # if (.......... + # ..........) + # { + # + # Otherwise, the line before the { tends to be too short. + + $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03; + $binary_bond_strength{'(('}{'{{'} = NOMINAL; + + # break on something like '} (', but keep this stronger than a ',' + # example is in 'howe.pl' + $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK; + $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK; + + # keep matrix and hash indices together + # but make them a little below STRONG to allow breaking open + # something like {'some-word'}{'some-very-long-word'} at the }{ + # (bracebrk.t) + $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL; + $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL; + $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL; + $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL; + + # increase strength to the point where a break in the following + # will be after the opening paren rather than at the arrow: + # $a->$b($c); + $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG; + + $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; + $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; + $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; + $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; + $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; + $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; + + $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL; + $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL; + $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL; + $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL; + + #--------------------------------------------------------------- + # Binary NO_BREAK rules + #--------------------------------------------------------------- + + # use strict requires that bare word and => not be separated + $binary_bond_strength{'C'}{'=>'} = NO_BREAK; + $binary_bond_strength{'U'}{'=>'} = NO_BREAK; + + # Never break between a bareword and a following paren because + # perl may give an error. For example, if a break is placed + # between 'to_filehandle' and its '(' the following line will + # give a syntax error [Carp.pm]: my( $no) =fileno( + # to_filehandle( $in)) ; + $binary_bond_strength{'C'}{'(('} = NO_BREAK; + $binary_bond_strength{'C'}{'{('} = NO_BREAK; + $binary_bond_strength{'U'}{'(('} = NO_BREAK; + $binary_bond_strength{'U'}{'{('} = NO_BREAK; + + # use strict requires that bare word within braces not start new + # line + $binary_bond_strength{'L{'}{'w'} = NO_BREAK; + + $binary_bond_strength{'w'}{'R}'} = NO_BREAK; + + # use strict requires that bare word and => not be separated + $binary_bond_strength{'w'}{'=>'} = NO_BREAK; + + # use strict does not allow separating type info from trailing { } + # testfile is readmail.pl + $binary_bond_strength{'t'}{'L{'} = NO_BREAK; + $binary_bond_strength{'i'}{'L{'} = NO_BREAK; + + # As a defensive measure, do not break between a '(' and a + # filehandle. In some cases, this can cause an error. For + # example, the following program works: + # my $msg="hi!\n"; + # print + # ( STDOUT + # $msg + # ); + # + # But this program fails: + # my $msg="hi!\n"; + # print + # ( + # STDOUT + # $msg + # ); + # + # This is normally only a problem with the 'extrude' option + $binary_bond_strength{'(('}{'Y'} = NO_BREAK; + $binary_bond_strength{'{('}{'Y'} = NO_BREAK; + + # never break between sub name and opening paren + $binary_bond_strength{'w'}{'(('} = NO_BREAK; + $binary_bond_strength{'w'}{'{('} = NO_BREAK; + + # keep '}' together with ';' + $binary_bond_strength{'}}'}{';'} = NO_BREAK; + + # Breaking before a ++ can cause perl to guess wrong. For + # example the following line will cause a syntax error + # with -extrude if we break between '$i' and '++' [fixstyle2] + # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) ); + $nobreak_lhs{'++'} = NO_BREAK; + + # Do not break before a possible file handle + $nobreak_lhs{'Z'} = NO_BREAK; + + # use strict hates bare words on any new line. For + # example, a break before the underscore here provokes the + # wrath of use strict: + # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) { + $nobreak_rhs{'F'} = NO_BREAK; + $nobreak_rhs{'CORE::'} = NO_BREAK; + + #--------------------------------------------------------------- + # Bond Strength BEGIN Section 3. + # Define tables and values for applying a small bias to the above + # values. + #--------------------------------------------------------------- + # Adding a small 'bias' to strengths is a simple way to make a line + # break at the first of a sequence of identical terms. For + # example, to force long string of conditional operators to break + # with each line ending in a ':', we can add a small number to the + # bond strength of each ':' (colon.t) + @bias_tokens = qw( : && || f and or . ); # tokens which get bias + $delta_bias = 0.0001; # a very small strength level + return; + + } ## end sub initialize_bond_strength_hashes + + sub set_bond_strengths { + + # patch-its always ok to break at end of line + $nobreak_to_go[$max_index_to_go] = 0; + + # we start a new set of bias values for each line + my %bias; + @bias{@bias_tokens} = (0) x scalar(@bias_tokens); + my $code_bias = -.01; # bias for closing block braces + + my $type = 'b'; + my $token = ' '; + my $last_type; + my $last_nonblank_type = $type; + my $last_nonblank_token = $token; + my $list_str = $left_bond_strength{'?'}; + + my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token, + $next_nonblank_type, $next_token, $next_type, $total_nesting_depth, + ); + + # main loop to compute bond strengths between each pair of tokens + foreach my $i ( 0 .. $max_index_to_go ) { + $last_type = $type; + if ( $type ne 'b' ) { + $last_nonblank_type = $type; + $last_nonblank_token = $token; + } + $type = $types_to_go[$i]; + + # strength on both sides of a blank is the same + if ( $type eq 'b' && $last_type ne 'b' ) { + $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ]; + next; + } + + $token = $tokens_to_go[$i]; + $block_type = $block_type_to_go[$i]; + $i_next = $i + 1; + $next_type = $types_to_go[$i_next]; + $next_token = $tokens_to_go[$i_next]; + $total_nesting_depth = $nesting_depth_to_go[$i_next]; + $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); + $next_nonblank_type = $types_to_go[$i_next_nonblank]; + $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; + + # We are computing the strength of the bond between the current + # token and the NEXT token. + + #--------------------------------------------------------------- + # Bond Strength Section 1: + # First Approximation. + # Use minimum of individual left and right tabulated bond + # strengths. + #--------------------------------------------------------------- + my $bsr = $right_bond_strength{$type}; + my $bsl = $left_bond_strength{$next_nonblank_type}; + + # define right bond strengths of certain keywords + if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) { + $bsr = $right_bond_strength{$token}; + } + elsif ( $token eq 'ne' or $token eq 'eq' ) { + $bsr = NOMINAL; + } + + # set terminal bond strength to the nominal value + # this will cause good preceding breaks to be retained + if ( $i_next_nonblank > $max_index_to_go ) { + $bsl = NOMINAL; + } + + # define right bond strengths of certain keywords + if ( $next_nonblank_type eq 'k' + && defined( $left_bond_strength{$next_nonblank_token} ) ) + { + $bsl = $left_bond_strength{$next_nonblank_token}; + } + elsif ($next_nonblank_token eq 'ne' + or $next_nonblank_token eq 'eq' ) + { + $bsl = NOMINAL; + } + elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) { + $bsl = 0.9 * NOMINAL + 0.1 * STRONG; + } + + # Use the minimum of the left and right strengths. Note: it might + # seem that we would want to keep a NO_BREAK if either token has + # this value. This didn't work, for example because in an arrow + # list, it prevents the comma from separating from the following + # bare word (which is probably quoted by its arrow). So necessary + # NO_BREAK's have to be handled as special cases in the final + # section. + if ( !defined($bsr) ) { $bsr = VERY_STRONG } + if ( !defined($bsl) ) { $bsl = VERY_STRONG } + my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl; + my $bond_str_1 = $bond_str; + + #--------------------------------------------------------------- + # Bond Strength Section 2: + # Apply hardwired rules.. + #--------------------------------------------------------------- + + # Patch to put terminal or clauses on a new line: Weaken the bond + # at an || followed by die or similar keyword to make the terminal + # or clause fall on a new line, like this: + # + # my $class = shift + # || die "Cannot add broadcast: No class identifier found"; + # + # Otherwise the break will be at the previous '=' since the || and + # = have the same starting strength and the or is biased, like + # this: + # + # my $class = + # shift || die "Cannot add broadcast: No class identifier found"; + # + # In any case if the user places a break at either the = or the || + # it should remain there. + if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) { + if ( $next_nonblank_token =~ /^(die|confess|croak|warn)$/ ) { + if ( $want_break_before{$token} && $i > 0 ) { + $bond_strength_to_go[ $i - 1 ] -= $delta_bias; + } + else { + $bond_str -= $delta_bias; + } + } + } + + # good to break after end of code blocks + if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) { + + $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias; + $code_bias += $delta_bias; + } + + if ( $type eq 'k' ) { + + # allow certain control keywords to stand out + if ( $next_nonblank_type eq 'k' + && $is_last_next_redo_return{$token} ) + { + $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK; + } + + # Don't break after keyword my. This is a quick fix for a + # rare problem with perl. An example is this line from file + # Container.pm: + + # foreach my $question( Debian::DebConf::ConfigDb::gettree( + # $this->{'question'} ) ) + + if ( $token eq 'my' ) { + $bond_str = NO_BREAK; + } + + } + + # good to break before 'if', 'unless', etc + if ( $is_if_brace_follower{$next_nonblank_token} ) { + $bond_str = VERY_WEAK; + } + + if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) { + + # FIXME: needs more testing + if ( $is_keyword_returning_list{$next_nonblank_token} ) { + $bond_str = $list_str if ( $bond_str > $list_str ); + } + + # keywords like 'unless', 'if', etc, within statements + # make good breaks + if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) { + $bond_str = VERY_WEAK / 1.05; + } + } + + # try not to break before a comma-arrow + elsif ( $next_nonblank_type eq '=>' ) { + if ( $bond_str < STRONG ) { $bond_str = STRONG } + } + + #--------------------------------------------------------------- + # Additional hardwired NOBREAK rules + #--------------------------------------------------------------- + + # map1.t -- correct for a quirk in perl + if ( $token eq '(' + && $next_nonblank_type eq 'i' + && $last_nonblank_type eq 'k' + && $is_sort_map_grep{$last_nonblank_token} ) + + # /^(sort|map|grep)$/ ) + { + $bond_str = NO_BREAK; + } + + # extrude.t: do not break before paren at: + # -l pid_filename( + if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) { + $bond_str = NO_BREAK; + } + + # in older version of perl, use strict can cause problems with + # breaks before bare words following opening parens. For example, + # this will fail under older versions if a break is made between + # '(' and 'MAIL': use strict; open( MAIL, "a long filename or + # command"); close MAIL; + if ( $type eq '{' ) { + + if ( $token eq '(' && $next_nonblank_type eq 'w' ) { + + # but it's fine to break if the word is followed by a '=>' + # or if it is obviously a sub call + my $i_next_next_nonblank = $i_next_nonblank + 1; + my $next_next_type = $types_to_go[$i_next_next_nonblank]; + if ( $next_next_type eq 'b' + && $i_next_nonblank < $max_index_to_go ) + { + $i_next_next_nonblank++; + $next_next_type = $types_to_go[$i_next_next_nonblank]; + } + + # We'll check for an old breakpoint and keep a leading + # bareword if it was that way in the input file. + # Presumably it was ok that way. For example, the + # following would remain unchanged: + # + # @months = ( + # January, February, March, April, + # May, June, July, August, + # September, October, November, December, + # ); + # + # This should be sufficient: + if ( + !$old_breakpoint_to_go[$i] + && ( $next_next_type eq ',' + || $next_next_type eq '}' ) + ) + { + $bond_str = NO_BREAK; + } + } + } + + # Do not break between a possible filehandle and a ? or / and do + # not introduce a break after it if there is no blank + # (extrude.t) + elsif ( $type eq 'Z' ) { + + # don't break.. + if ( + + # if there is no blank and we do not want one. Examples: + # print $x++ # do not break after $x + # print HTML"HELLO" # break ok after HTML + ( + $next_type ne 'b' + && defined( $want_left_space{$next_type} ) + && $want_left_space{$next_type} == WS_NO + ) + + # or we might be followed by the start of a quote + || $next_nonblank_type =~ /^[\/\?]$/ + ) + { + $bond_str = NO_BREAK; + } + } + + # Breaking before a ? before a quote can cause trouble if + # they are not separated by a blank. + # Example: a syntax error occurs if you break before the ? here + # my$logic=join$all?' && ':' || ',@regexps; + # From: Professional_Perl_Programming_Code/multifind.pl + if ( $next_nonblank_type eq '?' ) { + $bond_str = NO_BREAK + if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' ); + } + + # Breaking before a . followed by a number + # can cause trouble if there is no intervening space + # Example: a syntax error occurs if you break before the .2 here + # $str .= pack($endian.2, ensurrogate($ord)); + # From: perl58/Unicode.pm + elsif ( $next_nonblank_type eq '.' ) { + $bond_str = NO_BREAK + if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' ); + } + + my $bond_str_2 = $bond_str; + + #--------------------------------------------------------------- + # End of hardwired rules + #--------------------------------------------------------------- + + #--------------------------------------------------------------- + # Bond Strength Section 3: + # Apply table rules. These have priority over the above + # hardwired rules. + #--------------------------------------------------------------- + + my $tabulated_bond_str; + my $ltype = $type; + my $rtype = $next_nonblank_type; + if ( $token =~ /^[\(\[\{\)\]\}]/ ) { $ltype = $type . $token } + if ( $next_nonblank_token =~ /^[\(\[\{\)\]\}]/ ) { + $rtype = $next_nonblank_type . $next_nonblank_token; + } + + if ( $binary_bond_strength{$ltype}{$rtype} ) { + $bond_str = $binary_bond_strength{$ltype}{$rtype}; + $tabulated_bond_str = $bond_str; + } + + if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) { + $bond_str = NO_BREAK; + $tabulated_bond_str = $bond_str; + } + my $bond_str_3 = $bond_str; + + # If the hardwired rules conflict with the tabulated bond + # strength then there is an inconsistency that should be fixed + FORMATTER_DEBUG_FLAG_BOND_TABLES + && $tabulated_bond_str + && $bond_str_1 + && $bond_str_1 != $bond_str_2 + && $bond_str_2 != $tabulated_bond_str + && do { + print STDERR +"BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n"; + }; + + #----------------------------------------------------------------- + # Bond Strength Section 4: + # Modify strengths of certain tokens which often occur in sequence + # by adding a small bias to each one in turn so that the breaks + # occur from left to right. + # + # Note that we only changing strengths by small amounts here, + # and usually increasing, so we should not be altering any NO_BREAKs. + # Other routines which check for NO_BREAKs will use a tolerance + # of one to avoid any problem. + #----------------------------------------------------------------- + + # The bias tables use special keys + my $left_key = bias_table_key( $type, $token ); + my $right_key = + bias_table_key( $next_nonblank_type, $next_nonblank_token ); + + # add any bias set by sub scan_list at old comma break points. + if ( $type eq ',' ) { $bond_str += $bond_strength_to_go[$i] } + + # bias left token + elsif ( defined( $bias{$left_key} ) ) { + if ( !$want_break_before{$left_key} ) { + $bias{$left_key} += $delta_bias; + $bond_str += $bias{$left_key}; + } + } + + # bias right token + if ( defined( $bias{$right_key} ) ) { + if ( $want_break_before{$right_key} ) { + + # for leading '.' align all but 'short' quotes; the idea + # is to not place something like "\n" on a single line. + if ( $right_key eq '.' ) { + unless ( + $last_nonblank_type eq '.' + && ( + length($token) <= + $rOpts_short_concatenation_item_length ) + && ( !$is_closing_token{$token} ) + ) + { + $bias{$right_key} += $delta_bias; + } + } + else { + $bias{$right_key} += $delta_bias; + } + $bond_str += $bias{$right_key}; + } + } + my $bond_str_4 = $bond_str; + + #--------------------------------------------------------------- + # Bond Strength Section 5: + # Fifth Approximation. + # Take nesting depth into account by adding the nesting depth + # to the bond strength. + #--------------------------------------------------------------- + my $strength; + + if ( defined($bond_str) && !$nobreak_to_go[$i] ) { + if ( $total_nesting_depth > 0 ) { + $strength = $bond_str + $total_nesting_depth; + } + else { + $strength = $bond_str; + } + } + else { + $strength = NO_BREAK; + } + + #--------------------------------------------------------------- + # Bond Strength Section 6: + # Sixth Approximation. Welds. + #--------------------------------------------------------------- + + # Do not allow a break within welds, + if ( weld_len_right_to_go($i) ) { $strength = NO_BREAK } + + # But encourage breaking after opening welded tokens + elsif ( weld_len_left_to_go($i) && $is_opening_token{$token} ) { + $strength -= 1; + } + + # always break after side comment + if ( $type eq '#' ) { $strength = 0 } + + $bond_strength_to_go[$i] = $strength; + + FORMATTER_DEBUG_FLAG_BOND && do { + my $str = substr( $token, 0, 15 ); + $str .= ' ' x ( 16 - length($str) ); + print STDOUT +"BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n"; + }; + } ## end main loop + return; + } ## end sub set_bond_strengths +} + +sub pad_array_to_go { + + # to simplify coding in scan_list and set_bond_strengths, it helps + # to create some extra blank tokens at the end of the arrays + $tokens_to_go[ $max_index_to_go + 1 ] = ''; + $tokens_to_go[ $max_index_to_go + 2 ] = ''; + $types_to_go[ $max_index_to_go + 1 ] = 'b'; + $types_to_go[ $max_index_to_go + 2 ] = 'b'; + $nesting_depth_to_go[ $max_index_to_go + 1 ] = + $nesting_depth_to_go[$max_index_to_go]; + + # /^[R\}\)\]]$/ + if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) { + if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) { + + # shouldn't happen: + unless ( get_saw_brace_error() ) { + warning( +"Program bug in scan_list: hit nesting error which should have been caught\n" + ); + report_definite_bug(); + } + } + else { + $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1; + } + } + + # /^[L\{\(\[]$/ + elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) { + $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1; + } + return; +} + +{ # begin scan_list + + my ( + $block_type, $current_depth, + $depth, $i, + $i_last_nonblank_token, $last_colon_sequence_number, + $last_nonblank_token, $last_nonblank_type, + $last_nonblank_block_type, $last_old_breakpoint_count, + $minimum_depth, $next_nonblank_block_type, + $next_nonblank_token, $next_nonblank_type, + $old_breakpoint_count, $starting_breakpoint_count, + $starting_depth, $token, + $type, $type_sequence, + ); + + my ( + @breakpoint_stack, @breakpoint_undo_stack, + @comma_index, @container_type, + @identifier_count_stack, @index_before_arrow, + @interrupted_list, @item_count_stack, + @last_comma_index, @last_dot_index, + @last_nonblank_type, @old_breakpoint_count_stack, + @opening_structure_index_stack, @rfor_semicolon_list, + @has_old_logical_breakpoints, @rand_or_list, + @i_equals, + ); + + # routine to define essential variables when we go 'up' to + # a new depth + sub check_for_new_minimum_depth { + my $depth = shift; + if ( $depth < $minimum_depth ) { + + $minimum_depth = $depth; + + # these arrays need not retain values between calls + $breakpoint_stack[$depth] = $starting_breakpoint_count; + $container_type[$depth] = ""; + $identifier_count_stack[$depth] = 0; + $index_before_arrow[$depth] = -1; + $interrupted_list[$depth] = 1; + $item_count_stack[$depth] = 0; + $last_nonblank_type[$depth] = ""; + $opening_structure_index_stack[$depth] = -1; + + $breakpoint_undo_stack[$depth] = undef; + $comma_index[$depth] = undef; + $last_comma_index[$depth] = undef; + $last_dot_index[$depth] = undef; + $old_breakpoint_count_stack[$depth] = undef; + $has_old_logical_breakpoints[$depth] = 0; + $rand_or_list[$depth] = []; + $rfor_semicolon_list[$depth] = []; + $i_equals[$depth] = -1; + + # these arrays must retain values between calls + if ( !defined( $has_broken_sublist[$depth] ) ) { + $dont_align[$depth] = 0; + $has_broken_sublist[$depth] = 0; + $want_comma_break[$depth] = 0; + } + } + return; + } + + # routine to decide which commas to break at within a container; + # returns: + # $bp_count = number of comma breakpoints set + # $do_not_break_apart = a flag indicating if container need not + # be broken open + sub set_comma_breakpoints { + + my $dd = shift; + my $bp_count = 0; + my $do_not_break_apart = 0; + + # anything to do? + if ( $item_count_stack[$dd] ) { + + # handle commas not in containers... + if ( $dont_align[$dd] ) { + do_uncontained_comma_breaks($dd); + } + + # handle commas within containers... + else { + my $fbc = $forced_breakpoint_count; + + # always open comma lists not preceded by keywords, + # barewords, identifiers (that is, anything that doesn't + # look like a function call) + my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/; + + set_comma_breakpoints_do( + $dd, + $opening_structure_index_stack[$dd], + $i, + $item_count_stack[$dd], + $identifier_count_stack[$dd], + $comma_index[$dd], + $next_nonblank_type, + $container_type[$dd], + $interrupted_list[$dd], + \$do_not_break_apart, + $must_break_open, + ); + $bp_count = $forced_breakpoint_count - $fbc; + $do_not_break_apart = 0 if $must_break_open; + } + } + return ( $bp_count, $do_not_break_apart ); + } + + sub do_uncontained_comma_breaks { + + # Handle commas not in containers... + # This is a catch-all routine for commas that we + # don't know what to do with because the don't fall + # within containers. We will bias the bond strength + # to break at commas which ended lines in the input + # file. This usually works better than just trying + # to put as many items on a line as possible. A + # downside is that if the input file is garbage it + # won't work very well. However, the user can always + # prevent following the old breakpoints with the + # -iob flag. + my $dd = shift; + my $bias = -.01; + my $old_comma_break_count = 0; + foreach my $ii ( @{ $comma_index[$dd] } ) { + if ( $old_breakpoint_to_go[$ii] ) { + $old_comma_break_count++; + $bond_strength_to_go[$ii] = $bias; + + # reduce bias magnitude to force breaks in order + $bias *= 0.99; + } + } + + # Also put a break before the first comma if + # (1) there was a break there in the input, and + # (2) there was exactly one old break before the first comma break + # (3) OLD: there are multiple old comma breaks + # (3) NEW: there are one or more old comma breaks (see return example) + # + # For example, we will follow the user and break after + # 'print' in this snippet: + # print + # "conformability (Not the same dimension)\n", + # "\t", $have, " is ", text_unit($hu), "\n", + # "\t", $want, " is ", text_unit($wu), "\n", + # ; + # + # Another example, just one comma, where we will break after + # the return: + # return + # $x * cos($a) - $y * sin($a), + # $x * sin($a) + $y * cos($a); + + # Breaking a print statement: + # print SAVEOUT + # ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "", + # ( $? & 128 ) ? " -- core dumped" : "", "\n"; + # + # But we will not force a break after the opening paren here + # (causes a blinker): + # $heap->{stream}->set_output_filter( + # poe::filter::reference->new('myotherfreezer') ), + # ; + # + my $i_first_comma = $comma_index[$dd]->[0]; + if ( $old_breakpoint_to_go[$i_first_comma] ) { + my $level_comma = $levels_to_go[$i_first_comma]; + my $ibreak = -1; + my $obp_count = 0; + for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) { + if ( $old_breakpoint_to_go[$ii] ) { + $obp_count++; + last if ( $obp_count > 1 ); + $ibreak = $ii + if ( $levels_to_go[$ii] == $level_comma ); + } + } + + # Changed rule from multiple old commas to just one here: + if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 ) + { + # Do not to break before an opening token because + # it can lead to "blinkers". + my $ibreakm = $ibreak; + $ibreakm-- if ( $types_to_go[$ibreakm] eq 'b' ); + if ( $ibreakm >= 0 && $types_to_go[$ibreakm] !~ /^[\(\{\[L]$/ ) + { + set_forced_breakpoint($ibreak); + } + } + } + return; + } + + my %is_logical_container; + + BEGIN { + my @q = qw# if elsif unless while and or err not && | || ? : ! #; + @is_logical_container{@q} = (1) x scalar(@q); + } + + sub set_for_semicolon_breakpoints { + my $dd = shift; + foreach ( @{ $rfor_semicolon_list[$dd] } ) { + set_forced_breakpoint($_); + } + return; + } + + sub set_logical_breakpoints { + my $dd = shift; + if ( + $item_count_stack[$dd] == 0 + && $is_logical_container{ $container_type[$dd] } + + || $has_old_logical_breakpoints[$dd] + ) + { + + # Look for breaks in this order: + # 0 1 2 3 + # or and || && + foreach my $i ( 0 .. 3 ) { + if ( $rand_or_list[$dd][$i] ) { + foreach ( @{ $rand_or_list[$dd][$i] } ) { + set_forced_breakpoint($_); + } + + # break at any 'if' and 'unless' too + foreach ( @{ $rand_or_list[$dd][4] } ) { + set_forced_breakpoint($_); + } + $rand_or_list[$dd] = []; + last; + } + } + } + return; + } + + sub is_unbreakable_container { + + # never break a container of one of these types + # because bad things can happen (map1.t) + my $dd = shift; + return $is_sort_map_grep{ $container_type[$dd] }; + } + + sub scan_list { + + # This routine is responsible for setting line breaks for all lists, + # so that hierarchical structure can be displayed and so that list + # items can be vertically aligned. The output of this routine is + # stored in the array @forced_breakpoint_to_go, which is used to set + # final breakpoints. + + $starting_depth = $nesting_depth_to_go[0]; + + $block_type = ' '; + $current_depth = $starting_depth; + $i = -1; + $last_colon_sequence_number = -1; + $last_nonblank_token = ';'; + $last_nonblank_type = ';'; + $last_nonblank_block_type = ' '; + $last_old_breakpoint_count = 0; + $minimum_depth = $current_depth + 1; # forces update in check below + $old_breakpoint_count = 0; + $starting_breakpoint_count = $forced_breakpoint_count; + $token = ';'; + $type = ';'; + $type_sequence = ''; + + my $total_depth_variation = 0; + my $i_old_assignment_break; + my $depth_last = $starting_depth; + + check_for_new_minimum_depth($current_depth); + + my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0; + my $want_previous_breakpoint = -1; + + my $saw_good_breakpoint; + my $i_line_end = -1; + my $i_line_start = -1; + + # loop over all tokens in this batch + while ( ++$i <= $max_index_to_go ) { + if ( $type ne 'b' ) { + $i_last_nonblank_token = $i - 1; + $last_nonblank_type = $type; + $last_nonblank_token = $token; + $last_nonblank_block_type = $block_type; + } ## end if ( $type ne 'b' ) + $type = $types_to_go[$i]; + $block_type = $block_type_to_go[$i]; + $token = $tokens_to_go[$i]; + $type_sequence = $type_sequence_to_go[$i]; + my $next_type = $types_to_go[ $i + 1 ]; + my $next_token = $tokens_to_go[ $i + 1 ]; + my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); + $next_nonblank_type = $types_to_go[$i_next_nonblank]; + $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; + $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank]; + + # set break if flag was set + if ( $want_previous_breakpoint >= 0 ) { + set_forced_breakpoint($want_previous_breakpoint); + $want_previous_breakpoint = -1; + } + + $last_old_breakpoint_count = $old_breakpoint_count; + if ( $old_breakpoint_to_go[$i] ) { + $i_line_end = $i; + $i_line_start = $i_next_nonblank; + + $old_breakpoint_count++; + + # Break before certain keywords if user broke there and + # this is a 'safe' break point. The idea is to retain + # any preferred breaks for sequential list operations, + # like a schwartzian transform. + if ($rOpts_break_at_old_keyword_breakpoints) { + if ( + $next_nonblank_type eq 'k' + && $is_keyword_returning_list{$next_nonblank_token} + && ( $type =~ /^[=\)\]\}Riw]$/ + || $type eq 'k' + && $is_keyword_returning_list{$token} ) + ) + { + + # we actually have to set this break next time through + # the loop because if we are at a closing token (such + # as '}') which forms a one-line block, this break might + # get undone. + $want_previous_breakpoint = $i; + } ## end if ( $next_nonblank_type...) + } ## end if ($rOpts_break_at_old_keyword_breakpoints) + + # Break before attributes if user broke there + if ($rOpts_break_at_old_attribute_breakpoints) { + if ( $next_nonblank_type eq 'A' ) { + $want_previous_breakpoint = $i; + } + } + + # remember an = break as possible good break point + if ( $is_assignment{$type} ) { + $i_old_assignment_break = $i; + } + elsif ( $is_assignment{$next_nonblank_type} ) { + $i_old_assignment_break = $i_next_nonblank; + } + } ## end if ( $old_breakpoint_to_go...) + + next if ( $type eq 'b' ); + $depth = $nesting_depth_to_go[ $i + 1 ]; + + $total_depth_variation += abs( $depth - $depth_last ); + $depth_last = $depth; + + # safety check - be sure we always break after a comment + # Shouldn't happen .. an error here probably means that the + # nobreak flag did not get turned off correctly during + # formatting. + if ( $type eq '#' ) { + if ( $i != $max_index_to_go ) { + warning( +"Non-fatal program bug: backup logic needed to break after a comment\n" + ); + report_definite_bug(); + $nobreak_to_go[$i] = 0; + set_forced_breakpoint($i); + } ## end if ( $i != $max_index_to_go) + } ## end if ( $type eq '#' ) + + # Force breakpoints at certain tokens in long lines. + # Note that such breakpoints will be undone later if these tokens + # are fully contained within parens on a line. + if ( + + # break before a keyword within a line + $type eq 'k' + && $i > 0 + + # if one of these keywords: + && $token =~ /^(if|unless|while|until|for)$/ + + # but do not break at something like '1 while' + && ( $last_nonblank_type ne 'n' || $i > 2 ) + + # and let keywords follow a closing 'do' brace + && $last_nonblank_block_type ne 'do' + + && ( + $is_long_line + + # or container is broken (by side-comment, etc) + || ( $next_nonblank_token eq '(' + && $mate_index_to_go[$i_next_nonblank] < $i ) + ) + ) + { + set_forced_breakpoint( $i - 1 ); + } ## end if ( $type eq 'k' && $i...) + + # remember locations of -> if this is a pre-broken method chain + if ( $type eq '->' ) { + if ($rOpts_break_at_old_method_breakpoints) { + + # Case 1: look for lines with leading pointers + if ( $i == $i_line_start ) { + set_forced_breakpoint( $i - 1 ); + } + + # Case 2: look for cuddled pointer calls + else { + + # look for old lines with leading ')->' or ') ->' + # and, when found, force a break before the + # opening paren and after the previous closing paren. + if ( + $types_to_go[$i_line_start] eq '}' + && ( $i == $i_line_start + 1 + || $i == $i_line_start + 2 + && $types_to_go[ $i - 1 ] eq 'b' ) + ) + { + set_forced_breakpoint( $i_line_start - 1 ); + set_forced_breakpoint( + $mate_index_to_go[$i_line_start] ); + } + } + } + } ## end if ( $type eq '->' ) + + # remember locations of '||' and '&&' for possible breaks if we + # decide this is a long logical expression. + elsif ( $type eq '||' ) { + push @{ $rand_or_list[$depth][2] }, $i; + ++$has_old_logical_breakpoints[$depth] + if ( ( $i == $i_line_start || $i == $i_line_end ) + && $rOpts_break_at_old_logical_breakpoints ); + } ## end elsif ( $type eq '||' ) + elsif ( $type eq '&&' ) { + push @{ $rand_or_list[$depth][3] }, $i; + ++$has_old_logical_breakpoints[$depth] + if ( ( $i == $i_line_start || $i == $i_line_end ) + && $rOpts_break_at_old_logical_breakpoints ); + } ## end elsif ( $type eq '&&' ) + elsif ( $type eq 'f' ) { + push @{ $rfor_semicolon_list[$depth] }, $i; + } + elsif ( $type eq 'k' ) { + if ( $token eq 'and' ) { + push @{ $rand_or_list[$depth][1] }, $i; + ++$has_old_logical_breakpoints[$depth] + if ( ( $i == $i_line_start || $i == $i_line_end ) + && $rOpts_break_at_old_logical_breakpoints ); + } ## end if ( $token eq 'and' ) + + # break immediately at 'or's which are probably not in a logical + # block -- but we will break in logical breaks below so that + # they do not add to the forced_breakpoint_count + elsif ( $token eq 'or' ) { + push @{ $rand_or_list[$depth][0] }, $i; + ++$has_old_logical_breakpoints[$depth] + if ( ( $i == $i_line_start || $i == $i_line_end ) + && $rOpts_break_at_old_logical_breakpoints ); + if ( $is_logical_container{ $container_type[$depth] } ) { + } + else { + if ($is_long_line) { set_forced_breakpoint($i) } + elsif ( ( $i == $i_line_start || $i == $i_line_end ) + && $rOpts_break_at_old_logical_breakpoints ) + { + $saw_good_breakpoint = 1; + } + } ## end else [ if ( $is_logical_container...)] + } ## end elsif ( $token eq 'or' ) + elsif ( $token eq 'if' || $token eq 'unless' ) { + push @{ $rand_or_list[$depth][4] }, $i; + if ( ( $i == $i_line_start || $i == $i_line_end ) + && $rOpts_break_at_old_logical_breakpoints ) + { + set_forced_breakpoint($i); + } + } ## end elsif ( $token eq 'if' ||...) + } ## end elsif ( $type eq 'k' ) + elsif ( $is_assignment{$type} ) { + $i_equals[$depth] = $i; + } + + if ($type_sequence) { + + # handle any postponed closing breakpoints + if ( $token =~ /^[\)\]\}\:]$/ ) { + if ( $type eq ':' ) { + $last_colon_sequence_number = $type_sequence; + + # retain break at a ':' line break + if ( ( $i == $i_line_start || $i == $i_line_end ) + && $rOpts_break_at_old_ternary_breakpoints ) + { + + set_forced_breakpoint($i); + + # break at previous '=' + if ( $i_equals[$depth] > 0 ) { + set_forced_breakpoint( $i_equals[$depth] ); + $i_equals[$depth] = -1; + } + } ## end if ( ( $i == $i_line_start...)) + } ## end if ( $type eq ':' ) + if ( defined( $postponed_breakpoint{$type_sequence} ) ) { + my $inc = ( $type eq ':' ) ? 0 : 1; + set_forced_breakpoint( $i - $inc ); + delete $postponed_breakpoint{$type_sequence}; + } + } ## end if ( $token =~ /^[\)\]\}\:]$/[{[(]) + + # set breaks at ?/: if they will get separated (and are + # not a ?/: chain), or if the '?' is at the end of the + # line + elsif ( $token eq '?' ) { + my $i_colon = $mate_index_to_go[$i]; + if ( + $i_colon <= 0 # the ':' is not in this batch + || $i == 0 # this '?' is the first token of the line + || $i == + $max_index_to_go # or this '?' is the last token + ) + { + + # don't break at a '?' if preceded by ':' on + # this line of previous ?/: pair on this line. + # This is an attempt to preserve a chain of ?/: + # expressions (elsif2.t). And don't break if + # this has a side comment. + set_forced_breakpoint($i) + unless ( + $type_sequence == ( + $last_colon_sequence_number + + TYPE_SEQUENCE_INCREMENT + ) + || $tokens_to_go[$max_index_to_go] eq '#' + ); + set_closing_breakpoint($i); + } ## end if ( $i_colon <= 0 ||...) + } ## end elsif ( $token eq '?' ) + } ## end if ($type_sequence) + +#print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n"; + + #------------------------------------------------------------ + # Handle Increasing Depth.. + # + # prepare for a new list when depth increases + # token $i is a '(','{', or '[' + #------------------------------------------------------------ + if ( $depth > $current_depth ) { + + $breakpoint_stack[$depth] = $forced_breakpoint_count; + $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count; + $has_broken_sublist[$depth] = 0; + $identifier_count_stack[$depth] = 0; + $index_before_arrow[$depth] = -1; + $interrupted_list[$depth] = 0; + $item_count_stack[$depth] = 0; + $last_comma_index[$depth] = undef; + $last_dot_index[$depth] = undef; + $last_nonblank_type[$depth] = $last_nonblank_type; + $old_breakpoint_count_stack[$depth] = $old_breakpoint_count; + $opening_structure_index_stack[$depth] = $i; + $rand_or_list[$depth] = []; + $rfor_semicolon_list[$depth] = []; + $i_equals[$depth] = -1; + $want_comma_break[$depth] = 0; + $container_type[$depth] = + ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ ) + ? $last_nonblank_token + : ""; + $has_old_logical_breakpoints[$depth] = 0; + + # if line ends here then signal closing token to break + if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' ) + { + set_closing_breakpoint($i); + } + + # Not all lists of values should be vertically aligned.. + $dont_align[$depth] = + + # code BLOCKS are handled at a higher level + ( $block_type ne "" ) + + # certain paren lists + || ( $type eq '(' ) && ( + + # it does not usually look good to align a list of + # identifiers in a parameter list, as in: + # my($var1, $var2, ...) + # (This test should probably be refined, for now I'm just + # testing for any keyword) + ( $last_nonblank_type eq 'k' ) + + # a trailing '(' usually indicates a non-list + || ( $next_nonblank_type eq '(' ) + ); + + # patch to outdent opening brace of long if/for/.. + # statements (like this one). See similar coding in + # set_continuation breaks. We have also catch it here for + # short line fragments which otherwise will not go through + # set_continuation_breaks. + if ( + $block_type + + # if we have the ')' but not its '(' in this batch.. + && ( $last_nonblank_token eq ')' ) + && $mate_index_to_go[$i_last_nonblank_token] < 0 + + # and user wants brace to left + && !$rOpts->{'opening-brace-always-on-right'} + + && ( $type eq '{' ) # should be true + && ( $token eq '{' ) # should be true + ) + { + set_forced_breakpoint( $i - 1 ); + } ## end if ( $block_type && ( ...)) + } ## end if ( $depth > $current_depth) + + #------------------------------------------------------------ + # Handle Decreasing Depth.. + # + # finish off any old list when depth decreases + # token $i is a ')','}', or ']' + #------------------------------------------------------------ + elsif ( $depth < $current_depth ) { + + check_for_new_minimum_depth($depth); + + # force all outer logical containers to break after we see on + # old breakpoint + $has_old_logical_breakpoints[$depth] ||= + $has_old_logical_breakpoints[$current_depth]; + + # Patch to break between ') {' if the paren list is broken. + # There is similar logic in set_continuation_breaks for + # non-broken lists. + if ( $token eq ')' + && $next_nonblank_block_type + && $interrupted_list[$current_depth] + && $next_nonblank_type eq '{' + && !$rOpts->{'opening-brace-always-on-right'} ) + { + set_forced_breakpoint($i); + } ## end if ( $token eq ')' && ... + +#print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n"; + + # set breaks at commas if necessary + my ( $bp_count, $do_not_break_apart ) = + set_comma_breakpoints($current_depth); + + my $i_opening = $opening_structure_index_stack[$current_depth]; + my $saw_opening_structure = ( $i_opening >= 0 ); + + # this term is long if we had to break at interior commas.. + my $is_long_term = $bp_count > 0; + + # If this is a short container with one or more comma arrows, + # then we will mark it as a long term to open it if requested. + # $rOpts_comma_arrow_breakpoints = + # 0 - open only if comma precedes closing brace + # 1 - stable: except for one line blocks + # 2 - try to form 1 line blocks + # 3 - ignore => + # 4 - always open up if vt=0 + # 5 - stable: even for one line blocks if vt=0 + if ( !$is_long_term + && $tokens_to_go[$i_opening] =~ /^[\(\{\[]$/ + && $index_before_arrow[ $depth + 1 ] > 0 + && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] } + ) + { + $is_long_term = $rOpts_comma_arrow_breakpoints == 4 + || ( $rOpts_comma_arrow_breakpoints == 0 + && $last_nonblank_token eq ',' ) + || ( $rOpts_comma_arrow_breakpoints == 5 + && $old_breakpoint_to_go[$i_opening] ); + } ## end if ( !$is_long_term &&...) + + # mark term as long if the length between opening and closing + # parens exceeds allowed line length + if ( !$is_long_term && $saw_opening_structure ) { + my $i_opening_minus = find_token_starting_list($i_opening); + + # Note: we have to allow for one extra space after a + # closing token so that we do not strand a comma or + # semicolon, hence the '>=' here (oneline.t) + # Note: we ignore left weld lengths here for best results + $is_long_term = + excess_line_length( $i_opening_minus, $i, 1 ) >= 0; + } ## end if ( !$is_long_term &&...) + + # We've set breaks after all comma-arrows. Now we have to + # undo them if this can be a one-line block + # (the only breakpoints set will be due to comma-arrows) + if ( + + # user doesn't require breaking after all comma-arrows + ( $rOpts_comma_arrow_breakpoints != 0 ) + && ( $rOpts_comma_arrow_breakpoints != 4 ) + + # and if the opening structure is in this batch + && $saw_opening_structure + + # and either on the same old line + && ( + $old_breakpoint_count_stack[$current_depth] == + $last_old_breakpoint_count + + # or user wants to form long blocks with arrows + || $rOpts_comma_arrow_breakpoints == 2 + ) + + # and we made some breakpoints between the opening and closing + && ( $breakpoint_undo_stack[$current_depth] < + $forced_breakpoint_undo_count ) + + # and this block is short enough to fit on one line + # Note: use < because need 1 more space for possible comma + && !$is_long_term + + ) + { + undo_forced_breakpoint_stack( + $breakpoint_undo_stack[$current_depth] ); + } ## end if ( ( $rOpts_comma_arrow_breakpoints...)) + + # now see if we have any comma breakpoints left + my $has_comma_breakpoints = + ( $breakpoint_stack[$current_depth] != + $forced_breakpoint_count ); + + # update broken-sublist flag of the outer container + $has_broken_sublist[$depth] = + $has_broken_sublist[$depth] + || $has_broken_sublist[$current_depth] + || $is_long_term + || $has_comma_breakpoints; + +# Having come to the closing ')', '}', or ']', now we have to decide if we +# should 'open up' the structure by placing breaks at the opening and +# closing containers. This is a tricky decision. Here are some of the +# basic considerations: +# +# -If this is a BLOCK container, then any breakpoints will have already +# been set (and according to user preferences), so we need do nothing here. +# +# -If we have a comma-separated list for which we can align the list items, +# then we need to do so because otherwise the vertical aligner cannot +# currently do the alignment. +# +# -If this container does itself contain a container which has been broken +# open, then it should be broken open to properly show the structure. +# +# -If there is nothing to align, and no other reason to break apart, +# then do not do it. +# +# We will not break open the parens of a long but 'simple' logical expression. +# For example: +# +# This is an example of a simple logical expression and its formatting: +# +# if ( $bigwasteofspace1 && $bigwasteofspace2 +# || $bigwasteofspace3 && $bigwasteofspace4 ) +# +# Most people would prefer this than the 'spacey' version: +# +# if ( +# $bigwasteofspace1 && $bigwasteofspace2 +# || $bigwasteofspace3 && $bigwasteofspace4 +# ) +# +# To illustrate the rules for breaking logical expressions, consider: +# +# FULLY DENSE: +# if ( $opt_excl +# and ( exists $ids_excl_uc{$id_uc} +# or grep $id_uc =~ /$_/, @ids_excl_uc )) +# +# This is on the verge of being difficult to read. The current default is to +# open it up like this: +# +# DEFAULT: +# if ( +# $opt_excl +# and ( exists $ids_excl_uc{$id_uc} +# or grep $id_uc =~ /$_/, @ids_excl_uc ) +# ) +# +# This is a compromise which tries to avoid being too dense and to spacey. +# A more spaced version would be: +# +# SPACEY: +# if ( +# $opt_excl +# and ( +# exists $ids_excl_uc{$id_uc} +# or grep $id_uc =~ /$_/, @ids_excl_uc +# ) +# ) +# +# Some people might prefer the spacey version -- an option could be added. The +# innermost expression contains a long block '( exists $ids_... ')'. +# +# Here is how the logic goes: We will force a break at the 'or' that the +# innermost expression contains, but we will not break apart its opening and +# closing containers because (1) it contains no multi-line sub-containers itself, +# and (2) there is no alignment to be gained by breaking it open like this +# +# and ( +# exists $ids_excl_uc{$id_uc} +# or grep $id_uc =~ /$_/, @ids_excl_uc +# ) +# +# (although this looks perfectly ok and might be good for long expressions). The +# outer 'if' container, though, contains a broken sub-container, so it will be +# broken open to avoid too much density. Also, since it contains no 'or's, there +# will be a forced break at its 'and'. + + # set some flags telling something about this container.. + my $is_simple_logical_expression = 0; + if ( $item_count_stack[$current_depth] == 0 + && $saw_opening_structure + && $tokens_to_go[$i_opening] eq '(' + && $is_logical_container{ $container_type[$current_depth] } + ) + { + + # This seems to be a simple logical expression with + # no existing breakpoints. Set a flag to prevent + # opening it up. + if ( !$has_comma_breakpoints ) { + $is_simple_logical_expression = 1; + } + + # This seems to be a simple logical expression with + # breakpoints (broken sublists, for example). Break + # at all 'or's and '||'s. + else { + set_logical_breakpoints($current_depth); + } + } ## end if ( $item_count_stack...) + + if ( $is_long_term + && @{ $rfor_semicolon_list[$current_depth] } ) + { + set_for_semicolon_breakpoints($current_depth); + + # open up a long 'for' or 'foreach' container to allow + # leading term alignment unless -lp is used. + $has_comma_breakpoints = 1 + unless $rOpts_line_up_parentheses; + } ## end if ( $is_long_term && ...) + + if ( + + # breaks for code BLOCKS are handled at a higher level + !$block_type + + # we do not need to break at the top level of an 'if' + # type expression + && !$is_simple_logical_expression + + ## modification to keep ': (' containers vertically tight; + ## but probably better to let user set -vt=1 to avoid + ## inconsistency with other paren types + ## && ($container_type[$current_depth] ne ':') + + # otherwise, we require one of these reasons for breaking: + && ( + + # - this term has forced line breaks + $has_comma_breakpoints + + # - the opening container is separated from this batch + # for some reason (comment, blank line, code block) + # - this is a non-paren container spanning multiple lines + || !$saw_opening_structure + + # - this is a long block contained in another breakable + # container + || ( $is_long_term + && $container_environment_to_go[$i_opening] ne + 'BLOCK' ) + ) + ) + { + + # For -lp option, we must put a breakpoint before + # the token which has been identified as starting + # this indentation level. This is necessary for + # proper alignment. + if ( $rOpts_line_up_parentheses && $saw_opening_structure ) + { + my $item = $leading_spaces_to_go[ $i_opening + 1 ]; + if ( $i_opening + 1 < $max_index_to_go + && $types_to_go[ $i_opening + 1 ] eq 'b' ) + { + $item = $leading_spaces_to_go[ $i_opening + 2 ]; + } + if ( defined($item) ) { + my $i_start_2 = $item->get_starting_index(); + if ( + defined($i_start_2) + + # we are breaking after an opening brace, paren, + # so don't break before it too + && $i_start_2 ne $i_opening + ) + { + + # Only break for breakpoints at the same + # indentation level as the opening paren + my $test1 = $nesting_depth_to_go[$i_opening]; + my $test2 = $nesting_depth_to_go[$i_start_2]; + if ( $test2 == $test1 ) { + set_forced_breakpoint( $i_start_2 - 1 ); + } + } ## end if ( defined($i_start_2...)) + } ## end if ( defined($item) ) + } ## end if ( $rOpts_line_up_parentheses...) + + # break after opening structure. + # note: break before closing structure will be automatic + if ( $minimum_depth <= $current_depth ) { + + set_forced_breakpoint($i_opening) + unless ( $do_not_break_apart + || is_unbreakable_container($current_depth) ); + + # break at ',' of lower depth level before opening token + if ( $last_comma_index[$depth] ) { + set_forced_breakpoint( $last_comma_index[$depth] ); + } + + # break at '.' of lower depth level before opening token + if ( $last_dot_index[$depth] ) { + set_forced_breakpoint( $last_dot_index[$depth] ); + } + + # break before opening structure if preceded by another + # closing structure and a comma. This is normally + # done by the previous closing brace, but not + # if it was a one-line block. + if ( $i_opening > 2 ) { + my $i_prev = + ( $types_to_go[ $i_opening - 1 ] eq 'b' ) + ? $i_opening - 2 + : $i_opening - 1; + + if ( $types_to_go[$i_prev] eq ',' + && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ ) + { + set_forced_breakpoint($i_prev); + } + + # also break before something like ':(' or '?(' + # if appropriate. + elsif ( + $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ ) + { + my $token_prev = $tokens_to_go[$i_prev]; + if ( $want_break_before{$token_prev} ) { + set_forced_breakpoint($i_prev); + } + } ## end elsif ( $types_to_go[$i_prev...]) + } ## end if ( $i_opening > 2 ) + } ## end if ( $minimum_depth <=...) + + # break after comma following closing structure + if ( $next_type eq ',' ) { + set_forced_breakpoint( $i + 1 ); + } + + # break before an '=' following closing structure + if ( + $is_assignment{$next_nonblank_type} + && ( $breakpoint_stack[$current_depth] != + $forced_breakpoint_count ) + ) + { + set_forced_breakpoint($i); + } ## end if ( $is_assignment{$next_nonblank_type...}) + + # break at any comma before the opening structure Added + # for -lp, but seems to be good in general. It isn't + # obvious how far back to look; the '5' below seems to + # work well and will catch the comma in something like + # push @list, myfunc( $param, $param, .. + + my $icomma = $last_comma_index[$depth]; + if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) { + unless ( $forced_breakpoint_to_go[$icomma] ) { + set_forced_breakpoint($icomma); + } + } + } # end logic to open up a container + + # Break open a logical container open if it was already open + elsif ($is_simple_logical_expression + && $has_old_logical_breakpoints[$current_depth] ) + { + set_logical_breakpoints($current_depth); + } + + # Handle long container which does not get opened up + elsif ($is_long_term) { + + # must set fake breakpoint to alert outer containers that + # they are complex + set_fake_breakpoint(); + } ## end elsif ($is_long_term) + + } ## end elsif ( $depth < $current_depth) + + #------------------------------------------------------------ + # Handle this token + #------------------------------------------------------------ + + $current_depth = $depth; + + # handle comma-arrow + if ( $type eq '=>' ) { + next if ( $last_nonblank_type eq '=>' ); + next if $rOpts_break_at_old_comma_breakpoints; + next if $rOpts_comma_arrow_breakpoints == 3; + $want_comma_break[$depth] = 1; + $index_before_arrow[$depth] = $i_last_nonblank_token; + next; + } ## end if ( $type eq '=>' ) + + elsif ( $type eq '.' ) { + $last_dot_index[$depth] = $i; + } + + # Turn off alignment if we are sure that this is not a list + # environment. To be safe, we will do this if we see certain + # non-list tokens, such as ';', and also the environment is + # not a list. Note that '=' could be in any of the = operators + # (lextest.t). We can't just use the reported environment + # because it can be incorrect in some cases. + elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} ) + && $container_environment_to_go[$i] ne 'LIST' ) + { + $dont_align[$depth] = 1; + $want_comma_break[$depth] = 0; + $index_before_arrow[$depth] = -1; + } ## end elsif ( ( $type =~ /^[\;\<\>\~]$/...)) + + # now just handle any commas + next unless ( $type eq ',' ); + + $last_dot_index[$depth] = undef; + $last_comma_index[$depth] = $i; + + # break here if this comma follows a '=>' + # but not if there is a side comment after the comma + if ( $want_comma_break[$depth] ) { + + if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) { + if ($rOpts_comma_arrow_breakpoints) { + $want_comma_break[$depth] = 0; + next; + } + } + + set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' ); + + # break before the previous token if it looks safe + # Example of something that we will not try to break before: + # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt}, + # Also we don't want to break at a binary operator (like +): + # $c->createOval( + # $x + $R, $y + + # $R => $x - $R, + # $y - $R, -fill => 'black', + # ); + my $ibreak = $index_before_arrow[$depth] - 1; + if ( $ibreak > 0 + && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ ) + { + if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- } + if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- } + if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) { + + # don't break pointer calls, such as the following: + # File::Spec->curdir => 1, + # (This is tokenized as adjacent 'w' tokens) + ##if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) { + + # And don't break before a comma, as in the following: + # ( LONGER_THAN,=> 1, + # EIGHTY_CHARACTERS,=> 2, + # CAUSES_FORMATTING,=> 3, + # LIKE_THIS,=> 4, + # ); + # This example is for -tso but should be general rule + if ( $tokens_to_go[ $ibreak + 1 ] ne '->' + && $tokens_to_go[ $ibreak + 1 ] ne ',' ) + { + set_forced_breakpoint($ibreak); + } + } ## end if ( $types_to_go[$ibreak...]) + } ## end if ( $ibreak > 0 && $tokens_to_go...) + + $want_comma_break[$depth] = 0; + $index_before_arrow[$depth] = -1; + + # handle list which mixes '=>'s and ','s: + # treat any list items so far as an interrupted list + $interrupted_list[$depth] = 1; + next; + } ## end if ( $want_comma_break...) + + # break after all commas above starting depth + if ( $depth < $starting_depth && !$dont_align[$depth] ) { + set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' ); + next; + } + + # add this comma to the list.. + my $item_count = $item_count_stack[$depth]; + if ( $item_count == 0 ) { + + # but do not form a list with no opening structure + # for example: + + # open INFILE_COPY, ">$input_file_copy" + # or die ("very long message"); + + if ( ( $opening_structure_index_stack[$depth] < 0 ) + && $container_environment_to_go[$i] eq 'BLOCK' ) + { + $dont_align[$depth] = 1; + } + } ## end if ( $item_count == 0 ) + + $comma_index[$depth][$item_count] = $i; + ++$item_count_stack[$depth]; + if ( $last_nonblank_type =~ /^[iR\]]$/ ) { + $identifier_count_stack[$depth]++; + } + } ## end while ( ++$i <= $max_index_to_go) + + #------------------------------------------- + # end of loop over all tokens in this batch + #------------------------------------------- + + # set breaks for any unfinished lists .. + for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) { + + $interrupted_list[$dd] = 1; + $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth ); + set_comma_breakpoints($dd); + set_logical_breakpoints($dd) + if ( $has_old_logical_breakpoints[$dd] ); + set_for_semicolon_breakpoints($dd); + + # break open container... + my $i_opening = $opening_structure_index_stack[$dd]; + set_forced_breakpoint($i_opening) + unless ( + is_unbreakable_container($dd) + + # Avoid a break which would place an isolated ' or " + # on a line + || ( $type eq 'Q' + && $i_opening >= $max_index_to_go - 2 + && $token =~ /^['"]$/ ) + ); + } ## end for ( my $dd = $current_depth...) + + # Return a flag indicating if the input file had some good breakpoints. + # This flag will be used to force a break in a line shorter than the + # allowed line length. + if ( $has_old_logical_breakpoints[$current_depth] ) { + $saw_good_breakpoint = 1; + } + + # A complex line with one break at an = has a good breakpoint. + # This is not complex ($total_depth_variation=0): + # $res1 + # = 10; + # + # This is complex ($total_depth_variation=6): + # $res2 = + # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert')); + elsif ($i_old_assignment_break + && $total_depth_variation > 4 + && $old_breakpoint_count == 1 ) + { + $saw_good_breakpoint = 1; + } ## end elsif ( $i_old_assignment_break...) + + return $saw_good_breakpoint; + } ## end sub scan_list +} # end scan_list + +sub find_token_starting_list { + + # When testing to see if a block will fit on one line, some + # previous token(s) may also need to be on the line; particularly + # if this is a sub call. So we will look back at least one + # token. NOTE: This isn't perfect, but not critical, because + # if we mis-identify a block, it will be wrapped and therefore + # fixed the next time it is formatted. + my $i_opening_paren = shift; + my $i_opening_minus = $i_opening_paren; + my $im1 = $i_opening_paren - 1; + my $im2 = $i_opening_paren - 2; + my $im3 = $i_opening_paren - 3; + my $typem1 = $types_to_go[$im1]; + my $typem2 = $im2 >= 0 ? $types_to_go[$im2] : 'b'; + + if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) { + $i_opening_minus = $i_opening_paren; + } + elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) { + $i_opening_minus = $im1 if $im1 >= 0; + + # walk back to improve length estimate + for ( my $j = $im1 ; $j >= 0 ; $j-- ) { + last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ ); + $i_opening_minus = $j; + } + if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ } + } + elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 } + elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) { + $i_opening_minus = $im2; + } + return $i_opening_minus; +} + +{ # begin set_comma_breakpoints_do + + my %is_keyword_with_special_leading_term; + + BEGIN { + + # These keywords have prototypes which allow a special leading item + # followed by a list + my @q = + qw(formline grep kill map printf sprintf push chmod join pack unshift); + @is_keyword_with_special_leading_term{@q} = (1) x scalar(@q); + } + + sub set_comma_breakpoints_do { + + # Given a list with some commas, set breakpoints at some of the + # commas, if necessary, to make it easy to read. This list is + # an example: + my ( + $depth, $i_opening_paren, $i_closing_paren, + $item_count, $identifier_count, $rcomma_index, + $next_nonblank_type, $list_type, $interrupted, + $rdo_not_break_apart, $must_break_open, + ) = @_; + + # nothing to do if no commas seen + return if ( $item_count < 1 ); + my $i_first_comma = $rcomma_index->[0]; + my $i_true_last_comma = $rcomma_index->[ $item_count - 1 ]; + my $i_last_comma = $i_true_last_comma; + if ( $i_last_comma >= $max_index_to_go ) { + $i_last_comma = $rcomma_index->[ --$item_count - 1 ]; + return if ( $item_count < 1 ); + } + + #--------------------------------------------------------------- + # find lengths of all items in the list to calculate page layout + #--------------------------------------------------------------- + my $comma_count = $item_count; + my @item_lengths; + my @i_term_begin; + my @i_term_end; + my @i_term_comma; + my $i_prev_plus; + my @max_length = ( 0, 0 ); + my $first_term_length; + my $i = $i_opening_paren; + my $is_odd = 1; + + foreach my $j ( 0 .. $comma_count - 1 ) { + $is_odd = 1 - $is_odd; + $i_prev_plus = $i + 1; + $i = $rcomma_index->[$j]; + + my $i_term_end = + ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1; + my $i_term_begin = + ( $types_to_go[$i_prev_plus] eq 'b' ) + ? $i_prev_plus + 1 + : $i_prev_plus; + push @i_term_begin, $i_term_begin; + push @i_term_end, $i_term_end; + push @i_term_comma, $i; + + # note: currently adding 2 to all lengths (for comma and space) + my $length = + 2 + token_sequence_length( $i_term_begin, $i_term_end ); + push @item_lengths, $length; + + if ( $j == 0 ) { + $first_term_length = $length; + } + else { + + if ( $length > $max_length[$is_odd] ) { + $max_length[$is_odd] = $length; + } + } + } + + # now we have to make a distinction between the comma count and item + # count, because the item count will be one greater than the comma + # count if the last item is not terminated with a comma + my $i_b = + ( $types_to_go[ $i_last_comma + 1 ] eq 'b' ) + ? $i_last_comma + 1 + : $i_last_comma; + my $i_e = + ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' ) + ? $i_closing_paren - 2 + : $i_closing_paren - 1; + my $i_effective_last_comma = $i_last_comma; + + my $last_item_length = token_sequence_length( $i_b + 1, $i_e ); + + if ( $last_item_length > 0 ) { + + # add 2 to length because other lengths include a comma and a blank + $last_item_length += 2; + push @item_lengths, $last_item_length; + push @i_term_begin, $i_b + 1; + push @i_term_end, $i_e; + push @i_term_comma, undef; + + my $i_odd = $item_count % 2; + + if ( $last_item_length > $max_length[$i_odd] ) { + $max_length[$i_odd] = $last_item_length; + } + + $item_count++; + $i_effective_last_comma = $i_e + 1; + + if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) { + $identifier_count++; + } + } + + #--------------------------------------------------------------- + # End of length calculations + #--------------------------------------------------------------- + + #--------------------------------------------------------------- + # Compound List Rule 1: + # Break at (almost) every comma for a list containing a broken + # sublist. This has higher priority than the Interrupted List + # Rule. + #--------------------------------------------------------------- + if ( $has_broken_sublist[$depth] ) { + + # Break at every comma except for a comma between two + # simple, small terms. This prevents long vertical + # columns of, say, just 0's. + my $small_length = 10; # 2 + actual maximum length wanted + + # We'll insert a break in long runs of small terms to + # allow alignment in uniform tables. + my $skipped_count = 0; + my $columns = table_columns_available($i_first_comma); + my $fields = int( $columns / $small_length ); + if ( $rOpts_maximum_fields_per_table + && $fields > $rOpts_maximum_fields_per_table ) + { + $fields = $rOpts_maximum_fields_per_table; + } + my $max_skipped_count = $fields - 1; + + my $is_simple_last_term = 0; + my $is_simple_next_term = 0; + foreach my $j ( 0 .. $item_count ) { + $is_simple_last_term = $is_simple_next_term; + $is_simple_next_term = 0; + if ( $j < $item_count + && $i_term_end[$j] == $i_term_begin[$j] + && $item_lengths[$j] <= $small_length ) + { + $is_simple_next_term = 1; + } + next if $j == 0; + if ( $is_simple_last_term + && $is_simple_next_term + && $skipped_count < $max_skipped_count ) + { + $skipped_count++; + } + else { + $skipped_count = 0; + my $i = $i_term_comma[ $j - 1 ]; + last unless defined $i; + set_forced_breakpoint($i); + } + } + + # always break at the last comma if this list is + # interrupted; we wouldn't want to leave a terminal '{', for + # example. + if ($interrupted) { set_forced_breakpoint($i_true_last_comma) } + return; + } + +#my ( $a, $b, $c ) = caller(); +#print "LISTX: in set_list $a $c interrupt=$interrupted count=$item_count +#i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n"; +#print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n"; + + #--------------------------------------------------------------- + # Interrupted List Rule: + # A list is forced to use old breakpoints if it was interrupted + # by side comments or blank lines, or requested by user. + #--------------------------------------------------------------- + if ( $rOpts_break_at_old_comma_breakpoints + || $interrupted + || $i_opening_paren < 0 ) + { + copy_old_breakpoints( $i_first_comma, $i_true_last_comma ); + return; + } + + #--------------------------------------------------------------- + # Looks like a list of items. We have to look at it and size it up. + #--------------------------------------------------------------- + + my $opening_token = $tokens_to_go[$i_opening_paren]; + my $opening_environment = + $container_environment_to_go[$i_opening_paren]; + + #------------------------------------------------------------------- + # Return if this will fit on one line + #------------------------------------------------------------------- + + my $i_opening_minus = find_token_starting_list($i_opening_paren); + return + unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0; + + #------------------------------------------------------------------- + # Now we know that this block spans multiple lines; we have to set + # at least one breakpoint -- real or fake -- as a signal to break + # open any outer containers. + #------------------------------------------------------------------- + set_fake_breakpoint(); + + # be sure we do not extend beyond the current list length + if ( $i_effective_last_comma >= $max_index_to_go ) { + $i_effective_last_comma = $max_index_to_go - 1; + } + + # Set a flag indicating if we need to break open to keep -lp + # items aligned. This is necessary if any of the list terms + # exceeds the available space after the '('. + my $need_lp_break_open = $must_break_open; + if ( $rOpts_line_up_parentheses && !$must_break_open ) { + my $columns_if_unbroken = + maximum_line_length($i_opening_minus) - + total_line_length( $i_opening_minus, $i_opening_paren ); + $need_lp_break_open = + ( $max_length[0] > $columns_if_unbroken ) + || ( $max_length[1] > $columns_if_unbroken ) + || ( $first_term_length > $columns_if_unbroken ); + } + + # Specify if the list must have an even number of fields or not. + # It is generally safest to assume an even number, because the + # list items might be a hash list. But if we can be sure that + # it is not a hash, then we can allow an odd number for more + # flexibility. + my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count + + if ( $identifier_count >= $item_count - 1 + || $is_assignment{$next_nonblank_type} + || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ ) + ) + { + $odd_or_even = 1; + } + + # do we have a long first term which should be + # left on a line by itself? + my $use_separate_first_term = ( + $odd_or_even == 1 # only if we can use 1 field/line + && $item_count > 3 # need several items + && $first_term_length > + 2 * $max_length[0] - 2 # need long first term + && $first_term_length > + 2 * $max_length[1] - 2 # need long first term + ); + + # or do we know from the type of list that the first term should + # be placed alone? + if ( !$use_separate_first_term ) { + if ( $is_keyword_with_special_leading_term{$list_type} ) { + $use_separate_first_term = 1; + + # should the container be broken open? + if ( $item_count < 3 ) { + if ( $i_first_comma - $i_opening_paren < 4 ) { + ${$rdo_not_break_apart} = 1; + } + } + elsif ($first_term_length < 20 + && $i_first_comma - $i_opening_paren < 4 ) + { + my $columns = table_columns_available($i_first_comma); + if ( $first_term_length < $columns ) { + ${$rdo_not_break_apart} = 1; + } + } + } + } + + # if so, + if ($use_separate_first_term) { + + # ..set a break and update starting values + $use_separate_first_term = 1; + set_forced_breakpoint($i_first_comma); + $i_opening_paren = $i_first_comma; + $i_first_comma = $rcomma_index->[1]; + $item_count--; + return if $comma_count == 1; + shift @item_lengths; + shift @i_term_begin; + shift @i_term_end; + shift @i_term_comma; + } + + # if not, update the metrics to include the first term + else { + if ( $first_term_length > $max_length[0] ) { + $max_length[0] = $first_term_length; + } + } + + # Field width parameters + my $pair_width = ( $max_length[0] + $max_length[1] ); + my $max_width = + ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1]; + + # Number of free columns across the page width for laying out tables + my $columns = table_columns_available($i_first_comma); + + # Estimated maximum number of fields which fit this space + # This will be our first guess + my $number_of_fields_max = + maximum_number_of_fields( $columns, $odd_or_even, $max_width, + $pair_width ); + my $number_of_fields = $number_of_fields_max; + + # Find the best-looking number of fields + # and make this our second guess if possible + my ( $number_of_fields_best, $ri_ragged_break_list, + $new_identifier_count ) + = study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths, + $max_width ); + + if ( $number_of_fields_best != 0 + && $number_of_fields_best < $number_of_fields_max ) + { + $number_of_fields = $number_of_fields_best; + } + + # ---------------------------------------------------------------------- + # If we are crowded and the -lp option is being used, try to + # undo some indentation + # ---------------------------------------------------------------------- + if ( + $rOpts_line_up_parentheses + && ( + $number_of_fields == 0 + || ( $number_of_fields == 1 + && $number_of_fields != $number_of_fields_best ) + ) + ) + { + my $available_spaces = get_available_spaces_to_go($i_first_comma); + if ( $available_spaces > 0 ) { + + my $spaces_wanted = $max_width - $columns; # for 1 field + + if ( $number_of_fields_best == 0 ) { + $number_of_fields_best = + get_maximum_fields_wanted( \@item_lengths ); + } + + if ( $number_of_fields_best != 1 ) { + my $spaces_wanted_2 = + 1 + $pair_width - $columns; # for 2 fields + if ( $available_spaces > $spaces_wanted_2 ) { + $spaces_wanted = $spaces_wanted_2; + } + } + + if ( $spaces_wanted > 0 ) { + my $deleted_spaces = + reduce_lp_indentation( $i_first_comma, $spaces_wanted ); + + # redo the math + if ( $deleted_spaces > 0 ) { + $columns = table_columns_available($i_first_comma); + $number_of_fields_max = + maximum_number_of_fields( $columns, $odd_or_even, + $max_width, $pair_width ); + $number_of_fields = $number_of_fields_max; + + if ( $number_of_fields_best == 1 + && $number_of_fields >= 1 ) + { + $number_of_fields = $number_of_fields_best; + } + } + } + } + } + + # try for one column if two won't work + if ( $number_of_fields <= 0 ) { + $number_of_fields = int( $columns / $max_width ); + } + + # The user can place an upper bound on the number of fields, + # which can be useful for doing maintenance on tables + if ( $rOpts_maximum_fields_per_table + && $number_of_fields > $rOpts_maximum_fields_per_table ) + { + $number_of_fields = $rOpts_maximum_fields_per_table; + } + + # How many columns (characters) and lines would this container take + # if no additional whitespace were added? + my $packed_columns = token_sequence_length( $i_opening_paren + 1, + $i_effective_last_comma + 1 ); + if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero + my $packed_lines = 1 + int( $packed_columns / $columns ); + + # are we an item contained in an outer list? + my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/; + + if ( $number_of_fields <= 0 ) { + +# #--------------------------------------------------------------- +# # We're in trouble. We can't find a single field width that works. +# # There is no simple answer here; we may have a single long list +# # item, or many. +# #--------------------------------------------------------------- +# +# In many cases, it may be best to not force a break if there is just one +# comma, because the standard continuation break logic will do a better +# job without it. +# +# In the common case that all but one of the terms can fit +# on a single line, it may look better not to break open the +# containing parens. Consider, for example +# +# $color = +# join ( '/', +# sort { $color_value{$::a} <=> $color_value{$::b}; } +# keys %colors ); +# +# which will look like this with the container broken: +# +# $color = join ( +# '/', +# sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors +# ); +# +# Here is an example of this rule for a long last term: +# +# log_message( 0, 256, 128, +# "Number of routes in adj-RIB-in to be considered: $peercount" ); +# +# And here is an example with a long first term: +# +# $s = sprintf( +# "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)", +# $r, $pu, $ps, $cu, $cs, $tt +# ) +# if $style eq 'all'; + + my $i_last_comma = $rcomma_index->[ $comma_count - 1 ]; + my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0; + my $long_first_term = + excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0; + + # break at every comma ... + if ( + + # if requested by user or is best looking + $number_of_fields_best == 1 + + # or if this is a sublist of a larger list + || $in_hierarchical_list + + # or if multiple commas and we don't have a long first or last + # term + || ( $comma_count > 1 + && !( $long_last_term || $long_first_term ) ) + ) + { + foreach ( 0 .. $comma_count - 1 ) { + set_forced_breakpoint( $rcomma_index->[$_] ); + } + } + elsif ($long_last_term) { + + set_forced_breakpoint($i_last_comma); + ${$rdo_not_break_apart} = 1 unless $must_break_open; + } + elsif ($long_first_term) { + + set_forced_breakpoint($i_first_comma); + } + else { + + # let breaks be defined by default bond strength logic + } + return; + } + + # -------------------------------------------------------- + # We have a tentative field count that seems to work. + # How many lines will this require? + # -------------------------------------------------------- + my $formatted_lines = $item_count / ($number_of_fields); + if ( $formatted_lines != int $formatted_lines ) { + $formatted_lines = 1 + int $formatted_lines; + } + + # So far we've been trying to fill out to the right margin. But + # compact tables are easier to read, so let's see if we can use fewer + # fields without increasing the number of lines. + $number_of_fields = + compactify_table( $item_count, $number_of_fields, $formatted_lines, + $odd_or_even ); + + # How many spaces across the page will we fill? + my $columns_per_line = + ( int $number_of_fields / 2 ) * $pair_width + + ( $number_of_fields % 2 ) * $max_width; + + my $formatted_columns; + + if ( $number_of_fields > 1 ) { + $formatted_columns = + ( $pair_width * ( int( $item_count / 2 ) ) + + ( $item_count % 2 ) * $max_width ); + } + else { + $formatted_columns = $max_width * $item_count; + } + if ( $formatted_columns < $packed_columns ) { + $formatted_columns = $packed_columns; + } + + my $unused_columns = $formatted_columns - $packed_columns; + + # set some empirical parameters to help decide if we should try to + # align; high sparsity does not look good, especially with few lines + my $sparsity = ($unused_columns) / ($formatted_columns); + my $max_allowed_sparsity = + ( $item_count < 3 ) ? 0.1 + : ( $packed_lines == 1 ) ? 0.15 + : ( $packed_lines == 2 ) ? 0.4 + : 0.7; + + # Begin check for shortcut methods, which avoid treating a list + # as a table for relatively small parenthesized lists. These + # are usually easier to read if not formatted as tables. + if ( + $packed_lines <= 2 # probably can fit in 2 lines + && $item_count < 9 # doesn't have too many items + && $opening_environment eq 'BLOCK' # not a sub-container + && $opening_token eq '(' # is paren list + ) + { + + # Shortcut method 1: for -lp and just one comma: + # This is a no-brainer, just break at the comma. + if ( + $rOpts_line_up_parentheses # -lp + && $item_count == 2 # two items, one comma + && !$must_break_open + ) + { + my $i_break = $rcomma_index->[0]; + set_forced_breakpoint($i_break); + ${$rdo_not_break_apart} = 1; + set_non_alignment_flags( $comma_count, $rcomma_index ); + return; + + } + + # method 2 is for most small ragged lists which might look + # best if not displayed as a table. + if ( + ( $number_of_fields == 2 && $item_count == 3 ) + || ( + $new_identifier_count > 0 # isn't all quotes + && $sparsity > 0.15 + ) # would be fairly spaced gaps if aligned + ) + { + + my $break_count = set_ragged_breakpoints( \@i_term_comma, + $ri_ragged_break_list ); + ++$break_count if ($use_separate_first_term); + + # NOTE: we should really use the true break count here, + # which can be greater if there are large terms and + # little space, but usually this will work well enough. + unless ($must_break_open) { + + if ( $break_count <= 1 ) { + ${$rdo_not_break_apart} = 1; + } + elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open ) + { + ${$rdo_not_break_apart} = 1; + } + } + set_non_alignment_flags( $comma_count, $rcomma_index ); + return; + } + + } # end shortcut methods + + # debug stuff + + FORMATTER_DEBUG_FLAG_SPARSE && do { + print STDOUT +"SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n"; + + }; + + #--------------------------------------------------------------- + # Compound List Rule 2: + # If this list is too long for one line, and it is an item of a + # larger list, then we must format it, regardless of sparsity + # (ian.t). One reason that we have to do this is to trigger + # Compound List Rule 1, above, which causes breaks at all commas of + # all outer lists. In this way, the structure will be properly + # displayed. + #--------------------------------------------------------------- + + # Decide if this list is too long for one line unless broken + my $total_columns = table_columns_available($i_opening_paren); + my $too_long = $packed_columns > $total_columns; + + # For a paren list, include the length of the token just before the + # '(' because this is likely a sub call, and we would have to + # include the sub name on the same line as the list. This is still + # imprecise, but not too bad. (steve.t) + if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) { + + $too_long = excess_line_length( $i_opening_minus, + $i_effective_last_comma + 1 ) > 0; + } + + # FIXME: For an item after a '=>', try to include the length of the + # thing before the '=>'. This is crude and should be improved by + # actually looking back token by token. + if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) { + my $i_opening_minus = $i_opening_paren - 4; + if ( $i_opening_minus >= 0 ) { + $too_long = excess_line_length( $i_opening_minus, + $i_effective_last_comma + 1 ) > 0; + } + } + + # Always break lists contained in '[' and '{' if too long for 1 line, + # and always break lists which are too long and part of a more complex + # structure. + my $must_break_open_container = $must_break_open + || ( $too_long + && ( $in_hierarchical_list || $opening_token ne '(' ) ); + +#print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n"; + + #--------------------------------------------------------------- + # The main decision: + # Now decide if we will align the data into aligned columns. Do not + # attempt to align columns if this is a tiny table or it would be + # too spaced. It seems that the more packed lines we have, the + # sparser the list that can be allowed and still look ok. + #--------------------------------------------------------------- + + if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines ) + || ( $formatted_lines < 2 ) + || ( $unused_columns > $max_allowed_sparsity * $formatted_columns ) + ) + { + + #--------------------------------------------------------------- + # too sparse: would look ugly if aligned in a table; + #--------------------------------------------------------------- + + # use old breakpoints if this is a 'big' list + # FIXME: goal is to improve set_ragged_breakpoints so that + # this is not necessary. + if ( $packed_lines > 2 && $item_count > 10 ) { + write_logfile_entry("List sparse: using old breakpoints\n"); + copy_old_breakpoints( $i_first_comma, $i_last_comma ); + } + + # let the continuation logic handle it if 2 lines + else { + + my $break_count = set_ragged_breakpoints( \@i_term_comma, + $ri_ragged_break_list ); + ++$break_count if ($use_separate_first_term); + + unless ($must_break_open_container) { + if ( $break_count <= 1 ) { + ${$rdo_not_break_apart} = 1; + } + elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open ) + { + ${$rdo_not_break_apart} = 1; + } + } + set_non_alignment_flags( $comma_count, $rcomma_index ); + } + return; + } + + #--------------------------------------------------------------- + # go ahead and format as a table + #--------------------------------------------------------------- + write_logfile_entry( + "List: auto formatting with $number_of_fields fields/row\n"); + + my $j_first_break = + $use_separate_first_term ? $number_of_fields : $number_of_fields - 1; + + for ( + my $j = $j_first_break ; + $j < $comma_count ; + $j += $number_of_fields + ) + { + my $i = $rcomma_index->[$j]; + set_forced_breakpoint($i); + } + return; + } +} + +sub set_non_alignment_flags { + + # set flag which indicates that these commas should not be + # aligned + my ( $comma_count, $rcomma_index ) = @_; + foreach ( 0 .. $comma_count - 1 ) { + $matching_token_to_go[ $rcomma_index->[$_] ] = 1; + } + return; +} + +sub study_list_complexity { + + # Look for complex tables which should be formatted with one term per line. + # Returns the following: + # + # \@i_ragged_break_list = list of good breakpoints to avoid lines + # which are hard to read + # $number_of_fields_best = suggested number of fields based on + # complexity; = 0 if any number may be used. + # + my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_; + my $item_count = @{$ri_term_begin}; + my $complex_item_count = 0; + my $number_of_fields_best = $rOpts_maximum_fields_per_table; + my $i_max = @{$ritem_lengths} - 1; + ##my @item_complexity; + + my $i_last_last_break = -3; + my $i_last_break = -2; + my @i_ragged_break_list; + + my $definitely_complex = 30; + my $definitely_simple = 12; + my $quote_count = 0; + + for my $i ( 0 .. $i_max ) { + my $ib = $ri_term_begin->[$i]; + my $ie = $ri_term_end->[$i]; + + # define complexity: start with the actual term length + my $weighted_length = ( $ritem_lengths->[$i] - 2 ); + + ##TBD: join types here and check for variations + ##my $str=join "", @tokens_to_go[$ib..$ie]; + + my $is_quote = 0; + if ( $types_to_go[$ib] =~ /^[qQ]$/ ) { + $is_quote = 1; + $quote_count++; + } + elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) { + $quote_count++; + } + + if ( $ib eq $ie ) { + if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) { + $complex_item_count++; + $weighted_length *= 2; + } + else { + } + } + else { + if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) { + $complex_item_count++; + $weighted_length *= 2; + } + if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) { + $weighted_length += 4; + } + } + + # add weight for extra tokens. + $weighted_length += 2 * ( $ie - $ib ); + +## my $BUB = join '', @tokens_to_go[$ib..$ie]; +## print "# COMPLEXITY:$weighted_length $BUB\n"; + +##push @item_complexity, $weighted_length; + + # now mark a ragged break after this item it if it is 'long and + # complex': + if ( $weighted_length >= $definitely_complex ) { + + # if we broke after the previous term + # then break before it too + if ( $i_last_break == $i - 1 + && $i > 1 + && $i_last_last_break != $i - 2 ) + { + + ## FIXME: don't strand a small term + pop @i_ragged_break_list; + push @i_ragged_break_list, $i - 2; + push @i_ragged_break_list, $i - 1; + } + + push @i_ragged_break_list, $i; + $i_last_last_break = $i_last_break; + $i_last_break = $i; + } + + # don't break before a small last term -- it will + # not look good on a line by itself. + elsif ($i == $i_max + && $i_last_break == $i - 1 + && $weighted_length <= $definitely_simple ) + { + pop @i_ragged_break_list; + } + } + + my $identifier_count = $i_max + 1 - $quote_count; + + # Need more tuning here.. + if ( $max_width > 12 + && $complex_item_count > $item_count / 2 + && $number_of_fields_best != 2 ) + { + $number_of_fields_best = 1; + } + + return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count ); +} + +sub get_maximum_fields_wanted { + + # Not all tables look good with more than one field of items. + # This routine looks at a table and decides if it should be + # formatted with just one field or not. + # This coding is still under development. + my ($ritem_lengths) = @_; + + my $number_of_fields_best = 0; + + # For just a few items, we tentatively assume just 1 field. + my $item_count = @{$ritem_lengths}; + if ( $item_count <= 5 ) { + $number_of_fields_best = 1; + } + + # For larger tables, look at it both ways and see what looks best + else { + + my $is_odd = 1; + my @max_length = ( 0, 0 ); + my @last_length_2 = ( undef, undef ); + my @first_length_2 = ( undef, undef ); + my $last_length = undef; + my $total_variation_1 = 0; + my $total_variation_2 = 0; + my @total_variation_2 = ( 0, 0 ); + + foreach my $j ( 0 .. $item_count - 1 ) { + + $is_odd = 1 - $is_odd; + my $length = $ritem_lengths->[$j]; + if ( $length > $max_length[$is_odd] ) { + $max_length[$is_odd] = $length; + } + + if ( defined($last_length) ) { + my $dl = abs( $length - $last_length ); + $total_variation_1 += $dl; + } + $last_length = $length; + + my $ll = $last_length_2[$is_odd]; + if ( defined($ll) ) { + my $dl = abs( $length - $ll ); + $total_variation_2[$is_odd] += $dl; + } + else { + $first_length_2[$is_odd] = $length; + } + $last_length_2[$is_odd] = $length; + } + $total_variation_2 = $total_variation_2[0] + $total_variation_2[1]; + + my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0; + unless ( $total_variation_2 < $factor * $total_variation_1 ) { + $number_of_fields_best = 1; + } + } + return ($number_of_fields_best); +} + +sub table_columns_available { + my $i_first_comma = shift; + my $columns = + maximum_line_length($i_first_comma) - + leading_spaces_to_go($i_first_comma); + + # Patch: the vertical formatter does not line up lines whose lengths + # exactly equal the available line length because of allowances + # that must be made for side comments. Therefore, the number of + # available columns is reduced by 1 character. + $columns -= 1; + return $columns; +} + +sub maximum_number_of_fields { + + # how many fields will fit in the available space? + my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_; + my $max_pairs = int( $columns / $pair_width ); + my $number_of_fields = $max_pairs * 2; + if ( $odd_or_even == 1 + && $max_pairs * $pair_width + $max_width <= $columns ) + { + $number_of_fields++; + } + return $number_of_fields; +} + +sub compactify_table { + + # given a table with a certain number of fields and a certain number + # of lines, see if reducing the number of fields will make it look + # better. + my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_; + if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) { + my $min_fields; + + for ( + $min_fields = $number_of_fields ; + $min_fields >= $odd_or_even + && $min_fields * $formatted_lines >= $item_count ; + $min_fields -= $odd_or_even + ) + { + $number_of_fields = $min_fields; + } + } + return $number_of_fields; +} + +sub set_ragged_breakpoints { + + # Set breakpoints in a list that cannot be formatted nicely as a + # table. + my ( $ri_term_comma, $ri_ragged_break_list ) = @_; + + my $break_count = 0; + foreach ( @{$ri_ragged_break_list} ) { + my $j = $ri_term_comma->[$_]; + if ($j) { + set_forced_breakpoint($j); + $break_count++; + } + } + return $break_count; +} + +sub copy_old_breakpoints { + my ( $i_first_comma, $i_last_comma ) = @_; + for my $i ( $i_first_comma .. $i_last_comma ) { + if ( $old_breakpoint_to_go[$i] ) { + set_forced_breakpoint($i); + } + } + return; +} + +sub set_nobreaks { + my ( $i, $j ) = @_; + if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) { + + FORMATTER_DEBUG_FLAG_NOBREAK && do { + my ( $a, $b, $c ) = caller(); + print STDOUT +"NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n"; + }; + + @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 ); + } + + # shouldn't happen; non-critical error + else { + FORMATTER_DEBUG_FLAG_NOBREAK && do { + my ( $a, $b, $c ) = caller(); + print STDOUT + "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n"; + }; + } + return; +} + +sub set_fake_breakpoint { + + # Just bump up the breakpoint count as a signal that there are breaks. + # This is useful if we have breaks but may want to postpone deciding where + # to make them. + $forced_breakpoint_count++; + return; +} + +sub set_forced_breakpoint { + my $i = shift; + + return unless defined $i && $i >= 0; + + # no breaks between welded tokens + return if ( weld_len_right_to_go($i) ); + + # when called with certain tokens, use bond strengths to decide + # if we break before or after it + my $token = $tokens_to_go[$i]; + + if ( $token =~ /^([\=\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) { + if ( $want_break_before{$token} && $i >= 0 ) { $i-- } + } + + # breaks are forced before 'if' and 'unless' + elsif ( $is_if_unless{$token} ) { $i-- } + + if ( $i >= 0 && $i <= $max_index_to_go ) { + my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1; + + FORMATTER_DEBUG_FLAG_FORCE && do { + my ( $a, $b, $c ) = caller(); + print STDOUT +"FORCE $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n"; + }; + + if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) { + $forced_breakpoint_to_go[$i_nonblank] = 1; + + if ( $i_nonblank > $index_max_forced_break ) { + $index_max_forced_break = $i_nonblank; + } + $forced_breakpoint_count++; + $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] = + $i_nonblank; + + # if we break at an opening container..break at the closing + if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) { + set_closing_breakpoint($i_nonblank); + } + } + } + return; +} + +sub clear_breakpoint_undo_stack { + $forced_breakpoint_undo_count = 0; + return; +} + +sub undo_forced_breakpoint_stack { + + my $i_start = shift; + if ( $i_start < 0 ) { + $i_start = 0; + my ( $a, $b, $c ) = caller(); + warning( +"Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start " + ); + } + + while ( $forced_breakpoint_undo_count > $i_start ) { + my $i = + $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ]; + if ( $i >= 0 && $i <= $max_index_to_go ) { + $forced_breakpoint_to_go[$i] = 0; + $forced_breakpoint_count--; + + FORMATTER_DEBUG_FLAG_UNDOBP && do { + my ( $a, $b, $c ) = caller(); + print STDOUT +"UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n"; + }; + } + + # shouldn't happen, but not a critical error + else { + FORMATTER_DEBUG_FLAG_UNDOBP && do { + my ( $a, $b, $c ) = caller(); + print STDOUT +"Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go"; + }; + } + } + return; +} + +{ # begin recombine_breakpoints + + my %is_amp_amp; + my %is_ternary; + my %is_math_op; + my %is_plus_minus; + my %is_mult_div; + + BEGIN { + + my @q; + @q = qw( && || ); + @is_amp_amp{@q} = (1) x scalar(@q); + + @q = qw( ? : ); + @is_ternary{@q} = (1) x scalar(@q); + + @q = qw( + - * / ); + @is_math_op{@q} = (1) x scalar(@q); + + @q = qw( + - ); + @is_plus_minus{@q} = (1) x scalar(@q); + + @q = qw( * / ); + @is_mult_div{@q} = (1) x scalar(@q); + } + + sub DUMP_BREAKPOINTS { + + # Debug routine to dump current breakpoints...not normally called + # We are given indexes to the current lines: + # $ri_beg = ref to array of BEGinning indexes of each line + # $ri_end = ref to array of ENDing indexes of each line + my ( $ri_beg, $ri_end, $msg ) = @_; + print STDERR "----Dumping breakpoints from: $msg----\n"; + for my $n ( 0 .. @{$ri_end} - 1 ) { + my $ibeg = $ri_beg->[$n]; + my $iend = $ri_end->[$n]; + my $text = ""; + foreach my $i ( $ibeg .. $iend ) { + $text .= $tokens_to_go[$i]; + } + print STDERR "$n ($ibeg:$iend) $text\n"; + } + print STDERR "----\n"; + return; + } + + sub delete_one_line_semicolons { + + my ( $self, $ri_beg, $ri_end ) = @_; + my $rLL = $self->{rLL}; + my $K_opening_container = $self->{K_opening_container}; + + # Walk down the lines of this batch and delete any semicolons + # terminating one-line blocks; + my $nmax = @{$ri_end} - 1; + + foreach my $n ( 0 .. $nmax ) { + my $i_beg = $ri_beg->[$n]; + my $i_e = $ri_end->[$n]; + my $K_beg = $K_to_go[$i_beg]; + my $K_e = $K_to_go[$i_e]; + my $K_end = $K_e; + my $type_end = $rLL->[$K_end]->[_TYPE_]; + if ( $type_end eq '#' ) { + $K_end = $self->K_previous_nonblank($K_end); + if ( defined($K_end) ) { $type_end = $rLL->[$K_end]->[_TYPE_]; } + } + + # we are looking for a line ending in closing brace + next + unless ( $type_end eq '}' && $rLL->[$K_end]->[_TOKEN_] eq '}' ); + + # ...and preceded by a semicolon on the same line + my $K_semicolon = $self->K_previous_nonblank($K_end); + my $i_semicolon = $i_beg + ( $K_semicolon - $K_beg ); + next if ( $i_semicolon <= $i_beg ); + next unless ( $rLL->[$K_semicolon]->[_TYPE_] eq ';' ); + + # safety check - shouldn't happen + if ( $types_to_go[$i_semicolon] ne ';' ) { + Fault("unexpected type looking for semicolon, ignoring"); + next; + } + + # ... with the corresponding opening brace on the same line + my $type_sequence = $rLL->[$K_end]->[_TYPE_SEQUENCE_]; + my $K_opening = $K_opening_container->{$type_sequence}; + my $i_opening = $i_beg + ( $K_opening - $K_beg ); + next if ( $i_opening < $i_beg ); + + # ... and only one semicolon between these braces + my $semicolon_count = 0; + foreach my $K ( $K_opening + 1 .. $K_semicolon - 1 ) { + if ( $rLL->[$K]->[_TYPE_] eq ';' ) { + $semicolon_count++; + last; + } + } + next if ($semicolon_count); + + # ...ok, then make the semicolon invisible + $tokens_to_go[$i_semicolon] = ""; + } + return; + } + + sub unmask_phantom_semicolons { + + my ( $self, $ri_beg, $ri_end ) = @_; + + # Walk down the lines of this batch and unmask any invisible line-ending + # semicolons. They were placed by sub respace_tokens but we only now + # know if we actually need them. + + my $nmax = @{$ri_end} - 1; + foreach my $n ( 0 .. $nmax ) { + + my $i = $ri_end->[$n]; + if ( $types_to_go[$i] eq ';' && $tokens_to_go[$i] eq '' ) { + + $tokens_to_go[$i] = $want_left_space{';'} == WS_NO ? ';' : ' ;'; + + my $line_number = 1 + $self->get_old_line_index( $K_to_go[$i] ); + note_added_semicolon($line_number); + } + } + return; + } + + sub recombine_breakpoints { + + # sub set_continuation_breaks is very liberal in setting line breaks + # for long lines, always setting breaks at good breakpoints, even + # when that creates small lines. Sometimes small line fragments + # are produced which would look better if they were combined. + # That's the task of this routine. + # + # We are given indexes to the current lines: + # $ri_beg = ref to array of BEGinning indexes of each line + # $ri_end = ref to array of ENDing indexes of each line + my ( $ri_beg, $ri_end ) = @_; + + # Make a list of all good joining tokens between the lines + # n-1 and n. + my @joint; + my $nmax = @{$ri_end} - 1; + for my $n ( 1 .. $nmax ) { + my $ibeg_1 = $ri_beg->[ $n - 1 ]; + my $iend_1 = $ri_end->[ $n - 1 ]; + my $iend_2 = $ri_end->[$n]; + my $ibeg_2 = $ri_beg->[$n]; + + my ( $itok, $itokp, $itokm ); + + foreach my $itest ( $iend_1, $ibeg_2 ) { + my $type = $types_to_go[$itest]; + if ( $is_math_op{$type} + || $is_amp_amp{$type} + || $is_assignment{$type} + || $type eq ':' ) + { + $itok = $itest; + } + } + $joint[$n] = [$itok]; + } + + my $more_to_do = 1; + + # We keep looping over all of the lines of this batch + # until there are no more possible recombinations + my $nmax_last = @{$ri_end}; + my $reverse = 0; + while ($more_to_do) { + my $n_best = 0; + my $bs_best; + my $nmax = @{$ri_end} - 1; + + # Safety check for infinite loop + unless ( $nmax < $nmax_last ) { + + # Shouldn't happen because splice below decreases nmax on each + # pass. + Fault("Program bug-infinite loop in recombine breakpoints\n"); + } + $nmax_last = $nmax; + $more_to_do = 0; + my $skip_Section_3; + my $leading_amp_count = 0; + my $this_line_is_semicolon_terminated; + + # loop over all remaining lines in this batch + for my $iter ( 1 .. $nmax ) { + + # alternating sweep direction gives symmetric results + # for recombining lines which exceed the line length + # such as eval {{{{.... }}}} + my $n; + if ($reverse) { $n = 1 + $nmax - $iter; } + else { $n = $iter } + + #---------------------------------------------------------- + # If we join the current pair of lines, + # line $n-1 will become the left part of the joined line + # line $n will become the right part of the joined line + # + # Here are Indexes of the endpoint tokens of the two lines: + # + # -----line $n-1--- | -----line $n----- + # $ibeg_1 $iend_1 | $ibeg_2 $iend_2 + # ^ + # | + # We want to decide if we should remove the line break + # between the tokens at $iend_1 and $ibeg_2 + # + # We will apply a number of ad-hoc tests to see if joining + # here will look ok. The code will just issue a 'next' + # command if the join doesn't look good. If we get through + # the gauntlet of tests, the lines will be recombined. + #---------------------------------------------------------- + # + # beginning and ending tokens of the lines we are working on + my $ibeg_1 = $ri_beg->[ $n - 1 ]; + my $iend_1 = $ri_end->[ $n - 1 ]; + my $iend_2 = $ri_end->[$n]; + my $ibeg_2 = $ri_beg->[$n]; + my $ibeg_nmax = $ri_beg->[$nmax]; + + # combined line cannot be too long + my $excess = excess_line_length( $ibeg_1, $iend_2, 1, 1 ); + next if ( $excess > 0 ); + + my $type_iend_1 = $types_to_go[$iend_1]; + my $type_iend_2 = $types_to_go[$iend_2]; + my $type_ibeg_1 = $types_to_go[$ibeg_1]; + my $type_ibeg_2 = $types_to_go[$ibeg_2]; + + # terminal token of line 2 if any side comment is ignored: + my $iend_2t = $iend_2; + my $type_iend_2t = $type_iend_2; + + # some beginning indexes of other lines, which may not exist + my $ibeg_0 = $n > 1 ? $ri_beg->[ $n - 2 ] : -1; + my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1; + my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1; + + my $bs_tweak = 0; + + #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] - + # $nesting_depth_to_go[$ibeg_1] ); + + FORMATTER_DEBUG_FLAG_RECOMBINE && do { + print STDERR +"RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n"; + }; + + # If line $n is the last line, we set some flags and + # do any special checks for it + if ( $n == $nmax ) { + + # a terminal '{' should stay where it is + # unless preceded by a fat comma + next if ( $type_ibeg_2 eq '{' && $type_iend_1 ne '=>' ); + + if ( $type_iend_2 eq '#' + && $iend_2 - $ibeg_2 >= 2 + && $types_to_go[ $iend_2 - 1 ] eq 'b' ) + { + $iend_2t = $iend_2 - 2; + $type_iend_2t = $types_to_go[$iend_2t]; + } + + $this_line_is_semicolon_terminated = $type_iend_2t eq ';'; + } + + #---------------------------------------------------------- + # Recombine Section 0: + # Examine the special token joining this line pair, if any. + # Put as many tests in this section to avoid duplicate code and + # to make formatting independent of whether breaks are to the + # left or right of an operator. + #---------------------------------------------------------- + + my ($itok) = @{ $joint[$n] }; + if ($itok) { + + # FIXME: Patch - may not be necessary + my $iend_1 = + $type_iend_1 eq 'b' + ? $iend_1 - 1 + : $iend_1; + + my $iend_2 = + $type_iend_2 eq 'b' + ? $iend_2 - 1 + : $iend_2; + ## END PATCH + + my $type = $types_to_go[$itok]; + + if ( $type eq ':' ) { + + # do not join at a colon unless it disobeys the break request + if ( $itok eq $iend_1 ) { + next unless $want_break_before{$type}; + } + else { + $leading_amp_count++; + next if $want_break_before{$type}; + } + } ## end if ':' + + # handle math operators + - * / + elsif ( $is_math_op{$type} ) { + + # Combine these lines if this line is a single + # number, or if it is a short term with same + # operator as the previous line. For example, in + # the following code we will combine all of the + # short terms $A, $B, $C, $D, $E, $F, together + # instead of leaving them one per line: + # my $time = + # $A * $B * $C * $D * $E * $F * + # ( 2. * $eps * $sigma * $area ) * + # ( 1. / $tcold**3 - 1. / $thot**3 ); + + # This can be important in math-intensive code. + + my $good_combo; + + my $itokp = min( $inext_to_go[$itok], $iend_2 ); + my $itokpp = min( $inext_to_go[$itokp], $iend_2 ); + my $itokm = max( $iprev_to_go[$itok], $ibeg_1 ); + my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 ); + + # check for a number on the right + if ( $types_to_go[$itokp] eq 'n' ) { + + # ok if nothing else on right + if ( $itokp == $iend_2 ) { + $good_combo = 1; + } + else { + + # look one more token to right.. + # okay if math operator or some termination + $good_combo = + ( ( $itokpp == $iend_2 ) + && $is_math_op{ $types_to_go[$itokpp] } ) + || $types_to_go[$itokpp] =~ /^[#,;]$/; + } + } + + # check for a number on the left + if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) { + + # okay if nothing else to left + if ( $itokm == $ibeg_1 ) { + $good_combo = 1; + } + + # otherwise look one more token to left + else { + + # okay if math operator, comma, or assignment + $good_combo = ( $itokmm == $ibeg_1 ) + && ( $is_math_op{ $types_to_go[$itokmm] } + || $types_to_go[$itokmm] =~ /^[,]$/ + || $is_assignment{ $types_to_go[$itokmm] } + ); + } + } + + # look for a single short token either side of the + # operator + if ( !$good_combo ) { + + # Slight adjustment factor to make results + # independent of break before or after operator in + # long summed lists. (An operator and a space make + # two spaces). + my $two = ( $itok eq $iend_1 ) ? 2 : 0; + + $good_combo = + + # numbers or id's on both sides of this joint + $types_to_go[$itokp] =~ /^[in]$/ + && $types_to_go[$itokm] =~ /^[in]$/ + + # one of the two lines must be short: + && ( + ( + # no more than 2 nonblank tokens right of + # joint + $itokpp == $iend_2 + + # short + && token_sequence_length( $itokp, $iend_2 ) + < $two + + $rOpts_short_concatenation_item_length + ) + || ( + # no more than 2 nonblank tokens left of + # joint + $itokmm == $ibeg_1 + + # short + && token_sequence_length( $ibeg_1, $itokm ) + < 2 - $two + + $rOpts_short_concatenation_item_length + ) + + ) + + # keep pure terms; don't mix +- with */ + && !( + $is_plus_minus{$type} + && ( $is_mult_div{ $types_to_go[$itokmm] } + || $is_mult_div{ $types_to_go[$itokpp] } ) + ) + && !( + $is_mult_div{$type} + && ( $is_plus_minus{ $types_to_go[$itokmm] } + || $is_plus_minus{ $types_to_go[$itokpp] } ) + ) + + ; + } + + # it is also good to combine if we can reduce to 2 lines + if ( !$good_combo ) { + + # index on other line where same token would be in a + # long chain. + my $iother = + ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1; + + $good_combo = + $n == 2 + && $n == $nmax + && $types_to_go[$iother] ne $type; + } + + next unless ($good_combo); + + } ## end math + + elsif ( $is_amp_amp{$type} ) { + ##TBD + } ## end &&, || + + elsif ( $is_assignment{$type} ) { + ##TBD + } ## end assignment + } + + #---------------------------------------------------------- + # Recombine Section 1: + # Join welded nested containers immediately + #---------------------------------------------------------- + if ( weld_len_right_to_go($iend_1) + || weld_len_left_to_go($ibeg_2) ) + { + $n_best = $n; + + # Old coding alternated sweep direction: no longer needed + # $reverse = 1 - $reverse; + last; + } + $reverse = 0; + + #---------------------------------------------------------- + # Recombine Section 2: + # Examine token at $iend_1 (right end of first line of pair) + #---------------------------------------------------------- + + # an isolated '}' may join with a ';' terminated segment + if ( $type_iend_1 eq '}' ) { + + # Check for cases where combining a semicolon terminated + # statement with a previous isolated closing paren will + # allow the combined line to be outdented. This is + # generally a good move. For example, we can join up + # the last two lines here: + # ( + # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, + # $size, $atime, $mtime, $ctime, $blksize, $blocks + # ) + # = stat($file); + # + # to get: + # ( + # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, + # $size, $atime, $mtime, $ctime, $blksize, $blocks + # ) = stat($file); + # + # which makes the parens line up. + # + # Another example, from Joe Matarazzo, probably looks best + # with the 'or' clause appended to the trailing paren: + # $self->some_method( + # PARAM1 => 'foo', + # PARAM2 => 'bar' + # ) or die "Some_method didn't work"; + # + # But we do not want to do this for something like the -lp + # option where the paren is not outdentable because the + # trailing clause will be far to the right. + # + # The logic here is synchronized with the logic in sub + # sub set_adjusted_indentation, which actually does + # the outdenting. + # + $skip_Section_3 ||= $this_line_is_semicolon_terminated + + # only one token on last line + && $ibeg_1 == $iend_1 + + # must be structural paren + && $tokens_to_go[$iend_1] eq ')' + + # style must allow outdenting, + && !$closing_token_indentation{')'} + + # only leading '&&', '||', and ':' if no others seen + # (but note: our count made below could be wrong + # due to intervening comments) + && ( $leading_amp_count == 0 + || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ ) + + # but leading colons probably line up with a + # previous colon or question (count could be wrong). + && $type_ibeg_2 ne ':' + + # only one step in depth allowed. this line must not + # begin with a ')' itself. + && ( $nesting_depth_to_go[$iend_1] == + $nesting_depth_to_go[$iend_2] + 1 ); + + # YVES patch 2 of 2: + # Allow cuddled eval chains, like this: + # eval { + # #STUFF; + # 1; # return true + # } or do { + # #handle error + # }; + # This patch works together with a patch in + # setting adjusted indentation (where the closing eval + # brace is outdented if possible). + # The problem is that an 'eval' block has continuation + # indentation and it looks better to undo it in some + # cases. If we do not use this patch we would get: + # eval { + # #STUFF; + # 1; # return true + # } + # or do { + # #handle error + # }; + # The alternative, for uncuddled style, is to create + # a patch in set_adjusted_indentation which undoes + # the indentation of a leading line like 'or do {'. + # This doesn't work well with -icb through + if ( + $block_type_to_go[$iend_1] eq 'eval' + && !$rOpts->{'line-up-parentheses'} + && !$rOpts->{'indent-closing-brace'} + && $tokens_to_go[$iend_2] eq '{' + && ( + ( $type_ibeg_2 =~ /^(|\&\&|\|\|)$/ ) + || ( $type_ibeg_2 eq 'k' + && $is_and_or{ $tokens_to_go[$ibeg_2] } ) + || $is_if_unless{ $tokens_to_go[$ibeg_2] } + ) + ) + { + $skip_Section_3 ||= 1; + } + + next + unless ( + $skip_Section_3 + + # handle '.' and '?' specially below + || ( $type_ibeg_2 =~ /^[\.\?]$/ ) + ); + } + + elsif ( $type_iend_1 eq '{' ) { + + # YVES + # honor breaks at opening brace + # Added to prevent recombining something like this: + # } || eval { package main; + next if $forced_breakpoint_to_go[$iend_1]; + } + + # do not recombine lines with ending &&, ||, + elsif ( $is_amp_amp{$type_iend_1} ) { + next unless $want_break_before{$type_iend_1}; + } + + # Identify and recombine a broken ?/: chain + elsif ( $type_iend_1 eq '?' ) { + + # Do not recombine different levels + next + if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] ); + + # do not recombine unless next line ends in : + next unless $type_iend_2 eq ':'; + } + + # for lines ending in a comma... + elsif ( $type_iend_1 eq ',' ) { + + # Do not recombine at comma which is following the + # input bias. + # TODO: might be best to make a special flag + next if ( $old_breakpoint_to_go[$iend_1] ); + + # an isolated '},' may join with an identifier + ';' + # this is useful for the class of a 'bless' statement (bless.t) + if ( $type_ibeg_1 eq '}' + && $type_ibeg_2 eq 'i' ) + { + next + unless ( ( $ibeg_1 == ( $iend_1 - 1 ) ) + && ( $iend_2 == ( $ibeg_2 + 1 ) ) + && $this_line_is_semicolon_terminated ); + + # override breakpoint + $forced_breakpoint_to_go[$iend_1] = 0; + } + + # but otherwise .. + else { + + # do not recombine after a comma unless this will leave + # just 1 more line + next unless ( $n + 1 >= $nmax ); + + # do not recombine if there is a change in indentation depth + next + if ( + $levels_to_go[$iend_1] != $levels_to_go[$iend_2] ); + + # do not recombine a "complex expression" after a + # comma. "complex" means no parens. + my $saw_paren; + foreach my $ii ( $ibeg_2 .. $iend_2 ) { + if ( $tokens_to_go[$ii] eq '(' ) { + $saw_paren = 1; + last; + } + } + next if $saw_paren; + } + } + + # opening paren.. + elsif ( $type_iend_1 eq '(' ) { + + # No longer doing this + } + + elsif ( $type_iend_1 eq ')' ) { + + # No longer doing this + } + + # keep a terminal for-semicolon + elsif ( $type_iend_1 eq 'f' ) { + next; + } + + # if '=' at end of line ... + elsif ( $is_assignment{$type_iend_1} ) { + + # keep break after = if it was in input stream + # this helps prevent 'blinkers' + next if $old_breakpoint_to_go[$iend_1] + + # don't strand an isolated '=' + && $iend_1 != $ibeg_1; + + my $is_short_quote = + ( $type_ibeg_2 eq 'Q' + && $ibeg_2 == $iend_2 + && token_sequence_length( $ibeg_2, $ibeg_2 ) < + $rOpts_short_concatenation_item_length ); + my $is_ternary = + ( $type_ibeg_1 eq '?' + && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) ); + + # always join an isolated '=', a short quote, or if this + # will put ?/: at start of adjacent lines + if ( $ibeg_1 != $iend_1 + && !$is_short_quote + && !$is_ternary ) + { + next + unless ( + ( + + # unless we can reduce this to two lines + $nmax < $n + 2 + + # or three lines, the last with a leading semicolon + || ( $nmax == $n + 2 + && $types_to_go[$ibeg_nmax] eq ';' ) + + # or the next line ends with a here doc + || $type_iend_2 eq 'h' + + # or the next line ends in an open paren or brace + # and the break hasn't been forced [dima.t] + || ( !$forced_breakpoint_to_go[$iend_1] + && $type_iend_2 eq '{' ) + ) + + # do not recombine if the two lines might align well + # this is a very approximate test for this + && ( + + # RT#127633 - the leading tokens are not operators + ( $type_ibeg_2 ne $tokens_to_go[$ibeg_2] ) + + # or they are different + || ( $ibeg_3 >= 0 + && $type_ibeg_2 ne $types_to_go[$ibeg_3] ) + ) + ); + + if ( + + # Recombine if we can make two lines + $nmax >= $n + 2 + + # -lp users often prefer this: + # my $title = function($env, $env, $sysarea, + # "bubba Borrower Entry"); + # so we will recombine if -lp is used we have + # ending comma + && ( !$rOpts_line_up_parentheses + || $type_iend_2 ne ',' ) + ) + { + + # otherwise, scan the rhs line up to last token for + # complexity. Note that we are not counting the last + # token in case it is an opening paren. + my $tv = 0; + my $depth = $nesting_depth_to_go[$ibeg_2]; + foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) { + if ( $nesting_depth_to_go[$i] != $depth ) { + $tv++; + last if ( $tv > 1 ); + } + $depth = $nesting_depth_to_go[$i]; + } + + # ok to recombine if no level changes before last token + if ( $tv > 0 ) { + + # otherwise, do not recombine if more than two + # level changes. + next if ( $tv > 1 ); + + # check total complexity of the two adjacent lines + # that will occur if we do this join + my $istop = + ( $n < $nmax ) + ? $ri_end->[ $n + 1 ] + : $iend_2; + foreach my $i ( $iend_2 .. $istop ) { + if ( $nesting_depth_to_go[$i] != $depth ) { + $tv++; + last if ( $tv > 2 ); + } + $depth = $nesting_depth_to_go[$i]; + } + + # do not recombine if total is more than 2 level changes + next if ( $tv > 2 ); + } + } + } + + unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) { + $forced_breakpoint_to_go[$iend_1] = 0; + } + } + + # for keywords.. + elsif ( $type_iend_1 eq 'k' ) { + + # make major control keywords stand out + # (recombine.t) + next + if ( + + #/^(last|next|redo|return)$/ + $is_last_next_redo_return{ $tokens_to_go[$iend_1] } + + # but only if followed by multiple lines + && $n < $nmax + ); + + if ( $is_and_or{ $tokens_to_go[$iend_1] } ) { + next + unless $want_break_before{ $tokens_to_go[$iend_1] }; + } + } + + #---------------------------------------------------------- + # Recombine Section 3: + # Examine token at $ibeg_2 (left end of second line of pair) + #---------------------------------------------------------- + + # join lines identified above as capable of + # causing an outdented line with leading closing paren + # Note that we are skipping the rest of this section + # and the rest of the loop to do the join + if ($skip_Section_3) { + $forced_breakpoint_to_go[$iend_1] = 0; + $n_best = $n; + last; + } + + # handle lines with leading &&, || + elsif ( $is_amp_amp{$type_ibeg_2} ) { + + $leading_amp_count++; + + # ok to recombine if it follows a ? or : + # and is followed by an open paren.. + my $ok = + ( $is_ternary{$type_ibeg_1} + && $tokens_to_go[$iend_2] eq '(' ) + + # or is followed by a ? or : at same depth + # + # We are looking for something like this. We can + # recombine the && line with the line above to make the + # structure more clear: + # return + # exists $G->{Attr}->{V} + # && exists $G->{Attr}->{V}->{$u} + # ? %{ $G->{Attr}->{V}->{$u} } + # : (); + # + # We should probably leave something like this alone: + # return + # exists $G->{Attr}->{E} + # && exists $G->{Attr}->{E}->{$u} + # && exists $G->{Attr}->{E}->{$u}->{$v} + # ? %{ $G->{Attr}->{E}->{$u}->{$v} } + # : (); + # so that we either have all of the &&'s (or ||'s) + # on one line, as in the first example, or break at + # each one as in the second example. However, it + # sometimes makes things worse to check for this because + # it prevents multiple recombinations. So this is not done. + || ( $ibeg_3 >= 0 + && $is_ternary{ $types_to_go[$ibeg_3] } + && $nesting_depth_to_go[$ibeg_3] == + $nesting_depth_to_go[$ibeg_2] ); + + next if !$ok && $want_break_before{$type_ibeg_2}; + $forced_breakpoint_to_go[$iend_1] = 0; + + # tweak the bond strength to give this joint priority + # over ? and : + $bs_tweak = 0.25; + } + + # Identify and recombine a broken ?/: chain + elsif ( $type_ibeg_2 eq '?' ) { + + # Do not recombine different levels + my $lev = $levels_to_go[$ibeg_2]; + next if ( $lev ne $levels_to_go[$ibeg_1] ); + + # Do not recombine a '?' if either next line or + # previous line does not start with a ':'. The reasons + # are that (1) no alignment of the ? will be possible + # and (2) the expression is somewhat complex, so the + # '?' is harder to see in the interior of the line. + my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':'; + my $precedes_colon = + $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':'; + next unless ( $follows_colon || $precedes_colon ); + + # we will always combining a ? line following a : line + if ( !$follows_colon ) { + + # ...otherwise recombine only if it looks like a chain. + # we will just look at a few nearby lines to see if + # this looks like a chain. + my $local_count = 0; + foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) { + $local_count++ + if $ii >= 0 + && $types_to_go[$ii] eq ':' + && $levels_to_go[$ii] == $lev; + } + next unless ( $local_count > 1 ); + } + $forced_breakpoint_to_go[$iend_1] = 0; + } + + # do not recombine lines with leading '.' + elsif ( $type_ibeg_2 eq '.' ) { + my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 ); + next + unless ( + + # ... unless there is just one and we can reduce + # this to two lines if we do. For example, this + # + # + # $bodyA .= + # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;' + # + # looks better than this: + # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;' + # . '$args .= $pat;' + + ( + $n == 2 + && $n == $nmax + && $type_ibeg_1 ne $type_ibeg_2 + ) + + # ... or this would strand a short quote , like this + # . "some long quote" + # . "\n"; + + || ( $types_to_go[$i_next_nonblank] eq 'Q' + && $i_next_nonblank >= $iend_2 - 1 + && $token_lengths_to_go[$i_next_nonblank] < + $rOpts_short_concatenation_item_length ) + ); + } + + # handle leading keyword.. + elsif ( $type_ibeg_2 eq 'k' ) { + + # handle leading "or" + if ( $tokens_to_go[$ibeg_2] eq 'or' ) { + next + unless ( + $this_line_is_semicolon_terminated + && ( + + # following 'if' or 'unless' or 'or' + $type_ibeg_1 eq 'k' + && $is_if_unless{ $tokens_to_go[$ibeg_1] } + + # important: only combine a very simple or + # statement because the step below may have + # combined a trailing 'and' with this or, + # and we do not want to then combine + # everything together + && ( $iend_2 - $ibeg_2 <= 7 ) + ) + ); + + #X: RT #81854 + $forced_breakpoint_to_go[$iend_1] = 0 + unless $old_breakpoint_to_go[$iend_1]; + } + + # handle leading 'and' + elsif ( $tokens_to_go[$ibeg_2] eq 'and' ) { + + # Decide if we will combine a single terminal 'and' + # after an 'if' or 'unless'. + + # This looks best with the 'and' on the same + # line as the 'if': + # + # $a = 1 + # if $seconds and $nu < 2; + # + # But this looks better as shown: + # + # $a = 1 + # if !$this->{Parents}{$_} + # or $this->{Parents}{$_} eq $_; + # + next + unless ( + $this_line_is_semicolon_terminated + && ( + + # following 'if' or 'unless' or 'or' + $type_ibeg_1 eq 'k' + && ( $is_if_unless{ $tokens_to_go[$ibeg_1] } + || $tokens_to_go[$ibeg_1] eq 'or' ) + ) + ); + } + + # handle leading "if" and "unless" + elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) { + + # FIXME: This is still experimental..may not be too useful + next + unless ( + $this_line_is_semicolon_terminated + + # previous line begins with 'and' or 'or' + && $type_ibeg_1 eq 'k' + && $is_and_or{ $tokens_to_go[$ibeg_1] } + + ); + } + + # handle all other leading keywords + else { + + # keywords look best at start of lines, + # but combine things like "1 while" + unless ( $is_assignment{$type_iend_1} ) { + next + if ( ( $type_iend_1 ne 'k' ) + && ( $tokens_to_go[$ibeg_2] ne 'while' ) ); + } + } + } + + # similar treatment of && and || as above for 'and' and 'or': + # NOTE: This block of code is currently bypassed because + # of a previous block but is retained for possible future use. + elsif ( $is_amp_amp{$type_ibeg_2} ) { + + # maybe looking at something like: + # unless $TEXTONLY || $item =~ m%|p>|a|img)%i; + + next + unless ( + $this_line_is_semicolon_terminated + + # previous line begins with an 'if' or 'unless' keyword + && $type_ibeg_1 eq 'k' + && $is_if_unless{ $tokens_to_go[$ibeg_1] } + + ); + } + + # handle line with leading = or similar + elsif ( $is_assignment{$type_ibeg_2} ) { + next unless ( $n == 1 || $n == $nmax ); + next if $old_breakpoint_to_go[$iend_1]; + next + unless ( + + # unless we can reduce this to two lines + $nmax == 2 + + # or three lines, the last with a leading semicolon + || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' ) + + # or the next line ends with a here doc + || $type_iend_2 eq 'h' + + # or this is a short line ending in ; + || ( $n == $nmax && $this_line_is_semicolon_terminated ) + ); + $forced_breakpoint_to_go[$iend_1] = 0; + } + + #---------------------------------------------------------- + # Recombine Section 4: + # Combine the lines if we arrive here and it is possible + #---------------------------------------------------------- + + # honor hard breakpoints + next if ( $forced_breakpoint_to_go[$iend_1] > 0 ); + + my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak; + + # Require a few extra spaces before recombining lines if we are + # at an old breakpoint unless this is a simple list or terminal + # line. The goal is to avoid oscillating between two + # quasi-stable end states. For example this snippet caused + # problems: +## my $this = +## bless { +## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]" +## }, +## $type; + next + if ( $old_breakpoint_to_go[$iend_1] + && !$this_line_is_semicolon_terminated + && $n < $nmax + && $excess + 4 > 0 + && $type_iend_2 ne ',' ); + + # do not recombine if we would skip in indentation levels + if ( $n < $nmax ) { + my $if_next = $ri_beg->[ $n + 1 ]; + next + if ( + $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2] + && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next] + + # but an isolated 'if (' is undesirable + && !( + $n == 1 + && $iend_1 - $ibeg_1 <= 2 + && $type_ibeg_1 eq 'k' + && $tokens_to_go[$ibeg_1] eq 'if' + && $tokens_to_go[$iend_1] ne '(' + ) + ); + } + + # honor no-break's + next if ( $bs >= NO_BREAK - 1 ); + + # remember the pair with the greatest bond strength + if ( !$n_best ) { + $n_best = $n; + $bs_best = $bs; + } + else { + + if ( $bs > $bs_best ) { + $n_best = $n; + $bs_best = $bs; + } + } + } + + # recombine the pair with the greatest bond strength + if ($n_best) { + splice @{$ri_beg}, $n_best, 1; + splice @{$ri_end}, $n_best - 1, 1; + splice @joint, $n_best, 1; + + # keep going if we are still making progress + $more_to_do++; + } + } + return ( $ri_beg, $ri_end ); + } +} # end recombine_breakpoints + +sub break_all_chain_tokens { + + # scan the current breakpoints looking for breaks at certain "chain + # operators" (. : && || + etc) which often occur repeatedly in a long + # statement. If we see a break at any one, break at all similar tokens + # within the same container. + # + my ( $ri_left, $ri_right ) = @_; + + my %saw_chain_type; + my %left_chain_type; + my %right_chain_type; + my %interior_chain_type; + my $nmax = @{$ri_right} - 1; + + # scan the left and right end tokens of all lines + my $count = 0; + for my $n ( 0 .. $nmax ) { + my $il = $ri_left->[$n]; + my $ir = $ri_right->[$n]; + my $typel = $types_to_go[$il]; + my $typer = $types_to_go[$ir]; + $typel = '+' if ( $typel eq '-' ); # treat + and - the same + $typer = '+' if ( $typer eq '-' ); + $typel = '*' if ( $typel eq '/' ); # treat * and / the same + $typer = '*' if ( $typer eq '/' ); + my $tokenl = $tokens_to_go[$il]; + my $tokenr = $tokens_to_go[$ir]; + + if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) { + next if ( $typel eq '?' ); + push @{ $left_chain_type{$typel} }, $il; + $saw_chain_type{$typel} = 1; + $count++; + } + if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) { + next if ( $typer eq '?' ); + push @{ $right_chain_type{$typer} }, $ir; + $saw_chain_type{$typer} = 1; + $count++; + } + } + return unless $count; + + # now look for any interior tokens of the same types + $count = 0; + for my $n ( 0 .. $nmax ) { + my $il = $ri_left->[$n]; + my $ir = $ri_right->[$n]; + foreach my $i ( $il + 1 .. $ir - 1 ) { + my $type = $types_to_go[$i]; + $type = '+' if ( $type eq '-' ); + $type = '*' if ( $type eq '/' ); + if ( $saw_chain_type{$type} ) { + push @{ $interior_chain_type{$type} }, $i; + $count++; + } + } + } + return unless $count; + + # now make a list of all new break points + my @insert_list; + + # loop over all chain types + foreach my $type ( keys %saw_chain_type ) { + + # quit if just ONE continuation line with leading . For example-- + # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{' + # . $contents; + last if ( $nmax == 1 && $type =~ /^[\.\+]$/ ); + + # loop over all interior chain tokens + foreach my $itest ( @{ $interior_chain_type{$type} } ) { + + # loop over all left end tokens of same type + if ( $left_chain_type{$type} ) { + next if $nobreak_to_go[ $itest - 1 ]; + foreach my $i ( @{ $left_chain_type{$type} } ) { + next unless in_same_container( $i, $itest ); + push @insert_list, $itest - 1; + + # Break at matching ? if this : is at a different level. + # For example, the ? before $THRf_DEAD in the following + # should get a break if its : gets a break. + # + # my $flags = + # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE + # : ( $_ & 4 ) ? $THRf_R_DETACHED + # : $THRf_R_JOINABLE; + if ( $type eq ':' + && $levels_to_go[$i] != $levels_to_go[$itest] ) + { + my $i_question = $mate_index_to_go[$itest]; + if ( $i_question > 0 ) { + push @insert_list, $i_question - 1; + } + } + last; + } + } + + # loop over all right end tokens of same type + if ( $right_chain_type{$type} ) { + next if $nobreak_to_go[$itest]; + foreach my $i ( @{ $right_chain_type{$type} } ) { + next unless in_same_container( $i, $itest ); + push @insert_list, $itest; + + # break at matching ? if this : is at a different level + if ( $type eq ':' + && $levels_to_go[$i] != $levels_to_go[$itest] ) + { + my $i_question = $mate_index_to_go[$itest]; + if ( $i_question >= 0 ) { + push @insert_list, $i_question; + } + } + last; + } + } + } + } + + # insert any new break points + if (@insert_list) { + insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); + } + return; +} + +sub break_equals { + + # Look for assignment operators that could use a breakpoint. + # For example, in the following snippet + # + # $HOME = $ENV{HOME} + # || $ENV{LOGDIR} + # || $pw[7] + # || die "no home directory for user $<"; + # + # we could break at the = to get this, which is a little nicer: + # $HOME = + # $ENV{HOME} + # || $ENV{LOGDIR} + # || $pw[7] + # || die "no home directory for user $<"; + # + # The logic here follows the logic in set_logical_padding, which + # will add the padding in the second line to improve alignment. + # + my ( $ri_left, $ri_right ) = @_; + my $nmax = @{$ri_right} - 1; + return unless ( $nmax >= 2 ); + + # scan the left ends of first two lines + my $tokbeg = ""; + my $depth_beg; + for my $n ( 1 .. 2 ) { + my $il = $ri_left->[$n]; + my $typel = $types_to_go[$il]; + my $tokenl = $tokens_to_go[$il]; + + my $has_leading_op = ( $tokenl =~ /^\w/ ) + ? $is_chain_operator{$tokenl} # + - * / : ? && || + : $is_chain_operator{$typel}; # and, or + return unless ($has_leading_op); + if ( $n > 1 ) { + return + unless ( $tokenl eq $tokbeg + && $nesting_depth_to_go[$il] eq $depth_beg ); + } + $tokbeg = $tokenl; + $depth_beg = $nesting_depth_to_go[$il]; + } + + # now look for any interior tokens of the same types + my $il = $ri_left->[0]; + my $ir = $ri_right->[0]; + + # now make a list of all new break points + my @insert_list; + for ( my $i = $ir - 1 ; $i > $il ; $i-- ) { + my $type = $types_to_go[$i]; + if ( $is_assignment{$type} + && $nesting_depth_to_go[$i] eq $depth_beg ) + { + if ( $want_break_before{$type} ) { + push @insert_list, $i - 1; + } + else { + push @insert_list, $i; + } + } + } + + # Break after a 'return' followed by a chain of operators + # return ( $^O !~ /win32|dos/i ) + # && ( $^O ne 'VMS' ) + # && ( $^O ne 'OS2' ) + # && ( $^O ne 'MacOS' ); + # To give: + # return + # ( $^O !~ /win32|dos/i ) + # && ( $^O ne 'VMS' ) + # && ( $^O ne 'OS2' ) + # && ( $^O ne 'MacOS' ); + my $i = 0; + if ( $types_to_go[$i] eq 'k' + && $tokens_to_go[$i] eq 'return' + && $ir > $il + && $nesting_depth_to_go[$i] eq $depth_beg ) + { + push @insert_list, $i; + } + + return unless (@insert_list); + + # One final check... + # scan second and third lines and be sure there are no assignments + # we want to avoid breaking at an = to make something like this: + # unless ( $icon = + # $html_icons{"$type-$state"} + # or $icon = $html_icons{$type} + # or $icon = $html_icons{$state} ) + for my $n ( 1 .. 2 ) { + my $il = $ri_left->[$n]; + my $ir = $ri_right->[$n]; + foreach my $i ( $il + 1 .. $ir ) { + my $type = $types_to_go[$i]; + return + if ( $is_assignment{$type} + && $nesting_depth_to_go[$i] eq $depth_beg ); + } + } + + # ok, insert any new break point + if (@insert_list) { + insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); + } + return; +} + +sub insert_final_breaks { + + my ( $ri_left, $ri_right ) = @_; + + my $nmax = @{$ri_right} - 1; + + # scan the left and right end tokens of all lines + my $count = 0; + my $i_first_colon = -1; + for my $n ( 0 .. $nmax ) { + my $il = $ri_left->[$n]; + my $ir = $ri_right->[$n]; + my $typel = $types_to_go[$il]; + my $typer = $types_to_go[$ir]; + return if ( $typel eq '?' ); + return if ( $typer eq '?' ); + if ( $typel eq ':' ) { $i_first_colon = $il; last; } + elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; } + } + + # For long ternary chains, + # if the first : we see has its # ? is in the interior + # of a preceding line, then see if there are any good + # breakpoints before the ?. + if ( $i_first_colon > 0 ) { + my $i_question = $mate_index_to_go[$i_first_colon]; + if ( $i_question > 0 ) { + my @insert_list; + for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) { + my $token = $tokens_to_go[$ii]; + my $type = $types_to_go[$ii]; + + # For now, a good break is either a comma or, + # in a long chain, a 'return'. + # Patch for RT #126633: added the $nmax>1 check to avoid + # breaking after a return for a simple ternary. For longer + # chains the break after return allows vertical alignment, so + # it is still done. So perltidy -wba='?' will not break + # immediately after the return in the following statement: + # sub x { + # return 0 ? 'aaaaaaaaaaaaaaaaaaaaa' : + # 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'; + # } + if ( + ( + $type eq ',' + || $type eq 'k' && ( $nmax > 1 && $token eq 'return' ) + ) + && in_same_container( $ii, $i_question ) + ) + { + push @insert_list, $ii; + last; + } + +## # For now, a good break is either a comma or a 'return'. +## if ( ( $type eq ',' || $type eq 'k' && $token eq 'return' ) +## && in_same_container( $ii, $i_question ) ) +## { +## push @insert_list, $ii; +## last; +## } + } + + # insert any new break points + if (@insert_list) { + insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); + } + } + } + return; +} + +sub in_same_container { + + # check to see if tokens at i1 and i2 are in the + # same container, and not separated by a comma, ? or : + # FIXME: this can be written more efficiently now + my ( $i1, $i2 ) = @_; + my $type = $types_to_go[$i1]; + my $depth = $nesting_depth_to_go[$i1]; + return unless ( $nesting_depth_to_go[$i2] == $depth ); + if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) } + + ########################################################### + # This is potentially a very slow routine and not critical. + # For safety just give up for large differences. + # See test file 'infinite_loop.txt' + # TODO: replace this loop with a data structure + ########################################################### + return if ( $i2 - $i1 > 200 ); + + foreach my $i ( $i1 + 1 .. $i2 - 1 ) { + next if ( $nesting_depth_to_go[$i] > $depth ); + return if ( $nesting_depth_to_go[$i] < $depth ); + + my $tok = $tokens_to_go[$i]; + $tok = ',' if $tok eq '=>'; # treat => same as , + + # Example: we would not want to break at any of these .'s + # : "$str" + if ( $type ne ':' ) { + return if ( $tok =~ /^[\,\:\?]$/ ) || $tok eq '||' || $tok eq 'or'; + } + else { + return if ( $tok =~ /^[\,]$/ ); + } + } + return 1; +} + +sub set_continuation_breaks { + + # Define an array of indexes for inserting newline characters to + # keep the line lengths below the maximum desired length. There is + # an implied break after the last token, so it need not be included. + + # Method: + # This routine is part of series of routines which adjust line + # lengths. It is only called if a statement is longer than the + # maximum line length, or if a preliminary scanning located + # desirable break points. Sub scan_list has already looked at + # these tokens and set breakpoints (in array + # $forced_breakpoint_to_go[$i]) where it wants breaks (for example + # after commas, after opening parens, and before closing parens). + # This routine will honor these breakpoints and also add additional + # breakpoints as necessary to keep the line length below the maximum + # requested. It bases its decision on where the 'bond strength' is + # lowest. + + # Output: returns references to the arrays: + # @i_first + # @i_last + # which contain the indexes $i of the first and last tokens on each + # line. + + # In addition, the array: + # $forced_breakpoint_to_go[$i] + # may be updated to be =1 for any index $i after which there must be + # a break. This signals later routines not to undo the breakpoint. + + my $saw_good_break = shift; + my @i_first = (); # the first index to output + my @i_last = (); # the last index to output + my @i_colon_breaks = (); # needed to decide if we have to break at ?'s + if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 } + + set_bond_strengths(); + + my $imin = 0; + my $imax = $max_index_to_go; + if ( $types_to_go[$imin] eq 'b' ) { $imin++ } + if ( $types_to_go[$imax] eq 'b' ) { $imax-- } + my $i_begin = $imin; # index for starting next iteration + + my $leading_spaces = leading_spaces_to_go($imin); + my $line_count = 0; + my $last_break_strength = NO_BREAK; + my $i_last_break = -1; + my $max_bias = 0.001; + my $tiny_bias = 0.0001; + my $leading_alignment_token = ""; + my $leading_alignment_type = ""; + + # see if any ?/:'s are in order + my $colons_in_order = 1; + my $last_tok = ""; + my @colon_list = grep { /^[\?\:]$/ } @types_to_go[ 0 .. $max_index_to_go ]; + my $colon_count = @colon_list; + foreach (@colon_list) { + if ( $_ eq $last_tok ) { $colons_in_order = 0; last } + $last_tok = $_; + } + + # This is a sufficient but not necessary condition for colon chain + my $is_colon_chain = ( $colons_in_order && @colon_list > 2 ); + + #------------------------------------------------------- + # BEGINNING of main loop to set continuation breakpoints + # Keep iterating until we reach the end + #------------------------------------------------------- + while ( $i_begin <= $imax ) { + my $lowest_strength = NO_BREAK; + my $starting_sum = $summed_lengths_to_go[$i_begin]; + my $i_lowest = -1; + my $i_test = -1; + my $lowest_next_token = ''; + my $lowest_next_type = 'b'; + my $i_lowest_next_nonblank = -1; + + #------------------------------------------------------- + # BEGINNING of inner loop to find the best next breakpoint + #------------------------------------------------------- + for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) { + my $type = $types_to_go[$i_test]; + my $token = $tokens_to_go[$i_test]; + my $next_type = $types_to_go[ $i_test + 1 ]; + my $next_token = $tokens_to_go[ $i_test + 1 ]; + my $i_next_nonblank = $inext_to_go[$i_test]; + my $next_nonblank_type = $types_to_go[$i_next_nonblank]; + my $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; + my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank]; + my $strength = $bond_strength_to_go[$i_test]; + my $maximum_line_length = maximum_line_length($i_begin); + + # use old breaks as a tie-breaker. For example to + # prevent blinkers with -pbp in this code: + +##@keywords{ +## qw/ARG OUTPUT PROTO CONSTRUCTOR RETURNS DESC PARAMS SEEALSO EXAMPLE/} +## = (); + + # At the same time try to prevent a leading * in this code + # with the default formatting: + # +## return +## factorial( $a + $b - 1 ) / factorial( $a - 1 ) / factorial( $b - 1 ) +## * ( $x**( $a - 1 ) ) +## * ( ( 1 - $x )**( $b - 1 ) ); + + # reduce strength a bit to break ties at an old breakpoint ... + if ( + $old_breakpoint_to_go[$i_test] + + # which is a 'good' breakpoint, meaning ... + # we don't want to break before it + && !$want_break_before{$type} + + # and either we want to break before the next token + # or the next token is not short (i.e. not a '*', '/' etc.) + && $i_next_nonblank <= $imax + && ( $want_break_before{$next_nonblank_type} + || $token_lengths_to_go[$i_next_nonblank] > 2 + || $next_nonblank_type =~ /^[\,\(\[\{L]$/ ) + ) + { + $strength -= $tiny_bias; + } + + # otherwise increase strength a bit if this token would be at the + # maximum line length. This is necessary to avoid blinking + # in the above example when the -iob flag is added. + else { + my $len = + $leading_spaces + + $summed_lengths_to_go[ $i_test + 1 ] - + $starting_sum; + if ( $len >= $maximum_line_length ) { + $strength += $tiny_bias; + } + } + + my $must_break = 0; + + # Force an immediate break at certain operators + # with lower level than the start of the line, + # unless we've already seen a better break. + # + ############################################## + # Note on an issue with a preceding ? + ############################################## + # We don't include a ? in the above list, but there may + # be a break at a previous ? if the line is long. + # Because of this we do not want to force a break if + # there is a previous ? on this line. For now the best way + # to do this is to not break if we have seen a lower strength + # point, which is probably a ?. + # + # Example of unwanted breaks we are avoiding at a '.' following a ? + # from pod2html using perltidy -gnu: + # ) + # ? "\n<A NAME=\"" + # . $value + # . "\">\n$text</A>\n" + # : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n"; + if ( + ( + $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/ + || ( $next_nonblank_type eq 'k' + && $next_nonblank_token =~ /^(and|or)$/ ) + ) + && ( $nesting_depth_to_go[$i_begin] > + $nesting_depth_to_go[$i_next_nonblank] ) + && ( $strength <= $lowest_strength ) + ) + { + set_forced_breakpoint($i_next_nonblank); + } + + if ( + + # Try to put a break where requested by scan_list + $forced_breakpoint_to_go[$i_test] + + # break between ) { in a continued line so that the '{' can + # be outdented + # See similar logic in scan_list which catches instances + # where a line is just something like ') {'. We have to + # be careful because the corresponding block keyword might + # not be on the first line, such as 'for' here: + # + # eval { + # for ("a") { + # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ } + # } + # }; + # + || ( + $line_count + && ( $token eq ')' ) + && ( $next_nonblank_type eq '{' ) + && ($next_nonblank_block_type) + && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] ) + + # RT #104427: Dont break before opening sub brace because + # sub block breaks handled at higher level, unless + # it looks like the preceeding list is long and broken + && !( + $next_nonblank_block_type =~ /^sub\b/ + && ( $nesting_depth_to_go[$i_begin] == + $nesting_depth_to_go[$i_next_nonblank] ) + ) + + && !$rOpts->{'opening-brace-always-on-right'} + ) + + # There is an implied forced break at a terminal opening brace + || ( ( $type eq '{' ) && ( $i_test == $imax ) ) + ) + { + + # Forced breakpoints must sometimes be overridden, for example + # because of a side comment causing a NO_BREAK. It is easier + # to catch this here than when they are set. + if ( $strength < NO_BREAK - 1 ) { + $strength = $lowest_strength - $tiny_bias; + $must_break = 1; + } + } + + # quit if a break here would put a good terminal token on + # the next line and we already have a possible break + if ( + !$must_break + && ( $next_nonblank_type =~ /^[\;\,]$/ ) + && ( + ( + $leading_spaces + + $summed_lengths_to_go[ $i_next_nonblank + 1 ] - + $starting_sum + ) > $maximum_line_length + ) + ) + { + last if ( $i_lowest >= 0 ); + } + + # Avoid a break which would strand a single punctuation + # token. For example, we do not want to strand a leading + # '.' which is followed by a long quoted string. + # But note that we do want to do this with -extrude (l=1) + # so please test any changes to this code on -extrude. + if ( + !$must_break + && ( $i_test == $i_begin ) + && ( $i_test < $imax ) + && ( $token eq $type ) + && ( + ( + $leading_spaces + + $summed_lengths_to_go[ $i_test + 1 ] - + $starting_sum + ) < $maximum_line_length + ) + ) + { + $i_test = min( $imax, $inext_to_go[$i_test] ); + redo; + } + + if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) ) + { + + # break at previous best break if it would have produced + # a leading alignment of certain common tokens, and it + # is different from the latest candidate break + last + if ($leading_alignment_type); + + # Force at least one breakpoint if old code had good + # break It is only called if a breakpoint is required or + # desired. This will probably need some adjustments + # over time. A goal is to try to be sure that, if a new + # side comment is introduced into formatted text, then + # the same breakpoints will occur. scbreak.t + last + if ( + $i_test == $imax # we are at the end + && !$forced_breakpoint_count # + && $saw_good_break # old line had good break + && $type =~ /^[#;\{]$/ # and this line ends in + # ';' or side comment + && $i_last_break < 0 # and we haven't made a break + && $i_lowest >= 0 # and we saw a possible break + && $i_lowest < $imax - 1 # (but not just before this ;) + && $strength - $lowest_strength < 0.5 * WEAK # and it's good + ); + + # Do not skip past an important break point in a short final + # segment. For example, without this check we would miss the + # break at the final / in the following code: + # + # $depth_stop = + # ( $tau * $mass_pellet * $q_0 * + # ( 1. - exp( -$t_stop / $tau ) ) - + # 4. * $pi * $factor * $k_ice * + # ( $t_melt - $t_ice ) * + # $r_pellet * + # $t_stop ) / + # ( $rho_ice * $Qs * $pi * $r_pellet**2 ); + # + if ( $line_count > 2 + && $i_lowest < $i_test + && $i_test > $imax - 2 + && $nesting_depth_to_go[$i_begin] > + $nesting_depth_to_go[$i_lowest] + && $lowest_strength < $last_break_strength - .5 * WEAK ) + { + # Make this break for math operators for now + my $ir = $inext_to_go[$i_lowest]; + my $il = $iprev_to_go[$ir]; + last + if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/ + || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ ); + } + + # Update the minimum bond strength location + $lowest_strength = $strength; + $i_lowest = $i_test; + $lowest_next_token = $next_nonblank_token; + $lowest_next_type = $next_nonblank_type; + $i_lowest_next_nonblank = $i_next_nonblank; + last if $must_break; + + # set flags to remember if a break here will produce a + # leading alignment of certain common tokens + if ( $line_count > 0 + && $i_test < $imax + && ( $lowest_strength - $last_break_strength <= $max_bias ) + ) + { + my $i_last_end = $iprev_to_go[$i_begin]; + my $tok_beg = $tokens_to_go[$i_begin]; + my $type_beg = $types_to_go[$i_begin]; + if ( + + # check for leading alignment of certain tokens + ( + $tok_beg eq $next_nonblank_token + && $is_chain_operator{$tok_beg} + && ( $type_beg eq 'k' + || $type_beg eq $tok_beg ) + && $nesting_depth_to_go[$i_begin] >= + $nesting_depth_to_go[$i_next_nonblank] + ) + + || ( $tokens_to_go[$i_last_end] eq $token + && $is_chain_operator{$token} + && ( $type eq 'k' || $type eq $token ) + && $nesting_depth_to_go[$i_last_end] >= + $nesting_depth_to_go[$i_test] ) + ) + { + $leading_alignment_token = $next_nonblank_token; + $leading_alignment_type = $next_nonblank_type; + } + } + } + + my $too_long = ( $i_test >= $imax ); + if ( !$too_long ) { + my $next_length = + $leading_spaces + + $summed_lengths_to_go[ $i_test + 2 ] - + $starting_sum; + $too_long = $next_length > $maximum_line_length; + + # To prevent blinkers we will avoid leaving a token exactly at + # the line length limit unless it is the last token or one of + # several "good" types. + # + # The following code was a blinker with -pbp before this + # modification: +## $last_nonblank_token eq '(' +## && $is_indirect_object_taker{ $paren_type +## [$paren_depth] } + # The issue causing the problem is that if the + # term [$paren_depth] gets broken across a line then + # the whitespace routine doesn't see both opening and closing + # brackets and will format like '[ $paren_depth ]'. This + # leads to an oscillation in length depending if we break + # before the closing bracket or not. + if ( !$too_long + && $i_test + 1 < $imax + && $next_nonblank_type !~ /^[,\}\]\)R]$/ ) + { + $too_long = $next_length >= $maximum_line_length; + } + } + + FORMATTER_DEBUG_FLAG_BREAK + && do { + my $ltok = $token; + my $rtok = $next_nonblank_token ? $next_nonblank_token : ""; + my $i_testp2 = $i_test + 2; + if ( $i_testp2 > $max_index_to_go + 1 ) { + $i_testp2 = $max_index_to_go + 1; + } + if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) } + if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) } + print STDOUT +"BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength $ltok $rtok\n"; + }; + + # allow one extra terminal token after exceeding line length + # if it would strand this token. + if ( $rOpts_fuzzy_line_length + && $too_long + && $i_lowest == $i_test + && $token_lengths_to_go[$i_test] > 1 + && $next_nonblank_type =~ /^[\;\,]$/ ) + { + $too_long = 0; + } + + last + if ( + ( $i_test == $imax ) # we're done if no more tokens, + || ( + ( $i_lowest >= 0 ) # or no more space and we have a break + && $too_long + ) + ); + } + + #------------------------------------------------------- + # END of inner loop to find the best next breakpoint + # Now decide exactly where to put the breakpoint + #------------------------------------------------------- + + # it's always ok to break at imax if no other break was found + if ( $i_lowest < 0 ) { $i_lowest = $imax } + + # semi-final index calculation + my $i_next_nonblank = $inext_to_go[$i_lowest]; + my $next_nonblank_type = $types_to_go[$i_next_nonblank]; + my $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; + + #------------------------------------------------------- + # ?/: rule 1 : if a break here will separate a '?' on this + # line from its closing ':', then break at the '?' instead. + #------------------------------------------------------- + foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) { + next unless ( $tokens_to_go[$i] eq '?' ); + + # do not break if probable sequence of ?/: statements + next if ($is_colon_chain); + + # do not break if statement is broken by side comment + next + if ( + $tokens_to_go[$max_index_to_go] eq '#' + && terminal_type( \@types_to_go, \@block_type_to_go, 0, + $max_index_to_go ) !~ /^[\;\}]$/ + ); + + # no break needed if matching : is also on the line + next + if ( $mate_index_to_go[$i] >= 0 + && $mate_index_to_go[$i] <= $i_next_nonblank ); + + $i_lowest = $i; + if ( $want_break_before{'?'} ) { $i_lowest-- } + last; + } + + #------------------------------------------------------- + # END of inner loop to find the best next breakpoint: + # Break the line after the token with index i=$i_lowest + #------------------------------------------------------- + + # final index calculation + $i_next_nonblank = $inext_to_go[$i_lowest]; + $next_nonblank_type = $types_to_go[$i_next_nonblank]; + $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; + + FORMATTER_DEBUG_FLAG_BREAK + && print STDOUT + "BREAK: best is i = $i_lowest strength = $lowest_strength\n"; + + #------------------------------------------------------- + # ?/: rule 2 : if we break at a '?', then break at its ':' + # + # Note: this rule is also in sub scan_list to handle a break + # at the start and end of a line (in case breaks are dictated + # by side comments). + #------------------------------------------------------- + if ( $next_nonblank_type eq '?' ) { + set_closing_breakpoint($i_next_nonblank); + } + elsif ( $types_to_go[$i_lowest] eq '?' ) { + set_closing_breakpoint($i_lowest); + } + + #------------------------------------------------------- + # ?/: rule 3 : if we break at a ':' then we save + # its location for further work below. We may need to go + # back and break at its '?'. + #------------------------------------------------------- + if ( $next_nonblank_type eq ':' ) { + push @i_colon_breaks, $i_next_nonblank; + } + elsif ( $types_to_go[$i_lowest] eq ':' ) { + push @i_colon_breaks, $i_lowest; + } + + # here we should set breaks for all '?'/':' pairs which are + # separated by this line + + $line_count++; + + # save this line segment, after trimming blanks at the ends + push( @i_first, + ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin ); + push( @i_last, + ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest ); + + # set a forced breakpoint at a container opening, if necessary, to + # signal a break at a closing container. Excepting '(' for now. + if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/ + && !$forced_breakpoint_to_go[$i_lowest] ) + { + set_closing_breakpoint($i_lowest); + } + + # get ready to go again + $i_begin = $i_lowest + 1; + $last_break_strength = $lowest_strength; + $i_last_break = $i_lowest; + $leading_alignment_token = ""; + $leading_alignment_type = ""; + $lowest_next_token = ''; + $lowest_next_type = 'b'; + + if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) { + $i_begin++; + } + + # update indentation size + if ( $i_begin <= $imax ) { + $leading_spaces = leading_spaces_to_go($i_begin); + } + } + + #------------------------------------------------------- + # END of main loop to set continuation breakpoints + # Now go back and make any necessary corrections + #------------------------------------------------------- + + #------------------------------------------------------- + # ?/: rule 4 -- if we broke at a ':', then break at + # corresponding '?' unless this is a chain of ?: expressions + #------------------------------------------------------- + if (@i_colon_breaks) { + + # using a simple method for deciding if we are in a ?/: chain -- + # this is a chain if it has multiple ?/: pairs all in order; + # otherwise not. + # Note that if line starts in a ':' we count that above as a break + my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 ); + + unless ($is_chain) { + my @insert_list = (); + foreach (@i_colon_breaks) { + my $i_question = $mate_index_to_go[$_]; + if ( $i_question >= 0 ) { + if ( $want_break_before{'?'} ) { + $i_question = $iprev_to_go[$i_question]; + } + + if ( $i_question >= 0 ) { + push @insert_list, $i_question; + } + } + insert_additional_breaks( \@insert_list, \@i_first, \@i_last ); + } + } + } + return ( \@i_first, \@i_last, $colon_count ); +} + +sub insert_additional_breaks { + + # this routine will add line breaks at requested locations after + # sub set_continuation_breaks has made preliminary breaks. + + my ( $ri_break_list, $ri_first, $ri_last ) = @_; + my $i_f; + my $i_l; + my $line_number = 0; + foreach my $i_break_left ( sort { $a <=> $b } @{$ri_break_list} ) { + + $i_f = $ri_first->[$line_number]; + $i_l = $ri_last->[$line_number]; + while ( $i_break_left >= $i_l ) { + $line_number++; + + # shouldn't happen unless caller passes bad indexes + if ( $line_number >= @{$ri_last} ) { + warning( +"Non-fatal program bug: couldn't set break at $i_break_left\n" + ); + report_definite_bug(); + return; + } + $i_f = $ri_first->[$line_number]; + $i_l = $ri_last->[$line_number]; + } + + # Do not leave a blank at the end of a line; back up if necessary + if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- } + + my $i_break_right = $inext_to_go[$i_break_left]; + if ( $i_break_left >= $i_f + && $i_break_left < $i_l + && $i_break_right > $i_f + && $i_break_right <= $i_l ) + { + splice( @{$ri_first}, $line_number, 1, ( $i_f, $i_break_right ) ); + splice( @{$ri_last}, $line_number, 1, ( $i_break_left, $i_l ) ); + } + } + return; +} + +sub set_closing_breakpoint { + + # set a breakpoint at a matching closing token + # at present, this is only used to break at a ':' which matches a '?' + my $i_break = shift; + + if ( $mate_index_to_go[$i_break] >= 0 ) { + + # CAUTION: infinite recursion possible here: + # set_closing_breakpoint calls set_forced_breakpoint, and + # set_forced_breakpoint call set_closing_breakpoint + # ( test files attrib.t, BasicLyx.pm.html). + # Don't reduce the '2' in the statement below + if ( $mate_index_to_go[$i_break] > $i_break + 2 ) { + + # break before } ] and ), but sub set_forced_breakpoint will decide + # to break before or after a ? and : + my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1; + set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc ); + } + } + else { + my $type_sequence = $type_sequence_to_go[$i_break]; + if ($type_sequence) { + my $closing_token = $matching_token{ $tokens_to_go[$i_break] }; + $postponed_breakpoint{$type_sequence} = 1; + } + } + return; +} + +sub compare_indentation_levels { + + # check to see if output line tabbing agrees with input line + # this can be very useful for debugging a script which has an extra + # or missing brace + my ( $guessed_indentation_level, $structural_indentation_level ) = @_; + if ( $guessed_indentation_level ne $structural_indentation_level ) { + $last_tabbing_disagreement = $input_line_number; + + if ($in_tabbing_disagreement) { + } + else { + $tabbing_disagreement_count++; + + if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) { + write_logfile_entry( +"Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n" + ); + } + $in_tabbing_disagreement = $input_line_number; + $first_tabbing_disagreement = $in_tabbing_disagreement + unless ($first_tabbing_disagreement); + } + } + else { + + if ($in_tabbing_disagreement) { + + if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) { + write_logfile_entry( +"End indentation disagreement from input line $in_tabbing_disagreement\n" + ); + + if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) { + write_logfile_entry( + "No further tabbing disagreements will be noted\n"); + } + } + $in_tabbing_disagreement = 0; + } + } + return; +} +1; + diff --git a/lib/Perl/Tidy/HtmlWriter.pm b/lib/Perl/Tidy/HtmlWriter.pm new file mode 100644 index 0000000..0d82978 --- /dev/null +++ b/lib/Perl/Tidy/HtmlWriter.pm @@ -0,0 +1,1466 @@ +##################################################################### +# +# The Perl::Tidy::HtmlWriter class writes a copy of the input stream in html +# +##################################################################### + +package Perl::Tidy::HtmlWriter; +use strict; +use warnings; +our $VERSION = '20190601'; + +use File::Basename; + +# class variables +use vars qw{ + %html_color + %html_bold + %html_italic + %token_short_names + %short_to_long_names + $rOpts + $css_filename + $css_linkname + $missing_html_entities + $missing_pod_html +}; + +# replace unsafe characters with HTML entity representation if HTML::Entities +# is available +#{ eval "use HTML::Entities"; $missing_html_entities = $@; } + +BEGIN { + if ( !eval { require HTML::Entities; 1 } ) { + $missing_html_entities = $@ ? $@ : 1; + } + if ( !eval { require Pod::Html; 1 } ) { + $missing_pod_html = $@ ? $@ : 1; + } +} + +sub new { + + my ( $class, $input_file, $html_file, $extension, $html_toc_extension, + $html_src_extension ) + = @_; + + my $html_file_opened = 0; + my $html_fh; + ( $html_fh, my $html_filename ) = + Perl::Tidy::streamhandle( $html_file, 'w' ); + unless ($html_fh) { + Perl::Tidy::Warn("can't open $html_file: $!\n"); + return; + } + $html_file_opened = 1; + + if ( !$input_file || $input_file eq '-' || ref($input_file) ) { + $input_file = "NONAME"; + } + + # write the table of contents to a string + my $toc_string; + my $html_toc_fh = Perl::Tidy::IOScalar->new( \$toc_string, 'w' ); + + my $html_pre_fh; + my @pre_string_stack; + if ( $rOpts->{'html-pre-only'} ) { + + # pre section goes directly to the output stream + $html_pre_fh = $html_fh; + $html_pre_fh->print( <<"PRE_END"); +
+PRE_END
+    }
+    else {
+
+        # pre section go out to a temporary string
+        my $pre_string;
+        $html_pre_fh = Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
+        push @pre_string_stack, \$pre_string;
+    }
+
+    # pod text gets diverted if the 'pod2html' is used
+    my $html_pod_fh;
+    my $pod_string;
+    if ( $rOpts->{'pod2html'} ) {
+        if ( $rOpts->{'html-pre-only'} ) {
+            undef $rOpts->{'pod2html'};
+        }
+        else {
+            ##eval "use Pod::Html";
+            #if ($@) {
+            if ($missing_pod_html) {
+                Perl::Tidy::Warn(
+"unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n"
+                );
+                undef $rOpts->{'pod2html'};
+            }
+            else {
+                $html_pod_fh = Perl::Tidy::IOScalar->new( \$pod_string, 'w' );
+            }
+        }
+    }
+
+    my $toc_filename;
+    my $src_filename;
+    if ( $rOpts->{'frames'} ) {
+        unless ($extension) {
+            Perl::Tidy::Warn(
+"cannot use frames without a specified output extension; ignoring -frm\n"
+            );
+            undef $rOpts->{'frames'};
+        }
+        else {
+            $toc_filename = $input_file . $html_toc_extension . $extension;
+            $src_filename = $input_file . $html_src_extension . $extension;
+        }
+    }
+
+    # ----------------------------------------------------------
+    # Output is now directed as follows:
+    # html_toc_fh <-- table of contents items
+    # html_pre_fh <-- the 
 section of formatted code, except:
+    # html_pod_fh <-- pod goes here with the pod2html option
+    # ----------------------------------------------------------
+
+    my $title = $rOpts->{'title'};
+    unless ($title) {
+        ( $title, my $path ) = fileparse($input_file);
+    }
+    my $toc_item_count = 0;
+    my $in_toc_package = "";
+    my $last_level     = 0;
+    return bless {
+        _input_file        => $input_file,          # name of input file
+        _title             => $title,               # title, unescaped
+        _html_file         => $html_file,           # name of .html output file
+        _toc_filename      => $toc_filename,        # for frames option
+        _src_filename      => $src_filename,        # for frames option
+        _html_file_opened  => $html_file_opened,    # a flag
+        _html_fh           => $html_fh,             # the output stream
+        _html_pre_fh       => $html_pre_fh,         # pre section goes here
+        _rpre_string_stack => \@pre_string_stack,   # stack of pre sections
+        _html_pod_fh       => $html_pod_fh,         # pod goes here if pod2html
+        _rpod_string       => \$pod_string,         # string holding pod
+        _pod_cut_count     => 0,                    # how many =cut's?
+        _html_toc_fh       => $html_toc_fh,         # fh for table of contents
+        _rtoc_string       => \$toc_string,         # string holding toc
+        _rtoc_item_count   => \$toc_item_count,     # how many toc items
+        _rin_toc_package   => \$in_toc_package,     # package name
+        _rtoc_name_count   => {},                   # hash to track unique names
+        _rpackage_stack    => [],                   # stack to check for package
+                                                    # name changes
+        _rlast_level       => \$last_level,         # brace indentation level
+    }, $class;
+}
+
+sub close_object {
+    my ($object) = @_;
+
+    # returns true if close works, false if not
+    # failure probably means there is no close method
+    return eval { $object->close(); 1 };
+}
+
+sub add_toc_item {
+
+    # Add an item to the html table of contents.
+    # This is called even if no table of contents is written,
+    # because we still want to put the anchors in the 
 text.
+    # We are given an anchor name and its type; types are:
+    #      'package', 'sub', '__END__', '__DATA__', 'EOF'
+    # There must be an 'EOF' call at the end to wrap things up.
+    my ( $self, $name, $type ) = @_;
+    my $html_toc_fh     = $self->{_html_toc_fh};
+    my $html_pre_fh     = $self->{_html_pre_fh};
+    my $rtoc_name_count = $self->{_rtoc_name_count};
+    my $rtoc_item_count = $self->{_rtoc_item_count};
+    my $rlast_level     = $self->{_rlast_level};
+    my $rin_toc_package = $self->{_rin_toc_package};
+    my $rpackage_stack  = $self->{_rpackage_stack};
+
+    # packages contain sublists of subs, so to avoid errors all package
+    # items are written and finished with the following routines
+    my $end_package_list = sub {
+        if ( ${$rin_toc_package} ) {
+            $html_toc_fh->print("\n\n");
+            ${$rin_toc_package} = "";
+        }
+    };
+
+    my $start_package_list = sub {
+        my ( $unique_name, $package ) = @_;
+        if ( ${$rin_toc_package} ) { $end_package_list->() }
+        $html_toc_fh->print(<package $package
+
    +EOM + ${$rin_toc_package} = $package; + }; + + # start the table of contents on the first item + unless ( ${$rtoc_item_count} ) { + + # but just quit if we hit EOF without any other entries + # in this case, there will be no toc + return if ( $type eq 'EOF' ); + $html_toc_fh->print( <<"TOC_END"); + +
      +TOC_END + } + ${$rtoc_item_count}++; + + # make a unique anchor name for this location: + # - packages get a 'package-' prefix + # - subs use their names + my $unique_name = $name; + if ( $type eq 'package' ) { $unique_name = "package-$name" } + + # append '-1', '-2', etc if necessary to make unique; this will + # be unique because subs and packages cannot have a '-' + if ( my $count = $rtoc_name_count->{ lc $unique_name }++ ) { + $unique_name .= "-$count"; + } + + # - all names get terminal '-' if pod2html is used, to avoid + # conflicts with anchor names created by pod2html + if ( $rOpts->{'pod2html'} ) { $unique_name .= '-' } + + # start/stop lists of subs + if ( $type eq 'sub' ) { + my $package = $rpackage_stack->[ ${$rlast_level} ]; + unless ($package) { $package = 'main' } + + # if we're already in a package/sub list, be sure its the right + # package or else close it + if ( ${$rin_toc_package} && ${$rin_toc_package} ne $package ) { + $end_package_list->(); + } + + # start a package/sub list if necessary + unless ( ${$rin_toc_package} ) { + $start_package_list->( $unique_name, $package ); + } + } + + # now write an entry in the toc for this item + if ( $type eq 'package' ) { + $start_package_list->( $unique_name, $name ); + } + elsif ( $type eq 'sub' ) { + $html_toc_fh->print("
    • $name
    • \n"); + } + else { + $end_package_list->(); + $html_toc_fh->print("
    • $name
    • \n"); + } + + # write the anchor in the
       section
      +    $html_pre_fh->print("");
      +
      +    # end the table of contents, if any, on the end of file
      +    if ( $type eq 'EOF' ) {
      +        $html_toc_fh->print( <<"TOC_END");
      +
    + +TOC_END + } + return; +} + +BEGIN { + + # This is the official list of tokens which may be identified by the + # user. Long names are used as getopt keys. Short names are + # convenient short abbreviations for specifying input. Short names + # somewhat resemble token type characters, but are often different + # because they may only be alphanumeric, to allow command line + # input. Also, note that because of case insensitivity of html, + # this table must be in a single case only (I've chosen to use all + # lower case). + # When adding NEW_TOKENS: update this hash table + # short names => long names + %short_to_long_names = ( + 'n' => 'numeric', + 'p' => 'paren', + 'q' => 'quote', + 's' => 'structure', + 'c' => 'comment', + 'v' => 'v-string', + 'cm' => 'comma', + 'w' => 'bareword', + 'co' => 'colon', + 'pu' => 'punctuation', + 'i' => 'identifier', + 'j' => 'label', + 'h' => 'here-doc-target', + 'hh' => 'here-doc-text', + 'k' => 'keyword', + 'sc' => 'semicolon', + 'm' => 'subroutine', + 'pd' => 'pod-text', + ); + + # Now we have to map actual token types into one of the above short + # names; any token types not mapped will get 'punctuation' + # properties. + + # The values of this hash table correspond to the keys of the + # previous hash table. + # The keys of this hash table are token types and can be seen + # by running with --dump-token-types (-dtt). + + # When adding NEW_TOKENS: update this hash table + # $type => $short_name + %token_short_names = ( + '#' => 'c', + 'n' => 'n', + 'v' => 'v', + 'k' => 'k', + 'F' => 'k', + 'Q' => 'q', + 'q' => 'q', + 'J' => 'j', + 'j' => 'j', + 'h' => 'h', + 'H' => 'hh', + 'w' => 'w', + ',' => 'cm', + '=>' => 'cm', + ';' => 'sc', + ':' => 'co', + 'f' => 'sc', + '(' => 'p', + ')' => 'p', + 'M' => 'm', + 'P' => 'pd', + 'A' => 'co', + ); + + # These token types will all be called identifiers for now + # FIXME: could separate user defined modules as separate type + my @identifier = qw< i t U C Y Z G :: CORE::>; + @token_short_names{@identifier} = ('i') x scalar(@identifier); + + # These token types will be called 'structure' + my @structure = qw< { } >; + @token_short_names{@structure} = ('s') x scalar(@structure); + + # OLD NOTES: save for reference + # Any of these could be added later if it would be useful. + # For now, they will by default become punctuation + # my @list = qw< L R [ ] >; + # @token_long_names{@list} = ('non-structure') x scalar(@list); + # + # my @list = qw" + # / /= * *= ** **= + += - -= % %= = ++ -- << <<= >> >>= pp p m mm + # "; + # @token_long_names{@list} = ('math') x scalar(@list); + # + # my @list = qw" & &= ~ ~= ^ ^= | |= "; + # @token_long_names{@list} = ('bit') x scalar(@list); + # + # my @list = qw" == != < > <= <=> "; + # @token_long_names{@list} = ('numerical-comparison') x scalar(@list); + # + # my @list = qw" && || ! &&= ||= //= "; + # @token_long_names{@list} = ('logical') x scalar(@list); + # + # my @list = qw" . .= =~ !~ x x= "; + # @token_long_names{@list} = ('string-operators') x scalar(@list); + # + # # Incomplete.. + # my @list = qw" .. -> <> ... \ ? "; + # @token_long_names{@list} = ('misc-operators') x scalar(@list); + +} + +sub make_getopt_long_names { + my ( $class, $rgetopt_names ) = @_; + while ( my ( $short_name, $name ) = each %short_to_long_names ) { + push @{$rgetopt_names}, "html-color-$name=s"; + push @{$rgetopt_names}, "html-italic-$name!"; + push @{$rgetopt_names}, "html-bold-$name!"; + } + push @{$rgetopt_names}, "html-color-background=s"; + push @{$rgetopt_names}, "html-linked-style-sheet=s"; + push @{$rgetopt_names}, "nohtml-style-sheets"; + push @{$rgetopt_names}, "html-pre-only"; + push @{$rgetopt_names}, "html-line-numbers"; + push @{$rgetopt_names}, "html-entities!"; + push @{$rgetopt_names}, "stylesheet"; + push @{$rgetopt_names}, "html-table-of-contents!"; + push @{$rgetopt_names}, "pod2html!"; + push @{$rgetopt_names}, "frames!"; + push @{$rgetopt_names}, "html-toc-extension=s"; + push @{$rgetopt_names}, "html-src-extension=s"; + + # Pod::Html parameters: + push @{$rgetopt_names}, "backlink=s"; + push @{$rgetopt_names}, "cachedir=s"; + push @{$rgetopt_names}, "htmlroot=s"; + push @{$rgetopt_names}, "libpods=s"; + push @{$rgetopt_names}, "podpath=s"; + push @{$rgetopt_names}, "podroot=s"; + push @{$rgetopt_names}, "title=s"; + + # Pod::Html parameters with leading 'pod' which will be removed + # before the call to Pod::Html + push @{$rgetopt_names}, "podquiet!"; + push @{$rgetopt_names}, "podverbose!"; + push @{$rgetopt_names}, "podrecurse!"; + push @{$rgetopt_names}, "podflush"; + push @{$rgetopt_names}, "podheader!"; + push @{$rgetopt_names}, "podindex!"; + return; +} + +sub make_abbreviated_names { + + # We're appending things like this to the expansion list: + # 'hcc' => [qw(html-color-comment)], + # 'hck' => [qw(html-color-keyword)], + # etc + my ( $class, $rexpansion ) = @_; + + # abbreviations for color/bold/italic properties + while ( my ( $short_name, $long_name ) = each %short_to_long_names ) { + ${$rexpansion}{"hc$short_name"} = ["html-color-$long_name"]; + ${$rexpansion}{"hb$short_name"} = ["html-bold-$long_name"]; + ${$rexpansion}{"hi$short_name"} = ["html-italic-$long_name"]; + ${$rexpansion}{"nhb$short_name"} = ["nohtml-bold-$long_name"]; + ${$rexpansion}{"nhi$short_name"} = ["nohtml-italic-$long_name"]; + } + + # abbreviations for all other html options + ${$rexpansion}{"hcbg"} = ["html-color-background"]; + ${$rexpansion}{"pre"} = ["html-pre-only"]; + ${$rexpansion}{"toc"} = ["html-table-of-contents"]; + ${$rexpansion}{"ntoc"} = ["nohtml-table-of-contents"]; + ${$rexpansion}{"nnn"} = ["html-line-numbers"]; + ${$rexpansion}{"hent"} = ["html-entities"]; + ${$rexpansion}{"nhent"} = ["nohtml-entities"]; + ${$rexpansion}{"css"} = ["html-linked-style-sheet"]; + ${$rexpansion}{"nss"} = ["nohtml-style-sheets"]; + ${$rexpansion}{"ss"} = ["stylesheet"]; + ${$rexpansion}{"pod"} = ["pod2html"]; + ${$rexpansion}{"npod"} = ["nopod2html"]; + ${$rexpansion}{"frm"} = ["frames"]; + ${$rexpansion}{"nfrm"} = ["noframes"]; + ${$rexpansion}{"text"} = ["html-toc-extension"]; + ${$rexpansion}{"sext"} = ["html-src-extension"]; + return; +} + +sub check_options { + + # This will be called once after options have been parsed + # Note that we are defining the package variable $rOpts here: + ( my $class, $rOpts ) = @_; + + # X11 color names for default settings that seemed to look ok + # (these color names are only used for programming clarity; the hex + # numbers are actually written) + use constant ForestGreen => "#228B22"; + use constant SaddleBrown => "#8B4513"; + use constant magenta4 => "#8B008B"; + use constant IndianRed3 => "#CD5555"; + use constant DeepSkyBlue4 => "#00688B"; + use constant MediumOrchid3 => "#B452CD"; + use constant black => "#000000"; + use constant white => "#FFFFFF"; + use constant red => "#FF0000"; + + # set default color, bold, italic properties + # anything not listed here will be given the default (punctuation) color -- + # these types currently not listed and get default: ws pu s sc cm co p + # When adding NEW_TOKENS: add an entry here if you don't want defaults + + # set_default_properties( $short_name, default_color, bold?, italic? ); + set_default_properties( 'c', ForestGreen, 0, 0 ); + set_default_properties( 'pd', ForestGreen, 0, 1 ); + set_default_properties( 'k', magenta4, 1, 0 ); # was SaddleBrown + set_default_properties( 'q', IndianRed3, 0, 0 ); + set_default_properties( 'hh', IndianRed3, 0, 1 ); + set_default_properties( 'h', IndianRed3, 1, 0 ); + set_default_properties( 'i', DeepSkyBlue4, 0, 0 ); + set_default_properties( 'w', black, 0, 0 ); + set_default_properties( 'n', MediumOrchid3, 0, 0 ); + set_default_properties( 'v', MediumOrchid3, 0, 0 ); + set_default_properties( 'j', IndianRed3, 1, 0 ); + set_default_properties( 'm', red, 1, 0 ); + + set_default_color( 'html-color-background', white ); + set_default_color( 'html-color-punctuation', black ); + + # setup property lookup tables for tokens based on their short names + # every token type has a short name, and will use these tables + # to do the html markup + while ( my ( $short_name, $long_name ) = each %short_to_long_names ) { + $html_color{$short_name} = $rOpts->{"html-color-$long_name"}; + $html_bold{$short_name} = $rOpts->{"html-bold-$long_name"}; + $html_italic{$short_name} = $rOpts->{"html-italic-$long_name"}; + } + + # write style sheet to STDOUT and die if requested + if ( defined( $rOpts->{'stylesheet'} ) ) { + write_style_sheet_file('-'); + Perl::Tidy::Exit(0); + } + + # make sure user gives a file name after -css + if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) { + $css_linkname = $rOpts->{'html-linked-style-sheet'}; + if ( $css_linkname =~ /^-/ ) { + Perl::Tidy::Die("You must specify a valid filename after -css\n"); + } + } + + # check for conflict + if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) { + $rOpts->{'nohtml-style-sheets'} = 0; + warning("You can't specify both -css and -nss; -nss ignored\n"); + } + + # write a style sheet file if necessary + if ($css_linkname) { + + # if the selected filename exists, don't write, because user may + # have done some work by hand to create it; use backup name instead + # Also, this will avoid a potential disaster in which the user + # forgets to specify the style sheet, like this: + # perltidy -html -css myfile1.pl myfile2.pl + # This would cause myfile1.pl to parsed as the style sheet by GetOpts + my $css_filename = $css_linkname; + unless ( -e $css_filename ) { + write_style_sheet_file($css_filename); + } + } + $missing_html_entities = 1 unless $rOpts->{'html-entities'}; + return; +} + +sub write_style_sheet_file { + + my $css_filename = shift; + my $fh; + unless ( $fh = IO::File->new("> $css_filename") ) { + Perl::Tidy::Die("can't open $css_filename: $!\n"); + } + write_style_sheet_data($fh); + close_object($fh); + return; +} + +sub write_style_sheet_data { + + # write the style sheet data to an open file handle + my $fh = shift; + + my $bg_color = $rOpts->{'html-color-background'}; + my $text_color = $rOpts->{'html-color-punctuation'}; + + # pre-bgcolor is new, and may not be defined + my $pre_bg_color = $rOpts->{'html-pre-color-background'}; + $pre_bg_color = $bg_color unless $pre_bg_color; + + $fh->print(<<"EOM"); +/* default style sheet generated by perltidy */ +body {background: $bg_color; color: $text_color} +pre { color: $text_color; + background: $pre_bg_color; + font-family: courier; + } + +EOM + + foreach my $short_name ( sort keys %short_to_long_names ) { + my $long_name = $short_to_long_names{$short_name}; + + my $abbrev = '.' . $short_name; + if ( length($short_name) == 1 ) { $abbrev .= ' ' } # for alignment + my $color = $html_color{$short_name}; + if ( !defined($color) ) { $color = $text_color } + $fh->print("$abbrev \{ color: $color;"); + + if ( $html_bold{$short_name} ) { + $fh->print(" font-weight:bold;"); + } + + if ( $html_italic{$short_name} ) { + $fh->print(" font-style:italic;"); + } + $fh->print("} /* $long_name */\n"); + } + return; +} + +sub set_default_color { + + # make sure that options hash $rOpts->{$key} contains a valid color + my ( $key, $color ) = @_; + if ( $rOpts->{$key} ) { $color = $rOpts->{$key} } + $rOpts->{$key} = check_RGB($color); + return; +} + +sub check_RGB { + + # if color is a 6 digit hex RGB value, prepend a #, otherwise + # assume that it is a valid ascii color name + my ($color) = @_; + if ( $color =~ /^[0-9a-fA-F]{6,6}$/ ) { $color = "#$color" } + return $color; +} + +sub set_default_properties { + my ( $short_name, $color, $bold, $italic ) = @_; + + set_default_color( "html-color-$short_to_long_names{$short_name}", $color ); + my $key; + $key = "html-bold-$short_to_long_names{$short_name}"; + $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold; + $key = "html-italic-$short_to_long_names{$short_name}"; + $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic; + return; +} + +sub pod_to_html { + + # Use Pod::Html to process the pod and make the page + # then merge the perltidy code sections into it. + # return 1 if success, 0 otherwise + my ( $self, $pod_string, $css_string, $toc_string, $rpre_string_stack ) = + @_; + my $input_file = $self->{_input_file}; + my $title = $self->{_title}; + my $success_flag = 0; + + # don't try to use pod2html if no pod + unless ($pod_string) { + return $success_flag; + } + + # Pod::Html requires a real temporary filename + my ( $fh_tmp, $tmpfile ) = File::Temp::tempfile(); + unless ($fh_tmp) { + Perl::Tidy::Warn( + "unable to open temporary file $tmpfile; cannot use pod2html\n"); + return $success_flag; + } + + #------------------------------------------------------------------ + # Warning: a temporary file is open; we have to clean up if + # things go bad. From here on all returns should be by going to + # RETURN so that the temporary file gets unlinked. + #------------------------------------------------------------------ + + # write the pod text to the temporary file + $fh_tmp->print($pod_string); + $fh_tmp->close(); + + # Hand off the pod to pod2html. + # Note that we can use the same temporary filename for input and output + # because of the way pod2html works. + { + + my @args; + push @args, "--infile=$tmpfile", "--outfile=$tmpfile", "--title=$title"; + + # Flags with string args: + # "backlink=s", "cachedir=s", "htmlroot=s", "libpods=s", + # "podpath=s", "podroot=s" + # Note: -css=s is handled by perltidy itself + foreach my $kw (qw(backlink cachedir htmlroot libpods podpath podroot)) + { + if ( $rOpts->{$kw} ) { push @args, "--$kw=$rOpts->{$kw}" } + } + + # Toggle switches; these have extra leading 'pod' + # "header!", "index!", "recurse!", "quiet!", "verbose!" + foreach my $kw (qw(podheader podindex podrecurse podquiet podverbose)) { + my $kwd = $kw; # allows us to strip 'pod' + if ( $rOpts->{$kw} ) { $kwd =~ s/^pod//; push @args, "--$kwd" } + elsif ( defined( $rOpts->{$kw} ) ) { + $kwd =~ s/^pod//; + push @args, "--no$kwd"; + } + } + + # "flush", + my $kw = 'podflush'; + if ( $rOpts->{$kw} ) { $kw =~ s/^pod//; push @args, "--$kw" } + + # Must clean up if pod2html dies (it can); + # Be careful not to overwrite callers __DIE__ routine + local $SIG{__DIE__} = sub { + unlink $tmpfile if -e $tmpfile; + Perl::Tidy::Die( $_[0] ); + }; + + pod2html(@args); + } + $fh_tmp = IO::File->new( $tmpfile, 'r' ); + unless ($fh_tmp) { + + # this error shouldn't happen ... we just used this filename + Perl::Tidy::Warn( + "unable to open temporary file $tmpfile; cannot use pod2html\n"); + goto RETURN; + } + + my $html_fh = $self->{_html_fh}; + my @toc; + my $in_toc; + my $ul_level = 0; + my $no_print; + + # This routine will write the html selectively and store the toc + my $html_print = sub { + foreach (@_) { + $html_fh->print($_) unless ($no_print); + if ($in_toc) { push @toc, $_ } + } + }; + + # loop over lines of html output from pod2html and merge in + # the necessary perltidy html sections + my ( $saw_body, $saw_index, $saw_body_end ); + + my $timestamp = ""; + if ( $rOpts->{'timestamp'} ) { + my $date = localtime; + $timestamp = "on $date"; + } + while ( my $line = $fh_tmp->getline() ) { + + if ( $line =~ /^\s*\s*$/i ) { + ##my $date = localtime; + ##$html_print->("\n"); + $html_print->("\n"); + $html_print->($line); + } + + # Copy the perltidy css, if any, after tag + elsif ( $line =~ /^\s*\s*$/i ) { + $saw_body = 1; + $html_print->($css_string) if $css_string; + $html_print->($line); + + # add a top anchor and heading + $html_print->("\n"); + $title = escape_html($title); + $html_print->("

    $title

    \n"); + } + + # check for start of index, old pod2html + # before Pod::Html VERSION 1.15_02 it is delimited by comments as: + # + #
      + # ... + #
    + # + # + elsif ( $line =~ /^\s*\s*$/i ) { + $in_toc = 'INDEX'; + + # when frames are used, an extra table of contents in the + # contents panel is confusing, so don't print it + $no_print = $rOpts->{'frames'} + || !$rOpts->{'html-table-of-contents'}; + $html_print->("

    Doc Index:

    \n") if $rOpts->{'frames'}; + $html_print->($line); + } + + # check for start of index, new pod2html + # After Pod::Html VERSION 1.15_02 it is delimited as: + #
      + # ... + #
    + elsif ( $line =~ /^\s*/i ) { + $in_toc = 'UL'; + $ul_level = 1; + + # when frames are used, an extra table of contents in the + # contents panel is confusing, so don't print it + $no_print = $rOpts->{'frames'} + || !$rOpts->{'html-table-of-contents'}; + $html_print->("

    Doc Index:

    \n") if $rOpts->{'frames'}; + $html_print->($line); + } + + # Check for end of index, old pod2html + elsif ( $line =~ /^\s*\s*$/i ) { + $saw_index = 1; + $html_print->($line); + + # Copy the perltidy toc, if any, after the Pod::Html toc + if ($toc_string) { + $html_print->("
    \n") if $rOpts->{'frames'}; + $html_print->("

    Code Index:

    \n"); + ##my @toc = map { $_ .= "\n" } split /\n/, $toc_string; + my @toc = map { $_ . "\n" } split /\n/, $toc_string; + $html_print->(@toc); + } + $in_toc = ""; + $no_print = 0; + } + + # must track
      depth level for new pod2html + elsif ( $line =~ /\s*
        \s*$/i && $in_toc eq 'UL' ) { + $ul_level++; + $html_print->($line); + } + + # Check for end of index, for new pod2html + elsif ( $line =~ /\s*<\/ul>/i && $in_toc eq 'UL' ) { + $ul_level--; + $html_print->($line); + + # Copy the perltidy toc, if any, after the Pod::Html toc + if ( $ul_level <= 0 ) { + $saw_index = 1; + if ($toc_string) { + $html_print->("
        \n") if $rOpts->{'frames'}; + $html_print->("

        Code Index:

        \n"); + ##my @toc = map { $_ .= "\n" } split /\n/, $toc_string; + my @toc = map { $_ . "\n" } split /\n/, $toc_string; + $html_print->(@toc); + } + $in_toc = ""; + $ul_level = 0; + $no_print = 0; + } + } + + # Copy one perltidy section after each marker + elsif ( $line =~ /^(.*)(.*)$/ ) { + $line = $2; + $html_print->($1) if $1; + + # Intermingle code and pod sections if we saw multiple =cut's. + if ( $self->{_pod_cut_count} > 1 ) { + my $rpre_string = shift( @{$rpre_string_stack} ); + if ( ${$rpre_string} ) { + $html_print->('
        ');
        +                    $html_print->( ${$rpre_string} );
        +                    $html_print->('
        '); + } + else { + + # shouldn't happen: we stored a string before writing + # each marker. + Perl::Tidy::Warn( +"Problem merging html stream with pod2html; order may be wrong\n" + ); + } + $html_print->($line); + } + + # If didn't see multiple =cut lines, we'll put the pod out first + # and then the code, because it's less confusing. + else { + + # since we are not intermixing code and pod, we don't need + # or want any
        lines which separated pod and code + $html_print->($line) unless ( $line =~ /^\s*
        \s*$/i ); + } + } + + # Copy any remaining code section before the tag + elsif ( $line =~ /^\s*<\/body>\s*$/i ) { + $saw_body_end = 1; + if ( @{$rpre_string_stack} ) { + unless ( $self->{_pod_cut_count} > 1 ) { + $html_print->('
        '); + } + while ( my $rpre_string = shift( @{$rpre_string_stack} ) ) { + $html_print->('
        ');
        +                    $html_print->( ${$rpre_string} );
        +                    $html_print->('
        '); + } + } + $html_print->($line); + } + else { + $html_print->($line); + } + } + + $success_flag = 1; + unless ($saw_body) { + Perl::Tidy::Warn("Did not see in pod2html output\n"); + $success_flag = 0; + } + unless ($saw_body_end) { + Perl::Tidy::Warn("Did not see in pod2html output\n"); + $success_flag = 0; + } + unless ($saw_index) { + Perl::Tidy::Warn("Did not find INDEX END in pod2html output\n"); + $success_flag = 0; + } + + RETURN: + close_object($html_fh); + + # note that we have to unlink tmpfile before making frames + # because the tmpfile may be one of the names used for frames + if ( -e $tmpfile ) { + unless ( unlink($tmpfile) ) { + Perl::Tidy::Warn("couldn't unlink temporary file $tmpfile: $!\n"); + $success_flag = 0; + } + } + + if ( $success_flag && $rOpts->{'frames'} ) { + $self->make_frame( \@toc ); + } + return $success_flag; +} + +sub make_frame { + + # Make a frame with table of contents in the left panel + # and the text in the right panel. + # On entry: + # $html_filename contains the no-frames html output + # $rtoc is a reference to an array with the table of contents + my ( $self, $rtoc ) = @_; + my $input_file = $self->{_input_file}; + my $html_filename = $self->{_html_file}; + my $toc_filename = $self->{_toc_filename}; + my $src_filename = $self->{_src_filename}; + my $title = $self->{_title}; + $title = escape_html($title); + + # FUTURE input parameter: + my $top_basename = ""; + + # We need to produce 3 html files: + # 1. - the table of contents + # 2. - the contents (source code) itself + # 3. - the frame which contains them + + # get basenames for relative links + my ( $toc_basename, $toc_path ) = fileparse($toc_filename); + my ( $src_basename, $src_path ) = fileparse($src_filename); + + # 1. Make the table of contents panel, with appropriate changes + # to the anchor names + my $src_frame_name = 'SRC'; + my $first_anchor = + write_toc_html( $title, $toc_filename, $src_basename, $rtoc, + $src_frame_name ); + + # 2. The current .html filename is renamed to be the contents panel + rename( $html_filename, $src_filename ) + or Perl::Tidy::Die("Cannot rename $html_filename to $src_filename:$!\n"); + + # 3. Then use the original html filename for the frame + write_frame_html( + $title, $html_filename, $top_basename, + $toc_basename, $src_basename, $src_frame_name + ); + return; +} + +sub write_toc_html { + + # write a separate html table of contents file for frames + my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_; + my $fh = IO::File->new( $toc_filename, 'w' ) + or Perl::Tidy::Die("Cannot open $toc_filename:$!\n"); + $fh->print(< + +$title + + +

        $title

        +EOM + + my $first_anchor = + change_anchor_names( $rtoc, $src_basename, "$src_frame_name" ); + $fh->print( join "", @{$rtoc} ); + + $fh->print(< + +EOM + + return; +} + +sub write_frame_html { + + # write an html file to be the table of contents frame + my ( + $title, $frame_filename, $top_basename, + $toc_basename, $src_basename, $src_frame_name + ) = @_; + + my $fh = IO::File->new( $frame_filename, 'w' ) + or Perl::Tidy::Die("Cannot open $toc_basename:$!\n"); + + $fh->print(< + + + +$title + +EOM + + # two left panels, one right, if master index file + if ($top_basename) { + $fh->print(< + + + + +EOM + } + + # one left panels, one right, if no master index file + else { + $fh->print(< + +EOM + } + $fh->print(< + +<body> +<p>If you see this message, you are using a non-frame-capable web client.</p> +<p>This document contains:</p> +<ul> +<li><a href="$toc_basename">A table of contents</a></li> +<li><a href="$src_basename">The source code</a></li> +</ul> +</body> + + + +EOM + return; +} + +sub change_anchor_names { + + # add a filename and target to anchors + # also return the first anchor + my ( $rlines, $filename, $target ) = @_; + my $first_anchor; + foreach my $line ( @{$rlines} ) { + + # We're looking for lines like this: + #
      • SYNOPSIS
      • + # ---- - -------- ----------------- + # $1 $4 $5 + if ( $line =~ /^(.*)]*>(.*)$/i ) { + my $pre = $1; + my $name = $4; + my $post = $5; + my $href = "$filename#$name"; + $line = "$pre$post\n"; + unless ($first_anchor) { $first_anchor = $href } + } + } + return $first_anchor; +} + +sub close_html_file { + my $self = shift; + return unless $self->{_html_file_opened}; + + my $html_fh = $self->{_html_fh}; + my $rtoc_string = $self->{_rtoc_string}; + + # There are 3 basic paths to html output... + + # --------------------------------- + # Path 1: finish up if in -pre mode + # --------------------------------- + if ( $rOpts->{'html-pre-only'} ) { + $html_fh->print( <<"PRE_END"); +
+PRE_END + close_object($html_fh); + return; + } + + # Finish the index + $self->add_toc_item( 'EOF', 'EOF' ); + + my $rpre_string_stack = $self->{_rpre_string_stack}; + + # Patch to darken the
 background color in case of pod2html and
+    # interleaved code/documentation.  Otherwise, the distinction
+    # between code and documentation is blurred.
+    if (   $rOpts->{pod2html}
+        && $self->{_pod_cut_count} >= 1
+        && $rOpts->{'html-color-background'} eq '#FFFFFF' )
+    {
+        $rOpts->{'html-pre-color-background'} = '#F0F0F0';
+    }
+
+    # put the css or its link into a string, if used
+    my $css_string;
+    my $fh_css = Perl::Tidy::IOScalar->new( \$css_string, 'w' );
+
+    # use css linked to another file
+    if ( $rOpts->{'html-linked-style-sheet'} ) {
+        $fh_css->print(
+            qq());
+    }
+
+    # use css embedded in this file
+    elsif ( !$rOpts->{'nohtml-style-sheets'} ) {
+        $fh_css->print( <<'ENDCSS');
+
+ENDCSS
+    }
+
+    # -----------------------------------------------------------
+    # path 2: use pod2html if requested
+    #         If we fail for some reason, continue on to path 3
+    # -----------------------------------------------------------
+    if ( $rOpts->{'pod2html'} ) {
+        my $rpod_string = $self->{_rpod_string};
+        $self->pod_to_html(
+            ${$rpod_string}, $css_string,
+            ${$rtoc_string}, $rpre_string_stack
+        ) && return;
+    }
+
+    # --------------------------------------------------
+    # path 3: write code in html, with pod only in italics
+    # --------------------------------------------------
+    my $input_file = $self->{_input_file};
+    my $title      = escape_html($input_file);
+    my $timestamp  = "";
+    if ( $rOpts->{'timestamp'} ) {
+        my $date = localtime;
+        $timestamp = "on $date";
+    }
+    $html_fh->print( <<"HTML_START");
+
+
+
+
+$title
+HTML_START
+
+    # output the css, if used
+    if ($css_string) {
+        $html_fh->print($css_string);
+        $html_fh->print( <<"ENDCSS");
+
+
+ENDCSS
+    }
+    else {
+
+        $html_fh->print( <<"HTML_START");
+
+{'html-color-background'}\" text=\"$rOpts->{'html-color-punctuation'}\">
+HTML_START
+    }
+
+    $html_fh->print("\n");
+    $html_fh->print( <<"EOM");
+

$title

+EOM + + # copy the table of contents + if ( ${$rtoc_string} + && !$rOpts->{'frames'} + && $rOpts->{'html-table-of-contents'} ) + { + $html_fh->print( ${$rtoc_string} ); + } + + # copy the pre section(s) + my $fname_comment = $input_file; + $fname_comment =~ s/--+/-/g; # protect HTML comment tags + $html_fh->print( <<"END_PRE"); +
+ +
+END_PRE
+
+    foreach my $rpre_string ( @{$rpre_string_stack} ) {
+        $html_fh->print( ${$rpre_string} );
+    }
+
+    # and finish the html page
+    $html_fh->print( <<"HTML_END");
+
+ + +HTML_END + close_object($html_fh); + + if ( $rOpts->{'frames'} ) { + ##my @toc = map { $_ .= "\n" } split /\n/, ${$rtoc_string}; + my @toc = map { $_ . "\n" } split /\n/, ${$rtoc_string}; + $self->make_frame( \@toc ); + } + return; +} + +sub markup_tokens { + my ( $self, $rtokens, $rtoken_type, $rlevels ) = @_; + my ( @colored_tokens, $type, $token, $level ); + my $rlast_level = $self->{_rlast_level}; + my $rpackage_stack = $self->{_rpackage_stack}; + + for ( my $j = 0 ; $j < @{$rtoken_type} ; $j++ ) { + $type = $rtoken_type->[$j]; + $token = $rtokens->[$j]; + $level = $rlevels->[$j]; + $level = 0 if ( $level < 0 ); + + #------------------------------------------------------- + # Update the package stack. The package stack is needed to keep + # the toc correct because some packages may be declared within + # blocks and go out of scope when we leave the block. + #------------------------------------------------------- + if ( $level > ${$rlast_level} ) { + unless ( $rpackage_stack->[ $level - 1 ] ) { + $rpackage_stack->[ $level - 1 ] = 'main'; + } + $rpackage_stack->[$level] = $rpackage_stack->[ $level - 1 ]; + } + elsif ( $level < ${$rlast_level} ) { + my $package = $rpackage_stack->[$level]; + unless ($package) { $package = 'main' } + + # if we change packages due to a nesting change, we + # have to make an entry in the toc + if ( $package ne $rpackage_stack->[ $level + 1 ] ) { + $self->add_toc_item( $package, 'package' ); + } + } + ${$rlast_level} = $level; + + #------------------------------------------------------- + # Intercept a sub name here; split it + # into keyword 'sub' and sub name; and add an + # entry in the toc + #------------------------------------------------------- + if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) { + $token = $self->markup_html_element( $1, 'k' ); + push @colored_tokens, $token; + $token = $2; + $type = 'M'; + + # but don't include sub declarations in the toc; + # these wlll have leading token types 'i;' + my $signature = join "", @{$rtoken_type}; + unless ( $signature =~ /^i;/ ) { + my $subname = $token; + $subname =~ s/[\s\(].*$//; # remove any attributes and prototype + $self->add_toc_item( $subname, 'sub' ); + } + } + + #------------------------------------------------------- + # Intercept a package name here; split it + # into keyword 'package' and name; add to the toc, + # and update the package stack + #------------------------------------------------------- + if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) { + $token = $self->markup_html_element( $1, 'k' ); + push @colored_tokens, $token; + $token = $2; + $type = 'i'; + $self->add_toc_item( "$token", 'package' ); + $rpackage_stack->[$level] = $token; + } + + $token = $self->markup_html_element( $token, $type ); + push @colored_tokens, $token; + } + return ( \@colored_tokens ); +} + +sub markup_html_element { + my ( $self, $token, $type ) = @_; + + return $token if ( $type eq 'b' ); # skip a blank token + return $token if ( $token =~ /^\s*$/ ); # skip a blank line + $token = escape_html($token); + + # get the short abbreviation for this token type + my $short_name = $token_short_names{$type}; + if ( !defined($short_name) ) { + $short_name = "pu"; # punctuation is default + } + + # handle style sheets.. + if ( !$rOpts->{'nohtml-style-sheets'} ) { + if ( $short_name ne 'pu' ) { + $token = qq() . $token . ""; + } + } + + # handle no style sheets.. + else { + my $color = $html_color{$short_name}; + + if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) { + $token = qq() . $token . ""; + } + if ( $html_italic{$short_name} ) { $token = "$token" } + if ( $html_bold{$short_name} ) { $token = "$token" } + } + return $token; +} + +sub escape_html { + + my $token = shift; + if ($missing_html_entities) { + $token =~ s/\&/&/g; + $token =~ s/\/>/g; + $token =~ s/\"/"/g; + } + else { + HTML::Entities::encode_entities($token); + } + return $token; +} + +sub finish_formatting { + + # called after last line + my $self = shift; + $self->close_html_file(); + return; +} + +sub write_line { + + my ( $self, $line_of_tokens ) = @_; + return unless $self->{_html_file_opened}; + my $html_pre_fh = $self->{_html_pre_fh}; + my $line_type = $line_of_tokens->{_line_type}; + my $input_line = $line_of_tokens->{_line_text}; + my $line_number = $line_of_tokens->{_line_number}; + chomp $input_line; + + # markup line of code.. + my $html_line; + if ( $line_type eq 'CODE' ) { + my $rtoken_type = $line_of_tokens->{_rtoken_type}; + my $rtokens = $line_of_tokens->{_rtokens}; + my $rlevels = $line_of_tokens->{_rlevels}; + + if ( $input_line =~ /(^\s*)/ ) { + $html_line = $1; + } + else { + $html_line = ""; + } + my ($rcolored_tokens) = + $self->markup_tokens( $rtokens, $rtoken_type, $rlevels ); + $html_line .= join '', @{$rcolored_tokens}; + } + + # markup line of non-code.. + else { + my $line_character; + if ( $line_type eq 'HERE' ) { $line_character = 'H' } + elsif ( $line_type eq 'HERE_END' ) { $line_character = 'h' } + elsif ( $line_type eq 'FORMAT' ) { $line_character = 'H' } + elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' } + elsif ( $line_type eq 'SYSTEM' ) { $line_character = 'c' } + elsif ( $line_type eq 'END_START' ) { + $line_character = 'k'; + $self->add_toc_item( '__END__', '__END__' ); + } + elsif ( $line_type eq 'DATA_START' ) { + $line_character = 'k'; + $self->add_toc_item( '__DATA__', '__DATA__' ); + } + elsif ( $line_type =~ /^POD/ ) { + $line_character = 'P'; + if ( $rOpts->{'pod2html'} ) { + my $html_pod_fh = $self->{_html_pod_fh}; + if ( $line_type eq 'POD_START' ) { + + my $rpre_string_stack = $self->{_rpre_string_stack}; + my $rpre_string = $rpre_string_stack->[-1]; + + # if we have written any non-blank lines to the + # current pre section, start writing to a new output + # string + if ( ${$rpre_string} =~ /\S/ ) { + my $pre_string; + $html_pre_fh = + Perl::Tidy::IOScalar->new( \$pre_string, 'w' ); + $self->{_html_pre_fh} = $html_pre_fh; + push @{$rpre_string_stack}, \$pre_string; + + # leave a marker in the pod stream so we know + # where to put the pre section we just + # finished. + my $for_html = '=for html'; # don't confuse pod utils + $html_pod_fh->print(< + +EOM + } + + # otherwise, just clear the current string and start + # over + else { + ${$rpre_string} = ""; + $html_pod_fh->print("\n"); + } + } + $html_pod_fh->print( $input_line . "\n" ); + if ( $line_type eq 'POD_END' ) { + $self->{_pod_cut_count}++; + $html_pod_fh->print("\n"); + } + return; + } + } + else { $line_character = 'Q' } + $html_line = $self->markup_html_element( $input_line, $line_character ); + } + + # add the line number if requested + if ( $rOpts->{'html-line-numbers'} ) { + my $extra_space = + ( $line_number < 10 ) ? " " + : ( $line_number < 100 ) ? " " + : ( $line_number < 1000 ) ? " " + : ""; + $html_line = $extra_space . $line_number . " " . $html_line; + } + + # write the line + $html_pre_fh->print("$html_line\n"); + return; +} +1; + diff --git a/lib/Perl/Tidy/IOScalar.pm b/lib/Perl/Tidy/IOScalar.pm new file mode 100644 index 0000000..2bfb07f --- /dev/null +++ b/lib/Perl/Tidy/IOScalar.pm @@ -0,0 +1,87 @@ +##################################################################### +# +# This is a stripped down version of IO::Scalar +# Given a reference to a scalar, it supplies either: +# a getline method which reads lines (mode='r'), or +# a print method which reads lines (mode='w') +# +##################################################################### +package Perl::Tidy::IOScalar; +use strict; +use warnings; +use Carp; +our $VERSION = '20190601'; + +sub new { + my ( $package, $rscalar, $mode ) = @_; + my $ref = ref $rscalar; + if ( $ref ne 'SCALAR' ) { + confess <[1]; + if ( $mode ne 'r' ) { + confess <[2]++; + return $self->[0]->[$i]; +} + +sub print { + my ( $self, $msg ) = @_; + my $mode = $self->[1]; + if ( $mode ne 'w' ) { + confess <[0] } .= $msg; + return; +} +sub close { return } +1; + diff --git a/lib/Perl/Tidy/IOScalarArray.pm b/lib/Perl/Tidy/IOScalarArray.pm new file mode 100644 index 0000000..118f159 --- /dev/null +++ b/lib/Perl/Tidy/IOScalarArray.pm @@ -0,0 +1,76 @@ +##################################################################### +# +# This is a stripped down version of IO::ScalarArray +# Given a reference to an array, it supplies either: +# a getline method which reads lines (mode='r'), or +# a print method which reads lines (mode='w') +# +# NOTE: this routine assumes that there aren't any embedded +# newlines within any of the array elements. There are no checks +# for that. +# +##################################################################### +package Perl::Tidy::IOScalarArray; +use strict; +use warnings; +use Carp; +our $VERSION = '20190601'; + +sub new { + my ( $package, $rarray, $mode ) = @_; + my $ref = ref $rarray; + if ( $ref ne 'ARRAY' ) { + confess <[1]; + if ( $mode ne 'r' ) { + confess <[2]++; + return $self->[0]->[$i]; +} + +sub print { + my ( $self, $msg ) = @_; + my $mode = $self->[1]; + if ( $mode ne 'w' ) { + confess <[0] }, $msg; + return; +} +sub close { return } +1; + diff --git a/lib/Perl/Tidy/IndentationItem.pm b/lib/Perl/Tidy/IndentationItem.pm new file mode 100644 index 0000000..b0edd0a --- /dev/null +++ b/lib/Perl/Tidy/IndentationItem.pm @@ -0,0 +1,254 @@ +##################################################################### +# +# the Perl::Tidy::IndentationItem class supplies items which contain +# how much whitespace should be used at the start of a line +# +##################################################################### + +package Perl::Tidy::IndentationItem; +use strict; +use warnings; +our $VERSION = '20190601'; + +sub new { + + # Create an 'indentation_item' which describes one level of leading + # whitespace when the '-lp' indentation is used. + my ( + $class, $spaces, $level, + $ci_level, $available_spaces, $index, + $gnu_sequence_number, $align_paren, $stack_depth, + $starting_index, + ) = @_; + + my $closed = -1; + my $arrow_count = 0; + my $comma_count = 0; + my $have_child = 0; + my $want_right_spaces = 0; + my $marked = 0; + + # DEFINITIONS: + # spaces => # total leading white spaces + # level => # the indentation 'level' + # ci_level => # the 'continuation level' + # available_spaces => # how many left spaces available + # # for this level + # closed => # index where we saw closing '}' + # comma_count => # how many commas at this level? + # sequence_number => # output batch number + # index => # index in output batch list + # have_child => # any dependents? + # recoverable_spaces => # how many spaces to the right + # # we would like to move to get + # # alignment (negative if left) + # align_paren => # do we want to try to align + # # with an opening structure? + # marked => # if visited by corrector logic + # stack_depth => # indentation nesting depth + # starting_index => # first token index of this level + # arrow_count => # how many =>'s + + return bless { + _spaces => $spaces, + _level => $level, + _ci_level => $ci_level, + _available_spaces => $available_spaces, + _closed => $closed, + _comma_count => $comma_count, + _sequence_number => $gnu_sequence_number, + _index => $index, + _have_child => $have_child, + _recoverable_spaces => $want_right_spaces, + _align_paren => $align_paren, + _marked => $marked, + _stack_depth => $stack_depth, + _starting_index => $starting_index, + _arrow_count => $arrow_count, + }, $class; +} + +sub permanently_decrease_available_spaces { + + # make a permanent reduction in the available indentation spaces + # at one indentation item. NOTE: if there are child nodes, their + # total SPACES must be reduced by the caller. + + my ( $item, $spaces_needed ) = @_; + my $available_spaces = $item->get_available_spaces(); + my $deleted_spaces = + ( $available_spaces > $spaces_needed ) + ? $spaces_needed + : $available_spaces; + $item->decrease_available_spaces($deleted_spaces); + $item->decrease_SPACES($deleted_spaces); + $item->set_recoverable_spaces(0); + + return $deleted_spaces; +} + +sub tentatively_decrease_available_spaces { + + # We are asked to tentatively delete $spaces_needed of indentation + # for a indentation item. We may want to undo this later. NOTE: if + # there are child nodes, their total SPACES must be reduced by the + # caller. + my ( $item, $spaces_needed ) = @_; + my $available_spaces = $item->get_available_spaces(); + my $deleted_spaces = + ( $available_spaces > $spaces_needed ) + ? $spaces_needed + : $available_spaces; + $item->decrease_available_spaces($deleted_spaces); + $item->decrease_SPACES($deleted_spaces); + $item->increase_recoverable_spaces($deleted_spaces); + return $deleted_spaces; +} + +sub get_stack_depth { + my $self = shift; + return $self->{_stack_depth}; +} + +sub get_spaces { + my $self = shift; + return $self->{_spaces}; +} + +sub get_marked { + my $self = shift; + return $self->{_marked}; +} + +sub set_marked { + my ( $self, $value ) = @_; + if ( defined($value) ) { + $self->{_marked} = $value; + } + return $self->{_marked}; +} + +sub get_available_spaces { + my $self = shift; + return $self->{_available_spaces}; +} + +sub decrease_SPACES { + my ( $self, $value ) = @_; + if ( defined($value) ) { + $self->{_spaces} -= $value; + } + return $self->{_spaces}; +} + +sub decrease_available_spaces { + my ( $self, $value ) = @_; + if ( defined($value) ) { + $self->{_available_spaces} -= $value; + } + return $self->{_available_spaces}; +} + +sub get_align_paren { + my $self = shift; + return $self->{_align_paren}; +} + +sub get_recoverable_spaces { + my $self = shift; + return $self->{_recoverable_spaces}; +} + +sub set_recoverable_spaces { + my ( $self, $value ) = @_; + if ( defined($value) ) { + $self->{_recoverable_spaces} = $value; + } + return $self->{_recoverable_spaces}; +} + +sub increase_recoverable_spaces { + my ( $self, $value ) = @_; + if ( defined($value) ) { + $self->{_recoverable_spaces} += $value; + } + return $self->{_recoverable_spaces}; +} + +sub get_ci_level { + my $self = shift; + return $self->{_ci_level}; +} + +sub get_level { + my $self = shift; + return $self->{_level}; +} + +sub get_sequence_number { + my $self = shift; + return $self->{_sequence_number}; +} + +sub get_index { + my $self = shift; + return $self->{_index}; +} + +sub get_starting_index { + my $self = shift; + return $self->{_starting_index}; +} + +sub set_have_child { + my ( $self, $value ) = @_; + if ( defined($value) ) { + $self->{_have_child} = $value; + } + return $self->{_have_child}; +} + +sub get_have_child { + my $self = shift; + return $self->{_have_child}; +} + +sub set_arrow_count { + my ( $self, $value ) = @_; + if ( defined($value) ) { + $self->{_arrow_count} = $value; + } + return $self->{_arrow_count}; +} + +sub get_arrow_count { + my $self = shift; + return $self->{_arrow_count}; +} + +sub set_comma_count { + my ( $self, $value ) = @_; + if ( defined($value) ) { + $self->{_comma_count} = $value; + } + return $self->{_comma_count}; +} + +sub get_comma_count { + my $self = shift; + return $self->{_comma_count}; +} + +sub set_closed { + my ( $self, $value ) = @_; + if ( defined($value) ) { + $self->{_closed} = $value; + } + return $self->{_closed}; +} + +sub get_closed { + my $self = shift; + return $self->{_closed}; +} +1; diff --git a/lib/Perl/Tidy/LineBuffer.pm b/lib/Perl/Tidy/LineBuffer.pm new file mode 100644 index 0000000..ade5b2c --- /dev/null +++ b/lib/Perl/Tidy/LineBuffer.pm @@ -0,0 +1,57 @@ +##################################################################### +# +# The Perl::Tidy::LineBuffer class supplies a 'get_line()' +# method for returning the next line to be parsed, as well as a +# 'peek_ahead()' method +# +# The input parameter is an object with a 'get_line()' method +# which returns the next line to be parsed +# +##################################################################### + +package Perl::Tidy::LineBuffer; +use strict; +use warnings; +our $VERSION = '20190601'; + +sub new { + + my ( $class, $line_source_object ) = @_; + + return bless { + _line_source_object => $line_source_object, + _rlookahead_buffer => [], + }, $class; +} + +sub peek_ahead { + my ( $self, $buffer_index ) = @_; + my $line = undef; + my $line_source_object = $self->{_line_source_object}; + my $rlookahead_buffer = $self->{_rlookahead_buffer}; + if ( $buffer_index < scalar( @{$rlookahead_buffer} ) ) { + $line = $rlookahead_buffer->[$buffer_index]; + } + else { + $line = $line_source_object->get_line(); + push( @{$rlookahead_buffer}, $line ); + } + return $line; +} + +sub get_line { + my $self = shift; + my $line = undef; + my $line_source_object = $self->{_line_source_object}; + my $rlookahead_buffer = $self->{_rlookahead_buffer}; + + if ( scalar( @{$rlookahead_buffer} ) ) { + $line = shift @{$rlookahead_buffer}; + } + else { + $line = $line_source_object->get_line(); + } + return $line; +} +1; + diff --git a/lib/Perl/Tidy/LineSink.pm b/lib/Perl/Tidy/LineSink.pm new file mode 100644 index 0000000..98bea19 --- /dev/null +++ b/lib/Perl/Tidy/LineSink.pm @@ -0,0 +1,145 @@ +##################################################################### +# +# the Perl::Tidy::LineSink class supplies a write_line method for +# actual file writing +# +##################################################################### + +package Perl::Tidy::LineSink; +use strict; +use warnings; +our $VERSION = '20190601'; + +sub new { + + my ( $class, $output_file, $tee_file, $line_separator, $rOpts, + $rpending_logfile_message, $binmode ) + = @_; + my $fh = undef; + my $fh_tee = undef; + + my $output_file_open = 0; + + if ( $rOpts->{'format'} eq 'tidy' ) { + ( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' ); + unless ($fh) { Perl::Tidy::Die("Cannot write to output stream\n"); } + $output_file_open = 1; + if ($binmode) { + if ( $rOpts->{'character-encoding'} + && $rOpts->{'character-encoding'} eq 'utf8' ) + { + if ( ref($fh) eq 'IO::File' ) { + $fh->binmode(":raw:encoding(UTF-8)"); + } + elsif ( $output_file eq '-' ) { + binmode STDOUT, ":raw:encoding(UTF-8)"; + } + } + + # Patch for RT 122030 + elsif ( ref($fh) eq 'IO::File' ) { $fh->binmode(); } + + elsif ( $output_file eq '-' ) { binmode STDOUT } + } + } + + # in order to check output syntax when standard output is used, + # or when it is an object, we have to make a copy of the file + if ( $output_file eq '-' || ref $output_file ) { + if ( $rOpts->{'check-syntax'} ) { + + # Turning off syntax check when standard output is used. + # The reason is that temporary files cause problems on + # on many systems. + $rOpts->{'check-syntax'} = 0; + ${$rpending_logfile_message} .= < $fh, + _fh_tee => $fh_tee, + _output_file => $output_file, + _output_file_open => $output_file_open, + _tee_flag => 0, + _tee_file => $tee_file, + _tee_file_opened => 0, + _line_separator => $line_separator, + _binmode => $binmode, + }, $class; +} + +sub write_line { + + my ( $self, $line ) = @_; + my $fh = $self->{_fh}; + + my $output_file_open = $self->{_output_file_open}; + chomp $line; + $line .= $self->{_line_separator}; + + $fh->print($line) if ( $self->{_output_file_open} ); + + if ( $self->{_tee_flag} ) { + unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() } + my $fh_tee = $self->{_fh_tee}; + print $fh_tee $line; + } + return; +} + +sub tee_on { + my $self = shift; + $self->{_tee_flag} = 1; + return; +} + +sub tee_off { + my $self = shift; + $self->{_tee_flag} = 0; + return; +} + +sub really_open_tee_file { + my $self = shift; + my $tee_file = $self->{_tee_file}; + my $fh_tee; + $fh_tee = IO::File->new(">$tee_file") + or Perl::Tidy::Die("couldn't open TEE file $tee_file: $!\n"); + binmode $fh_tee if $self->{_binmode}; + $self->{_tee_file_opened} = 1; + $self->{_fh_tee} = $fh_tee; + return; +} + +sub close_output_file { + my $self = shift; + + # Only close physical files, not STDOUT and other objects + my $output_file = $self->{_output_file}; + if ( $output_file ne '-' && !ref $output_file ) { + eval { $self->{_fh}->close() } if $self->{_output_file_open}; + } + $self->close_tee_file(); + return; +} + +sub close_tee_file { + my $self = shift; + + # Only close physical files, not STDOUT and other objects + if ( $self->{_tee_file_opened} ) { + my $tee_file = $self->{_tee_file}; + if ( $tee_file ne '-' && !ref $tee_file ) { + eval { $self->{_fh_tee}->close() }; + $self->{_tee_file_opened} = 0; + } + } + return; +} + +1; + diff --git a/lib/Perl/Tidy/LineSource.pm b/lib/Perl/Tidy/LineSource.pm new file mode 100644 index 0000000..5d4ec98 --- /dev/null +++ b/lib/Perl/Tidy/LineSource.pm @@ -0,0 +1,89 @@ +##################################################################### +# +# the Perl::Tidy::LineSource class supplies an object with a 'get_line()' method +# which returns the next line to be parsed +# +##################################################################### + +package Perl::Tidy::LineSource; +use strict; +use warnings; +our $VERSION = '20190601'; + +sub new { + + my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_; + + my $input_line_ending; + if ( $rOpts->{'preserve-line-endings'} ) { + $input_line_ending = Perl::Tidy::find_input_line_ending($input_file); + } + + ( my $fh, $input_file ) = Perl::Tidy::streamhandle( $input_file, 'r' ); + return unless $fh; + + # in order to check output syntax when standard output is used, + # or when it is an object, we have to make a copy of the file + if ( ( $input_file eq '-' || ref $input_file ) && $rOpts->{'check-syntax'} ) + { + + # Turning off syntax check when input output is used. + # The reason is that temporary files cause problems on + # on many systems. + $rOpts->{'check-syntax'} = 0; + + ${$rpending_logfile_message} .= < $fh, + _filename => $input_file, + _input_line_ending => $input_line_ending, + _rinput_buffer => [], + _started => 0, + }, $class; +} + +sub close_input_file { + my $self = shift; + + # Only close physical files, not STDIN and other objects + my $filename = $self->{_filename}; + if ( $filename ne '-' && !ref $filename ) { + eval { $self->{_fh}->close() }; + } + return; +} + +sub get_line { + my $self = shift; + my $line = undef; + my $fh = $self->{_fh}; + my $rinput_buffer = $self->{_rinput_buffer}; + + if ( scalar( @{$rinput_buffer} ) ) { + $line = shift @{$rinput_buffer}; + } + else { + $line = $fh->getline(); + + # patch to read raw mac files under unix, dos + # see if the first line has embedded \r's + if ( $line && !$self->{_started} ) { + if ( $line =~ /[\015][^\015\012]/ ) { + + # found one -- break the line up and store in a buffer + @{$rinput_buffer} = map { $_ . "\n" } split /\015/, $line; + my $count = @{$rinput_buffer}; + $line = shift @{$rinput_buffer}; + } + $self->{_started}++; + } + } + return $line; +} +1; + diff --git a/lib/Perl/Tidy/Logger.pm b/lib/Perl/Tidy/Logger.pm new file mode 100644 index 0000000..547a635 --- /dev/null +++ b/lib/Perl/Tidy/Logger.pm @@ -0,0 +1,467 @@ +##################################################################### +# +# The Perl::Tidy::Logger class writes the .LOG and .ERR files +# +##################################################################### + +package Perl::Tidy::Logger; +use strict; +use warnings; +our $VERSION = '20190601'; + +sub new { + + my ( $class, $rOpts, $log_file, $warning_file, $fh_stderr, $saw_extrude ) = + @_; + + my $fh_warnings = $rOpts->{'standard-error-output'} ? $fh_stderr : undef; + + # remove any old error output file if we might write a new one + unless ( $fh_warnings || ref($warning_file) ) { + if ( -e $warning_file ) { + unlink($warning_file) + or Perl::Tidy::Die( + "couldn't unlink warning file $warning_file: $!\n"); + } + } + + my $logfile_gap = + defined( $rOpts->{'logfile-gap'} ) + ? $rOpts->{'logfile-gap'} + : 50; + if ( $logfile_gap == 0 ) { $logfile_gap = 1 } + + return bless { + _log_file => $log_file, + _logfile_gap => $logfile_gap, + _rOpts => $rOpts, + _fh_warnings => $fh_warnings, + _last_input_line_written => 0, + _at_end_of_file => 0, + _use_prefix => 1, + _block_log_output => 0, + _line_of_tokens => undef, + _output_line_number => undef, + _wrote_line_information_string => 0, + _wrote_column_headings => 0, + _warning_file => $warning_file, + _warning_count => 0, + _complaint_count => 0, + _saw_code_bug => -1, # -1=no 0=maybe 1=for sure + _saw_brace_error => 0, + _saw_extrude => $saw_extrude, + _output_array => [], + }, $class; +} + +sub get_warning_count { + my $self = shift; + return $self->{_warning_count}; +} + +sub get_use_prefix { + my $self = shift; + return $self->{_use_prefix}; +} + +sub block_log_output { + my $self = shift; + $self->{_block_log_output} = 1; + return; +} + +sub unblock_log_output { + my $self = shift; + $self->{_block_log_output} = 0; + return; +} + +sub interrupt_logfile { + my $self = shift; + $self->{_use_prefix} = 0; + $self->warning("\n"); + $self->write_logfile_entry( '#' x 24 . " WARNING " . '#' x 25 . "\n" ); + return; +} + +sub resume_logfile { + my $self = shift; + $self->write_logfile_entry( '#' x 60 . "\n" ); + $self->{_use_prefix} = 1; + return; +} + +sub we_are_at_the_last_line { + my $self = shift; + unless ( $self->{_wrote_line_information_string} ) { + $self->write_logfile_entry("Last line\n\n"); + } + $self->{_at_end_of_file} = 1; + return; +} + +# record some stuff in case we go down in flames +sub black_box { + my ( $self, $line_of_tokens, $output_line_number ) = @_; + my $input_line = $line_of_tokens->{_line_text}; + my $input_line_number = $line_of_tokens->{_line_number}; + + # save line information in case we have to write a logfile message + $self->{_line_of_tokens} = $line_of_tokens; + $self->{_output_line_number} = $output_line_number; + $self->{_wrote_line_information_string} = 0; + + my $last_input_line_written = $self->{_last_input_line_written}; + my $rOpts = $self->{_rOpts}; + if ( + ( + ( $input_line_number - $last_input_line_written ) >= + $self->{_logfile_gap} + ) + || ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) + ) + { + my $structural_indentation_level = $line_of_tokens->{_level_0}; + $structural_indentation_level = 0 + if ( $structural_indentation_level < 0 ); + $self->{_last_input_line_written} = $input_line_number; + ( my $out_str = $input_line ) =~ s/^\s*//; + chomp $out_str; + + $out_str = ( '.' x $structural_indentation_level ) . $out_str; + + if ( length($out_str) > 35 ) { + $out_str = substr( $out_str, 0, 35 ) . " ...."; + } + $self->logfile_output( "", "$out_str\n" ); + } + return; +} + +sub write_logfile_entry { + + my ( $self, @msg ) = @_; + + # add leading >>> to avoid confusing error messages and code + $self->logfile_output( ">>>", "@msg" ); + return; +} + +sub write_column_headings { + my $self = shift; + + $self->{_wrote_column_headings} = 1; + my $routput_array = $self->{_output_array}; + push @{$routput_array}, <>>) +lines levels i k (code begins with one '.' per indent level) +------ ----- - - -------- ------------------------------------------- +EOM + return; +} + +sub make_line_information_string { + + # make columns of information when a logfile message needs to go out + my $self = shift; + my $line_of_tokens = $self->{_line_of_tokens}; + my $input_line_number = $line_of_tokens->{_line_number}; + my $line_information_string = ""; + if ($input_line_number) { + + my $output_line_number = $self->{_output_line_number}; + my $brace_depth = $line_of_tokens->{_curly_brace_depth}; + my $paren_depth = $line_of_tokens->{_paren_depth}; + my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth}; + my $guessed_indentation_level = + $line_of_tokens->{_guessed_indentation_level}; + ##my $rtoken_array = $line_of_tokens->{_rtoken_array}; + + my $structural_indentation_level = $line_of_tokens->{_level_0}; + + $self->write_column_headings() unless $self->{_wrote_column_headings}; + + # keep logfile columns aligned for scripts up to 999 lines; + # for longer scripts it doesn't really matter + my $extra_space = ""; + $extra_space .= + ( $input_line_number < 10 ) ? " " + : ( $input_line_number < 100 ) ? " " + : ""; + $extra_space .= + ( $output_line_number < 10 ) ? " " + : ( $output_line_number < 100 ) ? " " + : ""; + + # there are 2 possible nesting strings: + # the original which looks like this: (0 [1 {2 + # the new one, which looks like this: {{[ + # the new one is easier to read, and shows the order, but + # could be arbitrarily long, so we use it unless it is too long + my $nesting_string = + "($paren_depth [$square_bracket_depth {$brace_depth"; + my $nesting_string_new = $line_of_tokens->{_nesting_tokens_0}; + my $ci_level = $line_of_tokens->{_ci_level_0}; + if ( $ci_level > 9 ) { $ci_level = '*' } + my $bk = ( $line_of_tokens->{_nesting_blocks_0} =~ /1$/ ) ? '1' : '0'; + + if ( length($nesting_string_new) <= 8 ) { + $nesting_string = + $nesting_string_new . " " x ( 8 - length($nesting_string_new) ); + } + $line_information_string = +"L$input_line_number:$output_line_number$extra_space i$guessed_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string"; + } + return $line_information_string; +} + +sub logfile_output { + my ( $self, $prompt, $msg ) = @_; + return if ( $self->{_block_log_output} ); + + my $routput_array = $self->{_output_array}; + if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) { + push @{$routput_array}, "$msg"; + } + else { + my $line_information_string = $self->make_line_information_string(); + $self->{_wrote_line_information_string} = 1; + + if ($line_information_string) { + push @{$routput_array}, "$line_information_string $prompt$msg"; + } + else { + push @{$routput_array}, "$msg"; + } + } + return; +} + +sub get_saw_brace_error { + my $self = shift; + return $self->{_saw_brace_error}; +} + +sub increment_brace_error { + my $self = shift; + $self->{_saw_brace_error}++; + return; +} + +sub brace_warning { + my ( $self, $msg ) = @_; + + #use constant BRACE_WARNING_LIMIT => 10; + my $BRACE_WARNING_LIMIT = 10; + my $saw_brace_error = $self->{_saw_brace_error}; + + if ( $saw_brace_error < $BRACE_WARNING_LIMIT ) { + $self->warning($msg); + } + $saw_brace_error++; + $self->{_saw_brace_error} = $saw_brace_error; + + if ( $saw_brace_error == $BRACE_WARNING_LIMIT ) { + $self->warning("No further warnings of this type will be given\n"); + } + return; +} + +sub complain { + + # handle non-critical warning messages based on input flag + my ( $self, $msg ) = @_; + my $rOpts = $self->{_rOpts}; + + # these appear in .ERR output only if -w flag is used + if ( $rOpts->{'warning-output'} ) { + $self->warning($msg); + } + + # otherwise, they go to the .LOG file + else { + $self->{_complaint_count}++; + $self->write_logfile_entry($msg); + } + return; +} + +sub warning { + + # report errors to .ERR file (or stdout) + my ( $self, $msg ) = @_; + + #use constant WARNING_LIMIT => 50; + my $WARNING_LIMIT = 50; + + my $rOpts = $self->{_rOpts}; + unless ( $rOpts->{'quiet'} ) { + + my $warning_count = $self->{_warning_count}; + my $fh_warnings = $self->{_fh_warnings}; + if ( !$fh_warnings ) { + my $warning_file = $self->{_warning_file}; + ( $fh_warnings, my $filename ) = + Perl::Tidy::streamhandle( $warning_file, 'w' ); + $fh_warnings or Perl::Tidy::Die("couldn't open $filename $!\n"); + Perl::Tidy::Warn("## Please see file $filename\n") + unless ref($warning_file); + $self->{_fh_warnings} = $fh_warnings; + $fh_warnings->print("Perltidy version is $Perl::Tidy::VERSION\n"); + } + + if ( $warning_count < $WARNING_LIMIT ) { + if ( $self->get_use_prefix() > 0 ) { + my $input_line_number = + Perl::Tidy::Tokenizer::get_input_line_number(); + if ( !defined($input_line_number) ) { $input_line_number = -1 } + $fh_warnings->print("$input_line_number:\t$msg"); + $self->write_logfile_entry("WARNING: $msg"); + } + else { + $fh_warnings->print($msg); + $self->write_logfile_entry($msg); + } + } + $warning_count++; + $self->{_warning_count} = $warning_count; + + if ( $warning_count == $WARNING_LIMIT ) { + $fh_warnings->print("No further warnings will be given\n"); + } + } + return; +} + +# programming bug codes: +# -1 = no bug +# 0 = maybe, not sure. +# 1 = definitely +sub report_possible_bug { + my $self = shift; + my $saw_code_bug = $self->{_saw_code_bug}; + $self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug; + return; +} + +sub report_definite_bug { + my $self = shift; + $self->{_saw_code_bug} = 1; + return; +} + +sub ask_user_for_bug_report { + + my ( $self, $infile_syntax_ok, $formatter ) = @_; + my $saw_code_bug = $self->{_saw_code_bug}; + if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) { + $self->warning(<{_saw_extrude} ) { + $self->warning(<warning(<get_added_semicolon_count(); + }; + if ( $added_semicolon_count > 0 ) { + $self->warning(<{_rOpts}; + my $warning_count = $self->{_warning_count}; + my $saw_code_bug = $self->{_saw_code_bug}; + + my $save_logfile = + ( $saw_code_bug == 0 && $infile_syntax_ok == 1 ) + || $saw_code_bug == 1 + || $rOpts->{'logfile'}; + my $log_file = $self->{_log_file}; + if ($warning_count) { + if ($save_logfile) { + $self->block_log_output(); # avoid echoing this to the logfile + $self->warning( + "The logfile $log_file may contain useful information\n"); + $self->unblock_log_output(); + } + + if ( $self->{_complaint_count} > 0 ) { + $self->warning( +"To see $self->{_complaint_count} non-critical warnings rerun with -w\n" + ); + } + + if ( $self->{_saw_brace_error} + && ( $self->{_logfile_gap} > 1 || !$save_logfile ) ) + { + $self->warning("To save a full .LOG file rerun with -g\n"); + } + } + $self->ask_user_for_bug_report( $infile_syntax_ok, $formatter ); + + if ($save_logfile) { + my $log_file = $self->{_log_file}; + my ( $fh, $filename ) = Perl::Tidy::streamhandle( $log_file, 'w' ); + if ($fh) { + my $routput_array = $self->{_output_array}; + foreach ( @{$routput_array} ) { $fh->print($_) } + if ( $log_file ne '-' && !ref $log_file ) { + eval { $fh->close() }; + } + } + } + return; +} +1; + diff --git a/lib/Perl/Tidy/Tokenizer.pm b/lib/Perl/Tidy/Tokenizer.pm new file mode 100644 index 0000000..c7bc6ff --- /dev/null +++ b/lib/Perl/Tidy/Tokenizer.pm @@ -0,0 +1,7892 @@ +######################################################################## +# +# the Perl::Tidy::Tokenizer package is essentially a filter which +# reads lines of perl source code from a source object and provides +# corresponding tokenized lines through its get_line() method. Lines +# flow from the source_object to the caller like this: +# +# source_object --> LineBuffer_object --> Tokenizer --> calling routine +# get_line() get_line() get_line() line_of_tokens +# +# The source object can be any object with a get_line() method which +# supplies one line (a character string) perl call. +# The LineBuffer object is created by the Tokenizer. +# The Tokenizer returns a reference to a data structure 'line_of_tokens' +# containing one tokenized line for each call to its get_line() method. +# +# WARNING: This is not a real class yet. Only one tokenizer my be used. +# +######################################################################## + +package Perl::Tidy::Tokenizer; +use strict; +use warnings; +our $VERSION = '20190601'; + +use Perl::Tidy::LineBuffer; + +BEGIN { + + # Caution: these debug flags produce a lot of output + # They should all be 0 except when debugging small scripts + + use constant TOKENIZER_DEBUG_FLAG_EXPECT => 0; + use constant TOKENIZER_DEBUG_FLAG_NSCAN => 0; + use constant TOKENIZER_DEBUG_FLAG_QUOTE => 0; + use constant TOKENIZER_DEBUG_FLAG_SCAN_ID => 0; + use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0; + + my $debug_warning = sub { + print STDOUT "TOKENIZER_DEBUGGING with key $_[0]\n"; + }; + + TOKENIZER_DEBUG_FLAG_EXPECT && $debug_warning->('EXPECT'); + TOKENIZER_DEBUG_FLAG_NSCAN && $debug_warning->('NSCAN'); + TOKENIZER_DEBUG_FLAG_QUOTE && $debug_warning->('QUOTE'); + TOKENIZER_DEBUG_FLAG_SCAN_ID && $debug_warning->('SCAN_ID'); + TOKENIZER_DEBUG_FLAG_TOKENIZE && $debug_warning->('TOKENIZE'); + +} + +use Carp; + +# PACKAGE VARIABLES for processing an entire FILE. +use vars qw{ + $tokenizer_self + + $last_nonblank_token + $last_nonblank_type + $last_nonblank_block_type + $statement_type + $in_attribute_list + $current_package + $context + + %is_constant + %is_user_function + %user_function_prototype + %is_block_function + %is_block_list_function + %saw_function_definition + + $brace_depth + $paren_depth + $square_bracket_depth + + @current_depth + @total_depth + $total_depth + @nesting_sequence_number + @current_sequence_number + @paren_type + @paren_semicolon_count + @paren_structural_type + @brace_type + @brace_structural_type + @brace_context + @brace_package + @square_bracket_type + @square_bracket_structural_type + @depth_array + @nested_ternary_flag + @nested_statement_type + @starting_line_of_current_depth +}; + +# GLOBAL CONSTANTS for routines in this package +use vars qw{ + %is_indirect_object_taker + %is_block_operator + %expecting_operator_token + %expecting_operator_types + %expecting_term_types + %expecting_term_token + %is_digraph + %is_file_test_operator + %is_trigraph + %is_tetragraph + %is_valid_token_type + %is_keyword + %is_code_block_token + %really_want_term + @opening_brace_names + @closing_brace_names + %is_keyword_taking_list + %is_keyword_taking_optional_args + %is_q_qq_qw_qx_qr_s_y_tr_m +}; + +# possible values of operator_expected() +use constant TERM => -1; +use constant UNKNOWN => 0; +use constant OPERATOR => 1; + +# possible values of context +use constant SCALAR_CONTEXT => -1; +use constant UNKNOWN_CONTEXT => 0; +use constant LIST_CONTEXT => 1; + +# Maximum number of little messages; probably need not be changed. +use constant MAX_NAG_MESSAGES => 6; + +{ + + # methods to count instances + my $_count = 0; + sub get_count { return $_count; } + sub _increment_count { return ++$_count } + sub _decrement_count { return --$_count } +} + +sub DESTROY { + my $self = shift; + $self->_decrement_count(); + return; +} + +sub new { + + my ( $class, @args ) = @_; + + # Note: 'tabs' and 'indent_columns' are temporary and should be + # removed asap + my %defaults = ( + source_object => undef, + debugger_object => undef, + diagnostics_object => undef, + logger_object => undef, + starting_level => undef, + indent_columns => 4, + tabsize => 8, + look_for_hash_bang => 0, + trim_qw => 1, + look_for_autoloader => 1, + look_for_selfloader => 1, + starting_line_number => 1, + extended_syntax => 0, + ); + my %args = ( %defaults, @args ); + + # we are given an object with a get_line() method to supply source lines + my $source_object = $args{source_object}; + + # we create another object with a get_line() and peek_ahead() method + my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object); + + # Tokenizer state data is as follows: + # _rhere_target_list reference to list of here-doc targets + # _here_doc_target the target string for a here document + # _here_quote_character the type of here-doc quoting (" ' ` or none) + # to determine if interpolation is done + # _quote_target character we seek if chasing a quote + # _line_start_quote line where we started looking for a long quote + # _in_here_doc flag indicating if we are in a here-doc + # _in_pod flag set if we are in pod documentation + # _in_error flag set if we saw severe error (binary in script) + # _in_data flag set if we are in __DATA__ section + # _in_end flag set if we are in __END__ section + # _in_format flag set if we are in a format description + # _in_attribute_list flag telling if we are looking for attributes + # _in_quote flag telling if we are chasing a quote + # _starting_level indentation level of first line + # _line_buffer_object object with get_line() method to supply source code + # _diagnostics_object place to write debugging information + # _unexpected_error_count error count used to limit output + # _lower_case_labels_at line numbers where lower case labels seen + # _hit_bug program bug detected + $tokenizer_self = { + _rhere_target_list => [], + _in_here_doc => 0, + _here_doc_target => "", + _here_quote_character => "", + _in_data => 0, + _in_end => 0, + _in_format => 0, + _in_error => 0, + _in_pod => 0, + _in_attribute_list => 0, + _in_quote => 0, + _quote_target => "", + _line_start_quote => -1, + _starting_level => $args{starting_level}, + _know_starting_level => defined( $args{starting_level} ), + _tabsize => $args{tabsize}, + _indent_columns => $args{indent_columns}, + _look_for_hash_bang => $args{look_for_hash_bang}, + _trim_qw => $args{trim_qw}, + _continuation_indentation => $args{continuation_indentation}, + _outdent_labels => $args{outdent_labels}, + _last_line_number => $args{starting_line_number} - 1, + _saw_perl_dash_P => 0, + _saw_perl_dash_w => 0, + _saw_use_strict => 0, + _saw_v_string => 0, + _hit_bug => 0, + _look_for_autoloader => $args{look_for_autoloader}, + _look_for_selfloader => $args{look_for_selfloader}, + _saw_autoloader => 0, + _saw_selfloader => 0, + _saw_hash_bang => 0, + _saw_end => 0, + _saw_data => 0, + _saw_negative_indentation => 0, + _started_tokenizing => 0, + _line_buffer_object => $line_buffer_object, + _debugger_object => $args{debugger_object}, + _diagnostics_object => $args{diagnostics_object}, + _logger_object => $args{logger_object}, + _unexpected_error_count => 0, + _started_looking_for_here_target_at => 0, + _nearly_matched_here_target_at => undef, + _line_text => "", + _rlower_case_labels_at => undef, + _extended_syntax => $args{extended_syntax}, + }; + + prepare_for_a_new_file(); + find_starting_indentation_level(); + + bless $tokenizer_self, $class; + + # This is not a full class yet, so die if an attempt is made to + # create more than one object. + + if ( _increment_count() > 1 ) { + confess +"Attempt to create more than 1 object in $class, which is not a true class yet\n"; + } + + return $tokenizer_self; + +} + +# interface to Perl::Tidy::Logger routines +sub warning { + my $msg = shift; + my $logger_object = $tokenizer_self->{_logger_object}; + if ($logger_object) { + $logger_object->warning($msg); + } + return; +} + +sub complain { + my $msg = shift; + my $logger_object = $tokenizer_self->{_logger_object}; + if ($logger_object) { + $logger_object->complain($msg); + } + return; +} + +sub write_logfile_entry { + my $msg = shift; + my $logger_object = $tokenizer_self->{_logger_object}; + if ($logger_object) { + $logger_object->write_logfile_entry($msg); + } + return; +} + +sub interrupt_logfile { + my $logger_object = $tokenizer_self->{_logger_object}; + if ($logger_object) { + $logger_object->interrupt_logfile(); + } + return; +} + +sub resume_logfile { + my $logger_object = $tokenizer_self->{_logger_object}; + if ($logger_object) { + $logger_object->resume_logfile(); + } + return; +} + +sub increment_brace_error { + my $logger_object = $tokenizer_self->{_logger_object}; + if ($logger_object) { + $logger_object->increment_brace_error(); + } + return; +} + +sub report_definite_bug { + $tokenizer_self->{_hit_bug} = 1; + my $logger_object = $tokenizer_self->{_logger_object}; + if ($logger_object) { + $logger_object->report_definite_bug(); + } + return; +} + +sub brace_warning { + my $msg = shift; + my $logger_object = $tokenizer_self->{_logger_object}; + if ($logger_object) { + $logger_object->brace_warning($msg); + } + return; +} + +sub get_saw_brace_error { + my $logger_object = $tokenizer_self->{_logger_object}; + if ($logger_object) { + return $logger_object->get_saw_brace_error(); + } + else { + return 0; + } +} + +# interface to Perl::Tidy::Diagnostics routines +sub write_diagnostics { + my $msg = shift; + if ( $tokenizer_self->{_diagnostics_object} ) { + $tokenizer_self->{_diagnostics_object}->write_diagnostics($msg); + } + return; +} + +sub report_tokenization_errors { + + my $self = shift; + my $severe_error = $self->{_in_error}; + + my $level = get_indentation_level(); + if ( $level != $tokenizer_self->{_starting_level} ) { + warning("final indentation level: $level\n"); + } + + check_final_nesting_depths(); + + if ( $tokenizer_self->{_look_for_hash_bang} + && !$tokenizer_self->{_saw_hash_bang} ) + { + warning( + "hit EOF without seeing hash-bang line; maybe don't need -x?\n"); + } + + if ( $tokenizer_self->{_in_format} ) { + warning("hit EOF while in format description\n"); + } + + if ( $tokenizer_self->{_in_pod} ) { + + # Just write log entry if this is after __END__ or __DATA__ + # because this happens to often, and it is not likely to be + # a parsing error. + if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) { + write_logfile_entry( +"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n" + ); + } + + else { + complain( +"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n" + ); + } + + } + + if ( $tokenizer_self->{_in_here_doc} ) { + $severe_error = 1; + my $here_doc_target = $tokenizer_self->{_here_doc_target}; + my $started_looking_for_here_target_at = + $tokenizer_self->{_started_looking_for_here_target_at}; + if ($here_doc_target) { + warning( +"hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n" + ); + } + else { + warning( +"hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n" + ); + } + my $nearly_matched_here_target_at = + $tokenizer_self->{_nearly_matched_here_target_at}; + if ($nearly_matched_here_target_at) { + warning( +"NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n" + ); + } + } + + if ( $tokenizer_self->{_in_quote} ) { + $severe_error = 1; + my $line_start_quote = $tokenizer_self->{_line_start_quote}; + my $quote_target = $tokenizer_self->{_quote_target}; + my $what = + ( $tokenizer_self->{_in_attribute_list} ) + ? "attribute list" + : "quote/pattern"; + warning( +"hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n" + ); + } + + if ( $tokenizer_self->{_hit_bug} ) { + $severe_error = 1; + } + + my $logger_object = $tokenizer_self->{_logger_object}; + +# TODO: eventually may want to activate this to cause file to be output verbatim + if (0) { + + # Set the severe error for a fairly high warning count because + # some of the warnings do not harm formatting, such as duplicate + # sub names. + my $warning_count = $logger_object->{_warning_count}; + if ( $warning_count > 50 ) { + $severe_error = 1; + } + + # Brace errors are significant, so set the severe error flag at + # a low number. + my $saw_brace_error = $logger_object->{_saw_brace_error}; + if ( $saw_brace_error > 2 ) { + $severe_error = 1; + } + } + + unless ( $tokenizer_self->{_saw_perl_dash_w} ) { + if ( $] < 5.006 ) { + write_logfile_entry("Suggest including '-w parameter'\n"); + } + else { + write_logfile_entry("Suggest including 'use warnings;'\n"); + } + } + + if ( $tokenizer_self->{_saw_perl_dash_P} ) { + write_logfile_entry("Use of -P parameter for defines is discouraged\n"); + } + + unless ( $tokenizer_self->{_saw_use_strict} ) { + write_logfile_entry("Suggest including 'use strict;'\n"); + } + + # it is suggested that labels have at least one upper case character + # for legibility and to avoid code breakage as new keywords are introduced + if ( $tokenizer_self->{_rlower_case_labels_at} ) { + my @lower_case_labels_at = + @{ $tokenizer_self->{_rlower_case_labels_at} }; + write_logfile_entry( + "Suggest using upper case characters in label(s)\n"); + local $" = ')('; + write_logfile_entry(" defined at line(s): (@lower_case_labels_at)\n"); + } + return $severe_error; +} + +sub report_v_string { + + # warn if this version can't handle v-strings + my $tok = shift; + unless ( $tokenizer_self->{_saw_v_string} ) { + $tokenizer_self->{_saw_v_string} = $tokenizer_self->{_last_line_number}; + } + if ( $] < 5.006 ) { + warning( +"Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n" + ); + } + return; +} + +sub get_input_line_number { + return $tokenizer_self->{_last_line_number}; +} + +# returns the next tokenized line +sub get_line { + + my $self = shift; + + # USES GLOBAL VARIABLES: $tokenizer_self, $brace_depth, + # $square_bracket_depth, $paren_depth + + my $input_line = $tokenizer_self->{_line_buffer_object}->get_line(); + $tokenizer_self->{_line_text} = $input_line; + + return unless ($input_line); + + my $input_line_number = ++$tokenizer_self->{_last_line_number}; + + # Find and remove what characters terminate this line, including any + # control r + my $input_line_separator = ""; + if ( chomp($input_line) ) { $input_line_separator = $/ } + + # TODO: what other characters should be included here? + if ( $input_line =~ s/((\r|\035|\032)+)$// ) { + $input_line_separator = $2 . $input_line_separator; + } + + # for backwards compatibility we keep the line text terminated with + # a newline character + $input_line .= "\n"; + $tokenizer_self->{_line_text} = $input_line; # update + + # create a data structure describing this line which will be + # returned to the caller. + + # _line_type codes are: + # SYSTEM - system-specific code before hash-bang line + # CODE - line of perl code (including comments) + # POD_START - line starting pod, such as '=head' + # POD - pod documentation text + # POD_END - last line of pod section, '=cut' + # HERE - text of here-document + # HERE_END - last line of here-doc (target word) + # FORMAT - format section + # FORMAT_END - last line of format section, '.' + # DATA_START - __DATA__ line + # DATA - unidentified text following __DATA__ + # END_START - __END__ line + # END - unidentified text following __END__ + # ERROR - we are in big trouble, probably not a perl script + + # Other variables: + # _curly_brace_depth - depth of curly braces at start of line + # _square_bracket_depth - depth of square brackets at start of line + # _paren_depth - depth of parens at start of line + # _starting_in_quote - this line continues a multi-line quote + # (so don't trim leading blanks!) + # _ending_in_quote - this line ends in a multi-line quote + # (so don't trim trailing blanks!) + my $line_of_tokens = { + _line_type => 'EOF', + _line_text => $input_line, + _line_number => $input_line_number, + _rtoken_type => undef, + _rtokens => undef, + _rlevels => undef, + _rslevels => undef, + _rblock_type => undef, + _rcontainer_type => undef, + _rcontainer_environment => undef, + _rtype_sequence => undef, + _rnesting_tokens => undef, + _rci_levels => undef, + _rnesting_blocks => undef, + _guessed_indentation_level => 0, + _starting_in_quote => 0, # to be set by subroutine + _ending_in_quote => 0, + _curly_brace_depth => $brace_depth, + _square_bracket_depth => $square_bracket_depth, + _paren_depth => $paren_depth, + _quote_character => '', + }; + + # must print line unchanged if we are in a here document + if ( $tokenizer_self->{_in_here_doc} ) { + + $line_of_tokens->{_line_type} = 'HERE'; + my $here_doc_target = $tokenizer_self->{_here_doc_target}; + my $here_quote_character = $tokenizer_self->{_here_quote_character}; + my $candidate_target = $input_line; + chomp $candidate_target; + + # Handle <<~ targets, which are indicated here by a leading space on + # the here quote character + if ( $here_quote_character =~ /^\s/ ) { + $candidate_target =~ s/^\s*//; + } + if ( $candidate_target eq $here_doc_target ) { + $tokenizer_self->{_nearly_matched_here_target_at} = undef; + $line_of_tokens->{_line_type} = 'HERE_END'; + write_logfile_entry("Exiting HERE document $here_doc_target\n"); + + my $rhere_target_list = $tokenizer_self->{_rhere_target_list}; + if ( @{$rhere_target_list} ) { # there can be multiple here targets + ( $here_doc_target, $here_quote_character ) = + @{ shift @{$rhere_target_list} }; + $tokenizer_self->{_here_doc_target} = $here_doc_target; + $tokenizer_self->{_here_quote_character} = + $here_quote_character; + write_logfile_entry( + "Entering HERE document $here_doc_target\n"); + $tokenizer_self->{_nearly_matched_here_target_at} = undef; + $tokenizer_self->{_started_looking_for_here_target_at} = + $input_line_number; + } + else { + $tokenizer_self->{_in_here_doc} = 0; + $tokenizer_self->{_here_doc_target} = ""; + $tokenizer_self->{_here_quote_character} = ""; + } + } + + # check for error of extra whitespace + # note for PERL6: leading whitespace is allowed + else { + $candidate_target =~ s/\s*$//; + $candidate_target =~ s/^\s*//; + if ( $candidate_target eq $here_doc_target ) { + $tokenizer_self->{_nearly_matched_here_target_at} = + $input_line_number; + } + } + return $line_of_tokens; + } + + # must print line unchanged if we are in a format section + elsif ( $tokenizer_self->{_in_format} ) { + + if ( $input_line =~ /^\.[\s#]*$/ ) { + write_logfile_entry("Exiting format section\n"); + $tokenizer_self->{_in_format} = 0; + $line_of_tokens->{_line_type} = 'FORMAT_END'; + } + else { + $line_of_tokens->{_line_type} = 'FORMAT'; + } + return $line_of_tokens; + } + + # must print line unchanged if we are in pod documentation + elsif ( $tokenizer_self->{_in_pod} ) { + + $line_of_tokens->{_line_type} = 'POD'; + if ( $input_line =~ /^=cut/ ) { + $line_of_tokens->{_line_type} = 'POD_END'; + write_logfile_entry("Exiting POD section\n"); + $tokenizer_self->{_in_pod} = 0; + } + if ( $input_line =~ /^\#\!.*perl\b/ ) { + warning( + "Hash-bang in pod can cause older versions of perl to fail! \n" + ); + } + + return $line_of_tokens; + } + + # must print line unchanged if we have seen a severe error (i.e., we + # are seeing illegal tokens and cannot continue. Syntax errors do + # not pass this route). Calling routine can decide what to do, but + # the default can be to just pass all lines as if they were after __END__ + elsif ( $tokenizer_self->{_in_error} ) { + $line_of_tokens->{_line_type} = 'ERROR'; + return $line_of_tokens; + } + + # print line unchanged if we are __DATA__ section + elsif ( $tokenizer_self->{_in_data} ) { + + # ...but look for POD + # Note that the _in_data and _in_end flags remain set + # so that we return to that state after seeing the + # end of a pod section + if ( $input_line =~ /^=(?!cut)/ ) { + $line_of_tokens->{_line_type} = 'POD_START'; + write_logfile_entry("Entering POD section\n"); + $tokenizer_self->{_in_pod} = 1; + return $line_of_tokens; + } + else { + $line_of_tokens->{_line_type} = 'DATA'; + return $line_of_tokens; + } + } + + # print line unchanged if we are in __END__ section + elsif ( $tokenizer_self->{_in_end} ) { + + # ...but look for POD + # Note that the _in_data and _in_end flags remain set + # so that we return to that state after seeing the + # end of a pod section + if ( $input_line =~ /^=(?!cut)/ ) { + $line_of_tokens->{_line_type} = 'POD_START'; + write_logfile_entry("Entering POD section\n"); + $tokenizer_self->{_in_pod} = 1; + return $line_of_tokens; + } + else { + $line_of_tokens->{_line_type} = 'END'; + return $line_of_tokens; + } + } + + # check for a hash-bang line if we haven't seen one + if ( !$tokenizer_self->{_saw_hash_bang} ) { + if ( $input_line =~ /^\#\!.*perl\b/ ) { + $tokenizer_self->{_saw_hash_bang} = $input_line_number; + + # check for -w and -P flags + if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) { + $tokenizer_self->{_saw_perl_dash_P} = 1; + } + + if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) { + $tokenizer_self->{_saw_perl_dash_w} = 1; + } + + if ( + ( $input_line_number > 1 ) + + # leave any hash bang in a BEGIN block alone + # i.e. see 'debugger-duck_type.t' + && !( + $last_nonblank_block_type + && $last_nonblank_block_type eq 'BEGIN' + ) + && ( !$tokenizer_self->{_look_for_hash_bang} ) + ) + { + + # this is helpful for VMS systems; we may have accidentally + # tokenized some DCL commands + if ( $tokenizer_self->{_started_tokenizing} ) { + warning( +"There seems to be a hash-bang after line 1; do you need to run with -x ?\n" + ); + } + else { + complain("Useless hash-bang after line 1\n"); + } + } + + # Report the leading hash-bang as a system line + # This will prevent -dac from deleting it + else { + $line_of_tokens->{_line_type} = 'SYSTEM'; + return $line_of_tokens; + } + } + } + + # wait for a hash-bang before parsing if the user invoked us with -x + if ( $tokenizer_self->{_look_for_hash_bang} + && !$tokenizer_self->{_saw_hash_bang} ) + { + $line_of_tokens->{_line_type} = 'SYSTEM'; + return $line_of_tokens; + } + + # a first line of the form ': #' will be marked as SYSTEM + # since lines of this form may be used by tcsh + if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) { + $line_of_tokens->{_line_type} = 'SYSTEM'; + return $line_of_tokens; + } + + # now we know that it is ok to tokenize the line... + # the line tokenizer will modify any of these private variables: + # _rhere_target_list + # _in_data + # _in_end + # _in_format + # _in_error + # _in_pod + # _in_quote + my $ending_in_quote_last = $tokenizer_self->{_in_quote}; + tokenize_this_line($line_of_tokens); + + # Now finish defining the return structure and return it + $line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote}; + + # handle severe error (binary data in script) + if ( $tokenizer_self->{_in_error} ) { + $tokenizer_self->{_in_quote} = 0; # to avoid any more messages + warning("Giving up after error\n"); + $line_of_tokens->{_line_type} = 'ERROR'; + reset_indentation_level(0); # avoid error messages + return $line_of_tokens; + } + + # handle start of pod documentation + if ( $tokenizer_self->{_in_pod} ) { + + # This gets tricky..above a __DATA__ or __END__ section, perl + # accepts '=cut' as the start of pod section. But afterwards, + # only pod utilities see it and they may ignore an =cut without + # leading =head. In any case, this isn't good. + if ( $input_line =~ /^=cut\b/ ) { + if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) { + complain("=cut while not in pod ignored\n"); + $tokenizer_self->{_in_pod} = 0; + $line_of_tokens->{_line_type} = 'POD_END'; + } + else { + $line_of_tokens->{_line_type} = 'POD_START'; + complain( +"=cut starts a pod section .. this can fool pod utilities.\n" + ); + write_logfile_entry("Entering POD section\n"); + } + } + + else { + $line_of_tokens->{_line_type} = 'POD_START'; + write_logfile_entry("Entering POD section\n"); + } + + return $line_of_tokens; + } + + # update indentation levels for log messages + if ( $input_line !~ /^\s*$/ ) { + my $rlevels = $line_of_tokens->{_rlevels}; + $line_of_tokens->{_guessed_indentation_level} = + guess_old_indentation_level($input_line); + } + + # see if this line contains here doc targets + my $rhere_target_list = $tokenizer_self->{_rhere_target_list}; + if ( @{$rhere_target_list} ) { + + my ( $here_doc_target, $here_quote_character ) = + @{ shift @{$rhere_target_list} }; + $tokenizer_self->{_in_here_doc} = 1; + $tokenizer_self->{_here_doc_target} = $here_doc_target; + $tokenizer_self->{_here_quote_character} = $here_quote_character; + write_logfile_entry("Entering HERE document $here_doc_target\n"); + $tokenizer_self->{_started_looking_for_here_target_at} = + $input_line_number; + } + + # NOTE: __END__ and __DATA__ statements are written unformatted + # because they can theoretically contain additional characters + # which are not tokenized (and cannot be read with either!). + if ( $tokenizer_self->{_in_data} ) { + $line_of_tokens->{_line_type} = 'DATA_START'; + write_logfile_entry("Starting __DATA__ section\n"); + $tokenizer_self->{_saw_data} = 1; + + # keep parsing after __DATA__ if use SelfLoader was seen + if ( $tokenizer_self->{_saw_selfloader} ) { + $tokenizer_self->{_in_data} = 0; + write_logfile_entry( + "SelfLoader seen, continuing; -nlsl deactivates\n"); + } + + return $line_of_tokens; + } + + elsif ( $tokenizer_self->{_in_end} ) { + $line_of_tokens->{_line_type} = 'END_START'; + write_logfile_entry("Starting __END__ section\n"); + $tokenizer_self->{_saw_end} = 1; + + # keep parsing after __END__ if use AutoLoader was seen + if ( $tokenizer_self->{_saw_autoloader} ) { + $tokenizer_self->{_in_end} = 0; + write_logfile_entry( + "AutoLoader seen, continuing; -nlal deactivates\n"); + } + return $line_of_tokens; + } + + # now, finally, we know that this line is type 'CODE' + $line_of_tokens->{_line_type} = 'CODE'; + + # remember if we have seen any real code + if ( !$tokenizer_self->{_started_tokenizing} + && $input_line !~ /^\s*$/ + && $input_line !~ /^\s*#/ ) + { + $tokenizer_self->{_started_tokenizing} = 1; + } + + if ( $tokenizer_self->{_debugger_object} ) { + $tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens); + } + + # Note: if keyword 'format' occurs in this line code, it is still CODE + # (keyword 'format' need not start a line) + if ( $tokenizer_self->{_in_format} ) { + write_logfile_entry("Entering format section\n"); + } + + if ( $tokenizer_self->{_in_quote} + and ( $tokenizer_self->{_line_start_quote} < 0 ) ) + { + + #if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) { + if ( + ( my $quote_target = $tokenizer_self->{_quote_target} ) !~ /^\s*$/ ) + { + $tokenizer_self->{_line_start_quote} = $input_line_number; + write_logfile_entry( + "Start multi-line quote or pattern ending in $quote_target\n"); + } + } + elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 ) + && !$tokenizer_self->{_in_quote} ) + { + $tokenizer_self->{_line_start_quote} = -1; + write_logfile_entry("End of multi-line quote or pattern\n"); + } + + # we are returning a line of CODE + return $line_of_tokens; +} + +sub find_starting_indentation_level { + + # We need to find the indentation level of the first line of the + # script being formatted. Often it will be zero for an entire file, + # but if we are formatting a local block of code (within an editor for + # example) it may not be zero. The user may specify this with the + # -sil=n parameter but normally doesn't so we have to guess. + # + # USES GLOBAL VARIABLES: $tokenizer_self + my $starting_level = 0; + + # use value if given as parameter + if ( $tokenizer_self->{_know_starting_level} ) { + $starting_level = $tokenizer_self->{_starting_level}; + } + + # if we know there is a hash_bang line, the level must be zero + elsif ( $tokenizer_self->{_look_for_hash_bang} ) { + $tokenizer_self->{_know_starting_level} = 1; + } + + # otherwise figure it out from the input file + else { + my $line; + my $i = 0; + + # keep looking at lines until we find a hash bang or piece of code + my $msg = ""; + while ( $line = + $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) ) + { + + # if first line is #! then assume starting level is zero + if ( $i == 1 && $line =~ /^\#\!/ ) { + $starting_level = 0; + last; + } + next if ( $line =~ /^\s*#/ ); # skip past comments + next if ( $line =~ /^\s*$/ ); # skip past blank lines + $starting_level = guess_old_indentation_level($line); + last; + } + $msg = "Line $i implies starting-indentation-level = $starting_level\n"; + write_logfile_entry("$msg"); + } + $tokenizer_self->{_starting_level} = $starting_level; + reset_indentation_level($starting_level); + return; +} + +sub guess_old_indentation_level { + my ($line) = @_; + + # Guess the indentation level of an input line. + # + # For the first line of code this result will define the starting + # indentation level. It will mainly be non-zero when perltidy is applied + # within an editor to a local block of code. + # + # This is an impossible task in general because we can't know what tabs + # meant for the old script and how many spaces were used for one + # indentation level in the given input script. For example it may have + # been previously formatted with -i=7 -et=3. But we can at least try to + # make sure that perltidy guesses correctly if it is applied repeatedly to + # a block of code within an editor, so that the block stays at the same + # level when perltidy is applied repeatedly. + # + # USES GLOBAL VARIABLES: $tokenizer_self + my $level = 0; + + # find leading tabs, spaces, and any statement label + my $spaces = 0; + if ( $line =~ /^(\t+)?(\s+)?(\w+:[^:])?/ ) { + + # If there are leading tabs, we use the tab scheme for this run, if + # any, so that the code will remain stable when editing. + if ($1) { $spaces += length($1) * $tokenizer_self->{_tabsize} } + + if ($2) { $spaces += length($2) } + + # correct for outdented labels + if ( $3 && $tokenizer_self->{'_outdent_labels'} ) { + $spaces += $tokenizer_self->{_continuation_indentation}; + } + } + + # compute indentation using the value of -i for this run. + # If -i=0 is used for this run (which is possible) it doesn't matter + # what we do here but we'll guess that the old run used 4 spaces per level. + my $indent_columns = $tokenizer_self->{_indent_columns}; + $indent_columns = 4 if ( !$indent_columns ); + $level = int( $spaces / $indent_columns ); + return ($level); +} + +# This is a currently unused debug routine +sub dump_functions { + + my $fh = *STDOUT; + foreach my $pkg ( keys %is_user_function ) { + print $fh "\nnon-constant subs in package $pkg\n"; + + foreach my $sub ( keys %{ $is_user_function{$pkg} } ) { + my $msg = ""; + if ( $is_block_list_function{$pkg}{$sub} ) { + $msg = 'block_list'; + } + + if ( $is_block_function{$pkg}{$sub} ) { + $msg = 'block'; + } + print $fh "$sub $msg\n"; + } + } + + foreach my $pkg ( keys %is_constant ) { + print $fh "\nconstants and constant subs in package $pkg\n"; + + foreach my $sub ( keys %{ $is_constant{$pkg} } ) { + print $fh "$sub\n"; + } + } + return; +} + +sub ones_count { + + # count number of 1's in a string of 1's and 0's + # example: ones_count("010101010101") gives 6 + my $str = shift; + return $str =~ tr/1/0/; +} + +sub prepare_for_a_new_file { + + # previous tokens needed to determine what to expect next + $last_nonblank_token = ';'; # the only possible starting state which + $last_nonblank_type = ';'; # will make a leading brace a code block + $last_nonblank_block_type = ''; + + # scalars for remembering statement types across multiple lines + $statement_type = ''; # '' or 'use' or 'sub..' or 'case..' + $in_attribute_list = 0; + + # scalars for remembering where we are in the file + $current_package = "main"; + $context = UNKNOWN_CONTEXT; + + # hashes used to remember function information + %is_constant = (); # user-defined constants + %is_user_function = (); # user-defined functions + %user_function_prototype = (); # their prototypes + %is_block_function = (); + %is_block_list_function = (); + %saw_function_definition = (); + + # variables used to track depths of various containers + # and report nesting errors + $paren_depth = 0; + $brace_depth = 0; + $square_bracket_depth = 0; + @current_depth = (0) x scalar @closing_brace_names; + $total_depth = 0; + @total_depth = (); + @nesting_sequence_number = ( 0 .. @closing_brace_names - 1 ); + @current_sequence_number = (); + $paren_type[$paren_depth] = ''; + $paren_semicolon_count[$paren_depth] = 0; + $paren_structural_type[$brace_depth] = ''; + $brace_type[$brace_depth] = ';'; # identify opening brace as code block + $brace_structural_type[$brace_depth] = ''; + $brace_context[$brace_depth] = UNKNOWN_CONTEXT; + $brace_package[$paren_depth] = $current_package; + $square_bracket_type[$square_bracket_depth] = ''; + $square_bracket_structural_type[$square_bracket_depth] = ''; + + initialize_tokenizer_state(); + return; +} + +{ # begin tokenize_this_line + + use constant BRACE => 0; + use constant SQUARE_BRACKET => 1; + use constant PAREN => 2; + use constant QUESTION_COLON => 3; + + # TV1: scalars for processing one LINE. + # Re-initialized on each entry to sub tokenize_this_line. + my ( + $block_type, $container_type, $expecting, + $i, $i_tok, $input_line, + $input_line_number, $last_nonblank_i, $max_token_index, + $next_tok, $next_type, $peeked_ahead, + $prototype, $rhere_target_list, $rtoken_map, + $rtoken_type, $rtokens, $tok, + $type, $type_sequence, $indent_flag, + ); + + # TV2: refs to ARRAYS for processing one LINE + # Re-initialized on each call. + my $routput_token_list = []; # stack of output token indexes + my $routput_token_type = []; # token types + my $routput_block_type = []; # types of code block + my $routput_container_type = []; # paren types, such as if, elsif, .. + my $routput_type_sequence = []; # nesting sequential number + my $routput_indent_flag = []; # + + # TV3: SCALARS for quote variables. These are initialized with a + # subroutine call and continually updated as lines are processed. + my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth, + $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, ); + + # TV4: SCALARS for multi-line identifiers and + # statements. These are initialized with a subroutine call + # and continually updated as lines are processed. + my ( $id_scan_state, $identifier, $want_paren, $indented_if_level ); + + # TV5: SCALARS for tracking indentation level. + # Initialized once and continually updated as lines are + # processed. + my ( + $nesting_token_string, $nesting_type_string, + $nesting_block_string, $nesting_block_flag, + $nesting_list_string, $nesting_list_flag, + $ci_string_in_tokenizer, $continuation_string_in_tokenizer, + $in_statement_continuation, $level_in_tokenizer, + $slevel_in_tokenizer, $rslevel_stack, + ); + + # TV6: SCALARS for remembering several previous + # tokens. Initialized once and continually updated as + # lines are processed. + my ( + $last_nonblank_container_type, $last_nonblank_type_sequence, + $last_last_nonblank_token, $last_last_nonblank_type, + $last_last_nonblank_block_type, $last_last_nonblank_container_type, + $last_last_nonblank_type_sequence, $last_nonblank_prototype, + ); + + # ---------------------------------------------------------------- + # beginning of tokenizer variable access and manipulation routines + # ---------------------------------------------------------------- + + sub initialize_tokenizer_state { + + # TV1: initialized on each call + # TV2: initialized on each call + # TV3: + $in_quote = 0; + $quote_type = 'Q'; + $quote_character = ""; + $quote_pos = 0; + $quote_depth = 0; + $quoted_string_1 = ""; + $quoted_string_2 = ""; + $allowed_quote_modifiers = ""; + + # TV4: + $id_scan_state = ''; + $identifier = ''; + $want_paren = ""; + $indented_if_level = 0; + + # TV5: + $nesting_token_string = ""; + $nesting_type_string = ""; + $nesting_block_string = '1'; # initially in a block + $nesting_block_flag = 1; + $nesting_list_string = '0'; # initially not in a list + $nesting_list_flag = 0; # initially not in a list + $ci_string_in_tokenizer = ""; + $continuation_string_in_tokenizer = "0"; + $in_statement_continuation = 0; + $level_in_tokenizer = 0; + $slevel_in_tokenizer = 0; + $rslevel_stack = []; + + # TV6: + $last_nonblank_container_type = ''; + $last_nonblank_type_sequence = ''; + $last_last_nonblank_token = ';'; + $last_last_nonblank_type = ';'; + $last_last_nonblank_block_type = ''; + $last_last_nonblank_container_type = ''; + $last_last_nonblank_type_sequence = ''; + $last_nonblank_prototype = ""; + return; + } + + sub save_tokenizer_state { + + my $rTV1 = [ + $block_type, $container_type, $expecting, + $i, $i_tok, $input_line, + $input_line_number, $last_nonblank_i, $max_token_index, + $next_tok, $next_type, $peeked_ahead, + $prototype, $rhere_target_list, $rtoken_map, + $rtoken_type, $rtokens, $tok, + $type, $type_sequence, $indent_flag, + ]; + + my $rTV2 = [ + $routput_token_list, $routput_token_type, + $routput_block_type, $routput_container_type, + $routput_type_sequence, $routput_indent_flag, + ]; + + my $rTV3 = [ + $in_quote, $quote_type, + $quote_character, $quote_pos, + $quote_depth, $quoted_string_1, + $quoted_string_2, $allowed_quote_modifiers, + ]; + + my $rTV4 = + [ $id_scan_state, $identifier, $want_paren, $indented_if_level ]; + + my $rTV5 = [ + $nesting_token_string, $nesting_type_string, + $nesting_block_string, $nesting_block_flag, + $nesting_list_string, $nesting_list_flag, + $ci_string_in_tokenizer, $continuation_string_in_tokenizer, + $in_statement_continuation, $level_in_tokenizer, + $slevel_in_tokenizer, $rslevel_stack, + ]; + + my $rTV6 = [ + $last_nonblank_container_type, + $last_nonblank_type_sequence, + $last_last_nonblank_token, + $last_last_nonblank_type, + $last_last_nonblank_block_type, + $last_last_nonblank_container_type, + $last_last_nonblank_type_sequence, + $last_nonblank_prototype, + ]; + return [ $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ]; + } + + sub restore_tokenizer_state { + my ($rstate) = @_; + my ( $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate}; + ( + $block_type, $container_type, $expecting, + $i, $i_tok, $input_line, + $input_line_number, $last_nonblank_i, $max_token_index, + $next_tok, $next_type, $peeked_ahead, + $prototype, $rhere_target_list, $rtoken_map, + $rtoken_type, $rtokens, $tok, + $type, $type_sequence, $indent_flag, + ) = @{$rTV1}; + + ( + $routput_token_list, $routput_token_type, + $routput_block_type, $routput_container_type, + $routput_type_sequence, $routput_type_sequence, + ) = @{$rTV2}; + + ( + $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth, + $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, + ) = @{$rTV3}; + + ( $id_scan_state, $identifier, $want_paren, $indented_if_level ) = + @{$rTV4}; + + ( + $nesting_token_string, $nesting_type_string, + $nesting_block_string, $nesting_block_flag, + $nesting_list_string, $nesting_list_flag, + $ci_string_in_tokenizer, $continuation_string_in_tokenizer, + $in_statement_continuation, $level_in_tokenizer, + $slevel_in_tokenizer, $rslevel_stack, + ) = @{$rTV5}; + + ( + $last_nonblank_container_type, + $last_nonblank_type_sequence, + $last_last_nonblank_token, + $last_last_nonblank_type, + $last_last_nonblank_block_type, + $last_last_nonblank_container_type, + $last_last_nonblank_type_sequence, + $last_nonblank_prototype, + ) = @{$rTV6}; + return; + } + + sub get_indentation_level { + + # patch to avoid reporting error if indented if is not terminated + if ($indented_if_level) { return $level_in_tokenizer - 1 } + return $level_in_tokenizer; + } + + sub reset_indentation_level { + $level_in_tokenizer = $slevel_in_tokenizer = shift; + push @{$rslevel_stack}, $slevel_in_tokenizer; + return; + } + + sub peeked_ahead { + my $flag = shift; + $peeked_ahead = defined($flag) ? $flag : $peeked_ahead; + return $peeked_ahead; + } + + # ------------------------------------------------------------ + # end of tokenizer variable access and manipulation routines + # ------------------------------------------------------------ + + # ------------------------------------------------------------ + # beginning of various scanner interface routines + # ------------------------------------------------------------ + sub scan_replacement_text { + + # check for here-docs in replacement text invoked by + # a substitution operator with executable modifier 'e'. + # + # given: + # $replacement_text + # return: + # $rht = reference to any here-doc targets + my ($replacement_text) = @_; + + # quick check + return unless ( $replacement_text =~ /<{_logger_object}; + + # localize all package variables + local ( + $tokenizer_self, $last_nonblank_token, + $last_nonblank_type, $last_nonblank_block_type, + $statement_type, $in_attribute_list, + $current_package, $context, + %is_constant, %is_user_function, + %user_function_prototype, %is_block_function, + %is_block_list_function, %saw_function_definition, + $brace_depth, $paren_depth, + $square_bracket_depth, @current_depth, + @total_depth, $total_depth, + @nesting_sequence_number, @current_sequence_number, + @paren_type, @paren_semicolon_count, + @paren_structural_type, @brace_type, + @brace_structural_type, @brace_context, + @brace_package, @square_bracket_type, + @square_bracket_structural_type, @depth_array, + @starting_line_of_current_depth, @nested_ternary_flag, + @nested_statement_type, + ); + + # save all lexical variables + my $rstate = save_tokenizer_state(); + _decrement_count(); # avoid error check for multiple tokenizers + + # make a new tokenizer + my $rOpts = {}; + my $rpending_logfile_message; + my $source_object = + Perl::Tidy::LineSource->new( \$replacement_text, $rOpts, + $rpending_logfile_message ); + my $tokenizer = Perl::Tidy::Tokenizer->new( + source_object => $source_object, + logger_object => $logger_object, + starting_line_number => $input_line_number, + ); + + # scan the replacement text + 1 while ( $tokenizer->get_line() ); + + # remove any here doc targets + my $rht = undef; + if ( $tokenizer_self->{_in_here_doc} ) { + $rht = []; + push @{$rht}, + [ + $tokenizer_self->{_here_doc_target}, + $tokenizer_self->{_here_quote_character} + ]; + if ( $tokenizer_self->{_rhere_target_list} ) { + push @{$rht}, @{ $tokenizer_self->{_rhere_target_list} }; + $tokenizer_self->{_rhere_target_list} = undef; + } + $tokenizer_self->{_in_here_doc} = undef; + } + + # now its safe to report errors + my $severe_error = $tokenizer->report_tokenization_errors(); + + # TODO: Could propagate a severe error up + + # restore all tokenizer lexical variables + restore_tokenizer_state($rstate); + + # return the here doc targets + return $rht; + } + + sub scan_bare_identifier { + ( $i, $tok, $type, $prototype ) = + scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype, + $rtoken_map, $max_token_index ); + return; + } + + sub scan_identifier { + ( $i, $tok, $type, $id_scan_state, $identifier ) = + scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens, + $max_token_index, $expecting, $paren_type[$paren_depth] ); + return; + } + + sub scan_id { + ( $i, $tok, $type, $id_scan_state ) = + scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map, + $id_scan_state, $max_token_index ); + return; + } + + sub scan_number { + my $number; + ( $i, $type, $number ) = + scan_number_do( $input_line, $i, $rtoken_map, $type, + $max_token_index ); + return $number; + } + + # a sub to warn if token found where term expected + sub error_if_expecting_TERM { + if ( $expecting == TERM ) { + if ( $really_want_term{$last_nonblank_type} ) { + report_unexpected( $tok, "term", $i_tok, $last_nonblank_i, + $rtoken_map, $rtoken_type, $input_line ); + return 1; + } + } + return; + } + + # a sub to warn if token found where operator expected + sub error_if_expecting_OPERATOR { + my $thing = shift; + if ( $expecting == OPERATOR ) { + if ( !defined($thing) ) { $thing = $tok } + report_unexpected( $thing, "operator", $i_tok, $last_nonblank_i, + $rtoken_map, $rtoken_type, $input_line ); + if ( $i_tok == 0 ) { + interrupt_logfile(); + warning("Missing ';' above?\n"); + resume_logfile(); + } + return 1; + } + return; + } + + # ------------------------------------------------------------ + # end scanner interfaces + # ------------------------------------------------------------ + + my %is_for_foreach; + @_ = qw(for foreach); + @is_for_foreach{@_} = (1) x scalar(@_); + + my %is_my_our; + @_ = qw(my our); + @is_my_our{@_} = (1) x scalar(@_); + + # These keywords may introduce blocks after parenthesized expressions, + # in the form: + # keyword ( .... ) { BLOCK } + # patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when' + my %is_blocktype_with_paren; + @_ = + qw(if elsif unless while until for foreach switch case given when catch); + @is_blocktype_with_paren{@_} = (1) x scalar(@_); + + # ------------------------------------------------------------ + # begin hash of code for handling most token types + # ------------------------------------------------------------ + my $tokenization_code = { + + # no special code for these types yet, but syntax checks + # could be added + +## '!' => undef, +## '!=' => undef, +## '!~' => undef, +## '%=' => undef, +## '&&=' => undef, +## '&=' => undef, +## '+=' => undef, +## '-=' => undef, +## '..' => undef, +## '..' => undef, +## '...' => undef, +## '.=' => undef, +## '<<=' => undef, +## '<=' => undef, +## '<=>' => undef, +## '<>' => undef, +## '=' => undef, +## '==' => undef, +## '=~' => undef, +## '>=' => undef, +## '>>' => undef, +## '>>=' => undef, +## '\\' => undef, +## '^=' => undef, +## '|=' => undef, +## '||=' => undef, +## '//=' => undef, +## '~' => undef, +## '~~' => undef, +## '!~~' => undef, + + '>' => sub { + error_if_expecting_TERM() + if ( $expecting == TERM ); + }, + '|' => sub { + error_if_expecting_TERM() + if ( $expecting == TERM ); + }, + '$' => sub { + + # start looking for a scalar + error_if_expecting_OPERATOR("Scalar") + if ( $expecting == OPERATOR ); + scan_identifier(); + + if ( $identifier eq '$^W' ) { + $tokenizer_self->{_saw_perl_dash_w} = 1; + } + + # Check for identifier in indirect object slot + # (vorboard.pl, sort.t). Something like: + # /^(print|printf|sort|exec|system)$/ + if ( + $is_indirect_object_taker{$last_nonblank_token} + + || ( ( $last_nonblank_token eq '(' ) + && $is_indirect_object_taker{ $paren_type[$paren_depth] } ) + || ( $last_nonblank_type =~ /^[Uw]$/ ) # possible object + ) + { + $type = 'Z'; + } + }, + '(' => sub { + + ++$paren_depth; + $paren_semicolon_count[$paren_depth] = 0; + if ($want_paren) { + $container_type = $want_paren; + $want_paren = ""; + } + elsif ( $statement_type =~ /^sub\b/ ) { + $container_type = $statement_type; + } + else { + $container_type = $last_nonblank_token; + + # We can check for a syntax error here of unexpected '(', + # but this is going to get messy... + if ( + $expecting == OPERATOR + + # be sure this is not a method call of the form + # &method(...), $method->(..), &{method}(...), + # $ref[2](list) is ok & short for $ref[2]->(list) + # NOTE: at present, braces in something like &{ xxx } + # are not marked as a block, we might have a method call + && $last_nonblank_token !~ /^([\]\}\&]|\-\>)/ + + ) + { + + # ref: camel 3 p 703. + if ( $last_last_nonblank_token eq 'do' ) { + complain( +"do SUBROUTINE is deprecated; consider & or -> notation\n" + ); + } + else { + + # if this is an empty list, (), then it is not an + # error; for example, we might have a constant pi and + # invoke it with pi() or just pi; + my ( $next_nonblank_token, $i_next ) = + find_next_nonblank_token( $i, $rtokens, + $max_token_index ); + if ( $next_nonblank_token ne ')' ) { + my $hint; + error_if_expecting_OPERATOR('('); + + if ( $last_nonblank_type eq 'C' ) { + $hint = + "$last_nonblank_token has a void prototype\n"; + } + elsif ( $last_nonblank_type eq 'i' ) { + if ( $i_tok > 0 + && $last_nonblank_token =~ /^\$/ ) + { + $hint = +"Do you mean '$last_nonblank_token->(' ?\n"; + } + } + if ($hint) { + interrupt_logfile(); + warning($hint); + resume_logfile(); + } + } ## end if ( $next_nonblank_token... + } ## end else [ if ( $last_last_nonblank_token... + } ## end if ( $expecting == OPERATOR... + } + $paren_type[$paren_depth] = $container_type; + ( $type_sequence, $indent_flag ) = + increase_nesting_depth( PAREN, $rtoken_map->[$i_tok] ); + + # propagate types down through nested parens + # for example: the second paren in 'if ((' would be structural + # since the first is. + + if ( $last_nonblank_token eq '(' ) { + $type = $last_nonblank_type; + } + + # We exclude parens as structural after a ',' because it + # causes subtle problems with continuation indentation for + # something like this, where the first 'or' will not get + # indented. + # + # assert( + # __LINE__, + # ( not defined $check ) + # or ref $check + # or $check eq "new" + # or $check eq "old", + # ); + # + # Likewise, we exclude parens where a statement can start + # because of problems with continuation indentation, like + # these: + # + # ($firstline =~ /^#\!.*perl/) + # and (print $File::Find::name, "\n") + # and (return 1); + # + # (ref($usage_fref) =~ /CODE/) + # ? &$usage_fref + # : (&blast_usage, &blast_params, &blast_general_params); + + else { + $type = '{'; + } + + if ( $last_nonblank_type eq ')' ) { + warning( + "Syntax error? found token '$last_nonblank_type' then '('\n" + ); + } + $paren_structural_type[$paren_depth] = $type; + + }, + ')' => sub { + ( $type_sequence, $indent_flag ) = + decrease_nesting_depth( PAREN, $rtoken_map->[$i_tok] ); + + if ( $paren_structural_type[$paren_depth] eq '{' ) { + $type = '}'; + } + + $container_type = $paren_type[$paren_depth]; + + # restore statement type as 'sub' at closing paren of a signature + # so that a subsequent ':' is identified as an attribute + if ( $container_type =~ /^sub\b/ ) { + $statement_type = $container_type; + } + + # /^(for|foreach)$/ + if ( $is_for_foreach{ $paren_type[$paren_depth] } ) { + my $num_sc = $paren_semicolon_count[$paren_depth]; + if ( $num_sc > 0 && $num_sc != 2 ) { + warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n"); + } + } + + if ( $paren_depth > 0 ) { $paren_depth-- } + }, + ',' => sub { + if ( $last_nonblank_type eq ',' ) { + complain("Repeated ','s \n"); + } + + # patch for operator_expected: note if we are in the list (use.t) + if ( $statement_type eq 'use' ) { $statement_type = '_use' } +## FIXME: need to move this elsewhere, perhaps check after a '(' +## elsif ($last_nonblank_token eq '(') { +## warning("Leading ','s illegal in some versions of perl\n"); +## } + }, + ';' => sub { + $context = UNKNOWN_CONTEXT; + $statement_type = ''; + $want_paren = ""; + + # /^(for|foreach)$/ + if ( $is_for_foreach{ $paren_type[$paren_depth] } ) + { # mark ; in for loop + + # Be careful: we do not want a semicolon such as the + # following to be included: + # + # for (sort {strcoll($a,$b);} keys %investments) { + + if ( $brace_depth == $depth_array[PAREN][BRACE][$paren_depth] + && $square_bracket_depth == + $depth_array[PAREN][SQUARE_BRACKET][$paren_depth] ) + { + + $type = 'f'; + $paren_semicolon_count[$paren_depth]++; + } + } + + }, + '"' => sub { + error_if_expecting_OPERATOR("String") + if ( $expecting == OPERATOR ); + $in_quote = 1; + $type = 'Q'; + $allowed_quote_modifiers = ""; + }, + "'" => sub { + error_if_expecting_OPERATOR("String") + if ( $expecting == OPERATOR ); + $in_quote = 1; + $type = 'Q'; + $allowed_quote_modifiers = ""; + }, + '`' => sub { + error_if_expecting_OPERATOR("String") + if ( $expecting == OPERATOR ); + $in_quote = 1; + $type = 'Q'; + $allowed_quote_modifiers = ""; + }, + '/' => sub { + my $is_pattern; + + # a pattern cannot follow certain keywords which take optional + # arguments, like 'shift' and 'pop'. See also '?'. + if ( $last_nonblank_type eq 'k' + && $is_keyword_taking_optional_args{$last_nonblank_token} ) + { + $is_pattern = 0; + } + elsif ( $expecting == UNKNOWN ) { # indeterminate, must guess.. + my $msg; + ( $is_pattern, $msg ) = + guess_if_pattern_or_division( $i, $rtokens, $rtoken_map, + $max_token_index ); + + if ($msg) { + write_diagnostics("DIVIDE:$msg\n"); + write_logfile_entry($msg); + } + } + else { $is_pattern = ( $expecting == TERM ) } + + if ($is_pattern) { + $in_quote = 1; + $type = 'Q'; + $allowed_quote_modifiers = '[msixpodualngc]'; + } + else { # not a pattern; check for a /= token + + if ( $rtokens->[ $i + 1 ] eq '=' ) { # form token /= + $i++; + $tok = '/='; + $type = $tok; + } + + #DEBUG - collecting info on what tokens follow a divide + # for development of guessing algorithm + #if ( numerator_expected( $i, $rtokens, $max_token_index ) < 0 ) { + # #write_diagnostics( "DIVIDE? $input_line\n" ); + #} + } + }, + '{' => sub { + + # if we just saw a ')', we will label this block with + # its type. We need to do this to allow sub + # code_block_type to determine if this brace starts a + # code block or anonymous hash. (The type of a paren + # pair is the preceding token, such as 'if', 'else', + # etc). + $container_type = ""; + + # ATTRS: for a '{' following an attribute list, reset + # things to look like we just saw the sub name + if ( $statement_type =~ /^sub/ ) { + $last_nonblank_token = $statement_type; + $last_nonblank_type = 'i'; + $statement_type = ""; + } + + # patch for SWITCH/CASE: hide these keywords from an immediately + # following opening brace + elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' ) + && $statement_type eq $last_nonblank_token ) + { + $last_nonblank_token = ";"; + } + + elsif ( $last_nonblank_token eq ')' ) { + $last_nonblank_token = $paren_type[ $paren_depth + 1 ]; + + # defensive move in case of a nesting error (pbug.t) + # in which this ')' had no previous '(' + # this nesting error will have been caught + if ( !defined($last_nonblank_token) ) { + $last_nonblank_token = 'if'; + } + + # check for syntax error here; + unless ( $is_blocktype_with_paren{$last_nonblank_token} ) { + if ( $tokenizer_self->{'_extended_syntax'} ) { + + # we append a trailing () to mark this as an unknown + # block type. This allows perltidy to format some + # common extensions of perl syntax. + # This is used by sub code_block_type + $last_nonblank_token .= '()'; + } + else { + my $list = + join( ' ', sort keys %is_blocktype_with_paren ); + warning( +"syntax error at ') {', didn't see one of: <<$list>>; If this code is okay try using the -xs flag\n" + ); + } + } + } + + # patch for paren-less for/foreach glitch, part 2. + # see note below under 'qw' + elsif ($last_nonblank_token eq 'qw' + && $is_for_foreach{$want_paren} ) + { + $last_nonblank_token = $want_paren; + if ( $last_last_nonblank_token eq $want_paren ) { + warning( +"syntax error at '$want_paren .. {' -- missing \$ loop variable\n" + ); + + } + $want_paren = ""; + } + + # now identify which of the three possible types of + # curly braces we have: hash index container, anonymous + # hash reference, or code block. + + # non-structural (hash index) curly brace pair + # get marked 'L' and 'R' + if ( is_non_structural_brace() ) { + $type = 'L'; + + # patch for SWITCH/CASE: + # allow paren-less identifier after 'when' + # if the brace is preceded by a space + if ( $statement_type eq 'when' + && $last_nonblank_type eq 'i' + && $last_last_nonblank_type eq 'k' + && ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) ) + { + $type = '{'; + $block_type = $statement_type; + } + } + + # code and anonymous hash have the same type, '{', but are + # distinguished by 'block_type', + # which will be blank for an anonymous hash + else { + + $block_type = code_block_type( $i_tok, $rtokens, $rtoken_type, + $max_token_index ); + + # patch to promote bareword type to function taking block + if ( $block_type + && $last_nonblank_type eq 'w' + && $last_nonblank_i >= 0 ) + { + if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) { + $routput_token_type->[$last_nonblank_i] = 'G'; + } + } + + # patch for SWITCH/CASE: if we find a stray opening block brace + # where we might accept a 'case' or 'when' block, then take it + if ( $statement_type eq 'case' + || $statement_type eq 'when' ) + { + if ( !$block_type || $block_type eq '}' ) { + $block_type = $statement_type; + } + } + } + + $brace_type[ ++$brace_depth ] = $block_type; + $brace_package[$brace_depth] = $current_package; + $brace_structural_type[$brace_depth] = $type; + $brace_context[$brace_depth] = $context; + ( $type_sequence, $indent_flag ) = + increase_nesting_depth( BRACE, $rtoken_map->[$i_tok] ); + }, + '}' => sub { + $block_type = $brace_type[$brace_depth]; + if ($block_type) { $statement_type = '' } + if ( defined( $brace_package[$brace_depth] ) ) { + $current_package = $brace_package[$brace_depth]; + } + + # can happen on brace error (caught elsewhere) + else { + } + ( $type_sequence, $indent_flag ) = + decrease_nesting_depth( BRACE, $rtoken_map->[$i_tok] ); + + if ( $brace_structural_type[$brace_depth] eq 'L' ) { + $type = 'R'; + } + + # propagate type information for 'do' and 'eval' blocks, and also + # for smartmatch operator. This is necessary to enable us to know + # if an operator or term is expected next. + if ( $is_block_operator{$block_type} ) { + $tok = $block_type; + } + + $context = $brace_context[$brace_depth]; + if ( $brace_depth > 0 ) { $brace_depth--; } + }, + '&' => sub { # maybe sub call? start looking + + # We have to check for sub call unless we are sure we + # are expecting an operator. This example from s2p + # got mistaken as a q operator in an early version: + # print BODY &q(<<'EOT'); + if ( $expecting != OPERATOR ) { + + # But only look for a sub call if we are expecting a term or + # if there is no existing space after the &. + # For example we probably don't want & as sub call here: + # Fcntl::S_IRUSR & $mode; + if ( $expecting == TERM || $next_type ne 'b' ) { + scan_identifier(); + } + } + else { + } + }, + '<' => sub { # angle operator or less than? + + if ( $expecting != OPERATOR ) { + ( $i, $type ) = + find_angle_operator_termination( $input_line, $i, $rtoken_map, + $expecting, $max_token_index ); + + if ( $type eq '<' && $expecting == TERM ) { + error_if_expecting_TERM(); + interrupt_logfile(); + warning("Unterminated <> operator?\n"); + resume_logfile(); + } + } + else { + } + }, + '?' => sub { # ?: conditional or starting pattern? + + my $is_pattern; + + # Patch for rt #126965 + # a pattern cannot follow certain keywords which take optional + # arguments, like 'shift' and 'pop'. See also '/'. + if ( $last_nonblank_type eq 'k' + && $is_keyword_taking_optional_args{$last_nonblank_token} ) + { + $is_pattern = 0; + } + elsif ( $expecting == UNKNOWN ) { + + my $msg; + ( $is_pattern, $msg ) = + guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map, + $max_token_index ); + + if ($msg) { write_logfile_entry($msg) } + } + else { $is_pattern = ( $expecting == TERM ) } + + if ($is_pattern) { + $in_quote = 1; + $type = 'Q'; + $allowed_quote_modifiers = '[msixpodualngc]'; + } + else { + ( $type_sequence, $indent_flag ) = + increase_nesting_depth( QUESTION_COLON, + $rtoken_map->[$i_tok] ); + } + }, + '*' => sub { # typeglob, or multiply? + + if ( $expecting == TERM ) { + scan_identifier(); + } + else { + + if ( $rtokens->[ $i + 1 ] eq '=' ) { + $tok = '*='; + $type = $tok; + $i++; + } + elsif ( $rtokens->[ $i + 1 ] eq '*' ) { + $tok = '**'; + $type = $tok; + $i++; + if ( $rtokens->[ $i + 1 ] eq '=' ) { + $tok = '**='; + $type = $tok; + $i++; + } + } + } + }, + '.' => sub { # what kind of . ? + + if ( $expecting != OPERATOR ) { + scan_number(); + if ( $type eq '.' ) { + error_if_expecting_TERM() + if ( $expecting == TERM ); + } + } + else { + } + }, + ':' => sub { + + # if this is the first nonblank character, call it a label + # since perl seems to just swallow it + if ( $input_line_number == 1 && $last_nonblank_i == -1 ) { + $type = 'J'; + } + + # ATTRS: check for a ':' which introduces an attribute list + # (this might eventually get its own token type) + elsif ( $statement_type =~ /^sub\b/ ) { + $type = 'A'; + $in_attribute_list = 1; + } + + # check for scalar attribute, such as + # my $foo : shared = 1; + elsif ($is_my_our{$statement_type} + && $current_depth[QUESTION_COLON] == 0 ) + { + $type = 'A'; + $in_attribute_list = 1; + } + + # otherwise, it should be part of a ?/: operator + else { + ( $type_sequence, $indent_flag ) = + decrease_nesting_depth( QUESTION_COLON, + $rtoken_map->[$i_tok] ); + if ( $last_nonblank_token eq '?' ) { + warning("Syntax error near ? :\n"); + } + } + }, + '+' => sub { # what kind of plus? + + if ( $expecting == TERM ) { + my $number = scan_number(); + + # unary plus is safest assumption if not a number + if ( !defined($number) ) { $type = 'p'; } + } + elsif ( $expecting == OPERATOR ) { + } + else { + if ( $next_type eq 'w' ) { $type = 'p' } + } + }, + '@' => sub { + + error_if_expecting_OPERATOR("Array") + if ( $expecting == OPERATOR ); + scan_identifier(); + }, + '%' => sub { # hash or modulo? + + # first guess is hash if no following blank + if ( $expecting == UNKNOWN ) { + if ( $next_type ne 'b' ) { $expecting = TERM } + } + if ( $expecting == TERM ) { + scan_identifier(); + } + }, + '[' => sub { + $square_bracket_type[ ++$square_bracket_depth ] = + $last_nonblank_token; + ( $type_sequence, $indent_flag ) = + increase_nesting_depth( SQUARE_BRACKET, $rtoken_map->[$i_tok] ); + + # It may seem odd, but structural square brackets have + # type '{' and '}'. This simplifies the indentation logic. + if ( !is_non_structural_brace() ) { + $type = '{'; + } + $square_bracket_structural_type[$square_bracket_depth] = $type; + }, + ']' => sub { + ( $type_sequence, $indent_flag ) = + decrease_nesting_depth( SQUARE_BRACKET, $rtoken_map->[$i_tok] ); + + if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' ) + { + $type = '}'; + } + + # propagate type information for smartmatch operator. This is + # necessary to enable us to know if an operator or term is expected + # next. + if ( $square_bracket_type[$square_bracket_depth] eq '~~' ) { + $tok = $square_bracket_type[$square_bracket_depth]; + } + + if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; } + }, + '-' => sub { # what kind of minus? + + if ( ( $expecting != OPERATOR ) + && $is_file_test_operator{$next_tok} ) + { + my ( $next_nonblank_token, $i_next ) = + find_next_nonblank_token( $i + 1, $rtokens, + $max_token_index ); + + # check for a quoted word like "-w=>xx"; + # it is sufficient to just check for a following '=' + if ( $next_nonblank_token eq '=' ) { + $type = 'm'; + } + else { + $i++; + $tok .= $next_tok; + $type = 'F'; + } + } + elsif ( $expecting == TERM ) { + my $number = scan_number(); + + # maybe part of bareword token? unary is safest + if ( !defined($number) ) { $type = 'm'; } + + } + elsif ( $expecting == OPERATOR ) { + } + else { + + if ( $next_type eq 'w' ) { + $type = 'm'; + } + } + }, + + '^' => sub { + + # check for special variables like ${^WARNING_BITS} + if ( $expecting == TERM ) { + + # FIXME: this should work but will not catch errors + # because we also have to be sure that previous token is + # a type character ($,@,%). + if ( $last_nonblank_token eq '{' + && ( $next_tok =~ /^[A-Za-z_]/ ) ) + { + + if ( $next_tok eq 'W' ) { + $tokenizer_self->{_saw_perl_dash_w} = 1; + } + $tok = $tok . $next_tok; + $i = $i + 1; + $type = 'w'; + } + + else { + unless ( error_if_expecting_TERM() ) { + + # Something like this is valid but strange: + # undef ^I; + complain("The '^' seems unusual here\n"); + } + } + } + }, + + '::' => sub { # probably a sub call + scan_bare_identifier(); + }, + '<<' => sub { # maybe a here-doc? + return + unless ( $i < $max_token_index ) + ; # here-doc not possible if end of line + + if ( $expecting != OPERATOR ) { + my ( $found_target, $here_doc_target, $here_quote_character, + $saw_error ); + ( + $found_target, $here_doc_target, $here_quote_character, $i, + $saw_error + ) + = find_here_doc( $expecting, $i, $rtokens, $rtoken_map, + $max_token_index ); + + if ($found_target) { + push @{$rhere_target_list}, + [ $here_doc_target, $here_quote_character ]; + $type = 'h'; + if ( length($here_doc_target) > 80 ) { + my $truncated = substr( $here_doc_target, 0, 80 ); + complain("Long here-target: '$truncated' ...\n"); + } + elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) { + complain( + "Unconventional here-target: '$here_doc_target'\n"); + } + } + elsif ( $expecting == TERM ) { + unless ($saw_error) { + + # shouldn't happen.. + warning("Program bug; didn't find here doc target\n"); + report_definite_bug(); + } + } + } + else { + } + }, + '<<~' => sub { # a here-doc, new type added in v26 + return + unless ( $i < $max_token_index ) + ; # here-doc not possible if end of line + if ( $expecting != OPERATOR ) { + my ( $found_target, $here_doc_target, $here_quote_character, + $saw_error ); + ( + $found_target, $here_doc_target, $here_quote_character, $i, + $saw_error + ) + = find_here_doc( $expecting, $i, $rtokens, $rtoken_map, + $max_token_index ); + + if ($found_target) { + + if ( length($here_doc_target) > 80 ) { + my $truncated = substr( $here_doc_target, 0, 80 ); + complain("Long here-target: '$truncated' ...\n"); + } + elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) { + complain( + "Unconventional here-target: '$here_doc_target'\n"); + } + + # Note that we put a leading space on the here quote + # character indicate that it may be preceded by spaces + $here_quote_character = " " . $here_quote_character; + push @{$rhere_target_list}, + [ $here_doc_target, $here_quote_character ]; + $type = 'h'; + } + elsif ( $expecting == TERM ) { + unless ($saw_error) { + + # shouldn't happen.. + warning("Program bug; didn't find here doc target\n"); + report_definite_bug(); + } + } + } + else { + } + }, + '->' => sub { + + # if -> points to a bare word, we must scan for an identifier, + # otherwise something like ->y would look like the y operator + scan_identifier(); + }, + + # type = 'pp' for pre-increment, '++' for post-increment + '++' => sub { + if ( $expecting == TERM ) { $type = 'pp' } + elsif ( $expecting == UNKNOWN ) { + my ( $next_nonblank_token, $i_next ) = + find_next_nonblank_token( $i, $rtokens, $max_token_index ); + if ( $next_nonblank_token eq '$' ) { $type = 'pp' } + } + }, + + '=>' => sub { + if ( $last_nonblank_type eq $tok ) { + complain("Repeated '=>'s \n"); + } + + # patch for operator_expected: note if we are in the list (use.t) + # TODO: make version numbers a new token type + if ( $statement_type eq 'use' ) { $statement_type = '_use' } + }, + + # type = 'mm' for pre-decrement, '--' for post-decrement + '--' => sub { + + if ( $expecting == TERM ) { $type = 'mm' } + elsif ( $expecting == UNKNOWN ) { + my ( $next_nonblank_token, $i_next ) = + find_next_nonblank_token( $i, $rtokens, $max_token_index ); + if ( $next_nonblank_token eq '$' ) { $type = 'mm' } + } + }, + + '&&' => sub { + error_if_expecting_TERM() + if ( $expecting == TERM ); + }, + + '||' => sub { + error_if_expecting_TERM() + if ( $expecting == TERM ); + }, + + '//' => sub { + error_if_expecting_TERM() + if ( $expecting == TERM ); + }, + }; + + # ------------------------------------------------------------ + # end hash of code for handling individual token types + # ------------------------------------------------------------ + + my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' ); + + # These block types terminate statements and do not need a trailing + # semicolon + # patched for SWITCH/CASE/ + my %is_zero_continuation_block_type; + @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ; + if elsif else unless while until for foreach switch case given when); + @is_zero_continuation_block_type{@_} = (1) x scalar(@_); + + my %is_not_zero_continuation_block_type; + @_ = qw(sort grep map do eval); + @is_not_zero_continuation_block_type{@_} = (1) x scalar(@_); + + my %is_logical_container; + @_ = qw(if elsif unless while and or err not && ! || for foreach); + @is_logical_container{@_} = (1) x scalar(@_); + + my %is_binary_type; + @_ = qw(|| &&); + @is_binary_type{@_} = (1) x scalar(@_); + + my %is_binary_keyword; + @_ = qw(and or err eq ne cmp); + @is_binary_keyword{@_} = (1) x scalar(@_); + + # 'L' is token for opening { at hash key + my %is_opening_type; + @_ = qw< L { ( [ >; + @is_opening_type{@_} = (1) x scalar(@_); + + # 'R' is token for closing } at hash key + my %is_closing_type; + @_ = qw< R } ) ] >; + @is_closing_type{@_} = (1) x scalar(@_); + + my %is_redo_last_next_goto; + @_ = qw(redo last next goto); + @is_redo_last_next_goto{@_} = (1) x scalar(@_); + + my %is_use_require; + @_ = qw(use require); + @is_use_require{@_} = (1) x scalar(@_); + + my %is_sub_package; + @_ = qw(sub package); + @is_sub_package{@_} = (1) x scalar(@_); + + # This hash holds the hash key in $tokenizer_self for these keywords: + my %is_format_END_DATA = ( + 'format' => '_in_format', + '__END__' => '_in_end', + '__DATA__' => '_in_data', + ); + + # original ref: camel 3 p 147, + # but perl may accept undocumented flags + # perl 5.10 adds 'p' (preserve) + # Perl version 5.22 added 'n' + # From http://perldoc.perl.org/perlop.html we have + # /PATTERN/msixpodualngc or m?PATTERN?msixpodualngc + # s/PATTERN/REPLACEMENT/msixpodualngcer + # y/SEARCHLIST/REPLACEMENTLIST/cdsr + # tr/SEARCHLIST/REPLACEMENTLIST/cdsr + # qr/STRING/msixpodualn + my %quote_modifiers = ( + 's' => '[msixpodualngcer]', + 'y' => '[cdsr]', + 'tr' => '[cdsr]', + 'm' => '[msixpodualngc]', + 'qr' => '[msixpodualn]', + 'q' => "", + 'qq' => "", + 'qw' => "", + 'qx' => "", + ); + + # table showing how many quoted things to look for after quote operator.. + # s, y, tr have 2 (pattern and replacement) + # others have 1 (pattern only) + my %quote_items = ( + 's' => 2, + 'y' => 2, + 'tr' => 2, + 'm' => 1, + 'qr' => 1, + 'q' => 1, + 'qq' => 1, + 'qw' => 1, + 'qx' => 1, + ); + + sub tokenize_this_line { + + # This routine breaks a line of perl code into tokens which are of use in + # indentation and reformatting. One of my goals has been to define tokens + # such that a newline may be inserted between any pair of tokens without + # changing or invalidating the program. This version comes close to this, + # although there are necessarily a few exceptions which must be caught by + # the formatter. Many of these involve the treatment of bare words. + # + # The tokens and their types are returned in arrays. See previous + # routine for their names. + # + # See also the array "valid_token_types" in the BEGIN section for an + # up-to-date list. + # + # To simplify things, token types are either a single character, or they + # are identical to the tokens themselves. + # + # As a debugging aid, the -D flag creates a file containing a side-by-side + # comparison of the input string and its tokenization for each line of a file. + # This is an invaluable debugging aid. + # + # In addition to tokens, and some associated quantities, the tokenizer + # also returns flags indication any special line types. These include + # quotes, here_docs, formats. + # + # ----------------------------------------------------------------------- + # + # How to add NEW_TOKENS: + # + # New token types will undoubtedly be needed in the future both to keep up + # with changes in perl and to help adapt the tokenizer to other applications. + # + # Here are some notes on the minimal steps. I wrote these notes while + # adding the 'v' token type for v-strings, which are things like version + # numbers 5.6.0, and ip addresses, and will use that as an example. ( You + # can use your editor to search for the string "NEW_TOKENS" to find the + # appropriate sections to change): + # + # *. Try to talk somebody else into doing it! If not, .. + # + # *. Make a backup of your current version in case things don't work out! + # + # *. Think of a new, unused character for the token type, and add to + # the array @valid_token_types in the BEGIN section of this package. + # For example, I used 'v' for v-strings. + # + # *. Implement coding to recognize the $type of the token in this routine. + # This is the hardest part, and is best done by imitating or modifying + # some of the existing coding. For example, to recognize v-strings, I + # patched 'sub scan_bare_identifier' to recognize v-strings beginning with + # 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'. + # + # *. Update sub operator_expected. This update is critically important but + # the coding is trivial. Look at the comments in that routine for help. + # For v-strings, which should behave like numbers, I just added 'v' to the + # regex used to handle numbers and strings (types 'n' and 'Q'). + # + # *. Implement a 'bond strength' rule in sub set_bond_strengths in + # Perl::Tidy::Formatter for breaking lines around this token type. You can + # skip this step and take the default at first, then adjust later to get + # desired results. For adding type 'v', I looked at sub bond_strength and + # saw that number type 'n' was using default strengths, so I didn't do + # anything. I may tune it up someday if I don't like the way line + # breaks with v-strings look. + # + # *. Implement a 'whitespace' rule in sub set_whitespace_flags in + # Perl::Tidy::Formatter. For adding type 'v', I looked at this routine + # and saw that type 'n' used spaces on both sides, so I just added 'v' + # to the array @spaces_both_sides. + # + # *. Update HtmlWriter package so that users can colorize the token as + # desired. This is quite easy; see comments identified by 'NEW_TOKENS' in + # that package. For v-strings, I initially chose to use a default color + # equal to the default for numbers, but it might be nice to change that + # eventually. + # + # *. Update comments in Perl::Tidy::Tokenizer::dump_token_types. + # + # *. Run lots and lots of debug tests. Start with special files designed + # to test the new token type. Run with the -D flag to create a .DEBUG + # file which shows the tokenization. When these work ok, test as many old + # scripts as possible. Start with all of the '.t' files in the 'test' + # directory of the distribution file. Compare .tdy output with previous + # version and updated version to see the differences. Then include as + # many more files as possible. My own technique has been to collect a huge + # number of perl scripts (thousands!) into one directory and run perltidy + # *, then run diff between the output of the previous version and the + # current version. + # + # *. For another example, search for the smartmatch operator '~~' + # with your editor to see where updates were made for it. + # + # ----------------------------------------------------------------------- + + my $line_of_tokens = shift; + my ($untrimmed_input_line) = $line_of_tokens->{_line_text}; + + # patch while coding change is underway + # make callers private data to allow access + # $tokenizer_self = $caller_tokenizer_self; + + # extract line number for use in error messages + $input_line_number = $line_of_tokens->{_line_number}; + + # reinitialize for multi-line quote + $line_of_tokens->{_starting_in_quote} = $in_quote && $quote_type eq 'Q'; + + # check for pod documentation + if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) { + + # must not be in multi-line quote + # and must not be in an equation + if ( !$in_quote && ( operator_expected( 'b', '=', 'b' ) == TERM ) ) + { + $tokenizer_self->{_in_pod} = 1; + return; + } + } + + $input_line = $untrimmed_input_line; + + chomp $input_line; + + # trim start of this line unless we are continuing a quoted line + # do not trim end because we might end in a quote (test: deken4.pl) + # Perl::Tidy::Formatter will delete needless trailing blanks + unless ( $in_quote && ( $quote_type eq 'Q' ) ) { + $input_line =~ s/^\s*//; # trim left end + } + + # Set a flag to indicate if we might be at an __END__ or __DATA__ line + # This will be used below to avoid quoting a bare word followed by + # a fat comma. + my $is_END_or_DATA = $input_line =~ /^\s*__(END|DATA)__\s*$/; + + # update the copy of the line for use in error messages + # This must be exactly what we give the pre_tokenizer + $tokenizer_self->{_line_text} = $input_line; + + # re-initialize for the main loop + $routput_token_list = []; # stack of output token indexes + $routput_token_type = []; # token types + $routput_block_type = []; # types of code block + $routput_container_type = []; # paren types, such as if, elsif, .. + $routput_type_sequence = []; # nesting sequential number + + $rhere_target_list = []; + + $tok = $last_nonblank_token; + $type = $last_nonblank_type; + $prototype = $last_nonblank_prototype; + $last_nonblank_i = -1; + $block_type = $last_nonblank_block_type; + $container_type = $last_nonblank_container_type; + $type_sequence = $last_nonblank_type_sequence; + $indent_flag = 0; + $peeked_ahead = 0; + + # tokenization is done in two stages.. + # stage 1 is a very simple pre-tokenization + my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens + + # a little optimization for a full-line comment + if ( !$in_quote && ( $input_line =~ /^#/ ) ) { + $max_tokens_wanted = 1 # no use tokenizing a comment + } + + # start by breaking the line into pre-tokens + ( $rtokens, $rtoken_map, $rtoken_type ) = + pre_tokenize( $input_line, $max_tokens_wanted ); + + $max_token_index = scalar( @{$rtokens} ) - 1; + push( @{$rtokens}, ' ', ' ', ' ' ); # extra whitespace simplifies logic + push( @{$rtoken_map}, 0, 0, 0 ); # shouldn't be referenced + push( @{$rtoken_type}, 'b', 'b', 'b' ); + + # initialize for main loop + foreach my $ii ( 0 .. $max_token_index + 3 ) { + $routput_token_type->[$ii] = ""; + $routput_block_type->[$ii] = ""; + $routput_container_type->[$ii] = ""; + $routput_type_sequence->[$ii] = ""; + $routput_indent_flag->[$ii] = 0; + } + $i = -1; + $i_tok = -1; + + # ------------------------------------------------------------ + # begin main tokenization loop + # ------------------------------------------------------------ + + # we are looking at each pre-token of one line and combining them + # into tokens + while ( ++$i <= $max_token_index ) { + + if ($in_quote) { # continue looking for end of a quote + $type = $quote_type; + + unless ( @{$routput_token_list} ) + { # initialize if continuation line + push( @{$routput_token_list}, $i ); + $routput_token_type->[$i] = $type; + + } + $tok = $quote_character unless ( $quote_character =~ /^\s*$/ ); + + # scan for the end of the quote or pattern + ( + $i, $in_quote, $quote_character, $quote_pos, $quote_depth, + $quoted_string_1, $quoted_string_2 + ) + = do_quote( + $i, $in_quote, $quote_character, + $quote_pos, $quote_depth, $quoted_string_1, + $quoted_string_2, $rtokens, $rtoken_map, + $max_token_index + ); + + # all done if we didn't find it + last if ($in_quote); + + # save pattern and replacement text for rescanning + my $qs1 = $quoted_string_1; + my $qs2 = $quoted_string_2; + + # re-initialize for next search + $quote_character = ''; + $quote_pos = 0; + $quote_type = 'Q'; + $quoted_string_1 = ""; + $quoted_string_2 = ""; + last if ( ++$i > $max_token_index ); + + # look for any modifiers + if ($allowed_quote_modifiers) { + + # check for exact quote modifiers + if ( $rtokens->[$i] =~ /^[A-Za-z_]/ ) { + my $str = $rtokens->[$i]; + my $saw_modifier_e; + while ( $str =~ /\G$allowed_quote_modifiers/gc ) { + my $pos = pos($str); + my $char = substr( $str, $pos - 1, 1 ); + $saw_modifier_e ||= ( $char eq 'e' ); + } + + # For an 'e' quote modifier we must scan the replacement + # text for here-doc targets. + if ($saw_modifier_e) { + + my $rht = scan_replacement_text($qs1); + + # Change type from 'Q' to 'h' for quotes with + # here-doc targets so that the formatter (see sub + # print_line_of_tokens) will not make any line + # breaks after this point. + if ($rht) { + push @{$rhere_target_list}, @{$rht}; + $type = 'h'; + if ( $i_tok < 0 ) { + my $ilast = $routput_token_list->[-1]; + $routput_token_type->[$ilast] = $type; + } + } + } + + if ( defined( pos($str) ) ) { + + # matched + if ( pos($str) == length($str) ) { + last if ( ++$i > $max_token_index ); + } + + # Looks like a joined quote modifier + # and keyword, maybe something like + # s/xxx/yyy/gefor @k=... + # Example is "galgen.pl". Would have to split + # the word and insert a new token in the + # pre-token list. This is so rare that I haven't + # done it. Will just issue a warning citation. + + # This error might also be triggered if my quote + # modifier characters are incomplete + else { + warning(<[$i]\n"; + # my $num = length($str) - pos($str); + # $rtokens->[$i]=substr($rtokens->[$i],pos($str),$num); + # print "continuing with new token $rtokens->[$i]\n"; + + # skipping past this token does least damage + last if ( ++$i > $max_token_index ); + } + } + else { + + # example file: rokicki4.pl + # This error might also be triggered if my quote + # modifier characters are incomplete + write_logfile_entry( +"Note: found word $str at quote modifier location\n" + ); + } + } + + # re-initialize + $allowed_quote_modifiers = ""; + } + } + + unless ( $tok =~ /^\s*$/ || $tok eq 'CORE::' ) { + + # try to catch some common errors + if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) { + + if ( $last_nonblank_token eq 'eq' ) { + complain("Should 'eq' be '==' here ?\n"); + } + elsif ( $last_nonblank_token eq 'ne' ) { + complain("Should 'ne' be '!=' here ?\n"); + } + } + + $last_last_nonblank_token = $last_nonblank_token; + $last_last_nonblank_type = $last_nonblank_type; + $last_last_nonblank_block_type = $last_nonblank_block_type; + $last_last_nonblank_container_type = + $last_nonblank_container_type; + $last_last_nonblank_type_sequence = + $last_nonblank_type_sequence; + $last_nonblank_token = $tok; + $last_nonblank_type = $type; + $last_nonblank_prototype = $prototype; + $last_nonblank_block_type = $block_type; + $last_nonblank_container_type = $container_type; + $last_nonblank_type_sequence = $type_sequence; + $last_nonblank_i = $i_tok; + } + + # store previous token type + if ( $i_tok >= 0 ) { + $routput_token_type->[$i_tok] = $type; + $routput_block_type->[$i_tok] = $block_type; + $routput_container_type->[$i_tok] = $container_type; + $routput_type_sequence->[$i_tok] = $type_sequence; + $routput_indent_flag->[$i_tok] = $indent_flag; + } + my $pre_tok = $rtokens->[$i]; # get the next pre-token + my $pre_type = $rtoken_type->[$i]; # and type + $tok = $pre_tok; + $type = $pre_type; # to be modified as necessary + $block_type = ""; # blank for all tokens except code block braces + $container_type = ""; # blank for all tokens except some parens + $type_sequence = ""; # blank for all tokens except ?/: + $indent_flag = 0; + $prototype = ""; # blank for all tokens except user defined subs + $i_tok = $i; + + # this pre-token will start an output token + push( @{$routput_token_list}, $i_tok ); + + # continue gathering identifier if necessary + # but do not start on blanks and comments + if ( $id_scan_state && $pre_type !~ /[b#]/ ) { + + if ( $id_scan_state =~ /^(sub|package)/ ) { + scan_id(); + } + else { + scan_identifier(); + } + + last if ($id_scan_state); + next if ( ( $i > 0 ) || $type ); + + # didn't find any token; start over + $type = $pre_type; + $tok = $pre_tok; + } + + # handle whitespace tokens.. + next if ( $type eq 'b' ); + my $prev_tok = $i > 0 ? $rtokens->[ $i - 1 ] : ' '; + my $prev_type = $i > 0 ? $rtoken_type->[ $i - 1 ] : 'b'; + + # Build larger tokens where possible, since we are not in a quote. + # + # First try to assemble digraphs. The following tokens are + # excluded and handled specially: + # '/=' is excluded because the / might start a pattern. + # 'x=' is excluded since it might be $x=, with $ on previous line + # '**' and *= might be typeglobs of punctuation variables + # I have allowed tokens starting with <, such as <=, + # because I don't think these could be valid angle operators. + # test file: storrs4.pl + my $test_tok = $tok . $rtokens->[ $i + 1 ]; + my $combine_ok = $is_digraph{$test_tok}; + + # check for special cases which cannot be combined + if ($combine_ok) { + + # '//' must be defined_or operator if an operator is expected. + # TODO: Code for other ambiguous digraphs (/=, x=, **, *=) + # could be migrated here for clarity + + # Patch for RT#102371, misparsing a // in the following snippet: + # state $b //= ccc(); + # The solution is to always accept the digraph (or trigraph) after + # token type 'Z' (possible file handle). The reason is that + # sub operator_expected gives TERM expected here, which is + # wrong in this case. + if ( $test_tok eq '//' && $last_nonblank_type ne 'Z' ) { + my $next_type = $rtokens->[ $i + 1 ]; + my $expecting = + operator_expected( $prev_type, $tok, $next_type ); + + # Patched for RT#101547, was 'unless ($expecting==OPERATOR)' + $combine_ok = 0 if ( $expecting == TERM ); + } + + # Patch for RT #114359: Missparsing of "print $x ** 0.5; + # Accept the digraphs '**' only after type 'Z' + # Otherwise postpone the decision. + if ( $test_tok eq '**' ) { + if ( $last_nonblank_type ne 'Z' ) { $combine_ok = 0 } + } + } + + if ( + $combine_ok + + && ( $test_tok ne '/=' ) # might be pattern + && ( $test_tok ne 'x=' ) # might be $x + && ( $test_tok ne '*=' ) # typeglob? + + # Moved above as part of fix for + # RT #114359: Missparsing of "print $x ** 0.5; + # && ( $test_tok ne '**' ) # typeglob? + ) + { + $tok = $test_tok; + $i++; + + # Now try to assemble trigraphs. Note that all possible + # perl trigraphs can be constructed by appending a character + # to a digraph. + $test_tok = $tok . $rtokens->[ $i + 1 ]; + + if ( $is_trigraph{$test_tok} ) { + $tok = $test_tok; + $i++; + } + + # The only current tetragraph is the double diamond operator + # and its first three characters are not a trigraph, so + # we do can do a special test for it + elsif ( $test_tok eq '<<>' ) { + $test_tok .= $rtokens->[ $i + 2 ]; + if ( $is_tetragraph{$test_tok} ) { + $tok = $test_tok; + $i += 2; + } + } + } + + $type = $tok; + $next_tok = $rtokens->[ $i + 1 ]; + $next_type = $rtoken_type->[ $i + 1 ]; + + TOKENIZER_DEBUG_FLAG_TOKENIZE && do { + local $" = ')('; + my @debug_list = ( + $last_nonblank_token, $tok, + $next_tok, $brace_depth, + $brace_type[$brace_depth], $paren_depth, + $paren_type[$paren_depth] + ); + print STDOUT "TOKENIZE:(@debug_list)\n"; + }; + + # turn off attribute list on first non-blank, non-bareword + if ( $pre_type ne 'w' ) { $in_attribute_list = 0 } + + ############################################################### + # We have the next token, $tok. + # Now we have to examine this token and decide what it is + # and define its $type + # + # section 1: bare words + ############################################################### + + if ( $pre_type eq 'w' ) { + $expecting = operator_expected( $prev_type, $tok, $next_type ); + my ( $next_nonblank_token, $i_next ) = + find_next_nonblank_token( $i, $rtokens, $max_token_index ); + + # ATTRS: handle sub and variable attributes + if ($in_attribute_list) { + + # treat bare word followed by open paren like qw( + if ( $next_nonblank_token eq '(' ) { + $in_quote = $quote_items{'q'}; + $allowed_quote_modifiers = $quote_modifiers{'q'}; + $type = 'q'; + $quote_type = 'q'; + next; + } + + # handle bareword not followed by open paren + else { + $type = 'w'; + next; + } + } + + # quote a word followed by => operator + # unless the word __END__ or __DATA__ and the only word on + # the line. + if ( !$is_END_or_DATA && $next_nonblank_token eq '=' ) { + + if ( $rtokens->[ $i_next + 1 ] eq '>' ) { + if ( $is_constant{$current_package}{$tok} ) { + $type = 'C'; + } + elsif ( $is_user_function{$current_package}{$tok} ) { + $type = 'U'; + $prototype = + $user_function_prototype{$current_package}{$tok}; + } + elsif ( $tok =~ /^v\d+$/ ) { + $type = 'v'; + report_v_string($tok); + } + else { $type = 'w' } + + next; + } + } + + # quote a bare word within braces..like xxx->{s}; note that we + # must be sure this is not a structural brace, to avoid + # mistaking {s} in the following for a quoted bare word: + # for(@[){s}bla}BLA} + # Also treat q in something like var{-q} as a bare word, not qoute operator + if ( + $next_nonblank_token eq '}' + && ( + $last_nonblank_type eq 'L' + || ( $last_nonblank_type eq 'm' + && $last_last_nonblank_type eq 'L' ) + ) + ) + { + $type = 'w'; + next; + } + + # a bare word immediately followed by :: is not a keyword; + # use $tok_kw when testing for keywords to avoid a mistake + my $tok_kw = $tok; + if ( $rtokens->[ $i + 1 ] eq ':' + && $rtokens->[ $i + 2 ] eq ':' ) + { + $tok_kw .= '::'; + } + + # handle operator x (now we know it isn't $x=) + if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) { + if ( $tok eq 'x' ) { + + if ( $rtokens->[ $i + 1 ] eq '=' ) { # x= + $tok = 'x='; + $type = $tok; + $i++; + } + else { + $type = 'x'; + } + } + + # FIXME: Patch: mark something like x4 as an integer for now + # It gets fixed downstream. This is easier than + # splitting the pretoken. + else { + $type = 'n'; + } + } + elsif ( $tok_kw eq 'CORE::' ) { + $type = $tok = $tok_kw; + $i += 2; + } + elsif ( ( $tok eq 'strict' ) + and ( $last_nonblank_token eq 'use' ) ) + { + $tokenizer_self->{_saw_use_strict} = 1; + scan_bare_identifier(); + } + + elsif ( ( $tok eq 'warnings' ) + and ( $last_nonblank_token eq 'use' ) ) + { + $tokenizer_self->{_saw_perl_dash_w} = 1; + + # scan as identifier, so that we pick up something like: + # use warnings::register + scan_bare_identifier(); + } + + elsif ( + $tok eq 'AutoLoader' + && $tokenizer_self->{_look_for_autoloader} + && ( + $last_nonblank_token eq 'use' + + # these regexes are from AutoSplit.pm, which we want + # to mimic + || $input_line =~ /^\s*(use|require)\s+AutoLoader\b/ + || $input_line =~ /\bISA\s*=.*\bAutoLoader\b/ + ) + ) + { + write_logfile_entry("AutoLoader seen, -nlal deactivates\n"); + $tokenizer_self->{_saw_autoloader} = 1; + $tokenizer_self->{_look_for_autoloader} = 0; + scan_bare_identifier(); + } + + elsif ( + $tok eq 'SelfLoader' + && $tokenizer_self->{_look_for_selfloader} + && ( $last_nonblank_token eq 'use' + || $input_line =~ /^\s*(use|require)\s+SelfLoader\b/ + || $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ ) + ) + { + write_logfile_entry("SelfLoader seen, -nlsl deactivates\n"); + $tokenizer_self->{_saw_selfloader} = 1; + $tokenizer_self->{_look_for_selfloader} = 0; + scan_bare_identifier(); + } + + elsif ( ( $tok eq 'constant' ) + and ( $last_nonblank_token eq 'use' ) ) + { + scan_bare_identifier(); + my ( $next_nonblank_token, $i_next ) = + find_next_nonblank_token( $i, $rtokens, + $max_token_index ); + + if ($next_nonblank_token) { + + if ( $is_keyword{$next_nonblank_token} ) { + + # Assume qw is used as a quote and okay, as in: + # use constant qw{ DEBUG 0 }; + # Not worth trying to parse for just a warning + + # NOTE: This warning is deactivated because recent + # versions of perl do not complain here, but + # the coding is retained for reference. + if ( 0 && $next_nonblank_token ne 'qw' ) { + warning( +"Attempting to define constant '$next_nonblank_token' which is a perl keyword\n" + ); + } + } + + # FIXME: could check for error in which next token is + # not a word (number, punctuation, ..) + else { + $is_constant{$current_package}{$next_nonblank_token} + = 1; + } + } + } + + # various quote operators + elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) { +##NICOL PATCH + if ( $expecting == OPERATOR ) { + + # Be careful not to call an error for a qw quote + # where a parenthesized list is allowed. For example, + # it could also be a for/foreach construct such as + # + # foreach my $key qw\Uno Due Tres Quadro\ { + # print "Set $key\n"; + # } + # + + # Or it could be a function call. + # NOTE: Braces in something like &{ xxx } are not + # marked as a block, we might have a method call. + # &method(...), $method->(..), &{method}(...), + # $ref[2](list) is ok & short for $ref[2]->(list) + # + # See notes in 'sub code_block_type' and + # 'sub is_non_structural_brace' + + unless ( + $tok eq 'qw' + && ( $last_nonblank_token =~ /^([\]\}\&]|\-\>)/ + || $is_for_foreach{$want_paren} ) + ) + { + error_if_expecting_OPERATOR(); + } + } + $in_quote = $quote_items{$tok}; + $allowed_quote_modifiers = $quote_modifiers{$tok}; + + # All quote types are 'Q' except possibly qw quotes. + # qw quotes are special in that they may generally be trimmed + # of leading and trailing whitespace. So they are given a + # separate type, 'q', unless requested otherwise. + $type = + ( $tok eq 'qw' && $tokenizer_self->{_trim_qw} ) + ? 'q' + : 'Q'; + $quote_type = $type; + } + + # check for a statement label + elsif ( + ( $next_nonblank_token eq ':' ) + && ( $rtokens->[ $i_next + 1 ] ne ':' ) + && ( $i_next <= $max_token_index ) # colon on same line + && label_ok() + ) + { + if ( $tok !~ /[A-Z]/ ) { + push @{ $tokenizer_self->{_rlower_case_labels_at} }, + $input_line_number; + } + $type = 'J'; + $tok .= ':'; + $i = $i_next; + next; + } + + # 'sub' || 'package' + elsif ( $is_sub_package{$tok_kw} ) { + error_if_expecting_OPERATOR() + if ( $expecting == OPERATOR ); + scan_id(); + } + + # Note on token types for format, __DATA__, __END__: + # It simplifies things to give these type ';', so that when we + # start rescanning we will be expecting a token of type TERM. + # We will switch to type 'k' before outputting the tokens. + elsif ( $is_format_END_DATA{$tok_kw} ) { + $type = ';'; # make tokenizer look for TERM next + $tokenizer_self->{ $is_format_END_DATA{$tok_kw} } = 1; + last; + } + + elsif ( $is_keyword{$tok_kw} ) { + $type = 'k'; + + # Since for and foreach may not be followed immediately + # by an opening paren, we have to remember which keyword + # is associated with the next '(' + if ( $is_for_foreach{$tok} ) { + if ( new_statement_ok() ) { + $want_paren = $tok; + } + } + + # recognize 'use' statements, which are special + elsif ( $is_use_require{$tok} ) { + $statement_type = $tok; + error_if_expecting_OPERATOR() + if ( $expecting == OPERATOR ); + } + + # remember my and our to check for trailing ": shared" + elsif ( $is_my_our{$tok} ) { + $statement_type = $tok; + } + + # Check for misplaced 'elsif' and 'else', but allow isolated + # else or elsif blocks to be formatted. This is indicated + # by a last noblank token of ';' + elsif ( $tok eq 'elsif' ) { + if ( $last_nonblank_token ne ';' + && $last_nonblank_block_type !~ + /^(if|elsif|unless)$/ ) + { + warning( +"expecting '$tok' to follow one of 'if|elsif|unless'\n" + ); + } + } + elsif ( $tok eq 'else' ) { + + # patched for SWITCH/CASE + if ( + $last_nonblank_token ne ';' + && $last_nonblank_block_type !~ + /^(if|elsif|unless|case|when)$/ + + # patch to avoid an unwanted error message for + # the case of a parenless 'case' (RT 105484): + # switch ( 1 ) { case x { 2 } else { } } + && $statement_type !~ + /^(if|elsif|unless|case|when)$/ + ) + { + warning( +"expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n" + ); + } + } + elsif ( $tok eq 'continue' ) { + if ( $last_nonblank_token ne ';' + && $last_nonblank_block_type !~ + /(^(\{|\}|;|while|until|for|foreach)|:$)/ ) + { + + # note: ';' '{' and '}' in list above + # because continues can follow bare blocks; + # ':' is labeled block + # + ############################################ + # NOTE: This check has been deactivated because + # continue has an alternative usage for given/when + # blocks in perl 5.10 + ## warning("'$tok' should follow a block\n"); + ############################################ + } + } + + # patch for SWITCH/CASE if 'case' and 'when are + # treated as keywords. + elsif ( $tok eq 'when' || $tok eq 'case' ) { + $statement_type = $tok; # next '{' is block + } + + # + # indent trailing if/unless/while/until + # outdenting will be handled by later indentation loop +## DEACTIVATED: unfortunately this can cause some unwanted indentation like: +##$opt_o = 1 +## if !( +## $opt_b +## || $opt_c +## || $opt_d +## || $opt_f +## || $opt_i +## || $opt_l +## || $opt_o +## || $opt_x +## ); +## if ( $tok =~ /^(if|unless|while|until)$/ +## && $next_nonblank_token ne '(' ) +## { +## $indent_flag = 1; +## } + } + + # check for inline label following + # /^(redo|last|next|goto)$/ + elsif (( $last_nonblank_type eq 'k' ) + && ( $is_redo_last_next_goto{$last_nonblank_token} ) ) + { + $type = 'j'; + next; + } + + # something else -- + else { + + scan_bare_identifier(); + if ( $type eq 'w' ) { + + if ( $expecting == OPERATOR ) { + + # don't complain about possible indirect object + # notation. + # For example: + # package main; + # sub new($) { ... } + # $b = new A::; # calls A::new + # $c = new A; # same thing but suspicious + # This will call A::new but we have a 'new' in + # main:: which looks like a constant. + # + if ( $last_nonblank_type eq 'C' ) { + if ( $tok !~ /::$/ ) { + complain(<[ $i + 1 ]; + if ( $next_tok eq '(' ) { + $type = 'U'; + } + + # underscore after file test operator is file handle + if ( $tok eq '_' && $last_nonblank_type eq 'F' ) { + $type = 'Z'; + } + + # patch for SWITCH/CASE if 'case' and 'when are + # not treated as keywords: + if ( + ( + $tok eq 'case' + && $brace_type[$brace_depth] eq 'switch' + ) + || ( $tok eq 'when' + && $brace_type[$brace_depth] eq 'given' ) + ) + { + $statement_type = $tok; # next '{' is block + $type = 'k'; # for keyword syntax coloring + } + + # patch for SWITCH/CASE if switch and given not keywords + # Switch is not a perl 5 keyword, but we will gamble + # and mark switch followed by paren as a keyword. This + # is only necessary to get html syntax coloring nice, + # and does not commit this as being a switch/case. + if ( $next_nonblank_token eq '(' + && ( $tok eq 'switch' || $tok eq 'given' ) ) + { + $type = 'k'; # for keyword syntax coloring + } + } + } + } + + ############################################################### + # section 2: strings of digits + ############################################################### + elsif ( $pre_type eq 'd' ) { + $expecting = operator_expected( $prev_type, $tok, $next_type ); + error_if_expecting_OPERATOR("Number") + if ( $expecting == OPERATOR ); + my $number = scan_number(); + if ( !defined($number) ) { + + # shouldn't happen - we should always get a number + warning("non-number beginning with digit--program bug\n"); + report_definite_bug(); + } + } + + ############################################################### + # section 3: all other tokens + ############################################################### + + else { + last if ( $tok eq '#' ); + my $code = $tokenization_code->{$tok}; + if ($code) { + $expecting = + operator_expected( $prev_type, $tok, $next_type ); + $code->(); + redo if $in_quote; + } + } + } + + # ----------------------------- + # end of main tokenization loop + # ----------------------------- + + if ( $i_tok >= 0 ) { + $routput_token_type->[$i_tok] = $type; + $routput_block_type->[$i_tok] = $block_type; + $routput_container_type->[$i_tok] = $container_type; + $routput_type_sequence->[$i_tok] = $type_sequence; + $routput_indent_flag->[$i_tok] = $indent_flag; + } + + unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) { + $last_last_nonblank_token = $last_nonblank_token; + $last_last_nonblank_type = $last_nonblank_type; + $last_last_nonblank_block_type = $last_nonblank_block_type; + $last_last_nonblank_container_type = $last_nonblank_container_type; + $last_last_nonblank_type_sequence = $last_nonblank_type_sequence; + $last_nonblank_token = $tok; + $last_nonblank_type = $type; + $last_nonblank_block_type = $block_type; + $last_nonblank_container_type = $container_type; + $last_nonblank_type_sequence = $type_sequence; + $last_nonblank_prototype = $prototype; + } + + # reset indentation level if necessary at a sub or package + # in an attempt to recover from a nesting error + if ( $level_in_tokenizer < 0 ) { + if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) { + reset_indentation_level(0); + brace_warning("resetting level to 0 at $1 $2\n"); + } + } + + # all done tokenizing this line ... + # now prepare the final list of tokens and types + + my @token_type = (); # stack of output token types + my @block_type = (); # stack of output code block types + my @container_type = (); # stack of output code container types + my @type_sequence = (); # stack of output type sequence numbers + my @tokens = (); # output tokens + my @levels = (); # structural brace levels of output tokens + my @slevels = (); # secondary nesting levels of output tokens + my @nesting_tokens = (); # string of tokens leading to this depth + my @nesting_types = (); # string of token types leading to this depth + my @nesting_blocks = (); # string of block types leading to this depth + my @nesting_lists = (); # string of list types leading to this depth + my @ci_string = (); # string needed to compute continuation indentation + my @container_environment = (); # BLOCK or LIST + my $container_environment = ''; + my $im = -1; # previous $i value + my $num; + my $ci_string_sum = ones_count($ci_string_in_tokenizer); + +# Computing Token Indentation +# +# The final section of the tokenizer forms tokens and also computes +# parameters needed to find indentation. It is much easier to do it +# in the tokenizer than elsewhere. Here is a brief description of how +# indentation is computed. Perl::Tidy computes indentation as the sum +# of 2 terms: +# +# (1) structural indentation, such as if/else/elsif blocks +# (2) continuation indentation, such as long parameter call lists. +# +# These are occasionally called primary and secondary indentation. +# +# Structural indentation is introduced by tokens of type '{', although +# the actual tokens might be '{', '(', or '['. Structural indentation +# is of two types: BLOCK and non-BLOCK. Default structural indentation +# is 4 characters if the standard indentation scheme is used. +# +# Continuation indentation is introduced whenever a line at BLOCK level +# is broken before its termination. Default continuation indentation +# is 2 characters in the standard indentation scheme. +# +# Both types of indentation may be nested arbitrarily deep and +# interlaced. The distinction between the two is somewhat arbitrary. +# +# For each token, we will define two variables which would apply if +# the current statement were broken just before that token, so that +# that token started a new line: +# +# $level = the structural indentation level, +# $ci_level = the continuation indentation level +# +# The total indentation will be $level * (4 spaces) + $ci_level * (2 spaces), +# assuming defaults. However, in some special cases it is customary +# to modify $ci_level from this strict value. +# +# The total structural indentation is easy to compute by adding and +# subtracting 1 from a saved value as types '{' and '}' are seen. The +# running value of this variable is $level_in_tokenizer. +# +# The total continuation is much more difficult to compute, and requires +# several variables. These variables are: +# +# $ci_string_in_tokenizer = a string of 1's and 0's indicating, for +# each indentation level, if there are intervening open secondary +# structures just prior to that level. +# $continuation_string_in_tokenizer = a string of 1's and 0's indicating +# if the last token at that level is "continued", meaning that it +# is not the first token of an expression. +# $nesting_block_string = a string of 1's and 0's indicating, for each +# indentation level, if the level is of type BLOCK or not. +# $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string +# $nesting_list_string = a string of 1's and 0's indicating, for each +# indentation level, if it is appropriate for list formatting. +# If so, continuation indentation is used to indent long list items. +# $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string +# @{$rslevel_stack} = a stack of total nesting depths at each +# structural indentation level, where "total nesting depth" means +# the nesting depth that would occur if every nesting token -- '{', '[', +# and '(' -- , regardless of context, is used to compute a nesting +# depth. + + #my $nesting_block_flag = ($nesting_block_string =~ /1$/); + #my $nesting_list_flag = ($nesting_list_string =~ /1$/); + + my ( $ci_string_i, $level_i, $nesting_block_string_i, + $nesting_list_string_i, $nesting_token_string_i, + $nesting_type_string_i, ); + + foreach my $i ( @{$routput_token_list} ) + { # scan the list of pre-tokens indexes + + # self-checking for valid token types + my $type = $routput_token_type->[$i]; + my $forced_indentation_flag = $routput_indent_flag->[$i]; + + # See if we should undo the $forced_indentation_flag. + # Forced indentation after 'if', 'unless', 'while' and 'until' + # expressions without trailing parens is optional and doesn't + # always look good. It is usually okay for a trailing logical + # expression, but if the expression is a function call, code block, + # or some kind of list it puts in an unwanted extra indentation + # level which is hard to remove. + # + # Example where extra indentation looks ok: + # return 1 + # if $det_a < 0 and $det_b > 0 + # or $det_a > 0 and $det_b < 0; + # + # Example where extra indentation is not needed because + # the eval brace also provides indentation: + # print "not " if defined eval { + # reduce { die if $b > 2; $a + $b } 0, 1, 2, 3, 4; + # }; + # + # The following rule works fairly well: + # Undo the flag if the end of this line, or start of the next + # line, is an opening container token or a comma. + # This almost always works, but if not after another pass it will + # be stable. + if ( $forced_indentation_flag && $type eq 'k' ) { + my $ixlast = -1; + my $ilast = $routput_token_list->[$ixlast]; + my $toklast = $routput_token_type->[$ilast]; + if ( $toklast eq '#' ) { + $ixlast--; + $ilast = $routput_token_list->[$ixlast]; + $toklast = $routput_token_type->[$ilast]; + } + if ( $toklast eq 'b' ) { + $ixlast--; + $ilast = $routput_token_list->[$ixlast]; + $toklast = $routput_token_type->[$ilast]; + } + if ( $toklast =~ /^[\{,]$/ ) { + $forced_indentation_flag = 0; + } + else { + ( $toklast, my $i_next ) = + find_next_nonblank_token( $max_token_index, $rtokens, + $max_token_index ); + if ( $toklast =~ /^[\{,]$/ ) { + $forced_indentation_flag = 0; + } + } + } + + # if we are already in an indented if, see if we should outdent + if ($indented_if_level) { + + # don't try to nest trailing if's - shouldn't happen + if ( $type eq 'k' ) { + $forced_indentation_flag = 0; + } + + # check for the normal case - outdenting at next ';' + elsif ( $type eq ';' ) { + if ( $level_in_tokenizer == $indented_if_level ) { + $forced_indentation_flag = -1; + $indented_if_level = 0; + } + } + + # handle case of missing semicolon + elsif ( $type eq '}' ) { + if ( $level_in_tokenizer == $indented_if_level ) { + $indented_if_level = 0; + + # TBD: This could be a subroutine call + $level_in_tokenizer--; + if ( @{$rslevel_stack} > 1 ) { + pop( @{$rslevel_stack} ); + } + if ( length($nesting_block_string) > 1 ) + { # true for valid script + chop $nesting_block_string; + chop $nesting_list_string; + } + + } + } + } + + my $tok = $rtokens->[$i]; # the token, but ONLY if same as pretoken + $level_i = $level_in_tokenizer; + + # This can happen by running perltidy on non-scripts + # although it could also be bug introduced by programming change. + # Perl silently accepts a 032 (^Z) and takes it as the end + if ( !$is_valid_token_type{$type} ) { + my $val = ord($type); + warning( + "unexpected character decimal $val ($type) in script\n"); + $tokenizer_self->{_in_error} = 1; + } + + # ---------------------------------------------------------------- + # TOKEN TYPE PATCHES + # output __END__, __DATA__, and format as type 'k' instead of ';' + # to make html colors correct, etc. + my $fix_type = $type; + if ( $type eq ';' && $tok =~ /\w/ ) { $fix_type = 'k' } + + # output anonymous 'sub' as keyword + if ( $type eq 't' && $tok eq 'sub' ) { $fix_type = 'k' } + + # ----------------------------------------------------------------- + + $nesting_token_string_i = $nesting_token_string; + $nesting_type_string_i = $nesting_type_string; + $nesting_block_string_i = $nesting_block_string; + $nesting_list_string_i = $nesting_list_string; + + # set primary indentation levels based on structural braces + # Note: these are set so that the leading braces have a HIGHER + # level than their CONTENTS, which is convenient for indentation + # Also, define continuation indentation for each token. + if ( $type eq '{' || $type eq 'L' || $forced_indentation_flag > 0 ) + { + + # use environment before updating + $container_environment = + $nesting_block_flag ? 'BLOCK' + : $nesting_list_flag ? 'LIST' + : ""; + + # if the difference between total nesting levels is not 1, + # there are intervening non-structural nesting types between + # this '{' and the previous unclosed '{' + my $intervening_secondary_structure = 0; + if ( @{$rslevel_stack} ) { + $intervening_secondary_structure = + $slevel_in_tokenizer - $rslevel_stack->[-1]; + } + + # Continuation Indentation + # + # Having tried setting continuation indentation both in the formatter and + # in the tokenizer, I can say that setting it in the tokenizer is much, + # much easier. The formatter already has too much to do, and can't + # make decisions on line breaks without knowing what 'ci' will be at + # arbitrary locations. + # + # But a problem with setting the continuation indentation (ci) here + # in the tokenizer is that we do not know where line breaks will actually + # be. As a result, we don't know if we should propagate continuation + # indentation to higher levels of structure. + # + # For nesting of only structural indentation, we never need to do this. + # For example, in a long if statement, like this + # + # if ( !$output_block_type[$i] + # && ($in_statement_continuation) ) + # { <--outdented + # do_something(); + # } + # + # the second line has ci but we do normally give the lines within the BLOCK + # any ci. This would be true if we had blocks nested arbitrarily deeply. + # + # But consider something like this, where we have created a break after + # an opening paren on line 1, and the paren is not (currently) a + # structural indentation token: + # + # my $file = $menubar->Menubutton( + # qw/-text File -underline 0 -menuitems/ => [ + # [ + # Cascade => '~View', + # -menuitems => [ + # ... + # + # The second line has ci, so it would seem reasonable to propagate it + # down, giving the third line 1 ci + 1 indentation. This suggests the + # following rule, which is currently used to propagating ci down: if there + # are any non-structural opening parens (or brackets, or braces), before + # an opening structural brace, then ci is propagated down, and otherwise + # not. The variable $intervening_secondary_structure contains this + # information for the current token, and the string + # "$ci_string_in_tokenizer" is a stack of previous values of this + # variable. + + # save the current states + push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer ); + $level_in_tokenizer++; + + if ($forced_indentation_flag) { + + # break BEFORE '?' when there is forced indentation + if ( $type eq '?' ) { $level_i = $level_in_tokenizer; } + if ( $type eq 'k' ) { + $indented_if_level = $level_in_tokenizer; + } + + # do not change container environment here if we are not + # at a real list. Adding this check prevents "blinkers" + # often near 'unless" clauses, such as in the following + # code: +## next +## unless -e ( +## $archive = +## File::Spec->catdir( $_, "auto", $root, "$sub$lib_ext" ) +## ); + + $nesting_block_string .= "$nesting_block_flag"; + } + else { + + if ( $routput_block_type->[$i] ) { + $nesting_block_flag = 1; + $nesting_block_string .= '1'; + } + else { + $nesting_block_flag = 0; + $nesting_block_string .= '0'; + } + } + + # we will use continuation indentation within containers + # which are not blocks and not logical expressions + my $bit = 0; + if ( !$routput_block_type->[$i] ) { + + # propagate flag down at nested open parens + if ( $routput_container_type->[$i] eq '(' ) { + $bit = 1 if $nesting_list_flag; + } + + # use list continuation if not a logical grouping + # /^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/ + else { + $bit = 1 + unless + $is_logical_container{ $routput_container_type->[$i] + }; + } + } + $nesting_list_string .= $bit; + $nesting_list_flag = $bit; + + $ci_string_in_tokenizer .= + ( $intervening_secondary_structure != 0 ) ? '1' : '0'; + $ci_string_sum = ones_count($ci_string_in_tokenizer); + $continuation_string_in_tokenizer .= + ( $in_statement_continuation > 0 ) ? '1' : '0'; + + # Sometimes we want to give an opening brace continuation indentation, + # and sometimes not. For code blocks, we don't do it, so that the leading + # '{' gets outdented, like this: + # + # if ( !$output_block_type[$i] + # && ($in_statement_continuation) ) + # { <--outdented + # + # For other types, we will give them continuation indentation. For example, + # here is how a list looks with the opening paren indented: + # + # @LoL = + # ( [ "fred", "barney" ], [ "george", "jane", "elroy" ], + # [ "homer", "marge", "bart" ], ); + # + # This looks best when 'ci' is one-half of the indentation (i.e., 2 and 4) + + my $total_ci = $ci_string_sum; + if ( + !$routput_block_type->[$i] # patch: skip for BLOCK + && ($in_statement_continuation) + && !( $forced_indentation_flag && $type eq ':' ) + ) + { + $total_ci += $in_statement_continuation + unless ( $ci_string_in_tokenizer =~ /1$/ ); + } + + $ci_string_i = $total_ci; + $in_statement_continuation = 0; + } + + elsif ($type eq '}' + || $type eq 'R' + || $forced_indentation_flag < 0 ) + { + + # only a nesting error in the script would prevent popping here + if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); } + + $level_i = --$level_in_tokenizer; + + # restore previous level values + if ( length($nesting_block_string) > 1 ) + { # true for valid script + chop $nesting_block_string; + $nesting_block_flag = ( $nesting_block_string =~ /1$/ ); + chop $nesting_list_string; + $nesting_list_flag = ( $nesting_list_string =~ /1$/ ); + + chop $ci_string_in_tokenizer; + $ci_string_sum = ones_count($ci_string_in_tokenizer); + + $in_statement_continuation = + chop $continuation_string_in_tokenizer; + + # zero continuation flag at terminal BLOCK '}' which + # ends a statement. + if ( $routput_block_type->[$i] ) { + + # ...These include non-anonymous subs + # note: could be sub ::abc { or sub 'abc + if ( $routput_block_type->[$i] =~ m/^sub\s*/gc ) { + + # note: older versions of perl require the /gc modifier + # here or else the \G does not work. + if ( $routput_block_type->[$i] =~ /\G('|::|\w)/gc ) + { + $in_statement_continuation = 0; + } + } + +# ...and include all block types except user subs with +# block prototypes and these: (sort|grep|map|do|eval) +# /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|;|if|elsif|else|unless|while|until|for|foreach)$/ + elsif ( + $is_zero_continuation_block_type{ + $routput_block_type->[$i] + } ) + { + $in_statement_continuation = 0; + } + + # ..but these are not terminal types: + # /^(sort|grep|map|do|eval)$/ ) + elsif ( + $is_not_zero_continuation_block_type{ + $routput_block_type->[$i] + } ) + { + } + + # ..and a block introduced by a label + # /^\w+\s*:$/gc ) { + elsif ( $routput_block_type->[$i] =~ /:$/ ) { + $in_statement_continuation = 0; + } + + # user function with block prototype + else { + $in_statement_continuation = 0; + } + } + + # If we are in a list, then + # we must set continuation indentation at the closing + # paren of something like this (paren after $check): + # assert( + # __LINE__, + # ( not defined $check ) + # or ref $check + # or $check eq "new" + # or $check eq "old", + # ); + elsif ( $tok eq ')' ) { + $in_statement_continuation = 1 + if $routput_container_type->[$i] =~ /^[;,\{\}]$/; + } + + elsif ( $tok eq ';' ) { $in_statement_continuation = 0 } + } + + # use environment after updating + $container_environment = + $nesting_block_flag ? 'BLOCK' + : $nesting_list_flag ? 'LIST' + : ""; + $ci_string_i = $ci_string_sum + $in_statement_continuation; + $nesting_block_string_i = $nesting_block_string; + $nesting_list_string_i = $nesting_list_string; + } + + # not a structural indentation type.. + else { + + $container_environment = + $nesting_block_flag ? 'BLOCK' + : $nesting_list_flag ? 'LIST' + : ""; + + # zero the continuation indentation at certain tokens so + # that they will be at the same level as its container. For + # commas, this simplifies the -lp indentation logic, which + # counts commas. For ?: it makes them stand out. + if ($nesting_list_flag) { + if ( $type =~ /^[,\?\:]$/ ) { + $in_statement_continuation = 0; + } + } + + # be sure binary operators get continuation indentation + if ( + $container_environment + && ( $type eq 'k' && $is_binary_keyword{$tok} + || $is_binary_type{$type} ) + ) + { + $in_statement_continuation = 1; + } + + # continuation indentation is sum of any open ci from previous + # levels plus the current level + $ci_string_i = $ci_string_sum + $in_statement_continuation; + + # update continuation flag ... + # if this isn't a blank or comment.. + if ( $type ne 'b' && $type ne '#' ) { + + # and we are in a BLOCK + if ($nesting_block_flag) { + + # the next token after a ';' and label starts a new stmt + if ( $type eq ';' || $type eq 'J' ) { + $in_statement_continuation = 0; + } + + # otherwise, we are continuing the current statement + else { + $in_statement_continuation = 1; + } + } + + # if we are not in a BLOCK.. + else { + + # do not use continuation indentation if not list + # environment (could be within if/elsif clause) + if ( !$nesting_list_flag ) { + $in_statement_continuation = 0; + } + + # otherwise, the token after a ',' starts a new term + + # Patch FOR RT#99961; no continuation after a ';' + # This is needed because perltidy currently marks + # a block preceded by a type character like % or @ + # as a non block, to simplify formatting. But these + # are actually blocks and can have semicolons. + # See code_block_type() and is_non_structural_brace(). + elsif ( $type eq ',' || $type eq ';' ) { + $in_statement_continuation = 0; + } + + # otherwise, we are continuing the current term + else { + $in_statement_continuation = 1; + } + } + } + } + + if ( $level_in_tokenizer < 0 ) { + unless ( $tokenizer_self->{_saw_negative_indentation} ) { + $tokenizer_self->{_saw_negative_indentation} = 1; + warning("Starting negative indentation\n"); + } + } + + # set secondary nesting levels based on all containment token types + # Note: these are set so that the nesting depth is the depth + # of the PREVIOUS TOKEN, which is convenient for setting + # the strength of token bonds + my $slevel_i = $slevel_in_tokenizer; + + # /^[L\{\(\[]$/ + if ( $is_opening_type{$type} ) { + $slevel_in_tokenizer++; + $nesting_token_string .= $tok; + $nesting_type_string .= $type; + } + + # /^[R\}\)\]]$/ + elsif ( $is_closing_type{$type} ) { + $slevel_in_tokenizer--; + my $char = chop $nesting_token_string; + + if ( $char ne $matching_start_token{$tok} ) { + $nesting_token_string .= $char . $tok; + $nesting_type_string .= $type; + } + else { + chop $nesting_type_string; + } + } + + push( @block_type, $routput_block_type->[$i] ); + push( @ci_string, $ci_string_i ); + push( @container_environment, $container_environment ); + push( @container_type, $routput_container_type->[$i] ); + push( @levels, $level_i ); + push( @nesting_tokens, $nesting_token_string_i ); + push( @nesting_types, $nesting_type_string_i ); + push( @slevels, $slevel_i ); + push( @token_type, $fix_type ); + push( @type_sequence, $routput_type_sequence->[$i] ); + push( @nesting_blocks, $nesting_block_string ); + push( @nesting_lists, $nesting_list_string ); + + # now form the previous token + if ( $im >= 0 ) { + $num = + $rtoken_map->[$i] - $rtoken_map->[$im]; # how many characters + + if ( $num > 0 ) { + push( @tokens, + substr( $input_line, $rtoken_map->[$im], $num ) ); + } + } + $im = $i; + } + + $num = length($input_line) - $rtoken_map->[$im]; # make the last token + if ( $num > 0 ) { + push( @tokens, substr( $input_line, $rtoken_map->[$im], $num ) ); + } + + $tokenizer_self->{_in_attribute_list} = $in_attribute_list; + $tokenizer_self->{_in_quote} = $in_quote; + $tokenizer_self->{_quote_target} = + $in_quote ? matching_end_token($quote_character) : ""; + $tokenizer_self->{_rhere_target_list} = $rhere_target_list; + + $line_of_tokens->{_rtoken_type} = \@token_type; + $line_of_tokens->{_rtokens} = \@tokens; + $line_of_tokens->{_rblock_type} = \@block_type; + $line_of_tokens->{_rcontainer_type} = \@container_type; + $line_of_tokens->{_rcontainer_environment} = \@container_environment; + $line_of_tokens->{_rtype_sequence} = \@type_sequence; + $line_of_tokens->{_rlevels} = \@levels; + $line_of_tokens->{_rslevels} = \@slevels; + $line_of_tokens->{_rnesting_tokens} = \@nesting_tokens; + $line_of_tokens->{_rci_levels} = \@ci_string; + $line_of_tokens->{_rnesting_blocks} = \@nesting_blocks; + + return; + } +} # end tokenize_this_line + +#########i############################################################# +# Tokenizer routines which assist in identifying token types +####################################################################### + +sub operator_expected { + + # Many perl symbols have two or more meanings. For example, '<<' + # can be a shift operator or a here-doc operator. The + # interpretation of these symbols depends on the current state of + # the tokenizer, which may either be expecting a term or an + # operator. For this example, a << would be a shift if an operator + # is expected, and a here-doc if a term is expected. This routine + # is called to make this decision for any current token. It returns + # one of three possible values: + # + # OPERATOR - operator expected (or at least, not a term) + # UNKNOWN - can't tell + # TERM - a term is expected (or at least, not an operator) + # + # The decision is based on what has been seen so far. This + # information is stored in the "$last_nonblank_type" and + # "$last_nonblank_token" variables. For example, if the + # $last_nonblank_type is '=~', then we are expecting a TERM, whereas + # if $last_nonblank_type is 'n' (numeric), we are expecting an + # OPERATOR. + # + # If a UNKNOWN is returned, the calling routine must guess. A major + # goal of this tokenizer is to minimize the possibility of returning + # UNKNOWN, because a wrong guess can spoil the formatting of a + # script. + # + # adding NEW_TOKENS: it is critically important that this routine be + # updated to allow it to determine if an operator or term is to be + # expected after the new token. Doing this simply involves adding + # the new token character to one of the regexes in this routine or + # to one of the hash lists + # that it uses, which are initialized in the BEGIN section. + # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token, + # $statement_type + + my ( $prev_type, $tok, $next_type ) = @_; + + my $op_expected = UNKNOWN; + +##print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n"; + +# Note: function prototype is available for token type 'U' for future +# program development. It contains the leading and trailing parens, +# and no blanks. It might be used to eliminate token type 'C', for +# example (prototype = '()'). Thus: +# if ($last_nonblank_type eq 'U') { +# print "previous token=$last_nonblank_token type=$last_nonblank_type prototype=$last_nonblank_prototype\n"; +# } + + # A possible filehandle (or object) requires some care... + if ( $last_nonblank_type eq 'Z' ) { + + # angle.t + if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) { + $op_expected = UNKNOWN; + } + + # For possible file handle like "$a", Perl uses weird parsing rules. + # For example: + # print $a/2,"/hi"; - division + # print $a / 2,"/hi"; - division + # print $a/ 2,"/hi"; - division + # print $a /2,"/hi"; - pattern (and error)! + elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) { + $op_expected = TERM; + } + + # Note when an operation is being done where a + # filehandle might be expected, since a change in whitespace + # could change the interpretation of the statement. + else { + if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) { + complain("operator in print statement not recommended\n"); + $op_expected = OPERATOR; + } + } + } + + # Check for smartmatch operator before preceding brace or square bracket. + # For example, at the ? after the ] in the following expressions we are + # expecting an operator: + # + # qr/3/ ~~ ['1234'] ? 1 : 0; + # map { $_ ~~ [ '0', '1' ] ? 'x' : 'o' } @a; + elsif ( $last_nonblank_type eq '}' && $last_nonblank_token eq '~~' ) { + $op_expected = OPERATOR; + } + + # handle something after 'do' and 'eval' + elsif ( $is_block_operator{$last_nonblank_token} ) { + + # something like $a = eval "expression"; + # ^ + if ( $last_nonblank_type eq 'k' ) { + $op_expected = TERM; # expression or list mode following keyword + } + + # something like $a = do { BLOCK } / 2; + # or this ? after a smartmatch anonynmous hash or array reference: + # qr/3/ ~~ ['1234'] ? 1 : 0; + # ^ + else { + $op_expected = OPERATOR; # block mode following } + } + } + + # handle bare word.. + elsif ( $last_nonblank_type eq 'w' ) { + + # unfortunately, we can't tell what type of token to expect next + # after most bare words + $op_expected = UNKNOWN; + } + + # operator, but not term possible after these types + # Note: moved ')' from type to token because parens in list context + # get marked as '{' '}' now. This is a minor glitch in the following: + # my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : (); + # + elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ ) + || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) ) + { + $op_expected = OPERATOR; + + # in a 'use' statement, numbers and v-strings are not true + # numbers, so to avoid incorrect error messages, we will + # mark them as unknown for now (use.t) + # TODO: it would be much nicer to create a new token V for VERSION + # number in a use statement. Then this could be a check on type V + # and related patches which change $statement_type for '=>' + # and ',' could be removed. Further, it would clean things up to + # scan the 'use' statement with a separate subroutine. + if ( ( $statement_type eq 'use' ) + && ( $last_nonblank_type =~ /^[nv]$/ ) ) + { + $op_expected = UNKNOWN; + } + + # expecting VERSION or {} after package NAMESPACE + elsif ($statement_type =~ /^package\b/ + && $last_nonblank_token =~ /^package\b/ ) + { + $op_expected = TERM; + } + } + + # no operator after many keywords, such as "die", "warn", etc + elsif ( $expecting_term_token{$last_nonblank_token} ) { + + # // may follow perl functions which may be unary operators + # see test file dor.t (defined or); + if ( $tok eq '/' + && $next_type eq '/' + && $last_nonblank_type eq 'k' + && $is_keyword_taking_optional_args{$last_nonblank_token} ) + { + $op_expected = OPERATOR; + } + else { + $op_expected = TERM; + } + } + + # no operator after things like + - ** (i.e., other operators) + elsif ( $expecting_term_types{$last_nonblank_type} ) { + $op_expected = TERM; + } + + # a few operators, like "time", have an empty prototype () and so + # take no parameters but produce a value to operate on + elsif ( $expecting_operator_token{$last_nonblank_token} ) { + $op_expected = OPERATOR; + } + + # post-increment and decrement produce values to be operated on + elsif ( $expecting_operator_types{$last_nonblank_type} ) { + $op_expected = OPERATOR; + } + + # no value to operate on after sub block + elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; } + + # a right brace here indicates the end of a simple block. + # all non-structural right braces have type 'R' + # all braces associated with block operator keywords have been given those + # keywords as "last_nonblank_token" and caught above. + # (This statement is order dependent, and must come after checking + # $last_nonblank_token). + elsif ( $last_nonblank_type eq '}' ) { + + # patch for dor.t (defined or). + if ( $tok eq '/' + && $next_type eq '/' + && $last_nonblank_token eq ']' ) + { + $op_expected = OPERATOR; + } + + # Patch for RT #116344: misparse a ternary operator after an anonymous + # hash, like this: + # return ref {} ? 1 : 0; + # The right brace should really be marked type 'R' in this case, and + # it is safest to return an UNKNOWN here. Expecting a TERM will + # cause the '?' to always be interpreted as a pattern delimiter + # rather than introducing a ternary operator. + elsif ( $tok eq '?' ) { + $op_expected = UNKNOWN; + } + else { + $op_expected = TERM; + } + } + + # something else..what did I forget? + else { + + # collecting diagnostics on unknown operator types..see what was missed + $op_expected = UNKNOWN; + write_diagnostics( +"OP: unknown after type=$last_nonblank_type token=$last_nonblank_token\n" + ); + } + + TOKENIZER_DEBUG_FLAG_EXPECT && do { + print STDOUT +"EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n"; + }; + return $op_expected; +} + +sub new_statement_ok { + + # return true if the current token can start a new statement + # USES GLOBAL VARIABLES: $last_nonblank_type + + return label_ok() # a label would be ok here + + || $last_nonblank_type eq 'J'; # or we follow a label + +} + +sub label_ok { + + # Decide if a bare word followed by a colon here is a label + # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type, + # $brace_depth, @brace_type + + # if it follows an opening or closing code block curly brace.. + if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' ) + && $last_nonblank_type eq $last_nonblank_token ) + { + + # it is a label if and only if the curly encloses a code block + return $brace_type[$brace_depth]; + } + + # otherwise, it is a label if and only if it follows a ';' (real or fake) + # or another label + else { + return ( $last_nonblank_type eq ';' || $last_nonblank_type eq 'J' ); + } +} + +sub code_block_type { + + # Decide if this is a block of code, and its type. + # Must be called only when $type = $token = '{' + # The problem is to distinguish between the start of a block of code + # and the start of an anonymous hash reference + # Returns "" if not code block, otherwise returns 'last_nonblank_token' + # to indicate the type of code block. (For example, 'last_nonblank_token' + # might be 'if' for an if block, 'else' for an else block, etc). + # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type, + # $last_nonblank_block_type, $brace_depth, @brace_type + + # handle case of multiple '{'s + +# print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n"; + + my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_; + if ( $last_nonblank_token eq '{' + && $last_nonblank_type eq $last_nonblank_token ) + { + + # opening brace where a statement may appear is probably + # a code block but might be and anonymous hash reference + if ( $brace_type[$brace_depth] ) { + return decide_if_code_block( $i, $rtokens, $rtoken_type, + $max_token_index ); + } + + # cannot start a code block within an anonymous hash + else { + return ""; + } + } + + elsif ( $last_nonblank_token eq ';' ) { + + # an opening brace where a statement may appear is probably + # a code block but might be and anonymous hash reference + return decide_if_code_block( $i, $rtokens, $rtoken_type, + $max_token_index ); + } + + # handle case of '}{' + elsif ($last_nonblank_token eq '}' + && $last_nonblank_type eq $last_nonblank_token ) + { + + # a } { situation ... + # could be hash reference after code block..(blktype1.t) + if ($last_nonblank_block_type) { + return decide_if_code_block( $i, $rtokens, $rtoken_type, + $max_token_index ); + } + + # must be a block if it follows a closing hash reference + else { + return $last_nonblank_token; + } + } + + ################################################################ + # NOTE: braces after type characters start code blocks, but for + # simplicity these are not identified as such. See also + # sub is_non_structural_brace. + ################################################################ + +## elsif ( $last_nonblank_type eq 't' ) { +## return $last_nonblank_token; +## } + + # brace after label: + elsif ( $last_nonblank_type eq 'J' ) { + return $last_nonblank_token; + } + +# otherwise, look at previous token. This must be a code block if +# it follows any of these: +# /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/ + elsif ( $is_code_block_token{$last_nonblank_token} ) { + + # Bug Patch: Note that the opening brace after the 'if' in the following + # snippet is an anonymous hash ref and not a code block! + # print 'hi' if { x => 1, }->{x}; + # We can identify this situation because the last nonblank type + # will be a keyword (instead of a closing peren) + if ( $last_nonblank_token =~ /^(if|unless)$/ + && $last_nonblank_type eq 'k' ) + { + return ""; + } + else { + return $last_nonblank_token; + } + } + + # or a sub or package BLOCK + elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' ) + && $last_nonblank_token =~ /^(sub|package)\b/ ) + { + return $last_nonblank_token; + } + + elsif ( $statement_type =~ /^(sub|package)\b/ ) { + return $statement_type; + } + + # user-defined subs with block parameters (like grep/map/eval) + elsif ( $last_nonblank_type eq 'G' ) { + return $last_nonblank_token; + } + + # check bareword + elsif ( $last_nonblank_type eq 'w' ) { + return decide_if_code_block( $i, $rtokens, $rtoken_type, + $max_token_index ); + } + + # Patch for bug # RT #94338 reported by Daniel Trizen + # for-loop in a parenthesized block-map triggering an error message: + # map( { foreach my $item ( '0', '1' ) { print $item} } qw(a b c) ); + # Check for a code block within a parenthesized function call + elsif ( $last_nonblank_token eq '(' ) { + my $paren_type = $paren_type[$paren_depth]; + if ( $paren_type && $paren_type =~ /^(map|grep|sort)$/ ) { + + # We will mark this as a code block but use type 't' instead + # of the name of the contining function. This will allow for + # correct parsing but will usually produce better formatting. + # Braces with block type 't' are not broken open automatically + # in the formatter as are other code block types, and this usually + # works best. + return 't'; # (Not $paren_type) + } + else { + return ""; + } + } + + # handle unknown syntax ') {' + # we previously appended a '()' to mark this case + elsif ( $last_nonblank_token =~ /\(\)$/ ) { + return $last_nonblank_token; + } + + # anything else must be anonymous hash reference + else { + return ""; + } +} + +sub decide_if_code_block { + + # USES GLOBAL VARIABLES: $last_nonblank_token + my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_; + + my ( $next_nonblank_token, $i_next ) = + find_next_nonblank_token( $i, $rtokens, $max_token_index ); + + # we are at a '{' where a statement may appear. + # We must decide if this brace starts an anonymous hash or a code + # block. + # return "" if anonymous hash, and $last_nonblank_token otherwise + + # initialize to be code BLOCK + my $code_block_type = $last_nonblank_token; + + # Check for the common case of an empty anonymous hash reference: + # Maybe something like sub { { } } + if ( $next_nonblank_token eq '}' ) { + $code_block_type = ""; + } + + else { + + # To guess if this '{' is an anonymous hash reference, look ahead + # and test as follows: + # + # it is a hash reference if next come: + # - a string or digit followed by a comma or => + # - bareword followed by => + # otherwise it is a code block + # + # Examples of anonymous hash ref: + # {'aa',}; + # {1,2} + # + # Examples of code blocks: + # {1; print "hello\n", 1;} + # {$a,1}; + + # We are only going to look ahead one more (nonblank/comment) line. + # Strange formatting could cause a bad guess, but that's unlikely. + my @pre_types; + my @pre_tokens; + + # Ignore the rest of this line if it is a side comment + if ( $next_nonblank_token ne '#' ) { + @pre_types = @{$rtoken_type}[ $i + 1 .. $max_token_index ]; + @pre_tokens = @{$rtokens}[ $i + 1 .. $max_token_index ]; + } + my ( $rpre_tokens, $rpre_types ) = + peek_ahead_for_n_nonblank_pre_tokens(20); # 20 is arbitrary but + # generous, and prevents + # wasting lots of + # time in mangled files + if ( defined($rpre_types) && @{$rpre_types} ) { + push @pre_types, @{$rpre_types}; + push @pre_tokens, @{$rpre_tokens}; + } + + # put a sentinel token to simplify stopping the search + push @pre_types, '}'; + push @pre_types, '}'; + + my $jbeg = 0; + $jbeg = 1 if $pre_types[0] eq 'b'; + + # first look for one of these + # - bareword + # - bareword with leading - + # - digit + # - quoted string + my $j = $jbeg; + if ( $pre_types[$j] =~ /^[\'\"]/ ) { + + # find the closing quote; don't worry about escapes + my $quote_mark = $pre_types[$j]; + foreach my $k ( $j + 1 .. @pre_types - 2 ) { + if ( $pre_types[$k] eq $quote_mark ) { + $j = $k + 1; + my $next = $pre_types[$j]; + last; + } + } + } + elsif ( $pre_types[$j] eq 'd' ) { + $j++; + } + elsif ( $pre_types[$j] eq 'w' ) { + $j++; + } + elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) { + $j++; + } + if ( $j > $jbeg ) { + + $j++ if $pre_types[$j] eq 'b'; + + # Patched for RT #95708 + if ( + + # it is a comma which is not a pattern delimeter except for qw + ( + $pre_types[$j] eq ',' + && $pre_tokens[$jbeg] !~ /^(s|m|y|tr|qr|q|qq|qx)$/ + ) + + # or a => + || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) + ) + { + $code_block_type = ""; + } + } + } + + return $code_block_type; +} + +sub report_unexpected { + + # report unexpected token type and show where it is + # USES GLOBAL VARIABLES: $tokenizer_self + my ( $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map, + $rpretoken_type, $input_line ) + = @_; + + if ( ++$tokenizer_self->{_unexpected_error_count} <= MAX_NAG_MESSAGES ) { + my $msg = "found $found where $expecting expected"; + my $pos = $rpretoken_map->[$i_tok]; + interrupt_logfile(); + my $input_line_number = $tokenizer_self->{_last_line_number}; + my ( $offset, $numbered_line, $underline ) = + make_numbered_line( $input_line_number, $input_line, $pos ); + $underline = write_on_underline( $underline, $pos - $offset, '^' ); + + my $trailer = ""; + if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) { + my $pos_prev = $rpretoken_map->[$last_nonblank_i]; + my $num; + if ( $rpretoken_type->[ $i_tok - 1 ] eq 'b' ) { + $num = $rpretoken_map->[ $i_tok - 1 ] - $pos_prev; + } + else { + $num = $pos - $pos_prev; + } + if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; } + + $underline = + write_on_underline( $underline, $pos_prev - $offset, '-' x $num ); + $trailer = " (previous token underlined)"; + } + warning( $numbered_line . "\n" ); + warning( $underline . "\n" ); + warning( $msg . $trailer . "\n" ); + resume_logfile(); + } + return; +} + +sub is_non_structural_brace { + + # Decide if a brace or bracket is structural or non-structural + # by looking at the previous token and type + # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token + + # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting. + # Tentatively deactivated because it caused the wrong operator expectation + # for this code: + # $user = @vars[1] / 100; + # Must update sub operator_expected before re-implementing. + # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) { + # return 0; + # } + + ################################################################ + # NOTE: braces after type characters start code blocks, but for + # simplicity these are not identified as such. See also + # sub code_block_type + ################################################################ + + ##if ($last_nonblank_type eq 't') {return 0} + + # otherwise, it is non-structural if it is decorated + # by type information. + # For example, the '{' here is non-structural: ${xxx} + return ( + $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/ + + # or if we follow a hash or array closing curly brace or bracket + # For example, the second '{' in this is non-structural: $a{'x'}{'y'} + # because the first '}' would have been given type 'R' + || $last_nonblank_type =~ /^([R\]])$/ + ); +} + +#########i############################################################# +# Tokenizer routines for tracking container nesting depths +####################################################################### + +# The following routines keep track of nesting depths of the nesting +# types, ( [ { and ?. This is necessary for determining the indentation +# level, and also for debugging programs. Not only do they keep track of +# nesting depths of the individual brace types, but they check that each +# of the other brace types is balanced within matching pairs. For +# example, if the program sees this sequence: +# +# { ( ( ) } +# +# then it can determine that there is an extra left paren somewhere +# between the { and the }. And so on with every other possible +# combination of outer and inner brace types. For another +# example: +# +# ( [ ..... ] ] ) +# +# which has an extra ] within the parens. +# +# The brace types have indexes 0 .. 3 which are indexes into +# the matrices. +# +# The pair ? : are treated as just another nesting type, with ? acting +# as the opening brace and : acting as the closing brace. +# +# The matrix +# +# $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b]; +# +# saves the nesting depth of brace type $b (where $b is either of the other +# nesting types) when brace type $a enters a new depth. When this depth +# decreases, a check is made that the current depth of brace types $b is +# unchanged, or otherwise there must have been an error. This can +# be very useful for localizing errors, particularly when perl runs to +# the end of a large file (such as this one) and announces that there +# is a problem somewhere. +# +# A numerical sequence number is maintained for every nesting type, +# so that each matching pair can be uniquely identified in a simple +# way. + +sub increase_nesting_depth { + my ( $aa, $pos ) = @_; + + # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth, + # @current_sequence_number, @depth_array, @starting_line_of_current_depth, + # $statement_type + $current_depth[$aa]++; + $total_depth++; + $total_depth[$aa][ $current_depth[$aa] ] = $total_depth; + my $input_line_number = $tokenizer_self->{_last_line_number}; + my $input_line = $tokenizer_self->{_line_text}; + + # Sequence numbers increment by number of items. This keeps + # a unique set of numbers but still allows the relative location + # of any type to be determined. + $nesting_sequence_number[$aa] += scalar(@closing_brace_names); + my $seqno = $nesting_sequence_number[$aa]; + $current_sequence_number[$aa][ $current_depth[$aa] ] = $seqno; + + $starting_line_of_current_depth[$aa][ $current_depth[$aa] ] = + [ $input_line_number, $input_line, $pos ]; + + for my $bb ( 0 .. @closing_brace_names - 1 ) { + next if ( $bb == $aa ); + $depth_array[$aa][$bb][ $current_depth[$aa] ] = $current_depth[$bb]; + } + + # set a flag for indenting a nested ternary statement + my $indent = 0; + if ( $aa == QUESTION_COLON ) { + $nested_ternary_flag[ $current_depth[$aa] ] = 0; + if ( $current_depth[$aa] > 1 ) { + if ( $nested_ternary_flag[ $current_depth[$aa] - 1 ] == 0 ) { + my $pdepth = $total_depth[$aa][ $current_depth[$aa] - 1 ]; + if ( $pdepth == $total_depth - 1 ) { + $indent = 1; + $nested_ternary_flag[ $current_depth[$aa] - 1 ] = -1; + } + } + } + } + $nested_statement_type[$aa][ $current_depth[$aa] ] = $statement_type; + $statement_type = ""; + return ( $seqno, $indent ); +} + +sub decrease_nesting_depth { + + my ( $aa, $pos ) = @_; + + # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth, + # @current_sequence_number, @depth_array, @starting_line_of_current_depth + # $statement_type + my $seqno = 0; + my $input_line_number = $tokenizer_self->{_last_line_number}; + my $input_line = $tokenizer_self->{_line_text}; + + my $outdent = 0; + $total_depth--; + if ( $current_depth[$aa] > 0 ) { + + # set a flag for un-indenting after seeing a nested ternary statement + $seqno = $current_sequence_number[$aa][ $current_depth[$aa] ]; + if ( $aa == QUESTION_COLON ) { + $outdent = $nested_ternary_flag[ $current_depth[$aa] ]; + } + $statement_type = $nested_statement_type[$aa][ $current_depth[$aa] ]; + + # check that any brace types $bb contained within are balanced + for my $bb ( 0 .. @closing_brace_names - 1 ) { + next if ( $bb == $aa ); + + unless ( $depth_array[$aa][$bb][ $current_depth[$aa] ] == + $current_depth[$bb] ) + { + my $diff = + $current_depth[$bb] - + $depth_array[$aa][$bb][ $current_depth[$aa] ]; + + # don't whine too many times + my $saw_brace_error = get_saw_brace_error(); + if ( + $saw_brace_error <= MAX_NAG_MESSAGES + + # if too many closing types have occurred, we probably + # already caught this error + && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) ) + ) + { + interrupt_logfile(); + my $rsl = + $starting_line_of_current_depth[$aa] + [ $current_depth[$aa] ]; + my $sl = $rsl->[0]; + my $rel = [ $input_line_number, $input_line, $pos ]; + my $el = $rel->[0]; + my ($ess); + + if ( $diff == 1 || $diff == -1 ) { + $ess = ''; + } + else { + $ess = 's'; + } + my $bname = + ( $diff > 0 ) + ? $opening_brace_names[$bb] + : $closing_brace_names[$bb]; + write_error_indicator_pair( @{$rsl}, '^' ); + my $msg = <<"EOM"; +Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el +EOM + + if ( $diff > 0 ) { + my $rml = + $starting_line_of_current_depth[$bb] + [ $current_depth[$bb] ]; + my $ml = $rml->[0]; + $msg .= +" The most recent un-matched $bname is on line $ml\n"; + write_error_indicator_pair( @{$rml}, '^' ); + } + write_error_indicator_pair( @{$rel}, '^' ); + warning($msg); + resume_logfile(); + } + increment_brace_error(); + } + } + $current_depth[$aa]--; + } + else { + + my $saw_brace_error = get_saw_brace_error(); + if ( $saw_brace_error <= MAX_NAG_MESSAGES ) { + my $msg = <<"EOM"; +There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number +EOM + indicate_error( $msg, $input_line_number, $input_line, $pos, '^' ); + } + increment_brace_error(); + } + return ( $seqno, $outdent ); +} + +sub check_final_nesting_depths { + + # USES GLOBAL VARIABLES: @current_depth, @starting_line_of_current_depth + + for my $aa ( 0 .. @closing_brace_names - 1 ) { + + if ( $current_depth[$aa] ) { + my $rsl = + $starting_line_of_current_depth[$aa][ $current_depth[$aa] ]; + my $sl = $rsl->[0]; + my $msg = <<"EOM"; +Final nesting depth of $opening_brace_names[$aa]s is $current_depth[$aa] +The most recent un-matched $opening_brace_names[$aa] is on line $sl +EOM + indicate_error( $msg, @{$rsl}, '^' ); + increment_brace_error(); + } + } + return; +} + +#########i############################################################# +# Tokenizer routines for looking ahead in input stream +####################################################################### + +sub peek_ahead_for_n_nonblank_pre_tokens { + + # returns next n pretokens if they exist + # returns undef's if hits eof without seeing any pretokens + # USES GLOBAL VARIABLES: $tokenizer_self + my $max_pretokens = shift; + my $line; + my $i = 0; + my ( $rpre_tokens, $rmap, $rpre_types ); + + while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) ) + { + $line =~ s/^\s*//; # trim leading blanks + next if ( length($line) <= 0 ); # skip blank + next if ( $line =~ /^#/ ); # skip comment + ( $rpre_tokens, $rmap, $rpre_types ) = + pre_tokenize( $line, $max_pretokens ); + last; + } + return ( $rpre_tokens, $rpre_types ); +} + +# look ahead for next non-blank, non-comment line of code +sub peek_ahead_for_nonblank_token { + + # USES GLOBAL VARIABLES: $tokenizer_self + my ( $rtokens, $max_token_index ) = @_; + my $line; + my $i = 0; + + while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) ) + { + $line =~ s/^\s*//; # trim leading blanks + next if ( length($line) <= 0 ); # skip blank + next if ( $line =~ /^#/ ); # skip comment + my ( $rtok, $rmap, $rtype ) = + pre_tokenize( $line, 2 ); # only need 2 pre-tokens + my $j = $max_token_index + 1; + + foreach my $tok ( @{$rtok} ) { + last if ( $tok =~ "\n" ); + $rtokens->[ ++$j ] = $tok; + } + last; + } + return $rtokens; +} + +#########i############################################################# +# Tokenizer guessing routines for ambiguous situations +####################################################################### + +sub guess_if_pattern_or_conditional { + + # this routine is called when we have encountered a ? following an + # unknown bareword, and we must decide if it starts a pattern or not + # input parameters: + # $i - token index of the ? starting possible pattern + # output parameters: + # $is_pattern = 0 if probably not pattern, =1 if probably a pattern + # msg = a warning or diagnostic message + # USES GLOBAL VARIABLES: $last_nonblank_token + + # FIXME: this needs to be rewritten + + my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_; + my $is_pattern = 0; + my $msg = "guessing that ? after $last_nonblank_token starts a "; + + if ( $i >= $max_token_index ) { + $msg .= "conditional (no end to pattern found on the line)\n"; + } + else { + my $ibeg = $i; + $i = $ibeg + 1; + my $next_token = $rtokens->[$i]; # first token after ? + + # look for a possible ending ? on this line.. + my $in_quote = 1; + my $quote_depth = 0; + my $quote_character = ''; + my $quote_pos = 0; + my $quoted_string; + ( + $i, $in_quote, $quote_character, $quote_pos, $quote_depth, + $quoted_string + ) + = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character, + $quote_pos, $quote_depth, $max_token_index ); + + if ($in_quote) { + + # we didn't find an ending ? on this line, + # so we bias towards conditional + $is_pattern = 0; + $msg .= "conditional (no ending ? on this line)\n"; + + # we found an ending ?, so we bias towards a pattern + } + else { + + # Watch out for an ending ? in quotes, like this + # my $case_flag = File::Spec->case_tolerant ? '(?i)' : ''; + my $s_quote = 0; + my $d_quote = 0; + my $colons = 0; + foreach my $ii ( $ibeg + 1 .. $i - 1 ) { + my $tok = $rtokens->[$ii]; + if ( $tok eq ":" ) { $colons++ } + if ( $tok eq "'" ) { $s_quote++ } + if ( $tok eq '"' ) { $d_quote++ } + } + if ( $s_quote % 2 || $d_quote % 2 || $colons ) { + $is_pattern = 0; + $msg .= "found ending ? but unbalanced quote chars\n"; + } + elsif ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) { + $is_pattern = 1; + $msg .= "pattern (found ending ? and pattern expected)\n"; + } + else { + $msg .= "pattern (uncertain, but found ending ?)\n"; + } + } + } + return ( $is_pattern, $msg ); +} + +sub guess_if_pattern_or_division { + + # this routine is called when we have encountered a / following an + # unknown bareword, and we must decide if it starts a pattern or is a + # division + # input parameters: + # $i - token index of the / starting possible pattern + # output parameters: + # $is_pattern = 0 if probably division, =1 if probably a pattern + # msg = a warning or diagnostic message + # USES GLOBAL VARIABLES: $last_nonblank_token + my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_; + my $is_pattern = 0; + my $msg = "guessing that / after $last_nonblank_token starts a "; + + if ( $i >= $max_token_index ) { + $msg .= "division (no end to pattern found on the line)\n"; + } + else { + my $ibeg = $i; + my $divide_expected = + numerator_expected( $i, $rtokens, $max_token_index ); + $i = $ibeg + 1; + my $next_token = $rtokens->[$i]; # first token after slash + + # look for a possible ending / on this line.. + my $in_quote = 1; + my $quote_depth = 0; + my $quote_character = ''; + my $quote_pos = 0; + my $quoted_string; + ( + $i, $in_quote, $quote_character, $quote_pos, $quote_depth, + $quoted_string + ) + = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character, + $quote_pos, $quote_depth, $max_token_index ); + + if ($in_quote) { + + # we didn't find an ending / on this line, + # so we bias towards division + if ( $divide_expected >= 0 ) { + $is_pattern = 0; + $msg .= "division (no ending / on this line)\n"; + } + else { + $msg = "multi-line pattern (division not possible)\n"; + $is_pattern = 1; + } + + } + + # we found an ending /, so we bias towards a pattern + else { + + if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) { + + if ( $divide_expected >= 0 ) { + + if ( $i - $ibeg > 60 ) { + $msg .= "division (matching / too distant)\n"; + $is_pattern = 0; + } + else { + $msg .= "pattern (but division possible too)\n"; + $is_pattern = 1; + } + } + else { + $is_pattern = 1; + $msg .= "pattern (division not possible)\n"; + } + } + else { + + if ( $divide_expected >= 0 ) { + $is_pattern = 0; + $msg .= "division (pattern not possible)\n"; + } + else { + $is_pattern = 1; + $msg .= + "pattern (uncertain, but division would not work here)\n"; + } + } + } + } + return ( $is_pattern, $msg ); +} + +# try to resolve here-doc vs. shift by looking ahead for +# non-code or the end token (currently only looks for end token) +# returns 1 if it is probably a here doc, 0 if not +sub guess_if_here_doc { + + # This is how many lines we will search for a target as part of the + # guessing strategy. It is a constant because there is probably + # little reason to change it. + # USES GLOBAL VARIABLES: $tokenizer_self, $current_package + # %is_constant, + my $HERE_DOC_WINDOW = 40; + + my $next_token = shift; + my $here_doc_expected = 0; + my $line; + my $k = 0; + my $msg = "checking <<"; + + while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) ) + { + chomp $line; + + if ( $line =~ /^$next_token$/ ) { + $msg .= " -- found target $next_token ahead $k lines\n"; + $here_doc_expected = 1; # got it + last; + } + last if ( $k >= $HERE_DOC_WINDOW ); + } + + unless ($here_doc_expected) { + + if ( !defined($line) ) { + $here_doc_expected = -1; # hit eof without seeing target + $msg .= " -- must be shift; target $next_token not in file\n"; + + } + else { # still unsure..taking a wild guess + + if ( !$is_constant{$current_package}{$next_token} ) { + $here_doc_expected = 1; + $msg .= + " -- guessing it's a here-doc ($next_token not a constant)\n"; + } + else { + $msg .= + " -- guessing it's a shift ($next_token is a constant)\n"; + } + } + } + write_logfile_entry($msg); + return $here_doc_expected; +} + +#########i############################################################# +# Tokenizer Routines for scanning identifiers and related items +####################################################################### + +sub scan_bare_identifier_do { + + # this routine is called to scan a token starting with an alphanumeric + # variable or package separator, :: or '. + # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, + # $last_nonblank_type,@paren_type, $paren_depth + + my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map, + $max_token_index ) + = @_; + my $i_begin = $i; + my $package = undef; + + my $i_beg = $i; + + # we have to back up one pretoken at a :: since each : is one pretoken + if ( $tok eq '::' ) { $i_beg-- } + if ( $tok eq '->' ) { $i_beg-- } + my $pos_beg = $rtoken_map->[$i_beg]; + pos($input_line) = $pos_beg; + + # Examples: + # A::B::C + # A:: + # ::A + # A'B + if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) { + + my $pos = pos($input_line); + my $numc = $pos - $pos_beg; + $tok = substr( $input_line, $pos_beg, $numc ); + + # type 'w' includes anything without leading type info + # ($,%,@,*) including something like abc::def::ghi + $type = 'w'; + + my $sub_name = ""; + if ( defined($2) ) { $sub_name = $2; } + if ( defined($1) ) { + $package = $1; + + # patch: don't allow isolated package name which just ends + # in the old style package separator (single quote). Example: + # use CGI':all'; + if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) { + $pos--; + } + + $package =~ s/\'/::/g; + if ( $package =~ /^\:/ ) { $package = 'main' . $package } + $package =~ s/::$//; + } + else { + $package = $current_package; + + if ( $is_keyword{$tok} ) { + $type = 'k'; + } + } + + # if it is a bareword.. + if ( $type eq 'w' ) { + + # check for v-string with leading 'v' type character + # (This seems to have precedence over filehandle, type 'Y') + if ( $tok =~ /^v\d[_\d]*$/ ) { + + # we only have the first part - something like 'v101' - + # look for more + if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) { + $pos = pos($input_line); + $numc = $pos - $pos_beg; + $tok = substr( $input_line, $pos_beg, $numc ); + } + $type = 'v'; + + # warn if this version can't handle v-strings + report_v_string($tok); + } + + elsif ( $is_constant{$package}{$sub_name} ) { + $type = 'C'; + } + + # bareword after sort has implied empty prototype; for example: + # @sorted = sort numerically ( 53, 29, 11, 32, 7 ); + # This has priority over whatever the user has specified. + elsif ($last_nonblank_token eq 'sort' + && $last_nonblank_type eq 'k' ) + { + $type = 'Z'; + } + + # Note: strangely, perl does not seem to really let you create + # functions which act like eval and do, in the sense that eval + # and do may have operators following the final }, but any operators + # that you create with prototype (&) apparently do not allow + # trailing operators, only terms. This seems strange. + # If this ever changes, here is the update + # to make perltidy behave accordingly: + + # elsif ( $is_block_function{$package}{$tok} ) { + # $tok='eval'; # patch to do braces like eval - doesn't work + # $type = 'k'; + #} + # FIXME: This could become a separate type to allow for different + # future behavior: + elsif ( $is_block_function{$package}{$sub_name} ) { + $type = 'G'; + } + + elsif ( $is_block_list_function{$package}{$sub_name} ) { + $type = 'G'; + } + elsif ( $is_user_function{$package}{$sub_name} ) { + $type = 'U'; + $prototype = $user_function_prototype{$package}{$sub_name}; + } + + # check for indirect object + elsif ( + + # added 2001-03-27: must not be followed immediately by '(' + # see fhandle.t + ( $input_line !~ m/\G\(/gc ) + + # and + && ( + + # preceded by keyword like 'print', 'printf' and friends + $is_indirect_object_taker{$last_nonblank_token} + + # or preceded by something like 'print(' or 'printf(' + || ( + ( $last_nonblank_token eq '(' ) + && $is_indirect_object_taker{ $paren_type[$paren_depth] + } + + ) + ) + ) + { + + # may not be indirect object unless followed by a space + if ( $input_line =~ m/\G\s+/gc ) { + $type = 'Y'; + + # Abandon Hope ... + # Perl's indirect object notation is a very bad + # thing and can cause subtle bugs, especially for + # beginning programmers. And I haven't even been + # able to figure out a sane warning scheme which + # doesn't get in the way of good scripts. + + # Complain if a filehandle has any lower case + # letters. This is suggested good practice. + # Use 'sub_name' because something like + # main::MYHANDLE is ok for filehandle + if ( $sub_name =~ /[a-z]/ ) { + + # could be bug caused by older perltidy if + # followed by '(' + if ( $input_line =~ m/\G\s*\(/gc ) { + complain( +"Caution: unknown word '$tok' in indirect object slot\n" + ); + } + } + } + + # bareword not followed by a space -- may not be filehandle + # (may be function call defined in a 'use' statement) + else { + $type = 'Z'; + } + } + } + + # Now we must convert back from character position + # to pre_token index. + # I don't think an error flag can occur here ..but who knows + my $error; + ( $i, $error ) = + inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); + if ($error) { + warning("scan_bare_identifier: Possibly invalid tokenization\n"); + } + } + + # no match but line not blank - could be syntax error + # perl will take '::' alone without complaint + else { + $type = 'w'; + + # change this warning to log message if it becomes annoying + warning("didn't find identifier after leading ::\n"); + } + return ( $i, $tok, $type, $prototype ); +} + +sub scan_id_do { + +# This is the new scanner and will eventually replace scan_identifier. +# Only type 'sub' and 'package' are implemented. +# Token types $ * % @ & -> are not yet implemented. +# +# Scan identifier following a type token. +# The type of call depends on $id_scan_state: $id_scan_state = '' +# for starting call, in which case $tok must be the token defining +# the type. +# +# If the type token is the last nonblank token on the line, a value +# of $id_scan_state = $tok is returned, indicating that further +# calls must be made to get the identifier. If the type token is +# not the last nonblank token on the line, the identifier is +# scanned and handled and a value of '' is returned. +# USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, $in_attribute_list, +# $statement_type, $tokenizer_self + + my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state, + $max_token_index ) + = @_; + my $type = ''; + my ( $i_beg, $pos_beg ); + + #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n"; + #my ($a,$b,$c) = caller; + #print "NSCAN: scan_id called with tok=$tok $a $b $c\n"; + + # on re-entry, start scanning at first token on the line + if ($id_scan_state) { + $i_beg = $i; + $type = ''; + } + + # on initial entry, start scanning just after type token + else { + $i_beg = $i + 1; + $id_scan_state = $tok; + $type = 't'; + } + + # find $i_beg = index of next nonblank token, + # and handle empty lines + my $blank_line = 0; + my $next_nonblank_token = $rtokens->[$i_beg]; + if ( $i_beg > $max_token_index ) { + $blank_line = 1; + } + else { + + # only a '#' immediately after a '$' is not a comment + if ( $next_nonblank_token eq '#' ) { + unless ( $tok eq '$' ) { + $blank_line = 1; + } + } + + if ( $next_nonblank_token =~ /^\s/ ) { + ( $next_nonblank_token, $i_beg ) = + find_next_nonblank_token_on_this_line( $i_beg, $rtokens, + $max_token_index ); + if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) { + $blank_line = 1; + } + } + } + + # handle non-blank line; identifier, if any, must follow + unless ($blank_line) { + + if ( $id_scan_state eq 'sub' ) { + ( $i, $tok, $type, $id_scan_state ) = do_scan_sub( + $input_line, $i, $i_beg, + $tok, $type, $rtokens, + $rtoken_map, $id_scan_state, $max_token_index + ); + } + + elsif ( $id_scan_state eq 'package' ) { + ( $i, $tok, $type ) = + do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens, + $rtoken_map, $max_token_index ); + $id_scan_state = ''; + } + + else { + warning("invalid token in scan_id: $tok\n"); + $id_scan_state = ''; + } + } + + if ( $id_scan_state && ( !defined($type) || !$type ) ) { + + # shouldn't happen: + warning( +"Program bug in scan_id: undefined type but scan_state=$id_scan_state\n" + ); + report_definite_bug(); + } + + TOKENIZER_DEBUG_FLAG_NSCAN && do { + print STDOUT + "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n"; + }; + return ( $i, $tok, $type, $id_scan_state ); +} + +sub check_prototype { + my ( $proto, $package, $subname ) = @_; + return unless ( defined($package) && defined($subname) ); + if ( defined($proto) ) { + $proto =~ s/^\s*\(\s*//; + $proto =~ s/\s*\)$//; + if ($proto) { + $is_user_function{$package}{$subname} = 1; + $user_function_prototype{$package}{$subname} = "($proto)"; + + # prototypes containing '&' must be treated specially.. + if ( $proto =~ /\&/ ) { + + # right curly braces of prototypes ending in + # '&' may be followed by an operator + if ( $proto =~ /\&$/ ) { + $is_block_function{$package}{$subname} = 1; + } + + # right curly braces of prototypes NOT ending in + # '&' may NOT be followed by an operator + elsif ( $proto !~ /\&$/ ) { + $is_block_list_function{$package}{$subname} = 1; + } + } + } + else { + $is_constant{$package}{$subname} = 1; + } + } + else { + $is_user_function{$package}{$subname} = 1; + } + return; +} + +sub do_scan_package { + + # do_scan_package parses a package name + # it is called with $i_beg equal to the index of the first nonblank + # token following a 'package' token. + # USES GLOBAL VARIABLES: $current_package, + + # package NAMESPACE + # package NAMESPACE VERSION + # package NAMESPACE BLOCK + # package NAMESPACE VERSION BLOCK + # + # If VERSION is provided, package sets the $VERSION variable in the given + # namespace to a version object with the VERSION provided. VERSION must be + # a "strict" style version number as defined by the version module: a + # positive decimal number (integer or decimal-fraction) without + # exponentiation or else a dotted-decimal v-string with a leading 'v' + # character and at least three components. + # reference http://perldoc.perl.org/functions/package.html + + my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map, + $max_token_index ) + = @_; + my $package = undef; + my $pos_beg = $rtoken_map->[$i_beg]; + pos($input_line) = $pos_beg; + + # handle non-blank line; package name, if any, must follow + if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) { + $package = $1; + $package = ( defined($1) && $1 ) ? $1 : 'main'; + $package =~ s/\'/::/g; + if ( $package =~ /^\:/ ) { $package = 'main' . $package } + $package =~ s/::$//; + my $pos = pos($input_line); + my $numc = $pos - $pos_beg; + $tok = 'package ' . substr( $input_line, $pos_beg, $numc ); + $type = 'i'; + + # Now we must convert back from character position + # to pre_token index. + # I don't think an error flag can occur here ..but ? + my $error; + ( $i, $error ) = + inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); + if ($error) { warning("Possibly invalid package\n") } + $current_package = $package; + + # we should now have package NAMESPACE + # now expecting VERSION, BLOCK, or ; to follow ... + # package NAMESPACE VERSION + # package NAMESPACE BLOCK + # package NAMESPACE VERSION BLOCK + my ( $next_nonblank_token, $i_next ) = + find_next_nonblank_token( $i, $rtokens, $max_token_index ); + + # check that something recognizable follows, but do not parse. + # A VERSION number will be parsed later as a number or v-string in the + # normal way. What is important is to set the statement type if + # everything looks okay so that the operator_expected() routine + # knows that the number is in a package statement. + # Examples of valid primitive tokens that might follow are: + # 1235 . ; { } v3 v + if ( $next_nonblank_token =~ /^([v\.\d;\{\}])|v\d|\d+$/ ) { + $statement_type = $tok; + } + else { + warning( + "Unexpected '$next_nonblank_token' after package name '$tok'\n" + ); + } + } + + # no match but line not blank -- + # could be a label with name package, like package: , for example. + else { + $type = 'k'; + } + + return ( $i, $tok, $type ); +} + +sub scan_identifier_do { + + # This routine assembles tokens into identifiers. It maintains a + # scan state, id_scan_state. It updates id_scan_state based upon + # current id_scan_state and token, and returns an updated + # id_scan_state and the next index after the identifier. + # USES GLOBAL VARIABLES: $context, $last_nonblank_token, + # $last_nonblank_type + + my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index, + $expecting, $container_type ) + = @_; + my $i_begin = $i; + my $type = ''; + my $tok_begin = $rtokens->[$i_begin]; + if ( $tok_begin eq ':' ) { $tok_begin = '::' } + my $id_scan_state_begin = $id_scan_state; + my $identifier_begin = $identifier; + my $tok = $tok_begin; + my $message = ""; + + my $in_prototype_or_signature = $container_type =~ /^sub/; + + # these flags will be used to help figure out the type: + my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ ); + my $saw_type; + + # allow old package separator (') except in 'use' statement + my $allow_tick = ( $last_nonblank_token ne 'use' ); + + # get started by defining a type and a state if necessary + unless ($id_scan_state) { + $context = UNKNOWN_CONTEXT; + + # fixup for digraph + if ( $tok eq '>' ) { + $tok = '->'; + $tok_begin = $tok; + } + $identifier = $tok; + + if ( $tok eq '$' || $tok eq '*' ) { + $id_scan_state = '$'; + $context = SCALAR_CONTEXT; + } + elsif ( $tok eq '%' || $tok eq '@' ) { + $id_scan_state = '$'; + $context = LIST_CONTEXT; + } + elsif ( $tok eq '&' ) { + $id_scan_state = '&'; + } + elsif ( $tok eq 'sub' or $tok eq 'package' ) { + $saw_alpha = 0; # 'sub' is considered type info here + $id_scan_state = '$'; + $identifier .= ' '; # need a space to separate sub from sub name + } + elsif ( $tok eq '::' ) { + $id_scan_state = 'A'; + } + elsif ( $tok =~ /^[A-Za-z_]/ ) { + $id_scan_state = ':'; + } + elsif ( $tok eq '->' ) { + $id_scan_state = '$'; + } + else { + + # shouldn't happen + my ( $a, $b, $c ) = caller; + warning("Program Bug: scan_identifier given bad token = $tok \n"); + warning(" called from sub $a line: $c\n"); + report_definite_bug(); + } + $saw_type = !$saw_alpha; + } + else { + $i--; + $saw_type = ( $tok =~ /([\$\%\@\*\&])/ ); + } + + # now loop to gather the identifier + my $i_save = $i; + + while ( $i < $max_token_index ) { + $i_save = $i unless ( $tok =~ /^\s*$/ ); + $tok = $rtokens->[ ++$i ]; + + if ( ( $tok eq ':' ) && ( $rtokens->[ $i + 1 ] eq ':' ) ) { + $tok = '::'; + $i++; + } + + if ( $id_scan_state eq '$' ) { # starting variable name + + if ( $tok eq '$' ) { + + $identifier .= $tok; + + # we've got a punctuation variable if end of line (punct.t) + if ( $i == $max_token_index ) { + $type = 'i'; + $id_scan_state = ''; + last; + } + } + + # POSTDEFREF ->@ ->% ->& ->* + elsif ( ( $tok =~ /^[\@\%\&\*]$/ ) && $identifier =~ /\-\>$/ ) { + $identifier .= $tok; + } + elsif ( $tok =~ /^[A-Za-z_]/ ) { # alphanumeric .. + $saw_alpha = 1; + $id_scan_state = ':'; # now need :: + $identifier .= $tok; + } + elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric .. + $saw_alpha = 1; + $id_scan_state = ':'; # now need :: + $identifier .= $tok; + + # Perl will accept leading digits in identifiers, + # although they may not always produce useful results. + # Something like $main::0 is ok. But this also works: + # + # sub howdy::123::bubba{ print "bubba $54321!\n" } + # howdy::123::bubba(); + # + } + elsif ( $tok =~ /^[0-9]/ ) { # numeric + $saw_alpha = 1; + $id_scan_state = ':'; # now need :: + $identifier .= $tok; + } + elsif ( $tok eq '::' ) { + $id_scan_state = 'A'; + $identifier .= $tok; + } + + # $# and POSTDEFREF ->$# + elsif ( ( $tok eq '#' ) && ( $identifier =~ /\$$/ ) ) { # $#array + $identifier .= $tok; # keep same state, a $ could follow + } + elsif ( $tok eq '{' ) { + + # check for something like ${#} or ${©} + if ( + ( + $identifier eq '$' + || $identifier eq '@' + || $identifier eq '$#' + ) + && $i + 2 <= $max_token_index + && $rtokens->[ $i + 2 ] eq '}' + && $rtokens->[ $i + 1 ] !~ /[\s\w]/ + ) + { + my $next2 = $rtokens->[ $i + 2 ]; + my $next1 = $rtokens->[ $i + 1 ]; + $identifier .= $tok . $next1 . $next2; + $i += 2; + $id_scan_state = ''; + last; + } + + # skip something like ${xxx} or ->{ + $id_scan_state = ''; + + # if this is the first token of a line, any tokens for this + # identifier have already been accumulated + if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; } + $i = $i_save; + last; + } + + # space ok after leading $ % * & @ + elsif ( $tok =~ /^\s*$/ ) { + + if ( $identifier =~ /^[\$\%\*\&\@]/ ) { + + if ( length($identifier) > 1 ) { + $id_scan_state = ''; + $i = $i_save; + $type = 'i'; # probably punctuation variable + last; + } + else { + + # spaces after $'s are common, and space after @ + # is harmless, so only complain about space + # after other type characters. Space after $ and + # @ will be removed in formatting. Report space + # after % and * because they might indicate a + # parsing error. In other words '% ' might be a + # modulo operator. Delete this warning if it + # gets annoying. + if ( $identifier !~ /^[\@\$]$/ ) { + $message = + "Space in identifier, following $identifier\n"; + } + } + } + + # else: + # space after '->' is ok + } + elsif ( $tok eq '^' ) { + + # check for some special variables like $^W + if ( $identifier =~ /^[\$\*\@\%]$/ ) { + $identifier .= $tok; + $id_scan_state = 'A'; + + # Perl accepts '$^]' or '@^]', but + # there must not be a space before the ']'. + my $next1 = $rtokens->[ $i + 1 ]; + if ( $next1 eq ']' ) { + $i++; + $identifier .= $next1; + $id_scan_state = ""; + last; + } + } + else { + $id_scan_state = ''; + } + } + else { # something else + + if ( $in_prototype_or_signature && $tok =~ /^[\),=]/ ) { + $id_scan_state = ''; + $i = $i_save; + $type = 'i'; # probably punctuation variable + last; + } + + # check for various punctuation variables + if ( $identifier =~ /^[\$\*\@\%]$/ ) { + $identifier .= $tok; + } + + # POSTDEFREF: Postfix reference ->$* ->%* ->@* ->** ->&* ->$#* + elsif ( $tok eq '*' && $identifier =~ /([\@\%\$\*\&]|\$\#)$/ ) { + $identifier .= $tok; + } + + elsif ( $identifier eq '$#' ) { + + if ( $tok eq '{' ) { $type = 'i'; $i = $i_save } + + # perl seems to allow just these: $#: $#- $#+ + elsif ( $tok =~ /^[\:\-\+]$/ ) { + $type = 'i'; + $identifier .= $tok; + } + else { + $i = $i_save; + write_logfile_entry( 'Use of $# is deprecated' . "\n" ); + } + } + elsif ( $identifier eq '$$' ) { + + # perl does not allow references to punctuation + # variables without braces. For example, this + # won't work: + # $:=\4; + # $a = $$:; + # You would have to use + # $a = ${$:}; + + $i = $i_save; + if ( $tok eq '{' ) { $type = 't' } + else { $type = 'i' } + } + elsif ( $identifier eq '->' ) { + $i = $i_save; + } + else { + $i = $i_save; + if ( length($identifier) == 1 ) { $identifier = ''; } + } + $id_scan_state = ''; + last; + } + } + elsif ( $id_scan_state eq '&' ) { # starting sub call? + + if ( $tok =~ /^[\$A-Za-z_]/ ) { # alphanumeric .. + $id_scan_state = ':'; # now need :: + $saw_alpha = 1; + $identifier .= $tok; + } + elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric .. + $id_scan_state = ':'; # now need :: + $saw_alpha = 1; + $identifier .= $tok; + } + elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above + $id_scan_state = ':'; # now need :: + $saw_alpha = 1; + $identifier .= $tok; + } + elsif ( $tok =~ /^\s*$/ ) { # allow space + } + elsif ( $tok eq '::' ) { # leading :: + $id_scan_state = 'A'; # accept alpha next + $identifier .= $tok; + } + elsif ( $tok eq '{' ) { + if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; } + $i = $i_save; + $id_scan_state = ''; + last; + } + else { + + # punctuation variable? + # testfile: cunningham4.pl + # + # We have to be careful here. If we are in an unknown state, + # we will reject the punctuation variable. In the following + # example the '&' is a binary operator but we are in an unknown + # state because there is no sigil on 'Prima', so we don't + # know what it is. But it is a bad guess that + # '&~' is a function variable. + # $self->{text}->{colorMap}->[ + # Prima::PodView::COLOR_CODE_FOREGROUND + # & ~tb::COLOR_INDEX ] = + # $sec->{ColorCode} + if ( $identifier eq '&' && $expecting ) { + $identifier .= $tok; + } + else { + $identifier = ''; + $i = $i_save; + $type = '&'; + } + $id_scan_state = ''; + last; + } + } + elsif ( $id_scan_state eq 'A' ) { # looking for alpha (after ::) + + if ( $tok =~ /^[A-Za-z_]/ ) { # found it + $identifier .= $tok; + $id_scan_state = ':'; # now need :: + $saw_alpha = 1; + } + elsif ( $tok eq "'" && $allow_tick ) { + $identifier .= $tok; + $id_scan_state = ':'; # now need :: + $saw_alpha = 1; + } + elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above + $identifier .= $tok; + $id_scan_state = ':'; # now need :: + $saw_alpha = 1; + } + elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) { + $id_scan_state = '('; + $identifier .= $tok; + } + elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) { + $id_scan_state = ')'; + $identifier .= $tok; + } + else { + $id_scan_state = ''; + $i = $i_save; + last; + } + } + elsif ( $id_scan_state eq ':' ) { # looking for :: after alpha + + if ( $tok eq '::' ) { # got it + $identifier .= $tok; + $id_scan_state = 'A'; # now require alpha + } + elsif ( $tok =~ /^[A-Za-z_]/ ) { # more alphanumeric is ok here + $identifier .= $tok; + $id_scan_state = ':'; # now need :: + $saw_alpha = 1; + } + elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above + $identifier .= $tok; + $id_scan_state = ':'; # now need :: + $saw_alpha = 1; + } + elsif ( $tok eq "'" && $allow_tick ) { # tick + + if ( $is_keyword{$identifier} ) { + $id_scan_state = ''; # that's all + $i = $i_save; + } + else { + $identifier .= $tok; + } + } + elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) { + $id_scan_state = '('; + $identifier .= $tok; + } + elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) { + $id_scan_state = ')'; + $identifier .= $tok; + } + else { + $id_scan_state = ''; # that's all + $i = $i_save; + last; + } + } + elsif ( $id_scan_state eq '(' ) { # looking for ( of prototype + + if ( $tok eq '(' ) { # got it + $identifier .= $tok; + $id_scan_state = ')'; # now find the end of it + } + elsif ( $tok =~ /^\s*$/ ) { # blank - keep going + $identifier .= $tok; + } + else { + $id_scan_state = ''; # that's all - no prototype + $i = $i_save; + last; + } + } + elsif ( $id_scan_state eq ')' ) { # looking for ) to end + + if ( $tok eq ')' ) { # got it + $identifier .= $tok; + $id_scan_state = ''; # all done + last; + } + elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) { + $identifier .= $tok; + } + else { # probable error in script, but keep going + warning("Unexpected '$tok' while seeking end of prototype\n"); + $identifier .= $tok; + } + } + else { # can get here due to error in initialization + $id_scan_state = ''; + $i = $i_save; + last; + } + } + + if ( $id_scan_state eq ')' ) { + warning("Hit end of line while seeking ) to end prototype\n"); + } + + # once we enter the actual identifier, it may not extend beyond + # the end of the current line + if ( $id_scan_state =~ /^[A\:\(\)]/ ) { + $id_scan_state = ''; + } + if ( $i < 0 ) { $i = 0 } + + unless ($type) { + + if ($saw_type) { + + if ($saw_alpha) { + if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) { + $type = 'w'; + } + else { $type = 'i' } + } + elsif ( $identifier eq '->' ) { + $type = '->'; + } + elsif ( + ( length($identifier) > 1 ) + + # In something like '@$=' we have an identifier '@$' + # In something like '$${' we have type '$$' (and only + # part of an identifier) + && !( $identifier =~ /\$$/ && $tok eq '{' ) + && ( $identifier !~ /^(sub |package )$/ ) + ) + { + $type = 'i'; + } + else { $type = 't' } + } + elsif ($saw_alpha) { + + # type 'w' includes anything without leading type info + # ($,%,@,*) including something like abc::def::ghi + $type = 'w'; + } + else { + $type = ''; + } # this can happen on a restart + } + + if ($identifier) { + $tok = $identifier; + if ($message) { write_logfile_entry($message) } + } + else { + $tok = $tok_begin; + $i = $i_begin; + } + + TOKENIZER_DEBUG_FLAG_SCAN_ID && do { + my ( $a, $b, $c ) = caller; + print STDOUT +"SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n"; + print STDOUT +"SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n"; + }; + return ( $i, $tok, $type, $id_scan_state, $identifier ); +} + +{ + + # saved package and subnames in case prototype is on separate line + my ( $package_saved, $subname_saved ); + + sub do_scan_sub { + + # do_scan_sub parses a sub name and prototype + # it is called with $i_beg equal to the index of the first nonblank + # token following a 'sub' token. + + # TODO: add future error checks to be sure we have a valid + # sub name. For example, 'sub &doit' is wrong. Also, be sure + # a name is given if and only if a non-anonymous sub is + # appropriate. + # USES GLOBAL VARS: $current_package, $last_nonblank_token, + # $in_attribute_list, %saw_function_definition, + # $statement_type + + my ( + $input_line, $i, $i_beg, + $tok, $type, $rtokens, + $rtoken_map, $id_scan_state, $max_token_index + ) = @_; + $id_scan_state = ""; # normally we get everything in one call + my $subname = undef; + my $package = undef; + my $proto = undef; + my $attrs = undef; + my $match; + + my $pos_beg = $rtoken_map->[$i_beg]; + pos($input_line) = $pos_beg; + + # Look for the sub NAME + if ( + $input_line =~ m/\G\s* + ((?:\w*(?:'|::))*) # package - something that ends in :: or ' + (\w+) # NAME - required + /gcx + ) + { + $match = 1; + $subname = $2; + + $package = ( defined($1) && $1 ) ? $1 : $current_package; + $package =~ s/\'/::/g; + if ( $package =~ /^\:/ ) { $package = 'main' . $package } + $package =~ s/::$//; + my $pos = pos($input_line); + my $numc = $pos - $pos_beg; + $tok = 'sub ' . substr( $input_line, $pos_beg, $numc ); + $type = 'i'; + } + + # Now look for PROTO ATTRS + # Look for prototype/attributes which are usually on the same + # line as the sub name but which might be on a separate line. + # For example, we might have an anonymous sub with attributes, + # or a prototype on a separate line from its sub name + + # NOTE: We only want to parse PROTOTYPES here. If we see anything that + # does not look like a prototype, we assume it is a SIGNATURE and we + # will stop and let the the standard tokenizer handle it. In + # particular, we stop if we see any nested parens, braces, or commas. + my $saw_opening_paren = $input_line =~ /\G\s*\(/; + if ( + $input_line =~ m/\G(\s*\([^\)\(\}\{\,]*\))? # PROTO + (\s*:)? # ATTRS leading ':' + /gcx + && ( $1 || $2 ) + ) + { + $proto = $1; + $attrs = $2; + + # If we also found the sub name on this call then append PROTO. + # This is not necessary but for compatability with previous + # versions when the -csc flag is used: + if ( $match && $proto ) { + $tok .= $proto; + } + $match ||= 1; + + # Handle prototype on separate line from subname + if ($subname_saved) { + $package = $package_saved; + $subname = $subname_saved; + $tok = $last_nonblank_token; + } + $type = 'i'; + } + + if ($match) { + + # ATTRS: if there are attributes, back up and let the ':' be + # found later by the scanner. + my $pos = pos($input_line); + if ($attrs) { + $pos -= length($attrs); + } + + my $next_nonblank_token = $tok; + + # catch case of line with leading ATTR ':' after anonymous sub + if ( $pos == $pos_beg && $tok eq ':' ) { + $type = 'A'; + $in_attribute_list = 1; + } + + # Otherwise, if we found a match we must convert back from + # string position to the pre_token index for continued parsing. + else { + + # I don't think an error flag can occur here ..but ? + my $error; + ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map, + $max_token_index ); + if ($error) { warning("Possibly invalid sub\n") } + + # check for multiple definitions of a sub + ( $next_nonblank_token, my $i_next ) = + find_next_nonblank_token_on_this_line( $i, $rtokens, + $max_token_index ); + } + + if ( $next_nonblank_token =~ /^(\s*|#)$/ ) + { # skip blank or side comment + my ( $rpre_tokens, $rpre_types ) = + peek_ahead_for_n_nonblank_pre_tokens(1); + if ( defined($rpre_tokens) && @{$rpre_tokens} ) { + $next_nonblank_token = $rpre_tokens->[0]; + } + else { + $next_nonblank_token = '}'; + } + } + $package_saved = ""; + $subname_saved = ""; + + # See what's next... + if ( $next_nonblank_token eq '{' ) { + if ($subname) { + + # Check for multiple definitions of a sub, but + # it is ok to have multiple sub BEGIN, etc, + # so we do not complain if name is all caps + if ( $saw_function_definition{$package}{$subname} + && $subname !~ /^[A-Z]+$/ ) + { + my $lno = $saw_function_definition{$package}{$subname}; + warning( +"already saw definition of 'sub $subname' in package '$package' at line $lno\n" + ); + } + $saw_function_definition{$package}{$subname} = + $tokenizer_self->{_last_line_number}; + } + } + elsif ( $next_nonblank_token eq ';' ) { + } + elsif ( $next_nonblank_token eq '}' ) { + } + + # ATTRS - if an attribute list follows, remember the name + # of the sub so the next opening brace can be labeled. + # Setting 'statement_type' causes any ':'s to introduce + # attributes. + elsif ( $next_nonblank_token eq ':' ) { + $statement_type = $tok; + } + + # if we stopped before an open paren ... + elsif ( $next_nonblank_token eq '(' ) { + + # If we DID NOT see this paren above then it must be on the + # next line so we will set a flag to come back here and see if + # it is a PROTOTYPE + + # Otherwise, we assume it is a SIGNATURE rather than a + # PROTOTYPE and let the normal tokenizer handle it as a list + if ( !$saw_opening_paren ) { + $id_scan_state = 'sub'; # we must come back to get proto + $package_saved = $package; + $subname_saved = $subname; + } + $statement_type = $tok; + } + elsif ($next_nonblank_token) { # EOF technically ok + warning( +"expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n" + ); + } + check_prototype( $proto, $package, $subname ); + } + + # no match but line not blank + else { + } + return ( $i, $tok, $type, $id_scan_state ); + } +} + +#########i############################################################### +# Tokenizer utility routines which may use CONSTANTS but no other GLOBALS +######################################################################### + +sub find_next_nonblank_token { + my ( $i, $rtokens, $max_token_index ) = @_; + + if ( $i >= $max_token_index ) { + if ( !peeked_ahead() ) { + peeked_ahead(1); + $rtokens = + peek_ahead_for_nonblank_token( $rtokens, $max_token_index ); + } + } + my $next_nonblank_token = $rtokens->[ ++$i ]; + + if ( $next_nonblank_token =~ /^\s*$/ ) { + $next_nonblank_token = $rtokens->[ ++$i ]; + } + return ( $next_nonblank_token, $i ); +} + +sub numerator_expected { + + # this is a filter for a possible numerator, in support of guessing + # for the / pattern delimiter token. + # returns - + # 1 - yes + # 0 - can't tell + # -1 - no + # Note: I am using the convention that variables ending in + # _expected have these 3 possible values. + my ( $i, $rtokens, $max_token_index ) = @_; + my $numerator_expected = 0; + + my $next_token = $rtokens->[ $i + 1 ]; + if ( $next_token eq '=' ) { $i++; } # handle /= + my ( $next_nonblank_token, $i_next ) = + find_next_nonblank_token( $i, $rtokens, $max_token_index ); + + if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) { + $numerator_expected = 1; + } + else { + + if ( $next_nonblank_token =~ /^\s*$/ ) { + $numerator_expected = 0; + } + else { + $numerator_expected = -1; + } + } + return $numerator_expected; +} + +sub pattern_expected { + + # This is the start of a filter for a possible pattern. + # It looks at the token after a possible pattern and tries to + # determine if that token could end a pattern. + # returns - + # 1 - yes + # 0 - can't tell + # -1 - no + my ( $i, $rtokens, $max_token_index ) = @_; + my $is_pattern = 0; + + my $next_token = $rtokens->[ $i + 1 ]; + if ( $next_token =~ /^[msixpodualgc]/ ) { $i++; } # skip possible modifier + my ( $next_nonblank_token, $i_next ) = + find_next_nonblank_token( $i, $rtokens, $max_token_index ); + + # list of tokens which may follow a pattern + # (can probably be expanded) + if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ ) + { + $is_pattern = 1; + } + else { + + if ( $next_nonblank_token =~ /^\s*$/ ) { + $is_pattern = 0; + } + else { + $is_pattern = -1; + } + } + return $is_pattern; +} + +sub find_next_nonblank_token_on_this_line { + my ( $i, $rtokens, $max_token_index ) = @_; + my $next_nonblank_token; + + if ( $i < $max_token_index ) { + $next_nonblank_token = $rtokens->[ ++$i ]; + + if ( $next_nonblank_token =~ /^\s*$/ ) { + + if ( $i < $max_token_index ) { + $next_nonblank_token = $rtokens->[ ++$i ]; + } + } + } + else { + $next_nonblank_token = ""; + } + return ( $next_nonblank_token, $i ); +} + +sub find_angle_operator_termination { + + # We are looking at a '<' and want to know if it is an angle operator. + # We are to return: + # $i = pretoken index of ending '>' if found, current $i otherwise + # $type = 'Q' if found, '>' otherwise + my ( $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) = @_; + my $i = $i_beg; + my $type = '<'; + pos($input_line) = 1 + $rtoken_map->[$i]; + + my $filter; + + # we just have to find the next '>' if a term is expected + if ( $expecting == TERM ) { $filter = '[\>]' } + + # we have to guess if we don't know what is expected + elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' } + + # shouldn't happen - we shouldn't be here if operator is expected + else { warning("Program Bug in find_angle_operator_termination\n") } + + # To illustrate what we might be looking at, in case we are + # guessing, here are some examples of valid angle operators + # (or file globs): + # + # + # <$fh> + # <*.c *.h> + # <_> + # ( glob.t) + # <${PREFIX}*img*.$IMAGE_TYPE> + # + # + # <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl> + # + # Here are some examples of lines which do not have angle operators: + # return unless $self->[2]++ < $#{$self->[1]}; + # < 2 || @$t > + # + # the following line from dlister.pl caused trouble: + # print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n"; + # + # If the '<' starts an angle operator, it must end on this line and + # it must not have certain characters like ';' and '=' in it. I use + # this to limit the testing. This filter should be improved if + # possible. + + if ( $input_line =~ /($filter)/g ) { + + if ( $1 eq '>' ) { + + # We MAY have found an angle operator termination if we get + # here, but we need to do more to be sure we haven't been + # fooled. + my $pos = pos($input_line); + + my $pos_beg = $rtoken_map->[$i]; + my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) ); + + # Reject if the closing '>' follows a '-' as in: + # if ( VERSION < 5.009 && $op-> name eq 'assign' ) { } + if ( $expecting eq UNKNOWN ) { + my $check = substr( $input_line, $pos - 2, 1 ); + if ( $check eq '-' ) { + return ( $i, $type ); + } + } + + ######################################debug##### + #write_diagnostics( "ANGLE? :$str\n"); + #print "ANGLE: found $1 at pos=$pos str=$str check=$check\n"; + ######################################debug##### + $type = 'Q'; + my $error; + ( $i, $error ) = + inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); + + # It may be possible that a quote ends midway in a pretoken. + # If this happens, it may be necessary to split the pretoken. + if ($error) { + warning( + "Possible tokinization error..please check this line\n"); + report_possible_bug(); + } + + # Now let's see where we stand.... + # OK if math op not possible + if ( $expecting == TERM ) { + } + + # OK if there are no more than 2 pre-tokens inside + # (not possible to write 2 token math between < and >) + # This catches most common cases + elsif ( $i <= $i_beg + 3 ) { + write_diagnostics("ANGLE(1 or 2 tokens): $str\n"); + } + + # Not sure.. + else { + + # Let's try a Brace Test: any braces inside must balance + my $br = 0; + while ( $str =~ /\{/g ) { $br++ } + while ( $str =~ /\}/g ) { $br-- } + my $sb = 0; + while ( $str =~ /\[/g ) { $sb++ } + while ( $str =~ /\]/g ) { $sb-- } + my $pr = 0; + while ( $str =~ /\(/g ) { $pr++ } + while ( $str =~ /\)/g ) { $pr-- } + + # if braces do not balance - not angle operator + if ( $br || $sb || $pr ) { + $i = $i_beg; + $type = '<'; + write_diagnostics( + "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n"); + } + + # we should keep doing more checks here...to be continued + # Tentatively accepting this as a valid angle operator. + # There are lots more things that can be checked. + else { + write_diagnostics( + "ANGLE-Guessing yes: $str expecting=$expecting\n"); + write_logfile_entry("Guessing angle operator here: $str\n"); + } + } + } + + # didn't find ending > + else { + if ( $expecting == TERM ) { + warning("No ending > for angle operator\n"); + } + } + } + return ( $i, $type ); +} + +sub scan_number_do { + + # scan a number in any of the formats that Perl accepts + # Underbars (_) are allowed in decimal numbers. + # input parameters - + # $input_line - the string to scan + # $i - pre_token index to start scanning + # $rtoken_map - reference to the pre_token map giving starting + # character position in $input_line of token $i + # output parameters - + # $i - last pre_token index of the number just scanned + # number - the number (characters); or undef if not a number + + my ( $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = @_; + my $pos_beg = $rtoken_map->[$i]; + my $pos; + my $i_begin = $i; + my $number = undef; + my $type = $input_type; + + my $first_char = substr( $input_line, $pos_beg, 1 ); + + # Look for bad starting characters; Shouldn't happen.. + if ( $first_char !~ /[\d\.\+\-Ee]/ ) { + warning("Program bug - scan_number given character $first_char\n"); + report_definite_bug(); + return ( $i, $type, $number ); + } + + # handle v-string without leading 'v' character ('Two Dot' rule) + # (vstring.t) + # TODO: v-strings may contain underscores + pos($input_line) = $pos_beg; + if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) { + $pos = pos($input_line); + my $numc = $pos - $pos_beg; + $number = substr( $input_line, $pos_beg, $numc ); + $type = 'v'; + report_v_string($number); + } + + # handle octal, hex, binary + if ( !defined($number) ) { + pos($input_line) = $pos_beg; + if ( $input_line =~ + /\G[+-]?0(([xX][0-9a-fA-F_]+)|([0-7_]+)|([bB][01_]+))/g ) + { + $pos = pos($input_line); + my $numc = $pos - $pos_beg; + $number = substr( $input_line, $pos_beg, $numc ); + $type = 'n'; + } + } + + # handle decimal + if ( !defined($number) ) { + pos($input_line) = $pos_beg; + + if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) { + $pos = pos($input_line); + + # watch out for things like 0..40 which would give 0. by this; + if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' ) + && ( substr( $input_line, $pos, 1 ) eq '.' ) ) + { + $pos--; + } + my $numc = $pos - $pos_beg; + $number = substr( $input_line, $pos_beg, $numc ); + $type = 'n'; + } + } + + # filter out non-numbers like e + - . e2 .e3 +e6 + # the rule: at least one digit, and any 'e' must be preceded by a digit + if ( + $number !~ /\d/ # no digits + || ( $number =~ /^(.*)[eE]/ + && $1 !~ /\d/ ) # or no digits before the 'e' + ) + { + $number = undef; + $type = $input_type; + return ( $i, $type, $number ); + } + + # Found a number; now we must convert back from character position + # to pre_token index. An error here implies user syntax error. + # An example would be an invalid octal number like '009'. + my $error; + ( $i, $error ) = + inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); + if ($error) { warning("Possibly invalid number\n") } + + return ( $i, $type, $number ); +} + +sub inverse_pretoken_map { + + # Starting with the current pre_token index $i, scan forward until + # finding the index of the next pre_token whose position is $pos. + my ( $i, $pos, $rtoken_map, $max_token_index ) = @_; + my $error = 0; + + while ( ++$i <= $max_token_index ) { + + if ( $pos <= $rtoken_map->[$i] ) { + + # Let the calling routine handle errors in which we do not + # land on a pre-token boundary. It can happen by running + # perltidy on some non-perl scripts, for example. + if ( $pos < $rtoken_map->[$i] ) { $error = 1 } + $i--; + last; + } + } + return ( $i, $error ); +} + +sub find_here_doc { + + # find the target of a here document, if any + # input parameters: + # $i - token index of the second < of << + # ($i must be less than the last token index if this is called) + # output parameters: + # $found_target = 0 didn't find target; =1 found target + # HERE_TARGET - the target string (may be empty string) + # $i - unchanged if not here doc, + # or index of the last token of the here target + # $saw_error - flag noting unbalanced quote on here target + my ( $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_; + my $ibeg = $i; + my $found_target = 0; + my $here_doc_target = ''; + my $here_quote_character = ''; + my $saw_error = 0; + my ( $next_nonblank_token, $i_next_nonblank, $next_token ); + $next_token = $rtokens->[ $i + 1 ]; + + # perl allows a backslash before the target string (heredoc.t) + my $backslash = 0; + if ( $next_token eq '\\' ) { + $backslash = 1; + $next_token = $rtokens->[ $i + 2 ]; + } + + ( $next_nonblank_token, $i_next_nonblank ) = + find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index ); + + if ( $next_nonblank_token =~ /[\'\"\`]/ ) { + + my $in_quote = 1; + my $quote_depth = 0; + my $quote_pos = 0; + my $quoted_string; + + ( + $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth, + $quoted_string + ) + = follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens, + $here_quote_character, $quote_pos, $quote_depth, $max_token_index ); + + if ($in_quote) { # didn't find end of quote, so no target found + $i = $ibeg; + if ( $expecting == TERM ) { + warning( +"Did not find here-doc string terminator ($here_quote_character) before end of line \n" + ); + $saw_error = 1; + } + } + else { # found ending quote + $found_target = 1; + + my $tokj; + foreach my $j ( $i_next_nonblank + 1 .. $i - 1 ) { + $tokj = $rtokens->[$j]; + + # we have to remove any backslash before the quote character + # so that the here-doc-target exactly matches this string + next + if ( $tokj eq "\\" + && $j < $i - 1 + && $rtokens->[ $j + 1 ] eq $here_quote_character ); + $here_doc_target .= $tokj; + } + } + } + + elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) { + $found_target = 1; + write_logfile_entry( + "found blank here-target after <<; suggest using \"\"\n"); + $i = $ibeg; + } + elsif ( $next_token =~ /^\w/ ) { # simple bareword or integer after << + + my $here_doc_expected; + if ( $expecting == UNKNOWN ) { + $here_doc_expected = guess_if_here_doc($next_token); + } + else { + $here_doc_expected = 1; + } + + if ($here_doc_expected) { + $found_target = 1; + $here_doc_target = $next_token; + $i = $ibeg + 1; + } + + } + else { + + if ( $expecting == TERM ) { + $found_target = 1; + write_logfile_entry("Note: bare here-doc operator <<\n"); + } + else { + $i = $ibeg; + } + } + + # patch to neglect any prepended backslash + if ( $found_target && $backslash ) { $i++ } + + return ( $found_target, $here_doc_target, $here_quote_character, $i, + $saw_error ); +} + +sub do_quote { + + # follow (or continue following) quoted string(s) + # $in_quote return code: + # 0 - ok, found end + # 1 - still must find end of quote whose target is $quote_character + # 2 - still looking for end of first of two quotes + # + # Returns updated strings: + # $quoted_string_1 = quoted string seen while in_quote=1 + # $quoted_string_2 = quoted string seen while in_quote=2 + my ( + $i, $in_quote, $quote_character, + $quote_pos, $quote_depth, $quoted_string_1, + $quoted_string_2, $rtokens, $rtoken_map, + $max_token_index + ) = @_; + + my $in_quote_starting = $in_quote; + + my $quoted_string; + if ( $in_quote == 2 ) { # two quotes/quoted_string_1s to follow + my $ibeg = $i; + ( + $i, $in_quote, $quote_character, $quote_pos, $quote_depth, + $quoted_string + ) + = follow_quoted_string( $i, $in_quote, $rtokens, $quote_character, + $quote_pos, $quote_depth, $max_token_index ); + $quoted_string_2 .= $quoted_string; + if ( $in_quote == 1 ) { + if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; } + $quote_character = ''; + } + else { + $quoted_string_2 .= "\n"; + } + } + + if ( $in_quote == 1 ) { # one (more) quote to follow + my $ibeg = $i; + ( + $i, $in_quote, $quote_character, $quote_pos, $quote_depth, + $quoted_string + ) + = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character, + $quote_pos, $quote_depth, $max_token_index ); + $quoted_string_1 .= $quoted_string; + if ( $in_quote == 1 ) { + $quoted_string_1 .= "\n"; + } + } + return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth, + $quoted_string_1, $quoted_string_2 ); +} + +sub follow_quoted_string { + + # scan for a specific token, skipping escaped characters + # if the quote character is blank, use the first non-blank character + # input parameters: + # $rtokens = reference to the array of tokens + # $i = the token index of the first character to search + # $in_quote = number of quoted strings being followed + # $beginning_tok = the starting quote character + # $quote_pos = index to check next for alphanumeric delimiter + # output parameters: + # $i = the token index of the ending quote character + # $in_quote = decremented if found end, unchanged if not + # $beginning_tok = the starting quote character + # $quote_pos = index to check next for alphanumeric delimiter + # $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested. + # $quoted_string = the text of the quote (without quotation tokens) + my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth, + $max_token_index ) + = @_; + my ( $tok, $end_tok ); + my $i = $i_beg - 1; + my $quoted_string = ""; + + TOKENIZER_DEBUG_FLAG_QUOTE && do { + print STDOUT +"QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n"; + }; + + # get the corresponding end token + if ( $beginning_tok !~ /^\s*$/ ) { + $end_tok = matching_end_token($beginning_tok); + } + + # a blank token means we must find and use the first non-blank one + else { + my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a + + while ( $i < $max_token_index ) { + $tok = $rtokens->[ ++$i ]; + + if ( $tok !~ /^\s*$/ ) { + + if ( ( $tok eq '#' ) && ($allow_quote_comments) ) { + $i = $max_token_index; + } + else { + + if ( length($tok) > 1 ) { + if ( $quote_pos <= 0 ) { $quote_pos = 1 } + $beginning_tok = substr( $tok, $quote_pos - 1, 1 ); + } + else { + $beginning_tok = $tok; + $quote_pos = 0; + } + $end_tok = matching_end_token($beginning_tok); + $quote_depth = 1; + last; + } + } + else { + $allow_quote_comments = 1; + } + } + } + + # There are two different loops which search for the ending quote + # character. In the rare case of an alphanumeric quote delimiter, we + # have to look through alphanumeric tokens character-by-character, since + # the pre-tokenization process combines multiple alphanumeric + # characters, whereas for a non-alphanumeric delimiter, only tokens of + # length 1 can match. + + ################################################################### + # Case 1 (rare): loop for case of alphanumeric quote delimiter.. + # "quote_pos" is the position the current word to begin searching + ################################################################### + if ( $beginning_tok =~ /\w/ ) { + + # Note this because it is not recommended practice except + # for obfuscated perl contests + if ( $in_quote == 1 ) { + write_logfile_entry( + "Note: alphanumeric quote delimiter ($beginning_tok) \n"); + } + + while ( $i < $max_token_index ) { + + if ( $quote_pos == 0 || ( $i < 0 ) ) { + $tok = $rtokens->[ ++$i ]; + + if ( $tok eq '\\' ) { + + # retain backslash unless it hides the end token + $quoted_string .= $tok + unless $rtokens->[ $i + 1 ] eq $end_tok; + $quote_pos++; + last if ( $i >= $max_token_index ); + $tok = $rtokens->[ ++$i ]; + } + } + my $old_pos = $quote_pos; + + unless ( defined($tok) && defined($end_tok) && defined($quote_pos) ) + { + + } + $quote_pos = 1 + index( $tok, $end_tok, $quote_pos ); + + if ( $quote_pos > 0 ) { + + $quoted_string .= + substr( $tok, $old_pos, $quote_pos - $old_pos - 1 ); + + $quote_depth--; + + if ( $quote_depth == 0 ) { + $in_quote--; + last; + } + } + else { + $quoted_string .= substr( $tok, $old_pos ); + } + } + } + + ######################################################################## + # Case 2 (normal): loop for case of a non-alphanumeric quote delimiter.. + ######################################################################## + else { + + while ( $i < $max_token_index ) { + $tok = $rtokens->[ ++$i ]; + + if ( $tok eq $end_tok ) { + $quote_depth--; + + if ( $quote_depth == 0 ) { + $in_quote--; + last; + } + } + elsif ( $tok eq $beginning_tok ) { + $quote_depth++; + } + elsif ( $tok eq '\\' ) { + + # retain backslash unless it hides the beginning or end token + $tok = $rtokens->[ ++$i ]; + $quoted_string .= '\\' + unless ( $tok eq $end_tok || $tok eq $beginning_tok ); + } + $quoted_string .= $tok; + } + } + if ( $i > $max_token_index ) { $i = $max_token_index } + return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth, + $quoted_string ); +} + +sub indicate_error { + my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_; + interrupt_logfile(); + warning($msg); + write_error_indicator_pair( $line_number, $input_line, $pos, $carrat ); + resume_logfile(); + return; +} + +sub write_error_indicator_pair { + my ( $line_number, $input_line, $pos, $carrat ) = @_; + my ( $offset, $numbered_line, $underline ) = + make_numbered_line( $line_number, $input_line, $pos ); + $underline = write_on_underline( $underline, $pos - $offset, $carrat ); + warning( $numbered_line . "\n" ); + $underline =~ s/\s*$//; + warning( $underline . "\n" ); + return; +} + +sub make_numbered_line { + + # Given an input line, its line number, and a character position of + # interest, create a string not longer than 80 characters of the form + # $lineno: sub_string + # such that the sub_string of $str contains the position of interest + # + # Here is an example of what we want, in this case we add trailing + # '...' because the line is long. + # + # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ... + # + # Here is another example, this time in which we used leading '...' + # because of excessive length: + # + # 2: ... er of the World Wide Web Consortium's + # + # input parameters are: + # $lineno = line number + # $str = the text of the line + # $pos = position of interest (the error) : 0 = first character + # + # We return : + # - $offset = an offset which corrects the position in case we only + # display part of a line, such that $pos-$offset is the effective + # position from the start of the displayed line. + # - $numbered_line = the numbered line as above, + # - $underline = a blank 'underline' which is all spaces with the same + # number of characters as the numbered line. + + my ( $lineno, $str, $pos ) = @_; + my $offset = ( $pos < 60 ) ? 0 : $pos - 40; + my $excess = length($str) - $offset - 68; + my $numc = ( $excess > 0 ) ? 68 : undef; + + if ( defined($numc) ) { + if ( $offset == 0 ) { + $str = substr( $str, $offset, $numc - 4 ) . " ..."; + } + else { + $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ..."; + } + } + else { + + if ( $offset == 0 ) { + } + else { + $str = "... " . substr( $str, $offset + 4 ); + } + } + + my $numbered_line = sprintf( "%d: ", $lineno ); + $offset -= length($numbered_line); + $numbered_line .= $str; + my $underline = " " x length($numbered_line); + return ( $offset, $numbered_line, $underline ); +} + +sub write_on_underline { + + # The "underline" is a string that shows where an error is; it starts + # out as a string of blanks with the same length as the numbered line of + # code above it, and we have to add marking to show where an error is. + # In the example below, we want to write the string '--^' just below + # the line of bad code: + # + # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ... + # ---^ + # We are given the current underline string, plus a position and a + # string to write on it. + # + # In the above example, there will be 2 calls to do this: + # First call: $pos=19, pos_chr=^ + # Second call: $pos=16, pos_chr=--- + # + # This is a trivial thing to do with substr, but there is some + # checking to do. + + my ( $underline, $pos, $pos_chr ) = @_; + + # check for error..shouldn't happen + unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) { + return $underline; + } + my $excess = length($pos_chr) + $pos - length($underline); + if ( $excess > 0 ) { + $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess ); + } + substr( $underline, $pos, length($pos_chr) ) = $pos_chr; + return ($underline); +} + +sub pre_tokenize { + + # Break a string, $str, into a sequence of preliminary tokens. We + # are interested in these types of tokens: + # words (type='w'), example: 'max_tokens_wanted' + # digits (type = 'd'), example: '0755' + # whitespace (type = 'b'), example: ' ' + # any other single character (i.e. punct; type = the character itself). + # We cannot do better than this yet because we might be in a quoted + # string or pattern. Caller sets $max_tokens_wanted to 0 to get all + # tokens. + my ( $str, $max_tokens_wanted ) = @_; + + # we return references to these 3 arrays: + my @tokens = (); # array of the tokens themselves + my @token_map = (0); # string position of start of each token + my @type = (); # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct + + do { + + # whitespace + if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; } + + # numbers + # note that this must come before words! + elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; } + + # words + elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; } + + # single-character punctuation + elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; } + + # that's all.. + else { + return ( \@tokens, \@token_map, \@type ); + } + + push @tokens, $1; + push @token_map, pos($str); + + } while ( --$max_tokens_wanted != 0 ); + + return ( \@tokens, \@token_map, \@type ); +} + +sub show_tokens { + + # this is an old debug routine + # not called, but saved for reference + my ( $rtokens, $rtoken_map ) = @_; + my $num = scalar( @{$rtokens} ); + + foreach my $i ( 0 .. $num - 1 ) { + my $len = length( $rtokens->[$i] ); + print STDOUT "$i:$len:$rtoken_map->[$i]:$rtokens->[$i]:\n"; + } + return; +} + +{ + my %matching_end_token; + + BEGIN { + %matching_end_token = ( + '{' => '}', + '(' => ')', + '[' => ']', + '<' => '>', + ); + } + + sub matching_end_token { + + # return closing character for a pattern + my $beginning_token = shift; + if ( $matching_end_token{$beginning_token} ) { + return $matching_end_token{$beginning_token}; + } + return ($beginning_token); + } +} + +sub dump_token_types { + my ( $class, $fh ) = @_; + + # This should be the latest list of token types in use + # adding NEW_TOKENS: add a comment here + print $fh <<'END_OF_LIST'; + +Here is a list of the token types currently used for lines of type 'CODE'. +For the following tokens, the "type" of a token is just the token itself. + +.. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <> +( ) <= >= == =~ !~ != ++ -- /= x= +... **= <<= >>= &&= ||= //= <=> +, + - / * | % ! x ~ = \ ? : . < > ^ & + +The following additional token types are defined: + + type meaning + b blank (white space) + { indent: opening structural curly brace or square bracket or paren + (code block, anonymous hash reference, or anonymous array reference) + } outdent: right structural curly brace or square bracket or paren + [ left non-structural square bracket (enclosing an array index) + ] right non-structural square bracket + ( left non-structural paren (all but a list right of an =) + ) right non-structural paren + L left non-structural curly brace (enclosing a key) + R right non-structural curly brace + ; terminal semicolon + f indicates a semicolon in a "for" statement + h here_doc operator << + # a comment + Q indicates a quote or pattern + q indicates a qw quote block + k a perl keyword + C user-defined constant or constant function (with void prototype = ()) + U user-defined function taking parameters + G user-defined function taking block parameter (like grep/map/eval) + M (unused, but reserved for subroutine definition name) + P (unused, but -html uses it to label pod text) + t type indicater such as %,$,@,*,&,sub + w bare word (perhaps a subroutine call) + i identifier of some type (with leading %, $, @, *, &, sub, -> ) + n a number + v a v-string + F a file test operator (like -e) + Y File handle + Z identifier in indirect object slot: may be file handle, object + J LABEL: code block label + j LABEL after next, last, redo, goto + p unary + + m unary - + pp pre-increment operator ++ + mm pre-decrement operator -- + A : used as attribute separator + + Here are the '_line_type' codes used internally: + SYSTEM - system-specific code before hash-bang line + CODE - line of perl code (including comments) + POD_START - line starting pod, such as '=head' + POD - pod documentation text + POD_END - last line of pod section, '=cut' + HERE - text of here-document + HERE_END - last line of here-doc (target word) + FORMAT - format section + FORMAT_END - last line of format section, '.' + DATA_START - __DATA__ line + DATA - unidentified text following __DATA__ + END_START - __END__ line + END - unidentified text following __END__ + ERROR - we are in big trouble, probably not a perl script +END_OF_LIST + + return; +} + +BEGIN { + + # These names are used in error messages + @opening_brace_names = qw# '{' '[' '(' '?' #; + @closing_brace_names = qw# '}' ']' ')' ':' #; + + my @q; + + my @digraphs = qw( + .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <> + <= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^. + ); + @is_digraph{@digraphs} = (1) x scalar(@digraphs); + + my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~); + @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs); + + my @tetragraphs = qw( <<>> ); + @is_tetragraph{@tetragraphs} = (1) x scalar(@tetragraphs); + + # make a hash of all valid token types for self-checking the tokenizer + # (adding NEW_TOKENS : select a new character and add to this list) + my @valid_token_types = qw# + A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v + { } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ & + #; + push( @valid_token_types, @digraphs ); + push( @valid_token_types, @trigraphs ); + push( @valid_token_types, @tetragraphs ); + push( @valid_token_types, ( '#', ',', 'CORE::' ) ); + @is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types); + + # a list of file test letters, as in -e (Table 3-4 of 'camel 3') + my @file_test_operators = + qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z); + @is_file_test_operator{@file_test_operators} = + (1) x scalar(@file_test_operators); + + # these functions have prototypes of the form (&), so when they are + # followed by a block, that block MAY BE followed by an operator. + # Smartmatch operator ~~ may be followed by anonymous hash or array ref + @q = qw( do eval ); + @is_block_operator{@q} = (1) x scalar(@q); + + # these functions allow an identifier in the indirect object slot + @q = qw( print printf sort exec system say); + @is_indirect_object_taker{@q} = (1) x scalar(@q); + + # These tokens may precede a code block + # patched for SWITCH/CASE/CATCH. Actually these could be removed + # now and we could let the extended-syntax coding handle them + @q = + qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else + unless do while until eval for foreach map grep sort + switch case given when catch try finally); + @is_code_block_token{@q} = (1) x scalar(@q); + + # I'll build the list of keywords incrementally + my @Keywords = (); + + # keywords and tokens after which a value or pattern is expected, + # but not an operator. In other words, these should consume terms + # to their right, or at least they are not expected to be followed + # immediately by operators. + my @value_requestor = qw( + AUTOLOAD + BEGIN + CHECK + DESTROY + END + EQ + GE + GT + INIT + LE + LT + NE + UNITCHECK + abs + accept + alarm + and + atan2 + bind + binmode + bless + break + caller + chdir + chmod + chomp + chop + chown + chr + chroot + close + closedir + cmp + connect + continue + cos + crypt + dbmclose + dbmopen + defined + delete + die + dump + each + else + elsif + eof + eq + exec + exists + exit + exp + fcntl + fileno + flock + for + foreach + formline + ge + getc + getgrgid + getgrnam + gethostbyaddr + gethostbyname + getnetbyaddr + getnetbyname + getpeername + getpgrp + getpriority + getprotobyname + getprotobynumber + getpwnam + getpwuid + getservbyname + getservbyport + getsockname + getsockopt + glob + gmtime + goto + grep + gt + hex + if + index + int + ioctl + join + keys + kill + last + lc + lcfirst + le + length + link + listen + local + localtime + lock + log + lstat + lt + map + mkdir + msgctl + msgget + msgrcv + msgsnd + my + ne + next + no + not + oct + open + opendir + or + ord + our + pack + pipe + pop + pos + print + printf + prototype + push + quotemeta + rand + read + readdir + readlink + readline + readpipe + recv + redo + ref + rename + require + reset + return + reverse + rewinddir + rindex + rmdir + scalar + seek + seekdir + select + semctl + semget + semop + send + sethostent + setnetent + setpgrp + setpriority + setprotoent + setservent + setsockopt + shift + shmctl + shmget + shmread + shmwrite + shutdown + sin + sleep + socket + socketpair + sort + splice + split + sprintf + sqrt + srand + stat + study + substr + symlink + syscall + sysopen + sysread + sysseek + system + syswrite + tell + telldir + tie + tied + truncate + uc + ucfirst + umask + undef + unless + unlink + unpack + unshift + untie + until + use + utime + values + vec + waitpid + warn + while + write + xor + + switch + case + given + when + err + say + + catch + ); + + # patched above for SWITCH/CASE given/when err say + # 'err' is a fairly safe addition. + # TODO: 'default' still needed if appropriate + # 'use feature' seen, but perltidy works ok without it. + # Concerned that 'default' could break code. + push( @Keywords, @value_requestor ); + + # These are treated the same but are not keywords: + my @extra_vr = qw( + constant + vars + ); + push( @value_requestor, @extra_vr ); + + @expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor); + + # this list contains keywords which do not look for arguments, + # so that they might be followed by an operator, or at least + # not a term. + my @operator_requestor = qw( + endgrent + endhostent + endnetent + endprotoent + endpwent + endservent + fork + getgrent + gethostent + getlogin + getnetent + getppid + getprotoent + getpwent + getservent + setgrent + setpwent + time + times + wait + wantarray + ); + + push( @Keywords, @operator_requestor ); + + # These are treated the same but are not considered keywords: + my @extra_or = qw( + STDERR + STDIN + STDOUT + ); + + push( @operator_requestor, @extra_or ); + + @expecting_operator_token{@operator_requestor} = + (1) x scalar(@operator_requestor); + + # these token TYPES expect trailing operator but not a term + # note: ++ and -- are post-increment and decrement, 'C' = constant + my @operator_requestor_types = qw( ++ -- C <> q ); + @expecting_operator_types{@operator_requestor_types} = + (1) x scalar(@operator_requestor_types); + + # these token TYPES consume values (terms) + # note: pp and mm are pre-increment and decrement + # f=semicolon in for, F=file test operator + my @value_requestor_type = qw# + L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x + **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //= + <= >= == != => \ > < % * / ? & | ** <=> ~~ !~~ + f F pp mm Y p m U J G j >> << ^ t + ~. ^. |. &. ^.= |.= &.= + #; + push( @value_requestor_type, ',' ) + ; # (perl doesn't like a ',' in a qw block) + @expecting_term_types{@value_requestor_type} = + (1) x scalar(@value_requestor_type); + + # Note: the following valid token types are not assigned here to + # hashes requesting to be followed by values or terms, but are + # instead currently hard-coded into sub operator_expected: + # ) -> :: Q R Z ] b h i k n v w } # + + # For simple syntax checking, it is nice to have a list of operators which + # will really be unhappy if not followed by a term. This includes most + # of the above... + %really_want_term = %expecting_term_types; + + # with these exceptions... + delete $really_want_term{'U'}; # user sub, depends on prototype + delete $really_want_term{'F'}; # file test works on $_ if no following term + delete $really_want_term{'Y'}; # indirect object, too risky to check syntax; + # let perl do it + + @q = qw(q qq qw qx qr s y tr m); + @is_q_qq_qw_qx_qr_s_y_tr_m{@q} = (1) x scalar(@q); + + # These keywords are handled specially in the tokenizer code: + my @special_keywords = qw( + do + eval + format + m + package + q + qq + qr + qw + qx + s + sub + tr + y + ); + push( @Keywords, @special_keywords ); + + # Keywords after which list formatting may be used + # WARNING: do not include |map|grep|eval or perl may die on + # syntax errors (map1.t). + my @keyword_taking_list = qw( + and + chmod + chomp + chop + chown + dbmopen + die + elsif + exec + fcntl + for + foreach + formline + getsockopt + if + index + ioctl + join + kill + local + msgctl + msgrcv + msgsnd + my + open + or + our + pack + print + printf + push + read + readpipe + recv + return + reverse + rindex + seek + select + semctl + semget + send + setpriority + setsockopt + shmctl + shmget + shmread + shmwrite + socket + socketpair + sort + splice + split + sprintf + substr + syscall + sysopen + sysread + sysseek + system + syswrite + tie + unless + unlink + unpack + unshift + until + vec + warn + while + given + when + ); + @is_keyword_taking_list{@keyword_taking_list} = + (1) x scalar(@keyword_taking_list); + + # perl functions which may be unary operators + my @keyword_taking_optional_args = qw( + chomp + eof + eval + lc + pop + shift + uc + undef + ); + @is_keyword_taking_optional_args{@keyword_taking_optional_args} = + (1) x scalar(@keyword_taking_optional_args); + + # These are not used in any way yet + # my @unused_keywords = qw( + # __FILE__ + # __LINE__ + # __PACKAGE__ + # ); + + # The list of keywords was originally extracted from function 'keyword' in + # perl file toke.c version 5.005.03, using this utility, plus a + # little editing: (file getkwd.pl): + # while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } } + # Add 'get' prefix where necessary, then split into the above lists. + # This list should be updated as necessary. + # The list should not contain these special variables: + # ARGV DATA ENV SIG STDERR STDIN STDOUT + # __DATA__ __END__ + + @is_keyword{@Keywords} = (1) x scalar(@Keywords); +} +1; + diff --git a/lib/Perl/Tidy/VerticalAligner.pm b/lib/Perl/Tidy/VerticalAligner.pm new file mode 100644 index 0000000..2ae6e19 --- /dev/null +++ b/lib/Perl/Tidy/VerticalAligner.pm @@ -0,0 +1,3267 @@ +package Perl::Tidy::VerticalAligner; +use strict; +use warnings; +our $VERSION = '20190601'; + +use Perl::Tidy::VerticalAligner::Alignment; +use Perl::Tidy::VerticalAligner::Line; + +# The Perl::Tidy::VerticalAligner package collects output lines and +# attempts to line up certain common tokens, such as => and #, which are +# identified by the calling routine. +# +# There are two main routines: valign_input and flush. Append acts as a +# storage buffer, collecting lines into a group which can be vertically +# aligned. When alignment is no longer possible or desirable, it dumps +# the group to flush. +# +# valign_input -----> flush +# +# collects writes +# vertical one +# groups group + +BEGIN { + + # Caution: these debug flags produce a lot of output + # They should all be 0 except when debugging small scripts + + use constant VALIGN_DEBUG_FLAG_APPEND => 0; + use constant VALIGN_DEBUG_FLAG_APPEND0 => 0; + use constant VALIGN_DEBUG_FLAG_TERNARY => 0; + use constant VALIGN_DEBUG_FLAG_TABS => 0; + + my $debug_warning = sub { + print STDOUT "VALIGN_DEBUGGING with key $_[0]\n"; + return; + }; + + VALIGN_DEBUG_FLAG_APPEND && $debug_warning->('APPEND'); + VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0'); + VALIGN_DEBUG_FLAG_TERNARY && $debug_warning->('TERNARY'); + VALIGN_DEBUG_FLAG_TABS && $debug_warning->('TABS'); + +} + +use vars qw( + $vertical_aligner_self + $maximum_alignment_index + $ralignment_list + $maximum_jmax_seen + $minimum_jmax_seen + $previous_minimum_jmax_seen + $previous_maximum_jmax_seen + @group_lines + $group_level + $group_type + $group_maximum_gap + $marginal_match + $last_level_written + $last_leading_space_count + $extra_indent_ok + $zero_count + $last_comment_column + $last_side_comment_line_number + $last_side_comment_length + $last_side_comment_level + $outdented_line_count + $first_outdented_line_at + $last_outdented_line_at + $diagnostics_object + $logger_object + $file_writer_object + @side_comment_history + $comment_leading_space_count + $is_matching_terminal_line + $consecutive_block_comments + + $cached_line_text + $cached_line_type + $cached_line_flag + $cached_seqno + $cached_line_valid + $cached_line_leading_space_count + $cached_seqno_string + + $valign_buffer_filling + @valign_buffer + + $seqno_string + $last_nonblank_seqno_string + + $rOpts + + $rOpts_maximum_line_length + $rOpts_variable_maximum_line_length + $rOpts_continuation_indentation + $rOpts_indent_columns + $rOpts_tabs + $rOpts_entab_leading_whitespace + $rOpts_valign + + $rOpts_fixed_position_side_comment + $rOpts_minimum_space_to_comment + +); + +sub initialize { + + ( + my $class, $rOpts, $file_writer_object, $logger_object, + $diagnostics_object + ) = @_; + + # variables describing the entire space group: + $ralignment_list = []; + $group_level = 0; + $last_level_written = -1; + $extra_indent_ok = 0; # can we move all lines to the right? + $last_side_comment_length = 0; + $maximum_jmax_seen = 0; + $minimum_jmax_seen = 0; + $previous_minimum_jmax_seen = 0; + $previous_maximum_jmax_seen = 0; + + # variables describing each line of the group + @group_lines = (); # list of all lines in group + + $outdented_line_count = 0; + $first_outdented_line_at = 0; + $last_outdented_line_at = 0; + $last_side_comment_line_number = 0; + $last_side_comment_level = -1; + $is_matching_terminal_line = 0; + + # most recent 3 side comments; [ line number, column ] + $side_comment_history[0] = [ -300, 0 ]; + $side_comment_history[1] = [ -200, 0 ]; + $side_comment_history[2] = [ -100, 0 ]; + + # valign_output_step_B cache: + $cached_line_text = ""; + $cached_line_type = 0; + $cached_line_flag = 0; + $cached_seqno = 0; + $cached_line_valid = 0; + $cached_line_leading_space_count = 0; + $cached_seqno_string = ""; + + # string of sequence numbers joined together + $seqno_string = ""; + $last_nonblank_seqno_string = ""; + + # frequently used parameters + $rOpts_indent_columns = $rOpts->{'indent-columns'}; + $rOpts_tabs = $rOpts->{'tabs'}; + $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'}; + $rOpts_fixed_position_side_comment = + $rOpts->{'fixed-position-side-comment'}; + $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'}; + $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'}; + $rOpts_variable_maximum_line_length = + $rOpts->{'variable-maximum-line-length'}; + $rOpts_valign = $rOpts->{'valign'}; + + $consecutive_block_comments = 0; + forget_side_comment(); + + initialize_for_new_group(); + + $vertical_aligner_self = {}; + bless $vertical_aligner_self, $class; + return $vertical_aligner_self; +} + +sub initialize_for_new_group { + @group_lines = (); + $maximum_alignment_index = -1; # alignments in current group + $zero_count = 0; # count consecutive lines without tokens + $group_maximum_gap = 0; # largest gap introduced + $group_type = ""; + $marginal_match = 0; + $comment_leading_space_count = 0; + $last_leading_space_count = 0; + return; +} + +# interface to Perl::Tidy::Diagnostics routines +sub write_diagnostics { + my $msg = shift; + if ($diagnostics_object) { + $diagnostics_object->write_diagnostics($msg); + } + return; +} + +# interface to Perl::Tidy::Logger routines +sub warning { + my ($msg) = @_; + if ($logger_object) { + $logger_object->warning($msg); + } + return; +} + +sub write_logfile_entry { + my ($msg) = @_; + if ($logger_object) { + $logger_object->write_logfile_entry($msg); + } + return; +} + +sub report_definite_bug { + if ($logger_object) { + $logger_object->report_definite_bug(); + } + return; +} + +sub get_cached_line_count { + my $self = shift; + return @group_lines + ( $cached_line_type ? 1 : 0 ); +} + +sub get_spaces { + + # return the number of leading spaces associated with an indentation + # variable $indentation is either a constant number of spaces or an + # object with a get_spaces method. + my $indentation = shift; + return ref($indentation) ? $indentation->get_spaces() : $indentation; +} + +sub get_recoverable_spaces { + + # return the number of spaces (+ means shift right, - means shift left) + # that we would like to shift a group of lines with the same indentation + # to get them to line up with their opening parens + my $indentation = shift; + return ref($indentation) ? $indentation->get_recoverable_spaces() : 0; +} + +sub get_stack_depth { + + my $indentation = shift; + return ref($indentation) ? $indentation->get_stack_depth() : 0; +} + +sub make_alignment { + my ( $col, $token ) = @_; + + # make one new alignment at column $col which aligns token $token + ++$maximum_alignment_index; + + #my $alignment = new Perl::Tidy::VerticalAligner::Alignment( + my $nlines = @group_lines; + my $alignment = Perl::Tidy::VerticalAligner::Alignment->new( + column => $col, + starting_column => $col, + matching_token => $token, + starting_line => $nlines - 1, + ending_line => $nlines - 1, + serial_number => $maximum_alignment_index, + ); + $ralignment_list->[$maximum_alignment_index] = $alignment; + return $alignment; +} + +sub dump_alignments { + print STDOUT +"Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n"; + for my $i ( 0 .. $maximum_alignment_index ) { + my $column = $ralignment_list->[$i]->get_column(); + my $starting_column = $ralignment_list->[$i]->get_starting_column(); + my $matching_token = $ralignment_list->[$i]->get_matching_token(); + my $starting_line = $ralignment_list->[$i]->get_starting_line(); + my $ending_line = $ralignment_list->[$i]->get_ending_line(); + print STDOUT +"$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n"; + } + return; +} + +sub save_alignment_columns { + for my $i ( 0 .. $maximum_alignment_index ) { + $ralignment_list->[$i]->save_column(); + } + return; +} + +sub restore_alignment_columns { + for my $i ( 0 .. $maximum_alignment_index ) { + $ralignment_list->[$i]->restore_column(); + } + return; +} + +sub forget_side_comment { + $last_comment_column = 0; + return; +} + +sub maximum_line_length_for_level { + + # return maximum line length for line starting with a given level + my $maximum_line_length = $rOpts_maximum_line_length; + if ($rOpts_variable_maximum_line_length) { + my $level = shift; + if ( $level < 0 ) { $level = 0 } + $maximum_line_length += $level * $rOpts_indent_columns; + } + return $maximum_line_length; +} + +sub push_group_line { + + my ($new_line) = @_; + push @group_lines, $new_line; + return; +} + +sub valign_input { + + # Place one line in the current vertical group. + # + # The input parameters are: + # $level = indentation level of this line + # $rfields = reference to array of fields + # $rpatterns = reference to array of patterns, one per field + # $rtokens = reference to array of tokens starting fields 1,2,.. + # + # Here is an example of what this package does. In this example, + # we are trying to line up both the '=>' and the '#'. + # + # '18' => 'grave', # \` + # '19' => 'acute', # `' + # '20' => 'caron', # \v + # <-tabs-><--field 2 ---><-f3-> + # | | | | + # | | | | + # col1 col2 col3 col4 + # + # The calling routine has already broken the entire line into 3 fields as + # indicated. (So the work of identifying promising common tokens has + # already been done). + # + # In this example, there will be 2 tokens being matched: '=>' and '#'. + # They are the leading parts of fields 2 and 3, but we do need to know + # what they are so that we can dump a group of lines when these tokens + # change. + # + # The fields contain the actual characters of each field. The patterns + # are like the fields, but they contain mainly token types instead + # of tokens, so they have fewer characters. They are used to be + # sure we are matching fields of similar type. + # + # In this example, there will be 4 column indexes being adjusted. The + # first one is always at zero. The interior columns are at the start of + # the matching tokens, and the last one tracks the maximum line length. + # + # Each time a new line comes in, it joins the current vertical + # group if possible. Otherwise it causes the current group to be dumped + # and a new group is started. + # + # For each new group member, the column locations are increased, as + # necessary, to make room for the new fields. When the group is finally + # output, these column numbers are used to compute the amount of spaces of + # padding needed for each field. + # + # Programming note: the fields are assumed not to have any tab characters. + # Tabs have been previously removed except for tabs in quoted strings and + # side comments. Tabs in these fields can mess up the column counting. + # The log file warns the user if there are any such tabs. + + my ( $rline_hash, $rfields, $rtokens, $rpatterns ) = @_; + my $level = $rline_hash->{level}; + my $level_end = $rline_hash->{level_end}; + my $indentation = $rline_hash->{indentation}; + my $is_forced_break = $rline_hash->{is_forced_break}; + my $outdent_long_lines = $rline_hash->{outdent_long_lines}; + my $is_terminal_ternary = $rline_hash->{is_terminal_ternary}; + my $is_terminal_statement = $rline_hash->{is_terminal_statement}; + my $do_not_pad = $rline_hash->{do_not_pad}; + my $rvertical_tightness_flags = $rline_hash->{rvertical_tightness_flags}; + my $level_jump = $rline_hash->{level_jump}; + + # number of fields is $jmax + # number of tokens between fields is $jmax-1 + my $jmax = @{$rfields} - 1; + + my $leading_space_count = get_spaces($indentation); + + # set outdented flag to be sure we either align within statements or + # across statement boundaries, but not both. + my $is_outdented = $last_leading_space_count > $leading_space_count; + $last_leading_space_count = $leading_space_count; + + # Patch: undo for hanging side comment + my $is_hanging_side_comment = + ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ ); + $is_outdented = 0 if $is_hanging_side_comment; + + # Forget side comment alignment after seeing 2 or more block comments + my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ ); + if ($is_block_comment) { + $consecutive_block_comments++; + } + else { + if ( $consecutive_block_comments > 1 ) { forget_side_comment() } + $consecutive_block_comments = 0; + } + + VALIGN_DEBUG_FLAG_APPEND0 && do { + my $nlines = @group_lines; + print STDOUT +"APPEND0: entering lines=$nlines new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break, level_jump=$level_jump, level=$level, group_level=$group_level, level_jump=$level_jump\n"; + }; + + # Validate cached line if necessary: If we can produce a container + # with just 2 lines total by combining an existing cached opening + # token with the closing token to follow, then we will mark both + # cached flags as valid. + if ($rvertical_tightness_flags) { + if ( @group_lines <= 1 + && $cached_line_type + && $cached_seqno + && $rvertical_tightness_flags->[2] + && $rvertical_tightness_flags->[2] == $cached_seqno ) + { + $rvertical_tightness_flags->[3] ||= 1; + $cached_line_valid ||= 1; + } + } + + # do not join an opening block brace with an unbalanced line + # unless requested with a flag value of 2 + if ( $cached_line_type == 3 + && !@group_lines + && $cached_line_flag < 2 + && $level_jump != 0 ) + { + $cached_line_valid = 0; + } + + # patch until new aligner is finished + if ($do_not_pad) { my_flush() } + + # shouldn't happen: + if ( $level < 0 ) { $level = 0 } + + # do not align code across indentation level changes + # or if vertical alignment is turned off for debugging + if ( $level != $group_level || $is_outdented || !$rOpts_valign ) { + + # we are allowed to shift a group of lines to the right if its + # level is greater than the previous and next group + $extra_indent_ok = + ( $level < $group_level && $last_level_written < $group_level ); + + my_flush(); + + # If we know that this line will get flushed out by itself because + # of level changes, we can leave the extra_indent_ok flag set. + # That way, if we get an external flush call, we will still be + # able to do some -lp alignment if necessary. + $extra_indent_ok = ( $is_terminal_statement && $level > $group_level ); + + $group_level = $level; + + # wait until after the above flush to get the leading space + # count because it may have been changed if the -icp flag is in + # effect + $leading_space_count = get_spaces($indentation); + + } + + # -------------------------------------------------------------------- + # Collect outdentable block COMMENTS + # -------------------------------------------------------------------- + my $is_blank_line = ""; + if ( $group_type eq 'COMMENT' ) { + if ( + ( + $is_block_comment + && $outdent_long_lines + && $leading_space_count == $comment_leading_space_count + ) + || $is_blank_line + ) + { + push_group_line( $rfields->[0] ); + return; + } + else { + my_flush(); + } + } + + # -------------------------------------------------------------------- + # add dummy fields for terminal ternary + # -------------------------------------------------------------------- + my $j_terminal_match; + + if ( $is_terminal_ternary && @group_lines ) { + $j_terminal_match = + fix_terminal_ternary( $group_lines[-1], $rfields, $rtokens, + $rpatterns ); + $jmax = @{$rfields} - 1; + } + + # -------------------------------------------------------------------- + # add dummy fields for else statement + # -------------------------------------------------------------------- + + if ( $rfields->[0] =~ /^else\s*$/ + && @group_lines + && $level_jump == 0 ) + { + + $j_terminal_match = + fix_terminal_else( $group_lines[-1], $rfields, $rtokens, $rpatterns ); + $jmax = @{$rfields} - 1; + } + + # -------------------------------------------------------------------- + # Handle simple line of code with no fields to match. + # -------------------------------------------------------------------- + if ( $jmax <= 0 ) { + $zero_count++; + + if ( @group_lines + && !get_recoverable_spaces( $group_lines[0]->get_indentation() ) ) + { + + # flush the current group if it has some aligned columns.. + if ( $group_lines[0]->get_jmax() > 1 ) { my_flush() } + + # flush current group if we are just collecting side comments.. + elsif ( + + # ...and we haven't seen a comment lately + ( $zero_count > 3 ) + + # ..or if this new line doesn't fit to the left of the comments + || ( ( $leading_space_count + length( $rfields->[0] ) ) > + $group_lines[0]->get_column(0) ) + ) + { + my_flush(); + } + } + + # start new COMMENT group if this comment may be outdented + if ( $is_block_comment + && $outdent_long_lines + && !@group_lines ) + { + $group_type = 'COMMENT'; + $comment_leading_space_count = $leading_space_count; + push_group_line( $rfields->[0] ); + return; + } + + # just write this line directly if no current group, no side comment, + # and no space recovery is needed. + if ( !@group_lines && !get_recoverable_spaces($indentation) ) { + valign_output_step_B( $leading_space_count, $rfields->[0], 0, + $outdent_long_lines, $rvertical_tightness_flags, $level ); + return; + } + } + else { + $zero_count = 0; + } + + # programming check: (shouldn't happen) + # an error here implies an incorrect call was made + if ( @{$rfields} && ( @{$rtokens} != ( @{$rfields} - 1 ) ) ) { + my $nt = @{$rtokens}; + my $nf = @{$rfields}; + warning( +"Program bug in Perl::Tidy::VerticalAligner - number of tokens = $nt should be one less than number of fields: $nf)\n" + ); + report_definite_bug(); + } + my $maximum_line_length_for_level = maximum_line_length_for_level($level); + + # -------------------------------------------------------------------- + # create an object to hold this line + # -------------------------------------------------------------------- + my $new_line = Perl::Tidy::VerticalAligner::Line->new( + jmax => $jmax, + jmax_original_line => $jmax, + rtokens => $rtokens, + rfields => $rfields, + rpatterns => $rpatterns, + indentation => $indentation, + leading_space_count => $leading_space_count, + outdent_long_lines => $outdent_long_lines, + list_type => "", + is_hanging_side_comment => $is_hanging_side_comment, + maximum_line_length => $maximum_line_length_for_level, + rvertical_tightness_flags => $rvertical_tightness_flags, + is_terminal_ternary => $is_terminal_ternary, + j_terminal_match => $j_terminal_match, + ); + + # -------------------------------------------------------------------- + # It simplifies things to create a zero length side comment + # if none exists. + # -------------------------------------------------------------------- + make_side_comment( $new_line, $level_end ); + + # -------------------------------------------------------------------- + # Decide if this is a simple list of items. + # There are 3 list types: none, comma, comma-arrow. + # We use this below to be less restrictive in deciding what to align. + # -------------------------------------------------------------------- + if ($is_forced_break) { + decide_if_list($new_line); + } + + # -------------------------------------------------------------------- + # Append this line to the current group (or start new group) + # -------------------------------------------------------------------- + if ( !@group_lines ) { + add_to_group($new_line); + } + else { + push_group_line($new_line); + } + + # output this group if it ends in a terminal else or ternary line + if ( defined($j_terminal_match) ) { + my_flush(); + } + + # Force break after jump to lower level + if ( $level_jump < 0 ) { + my_flush(); + } + + # -------------------------------------------------------------------- + # Some old debugging stuff + # -------------------------------------------------------------------- + VALIGN_DEBUG_FLAG_APPEND && do { + print STDOUT "APPEND fields:"; + dump_array( @{$rfields} ); + print STDOUT "APPEND tokens:"; + dump_array( @{$rtokens} ); + print STDOUT "APPEND patterns:"; + dump_array( @{$rpatterns} ); + dump_alignments(); + }; + + return; +} + +sub join_hanging_comment { + + my $line = shift; + my $jmax = $line->get_jmax(); + return 0 unless $jmax == 1; # must be 2 fields + my $rtokens = $line->get_rtokens(); + return 0 unless $rtokens->[0] eq '#'; # the second field is a comment.. + my $rfields = $line->get_rfields(); + return 0 unless $rfields->[0] =~ /^\s*$/; # the first field is empty... + my $old_line = shift; + my $maximum_field_index = $old_line->get_jmax(); + return 0 + unless $maximum_field_index > $jmax; # the current line has more fields + my $rpatterns = $line->get_rpatterns(); + + $line->set_is_hanging_side_comment(1); + $jmax = $maximum_field_index; + $line->set_jmax($jmax); + $rfields->[$jmax] = $rfields->[1]; + $rtokens->[ $jmax - 1 ] = $rtokens->[0]; + $rpatterns->[ $jmax - 1 ] = $rpatterns->[0]; + foreach my $j ( 1 .. $jmax - 1 ) { + $rfields->[$j] = " "; # NOTE: caused glitch unless 1 blank, why? + $rtokens->[ $j - 1 ] = ""; + $rpatterns->[ $j - 1 ] = ""; + } + return 1; +} + +sub eliminate_old_fields { + + my $new_line = shift; + my $jmax = $new_line->get_jmax(); + if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax } + if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax } + + # there must be one previous line + return unless ( @group_lines == 1 ); + + my $old_line = shift; + my $maximum_field_index = $old_line->get_jmax(); + + ############################################### + # Moved below to allow new coding for => matches + # return unless $maximum_field_index > $jmax; + ############################################### + + # Identify specific cases where field elimination is allowed: + # case=1: both lines have comma-separated lists, and the first + # line has an equals + # case=2: both lines have leading equals + + # case 1 is the default + my $case = 1; + + # See if case 2: both lines have leading '=' + # We'll require similar leading patterns in this case + my $old_rtokens = $old_line->get_rtokens(); + my $rtokens = $new_line->get_rtokens(); + my $rpatterns = $new_line->get_rpatterns(); + my $old_rpatterns = $old_line->get_rpatterns(); + if ( $rtokens->[0] =~ /^=>?\d*$/ + && $old_rtokens->[0] eq $rtokens->[0] + && $old_rpatterns->[0] eq $rpatterns->[0] ) + { + $case = 2; + } + + # not too many fewer fields in new line for case 1 + return unless ( $case != 1 || $maximum_field_index - 2 <= $jmax ); + + # case 1 must have side comment + my $old_rfields = $old_line->get_rfields(); + return + if ( $case == 1 + && length( $old_rfields->[$maximum_field_index] ) == 0 ); + + my $rfields = $new_line->get_rfields(); + + my $hid_equals = 0; + + my @new_alignments = (); + my @new_fields = (); + my @new_matching_patterns = (); + my @new_matching_tokens = (); + + my $j = 0; + my $current_field = ''; + my $current_pattern = ''; + + # loop over all old tokens + my $in_match = 0; + foreach my $k ( 0 .. $maximum_field_index - 1 ) { + $current_field .= $old_rfields->[$k]; + $current_pattern .= $old_rpatterns->[$k]; + last if ( $j > $jmax - 1 ); + + if ( $old_rtokens->[$k] eq $rtokens->[$j] ) { + $in_match = 1; + $new_fields[$j] = $current_field; + $new_matching_patterns[$j] = $current_pattern; + $current_field = ''; + $current_pattern = ''; + $new_matching_tokens[$j] = $old_rtokens->[$k]; + $new_alignments[$j] = $old_line->get_alignment($k); + $j++; + } + else { + + if ( $old_rtokens->[$k] =~ /^\=\d*$/ ) { + last if ( $case == 2 ); # avoid problems with stuff + # like: $a=$b=$c=$d; + $hid_equals = 1; + } + last + if ( $in_match && $case == 1 ) + ; # disallow gaps in matching field types in case 1 + } + } + + # Modify the current state if we are successful. + # We must exactly reach the ends of the new list for success, and the old + # pattern must have more fields. Here is an example where the first and + # second lines have the same number, and we should not align: + # my @a = map chr, 0 .. 255; + # my @b = grep /\W/, @a; + # my @c = grep /[^\w]/, @a; + + # Otherwise, we would get all of the commas aligned, which doesn't work as + # well: + # my @a = map chr, 0 .. 255; + # my @b = grep /\W/, @a; + # my @c = grep /[^\w]/, @a; + + if ( ( $j == $jmax ) + && ( $current_field eq '' ) + && ( $case != 1 || $hid_equals ) + && ( $maximum_field_index > $jmax ) ) + { + my $k = $maximum_field_index; + $current_field .= $old_rfields->[$k]; + $current_pattern .= $old_rpatterns->[$k]; + $new_fields[$j] = $current_field; + $new_matching_patterns[$j] = $current_pattern; + + $new_alignments[$j] = $old_line->get_alignment($k); + $maximum_field_index = $j; + + $old_line->set_alignments(@new_alignments); + $old_line->set_jmax($jmax); + $old_line->set_rtokens( \@new_matching_tokens ); + $old_line->set_rfields( \@new_fields ); + $old_line->set_rpatterns( \@{$rpatterns} ); + } + + # Dumb Down starting match if necessary: + # + # Consider the following two lines: + # + # { + # $a => 20 > 3 ? 1 : 0, + # $xyz => 5, + # } + + # We would like to get alignment regardless of the order of the two lines. + # If the lines come in in this order, then we will simplify the patterns of + # the first line in sub eliminate_new_fields. If the lines come in reverse + # order, then we achieve this with eliminate_new_fields. + + # This update is currently restricted to leading '=>' matches. Although we + # could do this for both '=' and '=>', overall the results for '=' come out + # better without this step because this step can eliminate some other good + # matches. For example, with the '=' we get: + +# my @disilva = ( "di Silva", "diSilva", "di Si\x{301}lva", "diSi\x{301}lva" ); +# my @dsf = map "$_\x{FFFE}Fred", @disilva; +# my @dsj = map "$_\x{FFFE}John", @disilva; +# my @dsJ = map "$_ John", @disilva; + + # without including '=' we get: + +# my @disilva = ( "di Silva", "diSilva", "di Si\x{301}lva", "diSi\x{301}lva" ); +# my @dsf = map "$_\x{FFFE}Fred", @disilva; +# my @dsj = map "$_\x{FFFE}John", @disilva; +# my @dsJ = map "$_ John", @disilva; + elsif ( + $case == 2 + + && @new_matching_tokens == 1 + ##&& $new_matching_tokens[0] =~ /^=/ # see note above + && $new_matching_tokens[0] =~ /^=>/ + && $maximum_field_index > 2 + ) + { + my $jmaxm = $jmax - 1; + my $kmaxm = $maximum_field_index - 1; + my $have_side_comment = $old_rtokens->[$kmaxm] eq '#'; + + # We need to reduce the group pattern to be just two tokens, + # the leading equality or => and the final side comment + + my $mid_field = join "", + @{$old_rfields}[ 1 .. $maximum_field_index - 1 ]; + my $mid_patterns = join "", + @{$old_rpatterns}[ 1 .. $maximum_field_index - 1 ]; + my @new_alignments = ( + $old_line->get_alignment(0), + $old_line->get_alignment( $maximum_field_index - 1 ) + ); + my @new_tokens = + ( $old_rtokens->[0], $old_rtokens->[ $maximum_field_index - 1 ] ); + my @new_fields = ( + $old_rfields->[0], $mid_field, $old_rfields->[$maximum_field_index] + ); + my @new_patterns = ( + $old_rpatterns->[0], $mid_patterns, + $old_rpatterns->[$maximum_field_index] + ); + + $maximum_field_index = 2; + $old_line->set_jmax($maximum_field_index); + $old_line->set_rtokens( \@new_tokens ); + $old_line->set_rfields( \@new_fields ); + $old_line->set_rpatterns( \@new_patterns ); + + initialize_for_new_group(); + add_to_group($old_line); + } + return; +} + +# create an empty side comment if none exists +sub make_side_comment { + my ( $new_line, $level_end ) = @_; + my $jmax = $new_line->get_jmax(); + my $rtokens = $new_line->get_rtokens(); + + # if line does not have a side comment... + if ( ( $jmax == 0 ) || ( $rtokens->[ $jmax - 1 ] ne '#' ) ) { + my $rfields = $new_line->get_rfields(); + my $rpatterns = $new_line->get_rpatterns(); + $rtokens->[$jmax] = '#'; + $rfields->[ ++$jmax ] = ''; + $rpatterns->[$jmax] = '#'; + $new_line->set_jmax($jmax); + $new_line->set_jmax_original_line($jmax); + } + + # line has a side comment.. + else { + + # don't remember old side comment location for very long + my $line_number = $vertical_aligner_self->get_output_line_number(); + my $rfields = $new_line->get_rfields(); + if ( + $line_number - $last_side_comment_line_number > 12 + + # and don't remember comment location across block level changes + || ( $level_end < $last_side_comment_level + && $rfields->[0] =~ /^}/ ) + ) + { + forget_side_comment(); + } + $last_side_comment_line_number = $line_number; + $last_side_comment_level = $level_end; + } + return; +} + +sub decide_if_list { + + my $line = shift; + + # A list will be taken to be a line with a forced break in which all + # of the field separators are commas or comma-arrows (except for the + # trailing #) + + # List separator tokens are things like ',3' or '=>2', + # where the trailing digit is the nesting depth. Allow braces + # to allow nested list items. + my $rtokens = $line->get_rtokens(); + my $test_token = $rtokens->[0]; + if ( $test_token =~ /^(\,|=>)/ ) { + my $list_type = $test_token; + my $jmax = $line->get_jmax(); + + foreach ( 1 .. $jmax - 2 ) { + if ( $rtokens->[$_] !~ /^(\,|=>|\{)/ ) { + $list_type = ""; + last; + } + } + $line->set_list_type($list_type); + } + return; +} + +sub eliminate_new_fields { + + my ( $new_line, $old_line ) = @_; + return unless (@group_lines); + my $jmax = $new_line->get_jmax(); + + my $old_rtokens = $old_line->get_rtokens(); + my $rtokens = $new_line->get_rtokens(); + my $is_assignment = + ( $rtokens->[0] =~ /^=>?\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) ); + + # must be monotonic variation + return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax ); + + # must be more fields in the new line + my $maximum_field_index = $old_line->get_jmax(); + return unless ( $maximum_field_index < $jmax ); + + unless ($is_assignment) { + return + unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen ) + ; # only if monotonic + + # never combine fields of a comma list + return + unless ( $maximum_field_index > 1 ) + && ( $new_line->get_list_type() !~ /^,/ ); + } + + my $rfields = $new_line->get_rfields(); + my $rpatterns = $new_line->get_rpatterns(); + my $old_rpatterns = $old_line->get_rpatterns(); + + # loop over all OLD tokens except comment and check match + my $match = 1; + foreach my $k ( 0 .. $maximum_field_index - 2 ) { + if ( ( $old_rtokens->[$k] ne $rtokens->[$k] ) + || ( $old_rpatterns->[$k] ne $rpatterns->[$k] ) ) + { + $match = 0; + last; + } + } + + # first tokens agree, so combine extra new tokens + if ($match) { + foreach my $k ( $maximum_field_index .. $jmax - 1 ) { + + $rfields->[ $maximum_field_index - 1 ] .= $rfields->[$k]; + $rfields->[$k] = ""; + $rpatterns->[ $maximum_field_index - 1 ] .= $rpatterns->[$k]; + $rpatterns->[$k] = ""; + } + + $rtokens->[ $maximum_field_index - 1 ] = '#'; + $rfields->[$maximum_field_index] = $rfields->[$jmax]; + $rpatterns->[$maximum_field_index] = $rpatterns->[$jmax]; + $jmax = $maximum_field_index; + } + $new_line->set_jmax($jmax); + return; +} + +sub fix_terminal_ternary { + + # Add empty fields as necessary to align a ternary term + # like this: + # + # my $leapyear = + # $year % 4 ? 0 + # : $year % 100 ? 1 + # : $year % 400 ? 0 + # : 1; + # + # returns 1 if the terminal item should be indented + + my ( $old_line, $rfields, $rtokens, $rpatterns ) = @_; + return unless ($old_line); + +## FUTURE CODING +## my ( $old_line, $end_line ) = @_; +## return unless ( $old_line && $end_line ); +## +## my $rfields = $end_line->get_rfields(); +## my $rpatterns = $end_line->get_rpatterns(); +## my $rtokens = $end_line->get_rtokens(); + + my $jmax = @{$rfields} - 1; + my $rfields_old = $old_line->get_rfields(); + + my $rpatterns_old = $old_line->get_rpatterns(); + my $rtokens_old = $old_line->get_rtokens(); + my $maximum_field_index = $old_line->get_jmax(); + + # look for the question mark after the : + my ($jquestion); + my $depth_question; + my $pad = ""; + foreach my $j ( 0 .. $maximum_field_index - 1 ) { + my $tok = $rtokens_old->[$j]; + if ( $tok =~ /^\?(\d+)$/ ) { + $depth_question = $1; + + # depth must be correct + next unless ( $depth_question eq $group_level ); + + $jquestion = $j; + if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) { + $pad = " " x length($1); + } + else { + return; # shouldn't happen + } + last; + } + } + return unless ( defined($jquestion) ); # shouldn't happen + + # Now splice the tokens and patterns of the previous line + # into the else line to insure a match. Add empty fields + # as necessary. + my $jadd = $jquestion; + + # Work on copies of the actual arrays in case we have + # to return due to an error + my @fields = @{$rfields}; + my @patterns = @{$rpatterns}; + my @tokens = @{$rtokens}; + + VALIGN_DEBUG_FLAG_TERNARY && do { + local $" = '><'; + print STDOUT "CURRENT FIELDS=<@{$rfields_old}>\n"; + print STDOUT "CURRENT TOKENS=<@{$rtokens_old}>\n"; + print STDOUT "CURRENT PATTERNS=<@{$rpatterns_old}>\n"; + print STDOUT "UNMODIFIED FIELDS=<@{$rfields}>\n"; + print STDOUT "UNMODIFIED TOKENS=<@{$rtokens}>\n"; + print STDOUT "UNMODIFIED PATTERNS=<@{$rpatterns}>\n"; + }; + + # handle cases of leading colon on this line + if ( $fields[0] =~ /^(:\s*)(.*)$/ ) { + + my ( $colon, $therest ) = ( $1, $2 ); + + # Handle sub-case of first field with leading colon plus additional code + # This is the usual situation as at the '1' below: + # ... + # : $year % 400 ? 0 + # : 1; + if ($therest) { + + # Split the first field after the leading colon and insert padding. + # Note that this padding will remain even if the terminal value goes + # out on a separate line. This does not seem to look to bad, so no + # mechanism has been included to undo it. + my $field1 = shift @fields; + unshift @fields, ( $colon, $pad . $therest ); + + # change the leading pattern from : to ? + return unless ( $patterns[0] =~ s/^\:/?/ ); + + # install leading tokens and patterns of existing line + unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] ); + unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] ); + + # insert appropriate number of empty fields + splice( @fields, 1, 0, ('') x $jadd ) if $jadd; + } + + # handle sub-case of first field just equal to leading colon. + # This can happen for example in the example below where + # the leading '(' would create a new alignment token + # : ( $name =~ /[]}]$/ ) ? ( $mname = $name ) + # : ( $mname = $name . '->' ); + else { + + return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen + + # prepend a leading ? onto the second pattern + $patterns[1] = "?b" . $patterns[1]; + + # pad the second field + $fields[1] = $pad . $fields[1]; + + # install leading tokens and patterns of existing line, replacing + # leading token and inserting appropriate number of empty fields + splice( @tokens, 0, 1, @{$rtokens_old}[ 0 .. $jquestion ] ); + splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] ); + splice( @fields, 1, 0, ('') x $jadd ) if $jadd; + } + } + + # Handle case of no leading colon on this line. This will + # be the case when -wba=':' is used. For example, + # $year % 400 ? 0 : + # 1; + else { + + # install leading tokens and patterns of existing line + $patterns[0] = '?' . 'b' . $patterns[0]; + unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] ); + unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] ); + + # insert appropriate number of empty fields + $jadd = $jquestion + 1; + $fields[0] = $pad . $fields[0]; + splice( @fields, 0, 0, ('') x $jadd ) if $jadd; + } + + VALIGN_DEBUG_FLAG_TERNARY && do { + local $" = '><'; + print STDOUT "MODIFIED TOKENS=<@tokens>\n"; + print STDOUT "MODIFIED PATTERNS=<@patterns>\n"; + print STDOUT "MODIFIED FIELDS=<@fields>\n"; + }; + + # all ok .. update the arrays + @{$rfields} = @fields; + @{$rtokens} = @tokens; + @{$rpatterns} = @patterns; +## FUTURE CODING +## $end_line->set_rfields( \@fields ); +## $end_line->set_rtokens( \@tokens ); +## $end_line->set_rpatterns( \@patterns ); + + # force a flush after this line + return $jquestion; +} + +sub fix_terminal_else { + + # Add empty fields as necessary to align a balanced terminal + # else block to a previous if/elsif/unless block, + # like this: + # + # if ( 1 || $x ) { print "ok 13\n"; } + # else { print "not ok 13\n"; } + # + # returns a positive value if the else block should be indented + # + my ( $old_line, $rfields, $rtokens, $rpatterns ) = @_; + return unless ($old_line); + my $jmax = @{$rfields} - 1; + return unless ( $jmax > 0 ); + + #my $old_line = $group_lines[-1]; + + # check for balanced else block following if/elsif/unless + my $rfields_old = $old_line->get_rfields(); + + # TBD: add handling for 'case' + return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ ); + + # look for the opening brace after the else, and extract the depth + my $tok_brace = $rtokens->[0]; + my $depth_brace; + if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; } + + # probably: "else # side_comment" + else { return } + + my $rpatterns_old = $old_line->get_rpatterns(); + my $rtokens_old = $old_line->get_rtokens(); + my $maximum_field_index = $old_line->get_jmax(); + + # be sure the previous if/elsif is followed by an opening paren + my $jparen = 0; + my $tok_paren = '(' . $depth_brace; + my $tok_test = $rtokens_old->[$jparen]; + return unless ( $tok_test eq $tok_paren ); # shouldn't happen + + # Now find the opening block brace + my ($jbrace); + foreach my $j ( 1 .. $maximum_field_index - 1 ) { + my $tok = $rtokens_old->[$j]; + if ( $tok eq $tok_brace ) { + $jbrace = $j; + last; + } + } + return unless ( defined($jbrace) ); # shouldn't happen + + # Now splice the tokens and patterns of the previous line + # into the else line to insure a match. Add empty fields + # as necessary. + my $jadd = $jbrace - $jparen; + splice( @{$rtokens}, 0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] ); + splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] ); + splice( @{$rfields}, 1, 0, ('') x $jadd ); + + # force a flush after this line if it does not follow a case + if ( $rfields_old->[0] =~ /^case\s*$/ ) { return } + else { return $jbrace } +} + +{ # sub check_match + my %is_good_alignment; + + BEGIN { + + # Vertically aligning on certain "good" tokens is usually okay + # so we can be less restrictive in marginal cases. + my @q = qw( { ? => = ); + push @q, (','); + @is_good_alignment{@q} = (1) x scalar(@q); + } + + sub check_match { + + # See if the current line matches the current vertical alignment group. + # If not, flush the current group. + my ( $new_line, $old_line ) = @_; + + # uses global variables: + # $previous_minimum_jmax_seen + # $maximum_jmax_seen + # $marginal_match + my $jmax = $new_line->get_jmax(); + my $maximum_field_index = $old_line->get_jmax(); + + # flush if this line has too many fields + # variable $GoToLoc indicates goto branch point, for debugging + my $GoToLoc = 1; + if ( $jmax > $maximum_field_index ) { goto NO_MATCH } + + # flush if adding this line would make a non-monotonic field count + if ( + ( $maximum_field_index > $jmax ) # this has too few fields + && ( + ( $previous_minimum_jmax_seen < + $jmax ) # and wouldn't be monotonic + || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen ) + ) + ) + { + $GoToLoc = 2; + goto NO_MATCH; + } + + # otherwise see if this line matches the current group + my $jmax_original_line = $new_line->get_jmax_original_line(); + my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment(); + my $rtokens = $new_line->get_rtokens(); + my $rfields = $new_line->get_rfields(); + my $rpatterns = $new_line->get_rpatterns(); + my $list_type = $new_line->get_list_type(); + + my $group_list_type = $old_line->get_list_type(); + my $old_rpatterns = $old_line->get_rpatterns(); + my $old_rtokens = $old_line->get_rtokens(); + + my $jlimit = $jmax - 1; + if ( $maximum_field_index > $jmax ) { + $jlimit = $jmax_original_line; + --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) ); + } + + # handle comma-separated lists .. + if ( $group_list_type && ( $list_type eq $group_list_type ) ) { + for my $j ( 0 .. $jlimit ) { + my $old_tok = $old_rtokens->[$j]; + next unless $old_tok; + my $new_tok = $rtokens->[$j]; + next unless $new_tok; + + # lists always match ... + # unless they would align any '=>'s with ','s + $GoToLoc = 3; + goto NO_MATCH + if ( $old_tok =~ /^=>/ && $new_tok =~ /^,/ + || $new_tok =~ /^=>/ && $old_tok =~ /^,/ ); + } + } + + # do detailed check for everything else except hanging side comments + elsif ( !$is_hanging_side_comment ) { + + my $leading_space_count = $new_line->get_leading_space_count(); + + my $max_pad = 0; + my $min_pad = 0; + my $saw_good_alignment; + + for my $j ( 0 .. $jlimit ) { + + my $old_tok = $old_rtokens->[$j]; + my $new_tok = $rtokens->[$j]; + + # Note on encoding used for alignment tokens: + # ------------------------------------------- + # Tokens are "decorated" with information which can help + # prevent unwanted alignments. Consider for example the + # following two lines: + # local ( $xn, $xd ) = split( '/', &'rnorm(@_) ); + # local ( $i, $f ) = &'bdiv( $xn, $xd ); + # There are three alignment tokens in each line, a comma, + # an =, and a comma. In the first line these three tokens + # are encoded as: + # ,4+local-18 =3 ,4+split-7 + # and in the second line they are encoded as + # ,4+local-18 =3 ,4+&'bdiv-8 + # Tokens always at least have token name and nesting + # depth. So in this example the ='s are at depth 3 and + # the ,'s are at depth 4. This prevents aligning tokens + # of different depths. Commas contain additional + # information, as follows: + # , {depth} + {container name} - {spaces to opening paren} + # This allows us to reject matching the rightmost commas + # in the above two lines, since they are for different + # function calls. This encoding is done in + # 'sub send_lines_to_vertical_aligner'. + + # Pick off actual token. + # Everything up to the first digit is the actual token. + my $alignment_token = $new_tok; + if ( $alignment_token =~ /^([^\d]+)/ ) { $alignment_token = $1 } + + # see if the decorated tokens match + my $tokens_match = $new_tok eq $old_tok + + # Exception for matching terminal : of ternary statement.. + # consider containers prefixed by ? and : a match + || ( $new_tok =~ /^,\d*\+\:/ && $old_tok =~ /^,\d*\+\?/ ); + + # No match if the alignment tokens differ... + if ( !$tokens_match ) { + + # ...Unless this is a side comment + if ( + $j == $jlimit + + # and there is either at least one alignment token + # or this is a single item following a list. This + # latter rule is required for 'December' to join + # the following list: + # my (@months) = ( + # '', 'January', 'February', 'March', + # 'April', 'May', 'June', 'July', + # 'August', 'September', 'October', 'November', + # 'December' + # ); + # If it doesn't then the -lp formatting will fail. + && ( $j > 0 || $old_tok =~ /^,/ ) + ) + { + $marginal_match = 1 + if ( $marginal_match == 0 + && @group_lines == 1 ); + last; + } + + $GoToLoc = 4; + goto NO_MATCH; + } + + # Calculate amount of padding required to fit this in. + # $pad is the number of spaces by which we must increase + # the current field to squeeze in this field. + my $pad = + length( $rfields->[$j] ) - $old_line->current_field_width($j); + if ( $j == 0 ) { $pad += $leading_space_count; } + + # remember max pads to limit marginal cases + if ( $alignment_token ne '#' ) { + if ( $pad > $max_pad ) { $max_pad = $pad } + if ( $pad < $min_pad ) { $min_pad = $pad } + } + if ( $is_good_alignment{$alignment_token} ) { + $saw_good_alignment = 1; + } + + # If patterns don't match, we have to be careful... + if ( $old_rpatterns->[$j] ne $rpatterns->[$j] ) { + + # flag this as a marginal match since patterns differ + $marginal_match = 1 + if ( $marginal_match == 0 && @group_lines == 1 ); + + # We have to be very careful about aligning commas + # when the pattern's don't match, because it can be + # worse to create an alignment where none is needed + # than to omit one. Here's an example where the ','s + # are not in named containers. The first line below + # should not match the next two: + # ( $a, $b ) = ( $b, $r ); + # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 ); + # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 ); + if ( $alignment_token eq ',' ) { + + # do not align commas unless they are in named containers + $GoToLoc = 5; + goto NO_MATCH unless ( $new_tok =~ /[A-Za-z]/ ); + } + + # do not align parens unless patterns match; + # large ugly spaces can occur in math expressions. + elsif ( $alignment_token eq '(' ) { + + # But we can allow a match if the parens don't + # require any padding. + $GoToLoc = 6; + if ( $pad != 0 ) { goto NO_MATCH } + } + + # Handle an '=' alignment with different patterns to + # the left. + elsif ( $alignment_token eq '=' ) { + + # It is best to be a little restrictive when + # aligning '=' tokens. Here is an example of + # two lines that we will not align: + # my $variable=6; + # $bb=4; + # The problem is that one is a 'my' declaration, + # and the other isn't, so they're not very similar. + # We will filter these out by comparing the first + # letter of the pattern. This is crude, but works + # well enough. + if ( + substr( $old_rpatterns->[$j], 0, 1 ) ne + substr( $rpatterns->[$j], 0, 1 ) ) + { + $GoToLoc = 7; + goto NO_MATCH; + } + + # If we pass that test, we'll call it a marginal match. + # Here is an example of a marginal match: + # $done{$$op} = 1; + # $op = compile_bblock($op); + # The left tokens are both identifiers, but + # one accesses a hash and the other doesn't. + # We'll let this be a tentative match and undo + # it later if we don't find more than 2 lines + # in the group. + elsif ( @group_lines == 1 ) { + $marginal_match = + 2; # =2 prevents being undone below + } + } + } + + # Don't let line with fewer fields increase column widths + # ( align3.t ) + if ( $maximum_field_index > $jmax ) { + + # Exception: suspend this rule to allow last lines to join + $GoToLoc = 8; + if ( $pad > 0 ) { goto NO_MATCH; } + } + } ## end for my $j ( 0 .. $jlimit) + + # Turn off the "marginal match" flag in some cases... + # A "marginal match" occurs when the alignment tokens agree + # but there are differences in the other tokens (patterns). + # If we leave the marginal match flag set, then the rule is that we + # will align only if there are more than two lines in the group. + # We will turn of the flag if we almost have a match + # and either we have seen a good alignment token or we + # just need a small pad (2 spaces) to fit. These rules are + # the result of experimentation. Tokens which misaligned by just + # one or two characters are annoying. On the other hand, + # large gaps to less important alignment tokens are also annoying. + if ( $marginal_match == 1 + && $jmax == $maximum_field_index + && ( $saw_good_alignment || ( $max_pad < 3 && $min_pad > -3 ) ) + ) + { + $marginal_match = 0; + } + ##print "marginal=$marginal_match saw=$saw_good_alignment jmax=$jmax max=$maximum_field_index maxpad=$max_pad minpad=$min_pad\n"; + } + + # We have a match (even if marginal). + # If the current line has fewer fields than the current group + # but otherwise matches, copy the remaining group fields to + # make it a perfect match. + if ( $maximum_field_index > $jmax ) { + + ########################################################## + # FIXME: The previous version had a bug which made side comments + # become regular fields, so for now the program does not allow a + # line with side comment to match. This should eventually be done. + # The best test file for experimenting is 'lista.t' + ########################################################## + + my $comment = $rfields->[$jmax]; + $GoToLoc = 9; + goto NO_MATCH if ($comment); + + # Corrected loop + for my $jj ( $jlimit .. $maximum_field_index ) { + $rtokens->[$jj] = $old_rtokens->[$jj]; + $rfields->[ $jj + 1 ] = ''; + $rpatterns->[ $jj + 1 ] = $old_rpatterns->[ $jj + 1 ]; + } + +## THESE DO NOT GIVE CORRECT RESULTS +## $rfields->[$jmax] = $comment; +## $new_line->set_jmax($jmax); + + } + return; + + NO_MATCH: + + # variable $GoToLoc is for debugging + #print "no match from $GoToLoc\n"; + + # Make one last effort to retain a match of certain statements + my $match = salvage_equality_matches( $new_line, $old_line ); + my_flush_code() unless ($match); + return; + } +} + +sub salvage_equality_matches { + my ( $new_line, $old_line ) = @_; + + # Reduce the complexity of the two lines if it will allow us to retain + # alignment of some common alignments, including '=' and '=>'. We will + # convert both lines to have just two matching tokens, the equality and the + # side comment. + + # return 0 or undef if unsuccessful + # return 1 if successful + + # Here is a very simple example of two lines where we could at least + # align the equals: + # $x = $class->_sub( $x, $delta ); + # $xpownm1 = $class->_pow( $class->_copy($x), $nm1 ); # x(i)^(n-1) + + # We will only do this if there is one old line (and one new line) + return unless ( @group_lines == 1 ); + return if ($is_matching_terminal_line); + + # We are only looking for equality type statements + my $old_rtokens = $old_line->get_rtokens(); + my $rtokens = $new_line->get_rtokens(); + my $is_equals = + ( $rtokens->[0] =~ /=/ && ( $old_rtokens->[0] eq $rtokens->[0] ) ); + return unless ($is_equals); + + # The leading patterns must match + my $old_rpatterns = $old_line->get_rpatterns(); + my $rpatterns = $new_line->get_rpatterns(); + return if ( $old_rpatterns->[0] ne $rpatterns->[0] ); + + # Both should have side comment fields (should always be true) + my $jmax_old = $old_line->get_jmax(); + my $jmax_new = $new_line->get_jmax(); + my $end_tok_old = $old_rtokens->[ $jmax_old - 1 ]; + my $end_tok_new = $rtokens->[ $jmax_new - 1 ]; + my $have_side_comments = + defined($end_tok_old) + && $end_tok_old eq '#' + && defined($end_tok_new) + && $end_tok_new eq '#'; + if ( !$have_side_comments ) { return; } + + # Do not match if any remaining tokens in new line include '?', 'if', + # 'unless','||', '&&'. The reason is that (1) this isn't a great match, and + # (2) we will prevent possibly better matchs to follow. Here is an + # example. The match of the first two lines is rejected, and this allows + # the second and third lines to match. + # my $type = shift || "o"; + # my $fname = ( $type eq 'oo' ? 'orte_city' : 'orte' ); + # my $suffix = ( $coord_system eq 'standard' ? '' : '-orig' ); + # This logic can cause some unwanted losses of alignments, but it can retain + # long runs of multiple-token alignments, so overall it is worthwhile. + # If we had a peek at the subsequent line we could make a much better + # decision here, but for now this is not available. + for ( my $j = 1 ; $j < $jmax_new - 1 ; $j++ ) { + my $new_tok = $rtokens->[$j]; + my $is_good_alignment = ( $new_tok =~ /^(=|\?|if|unless|\|\||\&\&)/ ); + return if ($is_good_alignment); + } + + my $squeeze_line = sub { + my ($line_obj) = @_; + + # reduce a line down to the three fields surrounding + # the two tokens, an '=' of some sort and a '#' at the end + + my $jmax = $line_obj->get_jmax(); + my $jmax_new = 2; + return unless $jmax > $jmax_new; + my $rfields = $line_obj->get_rfields(); + my $rpatterns = $line_obj->get_rpatterns(); + my $rtokens = $line_obj->get_rtokens(); + my $rfields_new = [ + $rfields->[0], join( '', @{$rfields}[ 1 .. $jmax - 1 ] ), + $rfields->[$jmax] + ]; + my $rpatterns_new = [ + $rpatterns->[0], join( '', @{$rpatterns}[ 1 .. $jmax - 1 ] ), + $rpatterns->[$jmax] + ]; + my $rtokens_new = [ $rtokens->[0], $rtokens->[ $jmax - 1 ] ]; + $line_obj->{_rfields} = $rfields_new; + $line_obj->{_rpatterns} = $rpatterns_new; + $line_obj->{_rtokens} = $rtokens_new; + $line_obj->set_jmax($jmax_new); + }; + + # Okay, we will force a match at the equals-like token. We will fix both + # lines to have just 2 tokens and 3 fields: + $squeeze_line->($new_line); + $squeeze_line->($old_line); + + # start over with a new group + initialize_for_new_group(); + add_to_group($old_line); + return 1; +} + +sub check_fit { + + my ( $new_line, $old_line ) = @_; + return unless (@group_lines); + + my $jmax = $new_line->get_jmax(); + my $leading_space_count = $new_line->get_leading_space_count(); + my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment(); + my $rtokens = $new_line->get_rtokens(); + my $rfields = $new_line->get_rfields(); + my $rpatterns = $new_line->get_rpatterns(); + + my $group_list_type = $group_lines[0]->get_list_type(); + + my $padding_so_far = 0; + my $padding_available = $old_line->get_available_space_on_right(); + + # save current columns in case this doesn't work + save_alignment_columns(); + + my $maximum_field_index = $old_line->get_jmax(); + for my $j ( 0 .. $jmax ) { + + my $pad = length( $rfields->[$j] ) - $old_line->current_field_width($j); + + if ( $j == 0 ) { + $pad += $leading_space_count; + } + + # remember largest gap of the group, excluding gap to side comment + if ( $pad < 0 + && $group_maximum_gap < -$pad + && $j > 0 + && $j < $jmax - 1 ) + { + $group_maximum_gap = -$pad; + } + + next if $pad < 0; + + ## OLD NOTES: + ## This patch helps sometimes, but it doesn't check to see if + ## the line is too long even without the side comment. It needs + ## to be reworked. + ##don't let a long token with no trailing side comment push + ##side comments out, or end a group. (sidecmt1.t) + ##next if ($j==$jmax-1 && length($rfields->[$jmax])==0); + + # BEGIN PATCH for keith1.txt. + # If the group began matching multiple tokens but later this got + # reduced to a fewer number of matching tokens, then the fields + # of the later lines will still have to fit into their corresponding + # fields. So a large later field will "push" the other fields to + # the right, including previous side comments, and if there is no room + # then there is no match. + # For example, look at the last line in the following snippet: + + # my $b_prod_db = ( $ENV{ORACLE_SID} =~ m/p$/ && !$testing ) ? true : false; + # my $env = ($b_prod_db) ? "prd" : "val"; + # my $plant = ( $OPT{p} ) ? $OPT{p} : "STL"; + # my $task = $OPT{t}; + # my $fnam = "longggggggggggggggg.$record_created.$env.$plant.idash"; + + # The long term will push the '?' to the right to fit in, and in this + # case there is not enough room so it will not match the equals unless + # we do something special. + + # Usually it looks good to keep an initial alignment of '=' going, and + # we can do this if the long term can fit in the space taken up by the + # remaining fields (the ? : fields here). + + # Allowing any matching token for now, but it could be restricted + # to an '='-like token if necessary. + + if ( + $pad > $padding_available + && $jmax == 2 # matching one thing (plus #) + && $j == $jmax - 1 # at last field + && @group_lines > 1 # more than 1 line in group now + && $jmax < $maximum_field_index # other lines have more fields + && length( $rfields->[$jmax] ) == 0 # no side comment + + # Uncomment to match only equals (but this does not seem necessary) + # && $rtokens->[0] =~ /^=\d/ # matching an equals + ) + { + my $extra_padding = 0; + foreach my $jj ( $j + 1 .. $maximum_field_index - 1 ) { + $extra_padding += $old_line->current_field_width($jj); + } + + next if ( $pad <= $padding_available + $extra_padding ); + } + + # END PATCH for keith1.pl + + # This line will need space; lets see if we want to accept it.. + if ( + + # not if this won't fit + ( $pad > $padding_available ) + + # previously, there were upper bounds placed on padding here + # (maximum_whitespace_columns), but they were not really helpful + + ) + { + + # revert to starting state then flush; things didn't work out + restore_alignment_columns(); + my_flush_code(); + last; + } + + # patch to avoid excessive gaps in previous lines, + # due to a line of fewer fields. + # return join( ".", + # $self->{"dfi"}, $self->{"aa"}, $self->rsvd, $self->{"rd"}, + # $self->{"area"}, $self->{"id"}, $self->{"sel"} ); + next if ( $jmax < $maximum_field_index && $j == $jmax - 1 ); + + # looks ok, squeeze this field in + $old_line->increase_field_width( $j, $pad ); + $padding_available -= $pad; + + # remember largest gap of the group, excluding gap to side comment + if ( $pad > $group_maximum_gap && $j > 0 && $j < $jmax - 1 ) { + $group_maximum_gap = $pad; + } + } + return; +} + +sub add_to_group { + + # The current line either starts a new alignment group or is + # accepted into the current alignment group. + my ($new_line) = @_; + push_group_line($new_line); + + # initialize field lengths if starting new group + if ( @group_lines == 1 ) { + + my $jmax = $new_line->get_jmax(); + my $rfields = $new_line->get_rfields(); + my $rtokens = $new_line->get_rtokens(); + my $col = $new_line->get_leading_space_count(); + + for my $j ( 0 .. $jmax ) { + $col += length( $rfields->[$j] ); + + # create initial alignments for the new group + my $token = ""; + if ( $j < $jmax ) { $token = $rtokens->[$j] } + my $alignment = make_alignment( $col, $token ); + $new_line->set_alignment( $j, $alignment ); + } + + $maximum_jmax_seen = $jmax; + $minimum_jmax_seen = $jmax; + } + + # use previous alignments otherwise + else { + my @new_alignments = $group_lines[-2]->get_alignments(); + $new_line->set_alignments(@new_alignments); + } + + # remember group jmax extremes for next call to valign_input + $previous_minimum_jmax_seen = $minimum_jmax_seen; + $previous_maximum_jmax_seen = $maximum_jmax_seen; + return; +} + +sub dump_array { + + # debug routine to dump array contents + local $" = ')('; + print STDOUT "(@_)\n"; + return; +} + +# flush() sends the current Perl::Tidy::VerticalAligner group down the +# pipeline to Perl::Tidy::FileWriter. + +# This is the external flush, which also empties the buffer and cache +sub flush { + + # the buffer must be emptied first, then any cached text + dump_valign_buffer(); + + if (@group_lines) { + my_flush(); + } + else { + if ($cached_line_type) { + $seqno_string = $cached_seqno_string; + valign_output_step_C( $cached_line_text, + $cached_line_leading_space_count, + $last_level_written ); + $cached_line_type = 0; + $cached_line_text = ""; + $cached_seqno_string = ""; + } + } + return; +} + +sub reduce_valign_buffer_indentation { + + my ($diff) = @_; + if ( $valign_buffer_filling && $diff ) { + my $max_valign_buffer = @valign_buffer; + foreach my $i ( 0 .. $max_valign_buffer - 1 ) { + my ( $line, $leading_space_count, $level ) = + @{ $valign_buffer[$i] }; + my $ws = substr( $line, 0, $diff ); + if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) { + $line = substr( $line, $diff ); + } + if ( $leading_space_count >= $diff ) { + $leading_space_count -= $diff; + $level = level_change( $leading_space_count, $diff, $level ); + } + $valign_buffer[$i] = [ $line, $leading_space_count, $level ]; + } + } + return; +} + +sub level_change { + + # compute decrease in level when we remove $diff spaces from the + # leading spaces + my ( $leading_space_count, $diff, $level ) = @_; + if ($rOpts_indent_columns) { + my $olev = + int( ( $leading_space_count + $diff ) / $rOpts_indent_columns ); + my $nlev = int( $leading_space_count / $rOpts_indent_columns ); + $level -= ( $olev - $nlev ); + if ( $level < 0 ) { $level = 0 } + } + return $level; +} + +sub dump_valign_buffer { + if (@valign_buffer) { + foreach (@valign_buffer) { + valign_output_step_D( @{$_} ); + } + @valign_buffer = (); + } + $valign_buffer_filling = ""; + return; +} + +sub my_flush_comment { + + # Output a group of COMMENT lines + + return unless (@group_lines); + my $leading_space_count = $comment_leading_space_count; + my $leading_string = get_leading_string($leading_space_count); + + # look for excessively long lines + my $max_excess = 0; + foreach my $str (@group_lines) { + my $excess = + length($str) + + $leading_space_count - + maximum_line_length_for_level($group_level); + if ( $excess > $max_excess ) { + $max_excess = $excess; + } + } + + # zero leading space count if any lines are too long + if ( $max_excess > 0 ) { + $leading_space_count -= $max_excess; + if ( $leading_space_count < 0 ) { $leading_space_count = 0 } + $last_outdented_line_at = $file_writer_object->get_output_line_number(); + unless ($outdented_line_count) { + $first_outdented_line_at = $last_outdented_line_at; + } + my $nlines = @group_lines; + $outdented_line_count += $nlines; + } + + # write the lines + my $outdent_long_lines = 0; + foreach my $line (@group_lines) { + valign_output_step_B( $leading_space_count, $line, 0, + $outdent_long_lines, "", $group_level ); + } + + initialize_for_new_group(); + return; +} + +sub my_flush_code { + + # Output a group of CODE lines + + return unless (@group_lines); + + VALIGN_DEBUG_FLAG_APPEND0 + && do { + my $group_list_type = $group_lines[0]->get_list_type(); + my ( $a, $b, $c ) = caller(); + my $nlines = @group_lines; + my $maximum_field_index = $group_lines[0]->get_jmax(); + my $rfields_old = $group_lines[0]->get_rfields(); + my $tok = $rfields_old->[0]; + print STDOUT +"APPEND0: my_flush_code called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$nlines extra=$extra_indent_ok first tok=$tok;\n"; + + }; + + # some small groups are best left unaligned + my $do_not_align = decide_if_aligned_pair(); + + # optimize side comment location + $do_not_align = adjust_side_comment($do_not_align); + + # recover spaces for -lp option if possible + my $extra_leading_spaces = get_extra_leading_spaces(); + + # all lines of this group have the same basic leading spacing + my $group_leader_length = $group_lines[0]->get_leading_space_count(); + + # add extra leading spaces if helpful + # NOTE: Use zero; this did not work well + my $min_ci_gap = 0; + + # output the lines + foreach my $line (@group_lines) { + valign_output_step_A( $line, $min_ci_gap, $do_not_align, + $group_leader_length, $extra_leading_spaces ); + } + + initialize_for_new_group(); + return; +} + +sub my_flush { + + # This is the vertical aligner internal flush, which leaves the cache + # intact + return unless (@group_lines); + + VALIGN_DEBUG_FLAG_APPEND0 && do { + my ( $a, $b, $c ) = caller(); + my $nlines = @group_lines; + print STDOUT +"APPEND0: my_flush called from $a $b $c lines=$nlines, type=$group_type \n"; + }; + + # handle a group of COMMENT lines + if ( $group_type eq 'COMMENT' ) { my_flush_comment() } + + # handle a single line of CODE + elsif ( @group_lines == 1 ) { my_flush_code() } + + # handle group(s) of CODE lines + else { + + # LP FIX PART 1 + # If we are trying to add extra indentation for -lp formatting, + # then we need to try to keep the group intact. But we have + # to set the $extra_indent_ok flag to zero in case some lines + # are output separately. We fix things up at the bottom. + # NOTE: this is a workaround but is tentative; we should really look to + # see if if extra indentation is possible. + my $rOpt_lp = $rOpts->{'line-up-parentheses'}; + my $keep_group_intact = $rOpt_lp && $extra_indent_ok; + my $extra_indent_ok_save = $extra_indent_ok; + $extra_indent_ok = 0; + + # we will rebuild alignment line group(s); + my @new_lines = @group_lines; + initialize_for_new_group(); + + ##my $has_terminal_ternary = $new_lines[-1]->{_is_terminal_ternary}; + + # remove unmatched tokens in all lines + delete_unmatched_tokens( \@new_lines ); + + foreach my $new_line (@new_lines) { + + # Start a new group if necessary + if ( !@group_lines ) { + add_to_group($new_line); + + next; + } + + my $j_terminal_match = $new_line->get_j_terminal_match(); + my $base_line = $group_lines[0]; + + # Initialize a global flag saying if the last line of the group + # should match end of group and also terminate the group. There + # should be no returns between here and where the flag is handled + # at the bottom. + my $col_matching_terminal = 0; + if ( defined($j_terminal_match) ) { + + # remember the column of the terminal ? or { to match with + $col_matching_terminal = + $base_line->get_column($j_terminal_match); + + # set global flag for sub decide_if_aligned_pair + $is_matching_terminal_line = 1; + } + + # ------------------------------------------------------------- + # Allow hanging side comment to join current group, if any. This + # will help keep side comments aligned, because otherwise we + # will have to start a new group, making alignment less likely. + # ------------------------------------------------------------- + + if ( $new_line->get_is_hanging_side_comment() ) { + join_hanging_comment( $new_line, $base_line ); + } + + # If this line has no matching tokens, then flush out the lines + # BEFORE this line unless both it and the previous line have side + # comments. This prevents this line from pushing side coments out + # to the right. + ##elsif ( $new_line->get_jmax() == 1 ) { + elsif ( $new_line->get_jmax() == 1 && !$keep_group_intact ) { + + # There are no matching tokens, so now check side comments: + my $prev_comment = $group_lines[-1]->get_rfields()->[-1]; + my $side_comment = $new_line->get_rfields()->[-1]; + my_flush_code() unless ( $side_comment && $prev_comment ); + + } + + # ------------------------------------------------------------- + # If there is just one previous line, and it has more fields + # than the new line, try to join fields together to get a match + # with the new line. At the present time, only a single + # leading '=' is allowed to be compressed out. This is useful + # in rare cases where a table is forced to use old breakpoints + # because of side comments, + # and the table starts out something like this: + # my %MonthChars = ('0', 'Jan', # side comment + # '1', 'Feb', + # '2', 'Mar', + # Eliminating the '=' field will allow the remaining fields to + # line up. This situation does not occur if there are no side + # comments because scan_list would put a break after the + # opening '('. + # ------------------------------------------------------------- + + eliminate_old_fields( $new_line, $base_line ); + + # ------------------------------------------------------------- + # If the new line has more fields than the current group, + # see if we can match the first fields and combine the remaining + # fields of the new line. + # ------------------------------------------------------------- + + eliminate_new_fields( $new_line, $base_line ); + + # ------------------------------------------------------------- + # Flush previous group unless all common tokens and patterns + # match.. + + check_match( $new_line, $base_line ); + + # ------------------------------------------------------------- + # See if there is space for this line in the current group (if + # any) + # ------------------------------------------------------------- + if (@group_lines) { + check_fit( $new_line, $base_line ); + } + + add_to_group($new_line); + + if ( defined($j_terminal_match) ) { + + # if there is only one line in the group (maybe due to failure + # to match perfectly with previous lines), then align the ? or + # { of this terminal line with the previous one unless that + # would make the line too long + if ( @group_lines == 1 ) { + $base_line = $group_lines[0]; + my $col_now = $base_line->get_column($j_terminal_match); + my $pad = $col_matching_terminal - $col_now; + my $padding_available = + $base_line->get_available_space_on_right(); + if ( $pad > 0 && $pad <= $padding_available ) { + $base_line->increase_field_width( $j_terminal_match, + $pad ); + } + } + my_flush_code(); + $is_matching_terminal_line = 0; + } + + # Optional optimization; end the group if we know we cannot match + # next line. + elsif ( $new_line->{_end_group} ) { + my_flush_code(); + } + } + + # LP FIX PART 2 + # if we managed to keep the group intact for -lp formatting, + # restore the flag which allows extra indentation + if ( $keep_group_intact && @group_lines == @new_lines ) { + $extra_indent_ok = $extra_indent_ok_save; + } + my_flush_code(); + } + return; +} + +sub delete_selected_tokens { + + my ( $line_obj, $ridel ) = @_; + + # remove an unused alignment token(s) to improve alignment chances + return unless ( defined($line_obj) && defined($ridel) && @{$ridel} ); + + my $jmax_old = $line_obj->get_jmax(); + my $rfields_old = $line_obj->get_rfields(); + my $rpatterns_old = $line_obj->get_rpatterns(); + my $rtokens_old = $line_obj->get_rtokens(); + + local $" = '> <'; + 0 && print < +old jmax: $jmax_old +old tokens: <@{$rtokens_old}> +old patterns: <@{$rpatterns_old}> +old fields: <@{$rfields_old}> +EOM + + my $rfields_new = []; + my $rpatterns_new = []; + my $rtokens_new = []; + + my $kmax = @{$ridel} - 1; + my $k = 0; + my $jdel_next = $ridel->[$k]; + + # FIXME: + if ( $jdel_next < 0 ) { print STDERR "bad jdel_next=$jdel_next\n"; return } + my $pattern = $rpatterns_old->[0]; + my $field = $rfields_old->[0]; + push @{$rfields_new}, $field; + push @{$rpatterns_new}, $pattern; + for ( my $j = 0 ; $j < $jmax_old ; $j++ ) { + my $token = $rtokens_old->[$j]; + my $field = $rfields_old->[ $j + 1 ]; + my $pattern = $rpatterns_old->[ $j + 1 ]; + if ( $k > $kmax || $j < $jdel_next ) { + push @{$rtokens_new}, $token; + push @{$rfields_new}, $field; + push @{$rpatterns_new}, $pattern; + } + elsif ( $j == $jdel_next ) { + $rfields_new->[-1] .= $field; + $rpatterns_new->[-1] .= $pattern; + if ( ++$k <= $kmax ) { + my $jdel_last = $jdel_next; + $jdel_next = $ridel->[$k]; + if ( $jdel_next < $jdel_last ) { + + # FIXME: + print STDERR "bad jdel_next=$jdel_next\n"; + return; + } + } + } + } + + # ----- x ------ x ------ x ------ + #t 0 1 2 <- token indexing + #f 0 1 2 3 <- field and pattern + + my $jmax_new = @{$rfields_new} - 1; + $line_obj->set_rtokens($rtokens_new); + $line_obj->set_rpatterns($rpatterns_new); + $line_obj->set_rfields($rfields_new); + $line_obj->set_jmax($jmax_new); + + 0 && print < +new patterns: <@{$rpatterns_new}> +new fields: <@{$rfields_new}> +EOM + return; +} + +{ # sub is_deletable_token + + my %is_deletable_equals; + + BEGIN { + my @q; + + # These tokens with = may be deleted for vertical aligmnemt + @q = qw( + <= >= == =~ != <=> + ); + @is_deletable_equals{@q} = (1) x scalar(@q); + + } + + sub is_deletable_token { + + # Determine if an token with no match possibility can be removed to + # improve chances of making an alignment. + my ( $token, $i, $imax, $jline, $i_eq ) = @_; + + # Strip off the level and other stuff appended to the token. + # Tokens have a trailing decimal level and optional tag (for commas): + # For example, the first comma in the following line + # sub banner { crlf; report( shift, '/', shift ); crlf } + # is decorated as follows: + # ,2+report-6 => (tok,lev,tag) =qw( , 2 +report-6) + my ( $tok, $lev, $tag ) = ( $token, 0, "" ); + if ( $tok =~ /^(\D+)(\d+)(.*)$/ ) { $tok = $1; $lev = $2; $tag = $3 } + ##print "$token >> $tok $lev $tag\n"; + + # only remove lower level commas + ##if ( $tok eq ',' ) { return unless $lev > $group_level; } + if ( $tok eq ',' ) { + + #print "tok=$tok, lev=$lev, gl=$group_level, i=$i, ieq=$i_eq\n"; + return if ( defined($i_eq) && $i < $i_eq ); + return if ( $lev >= $group_level ); + } + + # most operators with an equals sign should be retained if at + # same level as this statement + elsif ( $tok =~ /=/ ) { + return unless ( $lev > $group_level || $is_deletable_equals{$tok} ); + } + + # otherwise, ok to delete the token + return 1; + } +} + +sub delete_unmatched_tokens { + my ($rlines) = @_; + + # We will look at each line of a collection and compare its alignment + # tokens with its neighbors. If it has alignment tokens which do not match + # either neighbor, then we will usually remove them. This will + # simplify later work and improve chances of aligning. + + return unless @{$rlines}; + my $has_terminal_match = $rlines->[-1]->get_j_terminal_match(); + + # ignore hanging side comments + my @filtered = grep { !$_->{_is_hanging_side_comment} } @{$rlines}; + my $rnew_lines = \@filtered; + my @i_equals; + + # Step 1: create a hash of tokens for each line + my $rline_hashes = []; + foreach my $line ( @{$rnew_lines} ) { + my $rhash = {}; + my $rtokens = $line->get_rtokens(); + my $i = 0; + my $i_eq; + foreach my $tok ( @{$rtokens} ) { + $rhash->{$tok} = [ $i, undef, undef ]; + + # remember the first equals at line level + if ( !defined($i_eq) && $tok =~ /^=(\d+)/ ) { + my $lev = $1; + if ( $lev eq $group_level ) { $i_eq = $i } + } + $i++; + } + push @{$rline_hashes}, $rhash; + push @i_equals, $i_eq; + } + + # Step 2: compare each line pair and record matches + for ( my $jl = 0 ; $jl < @{$rline_hashes} - 1 ; $jl++ ) { + my $jr = $jl + 1; + my $rhash_l = $rline_hashes->[$jl]; + my $rhash_r = $rline_hashes->[$jr]; + my $count = 0; + my $ntoks = 0; + foreach my $tok ( keys %{$rhash_l} ) { + $ntoks++; + if ( defined( $rhash_r->{$tok} ) ) { + if ( $tok ne '#' ) { $count++; } + my $il = $rhash_l->{$tok}->[0]; + my $ir = $rhash_r->{$tok}->[0]; + $rhash_l->{$tok}->[2] = $ir; + $rhash_r->{$tok}->[1] = $il; + } + } + } + + # Step 3: remove unmatched tokens + my $jj = 0; + my $jmax = @{$rnew_lines} - 1; + foreach my $line ( @{$rnew_lines} ) { + my $rtokens = $line->get_rtokens(); + my $rhash = $rline_hashes->[$jj]; + my $i = 0; + my $nl = 0; + my $nr = 0; + my $i_eq = $i_equals[$jj]; + my @idel; + my $imax = @{$rtokens} - 2; + + for ( my $i = 0 ; $i <= $imax ; $i++ ) { + my $tok = $rtokens->[$i]; + next if ( $tok eq '#' ); # shouldn't happen + my ( $il, $ir ) = @{ $rhash->{$tok} }[ 1, 2 ]; + $nl++ if defined($il); + $nr++ if defined($ir); + if ( + !defined($il) + && !defined($ir) + && is_deletable_token( $tok, $i, $imax, $jj, $i_eq ) + + # Patch: do not touch the first line of a terminal match, + # such as below, because j_terminal has already been set. + # if ($tag) { $tago = "<$tag>"; $tagc = ""; } + # else { $tago = $tagc = ''; } + # But see snippets 'else1.t' and 'else2.t' + && !( $jj == 0 && $has_terminal_match && $jmax == 1 ) + + ) + { + push @idel, $i; + } + } + + if (@idel) { delete_selected_tokens( $line, \@idel ) } + + # set a break if this is an interior line with possible left matches + # but no matches to the right. We do not do this for the last line + # because it could be followed by hanging side comments filtered out + # above. + if ( $nr == 0 && $nl > 0 && $jj < @{$rnew_lines} - 1 ) { + $rnew_lines->[$jj]->{_end_group} = 1; + } + $jj++; + } + + #use Data::Dumper; + #print Data::Dumper->Dump( [$rline_hashes] ); + return; +} + +sub decide_if_aligned_pair { + + # Do not try to align two lines which are not really similar + return unless ( @group_lines == 2 ); + return if ($is_matching_terminal_line); + + my $group_list_type = $group_lines[0]->get_list_type(); + + my $rtokens = $group_lines[0]->get_rtokens(); + my $leading_equals = ( $rtokens->[0] =~ /=/ ); + + # A marginal match is a match which has different patterns. Normally, we + # should not allow exactly two lines to match if marginal. But we will modify + # this rule for two lines with a leading equals-like operator such that we + # match if the patterns to the left of the equals are the same. So for + # example the following two lines are a marginal match but have the same + # left side patterns, so we will align the equals. + # my $orig = my $format = "^<<<<< ~~\n"; + # my $abc = "abc"; + # But these have a different left pattern so they will not be aligned + # $xmldoc .= $`; + # $self->{'leftovers'} .= "get_rpatterns(); + my $rpatterns1 = $group_lines[1]->get_rpatterns(); + my $pat0 = $rpatterns0->[0]; + my $pat1 = $rpatterns1->[0]; + $is_marginal = $pat0 ne $pat1; + } + + my $do_not_align = ( + + # always align lists + !$group_list_type + + && ( + + # don't align if it was just a marginal match + $is_marginal ##$marginal_match + + # don't align two lines with big gap + # NOTE: I am not sure if this test is actually functional any longer + || $group_maximum_gap > 12 + + # or lines with differing number of alignment tokens + || ( $previous_maximum_jmax_seen != $previous_minimum_jmax_seen + && !$leading_equals ) + ) + ); + + # But try to convert them into a simple comment group if the first line + # a has side comment + my $rfields = $group_lines[0]->get_rfields(); + my $maximum_field_index = $group_lines[0]->get_jmax(); + if ( $do_not_align + && ( length( $rfields->[$maximum_field_index] ) > 0 ) ) + { + combine_fields(); + $do_not_align = 0; + } + return $do_not_align; +} + +sub adjust_side_comment { + + my $do_not_align = shift; + + # let's see if we can move the side comment field out a little + # to improve readability (the last field is always a side comment field) + my $have_side_comment = 0; + my $first_side_comment_line = -1; + my $maximum_field_index = $group_lines[0]->get_jmax(); + my $i = 0; + foreach my $line (@group_lines) { + if ( length( $line->get_rfields()->[$maximum_field_index] ) ) { + $have_side_comment = 1; + $first_side_comment_line = $i; + last; + } + $i++; + } + + my $kmax = $maximum_field_index + 1; + + if ($have_side_comment) { + + my $line = $group_lines[0]; + + # the maximum space without exceeding the line length: + my $avail = $line->get_available_space_on_right(); + + # try to use the previous comment column + my $side_comment_column = $line->get_column( $kmax - 2 ); + my $move = $last_comment_column - $side_comment_column; + +## my $sc_line0 = $side_comment_history[0]->[0]; +## my $sc_col0 = $side_comment_history[0]->[1]; +## my $sc_line1 = $side_comment_history[1]->[0]; +## my $sc_col1 = $side_comment_history[1]->[1]; +## my $sc_line2 = $side_comment_history[2]->[0]; +## my $sc_col2 = $side_comment_history[2]->[1]; +## +## # FUTURE UPDATES: +## # Be sure to ignore 'do not align' and '} # end comments' +## # Find first $move > 0 and $move <= $avail as follows: +## # 1. try sc_col1 if sc_col1 == sc_col0 && (line-sc_line0) < 12 +## # 2. try sc_col2 if (line-sc_line2) < 12 +## # 3. try min possible space, plus up to 8, +## # 4. try min possible space + + if ( $kmax > 0 && !$do_not_align ) { + + # but if this doesn't work, give up and use the minimum space + if ( $move > $avail ) { + $move = $rOpts_minimum_space_to_comment - 1; + } + + # but we want some minimum space to the comment + my $min_move = $rOpts_minimum_space_to_comment - 1; + if ( $move >= 0 + && $last_side_comment_length > 0 + && ( $first_side_comment_line == 0 ) + && $group_level == $last_level_written ) + { + $min_move = 0; + } + + if ( $move < $min_move ) { + $move = $min_move; + } + + # previously, an upper bound was placed on $move here, + # (maximum_space_to_comment), but it was not helpful + + # don't exceed the available space + if ( $move > $avail ) { $move = $avail } + + # we can only increase space, never decrease + if ( $move > 0 ) { + $line->increase_field_width( $maximum_field_index - 1, $move ); + } + + # remember this column for the next group + $last_comment_column = $line->get_column( $kmax - 2 ); + } + else { + + # try to at least line up the existing side comment location + if ( $kmax > 0 && $move > 0 && $move < $avail ) { + $line->increase_field_width( $maximum_field_index - 1, $move ); + $do_not_align = 0; + } + + # reset side comment column if we can't align + else { + forget_side_comment(); + } + } + } + return $do_not_align; +} + +sub valign_output_step_A { + + ############################################################### + # This is Step A in writing vertically aligned lines. + # The line is prepared according to the alignments which have + # been found. Then it is shipped to the next step. + ############################################################### + + my ( $line, $min_ci_gap, $do_not_align, $group_leader_length, + $extra_leading_spaces ) + = @_; + my $rfields = $line->get_rfields(); + my $leading_space_count = $line->get_leading_space_count(); + my $outdent_long_lines = $line->get_outdent_long_lines(); + my $maximum_field_index = $line->get_jmax(); + my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags(); + + # add any extra spaces + if ( $leading_space_count > $group_leader_length ) { + $leading_space_count += $min_ci_gap; + } + + my $str = $rfields->[0]; + + # loop to concatenate all fields of this line and needed padding + my $total_pad_count = 0; + for my $j ( 1 .. $maximum_field_index ) { + + # skip zero-length side comments + last + if ( + ( $j == $maximum_field_index ) + && ( !defined( $rfields->[$j] ) + || ( length( $rfields->[$j] ) == 0 ) ) + ); + + # compute spaces of padding before this field + my $col = $line->get_column( $j - 1 ); + my $pad = $col - ( length($str) + $leading_space_count ); + + if ($do_not_align) { + $pad = + ( $j < $maximum_field_index ) + ? 0 + : $rOpts_minimum_space_to_comment - 1; + } + + # if the -fpsc flag is set, move the side comment to the selected + # column if and only if it is possible, ignoring constraints on + # line length and minimum space to comment + if ( $rOpts_fixed_position_side_comment && $j == $maximum_field_index ) + { + my $newpad = $pad + $rOpts_fixed_position_side_comment - $col - 1; + if ( $newpad >= 0 ) { $pad = $newpad; } + } + + # accumulate the padding + if ( $pad > 0 ) { $total_pad_count += $pad; } + + # add this field + if ( !defined $rfields->[$j] ) { + write_diagnostics("UNDEFined field at j=$j\n"); + } + + # only add padding when we have a finite field; + # this avoids extra terminal spaces if we have empty fields + if ( length( $rfields->[$j] ) > 0 ) { + $str .= ' ' x $total_pad_count; + $total_pad_count = 0; + $str .= $rfields->[$j]; + } + else { + $total_pad_count = 0; + } + + # update side comment history buffer + if ( $j == $maximum_field_index ) { + my $lineno = $file_writer_object->get_output_line_number(); + shift @side_comment_history; + push @side_comment_history, [ $lineno, $col ]; + } + } + + my $side_comment_length = ( length( $rfields->[$maximum_field_index] ) ); + + # ship this line off + valign_output_step_B( $leading_space_count + $extra_leading_spaces, + $str, $side_comment_length, $outdent_long_lines, + $rvertical_tightness_flags, $group_level ); + return; +} + +sub get_extra_leading_spaces { + + #---------------------------------------------------------- + # Define any extra indentation space (for the -lp option). + # Here is why: + # If a list has side comments, sub scan_list must dump the + # list before it sees everything. When this happens, it sets + # the indentation to the standard scheme, but notes how + # many spaces it would have liked to use. We may be able + # to recover that space here in the event that all of the + # lines of a list are back together again. + #---------------------------------------------------------- + + my $extra_leading_spaces = 0; + if ($extra_indent_ok) { + my $object = $group_lines[0]->get_indentation(); + if ( ref($object) ) { + my $extra_indentation_spaces_wanted = + get_recoverable_spaces($object); + + # all indentation objects must be the same + for my $i ( 1 .. @group_lines - 1 ) { + if ( $object != $group_lines[$i]->get_indentation() ) { + $extra_indentation_spaces_wanted = 0; + last; + } + } + + if ($extra_indentation_spaces_wanted) { + + # the maximum space without exceeding the line length: + my $avail = $group_lines[0]->get_available_space_on_right(); + $extra_leading_spaces = + ( $avail > $extra_indentation_spaces_wanted ) + ? $extra_indentation_spaces_wanted + : $avail; + + # update the indentation object because with -icp the terminal + # ');' will use the same adjustment. + $object->permanently_decrease_available_spaces( + -$extra_leading_spaces ); + } + } + } + return $extra_leading_spaces; +} + +sub combine_fields { + + # combine all fields except for the comment field ( sidecmt.t ) + # Uses global variables: + # @group_lines + my $maximum_field_index = $group_lines[0]->get_jmax(); + foreach my $line (@group_lines) { + my $rfields = $line->get_rfields(); + foreach ( 1 .. $maximum_field_index - 1 ) { + $rfields->[0] .= $rfields->[$_]; + } + $rfields->[1] = $rfields->[$maximum_field_index]; + + $line->set_jmax(1); + $line->set_column( 0, 0 ); + $line->set_column( 1, 0 ); + + } + $maximum_field_index = 1; + + foreach my $line (@group_lines) { + my $rfields = $line->get_rfields(); + for my $k ( 0 .. $maximum_field_index ) { + my $pad = length( $rfields->[$k] ) - $line->current_field_width($k); + if ( $k == 0 ) { + $pad += $line->get_leading_space_count(); + } + + if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) } + + } + } + return; +} + +sub get_output_line_number { + + # the output line number reported to a caller is the number of items + # written plus the number of items in the buffer + my $self = shift; + my $nlines = @group_lines; + return $nlines + $file_writer_object->get_output_line_number(); +} + +sub valign_output_step_B { + + ############################################################### + # This is Step B in writing vertically aligned lines. + # Vertical tightness is applied according to preset flags. + # In particular this routine handles stacking of opening + # and closing tokens. + ############################################################### + + my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines, + $rvertical_tightness_flags, $level ) + = @_; + + # handle outdenting of long lines: + if ($outdent_long_lines) { + my $excess = + length($str) - + $side_comment_length + + $leading_space_count - + maximum_line_length_for_level($level); + if ( $excess > 0 ) { + $leading_space_count = 0; + $last_outdented_line_at = + $file_writer_object->get_output_line_number(); + + unless ($outdented_line_count) { + $first_outdented_line_at = $last_outdented_line_at; + } + $outdented_line_count++; + } + } + + # Make preliminary leading whitespace. It could get changed + # later by entabbing, so we have to keep track of any changes + # to the leading_space_count from here on. + my $leading_string = + $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : ""; + + # Unpack any recombination data; it was packed by + # sub send_lines_to_vertical_aligner. Contents: + # + # [0] type: 1=opening non-block 2=closing non-block + # 3=opening block brace 4=closing block brace + # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok + # if closing: spaces of padding to use + # [2] sequence number of container + # [3] valid flag: do not append if this flag is false + # + my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg, + $seqno_end ); + if ($rvertical_tightness_flags) { + ( + $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg, + $seqno_end + ) = @{$rvertical_tightness_flags}; + } + + $seqno_string = $seqno_end; + + # handle any cached line .. + # either append this line to it or write it out + if ( length($cached_line_text) ) { + + # Dump an invalid cached line + if ( !$cached_line_valid ) { + valign_output_step_C( $cached_line_text, + $cached_line_leading_space_count, + $last_level_written ); + } + + # Handle cached line ending in OPENING tokens + elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) { + + my $gap = $leading_space_count - length($cached_line_text); + + # handle option of just one tight opening per line: + if ( $cached_line_flag == 1 ) { + if ( defined($open_or_close) && $open_or_close == 1 ) { + $gap = -1; + } + } + + if ( $gap >= 0 && defined($seqno_beg) ) { + $leading_string = $cached_line_text . ' ' x $gap; + $leading_space_count = $cached_line_leading_space_count; + $seqno_string = $cached_seqno_string . ':' . $seqno_beg; + $level = $last_level_written; + } + else { + valign_output_step_C( $cached_line_text, + $cached_line_leading_space_count, + $last_level_written ); + } + } + + # Handle cached line ending in CLOSING tokens + else { + my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str; + if ( + + # The new line must start with container + $seqno_beg + + # The container combination must be okay.. + && ( + + # okay to combine like types + ( $open_or_close == $cached_line_type ) + + # closing block brace may append to non-block + || ( $cached_line_type == 2 && $open_or_close == 4 ) + + # something like ');' + || ( !$open_or_close && $cached_line_type == 2 ) + + ) + + # The combined line must fit + && ( + length($test_line) <= + maximum_line_length_for_level($last_level_written) ) + ) + { + + $seqno_string = $cached_seqno_string . ':' . $seqno_beg; + + # Patch to outdent closing tokens ending # in ');' + # If we are joining a line like ');' to a previous stacked + # set of closing tokens, then decide if we may outdent the + # combined stack to the indentation of the ');'. Since we + # should not normally outdent any of the other tokens more than + # the indentation of the lines that contained them, we will + # only do this if all of the corresponding opening + # tokens were on the same line. This can happen with + # -sot and -sct. For example, it is ok here: + # __PACKAGE__->load_components( qw( + # PK::Auto + # Core + # )); + # + # But, for example, we do not outdent in this example because + # that would put the closing sub brace out farther than the + # opening sub brace: + # + # perltidy -sot -sct + # $c->Tk::bind( + # '' => sub { + # my ($c) = @_; + # my $e = $c->XEvent; + # itemsUnderArea $c; + # } ); + # + if ( $str =~ /^\);/ && $cached_line_text =~ /^[\)\}\]\s]*$/ ) { + + # The way to tell this is if the stacked sequence numbers + # of this output line are the reverse of the stacked + # sequence numbers of the previous non-blank line of + # sequence numbers. So we can join if the previous + # nonblank string of tokens is the mirror image. For + # example if stack )}] is 13:8:6 then we are looking for a + # leading stack like [{( which is 6:8:13 We only need to + # check the two ends, because the intermediate tokens must + # fall in order. Note on speed: having to split on colons + # and eliminate multiple colons might appear to be slow, + # but it's not an issue because we almost never come + # through here. In a typical file we don't. + $seqno_string =~ s/^:+//; + $last_nonblank_seqno_string =~ s/^:+//; + $seqno_string =~ s/:+/:/g; + $last_nonblank_seqno_string =~ s/:+/:/g; + + # how many spaces can we outdent? + my $diff = + $cached_line_leading_space_count - $leading_space_count; + if ( $diff > 0 + && length($seqno_string) + && length($last_nonblank_seqno_string) == + length($seqno_string) ) + { + my @seqno_last = + ( split /:/, $last_nonblank_seqno_string ); + my @seqno_now = ( split /:/, $seqno_string ); + if ( $seqno_now[-1] == $seqno_last[0] + && $seqno_now[0] == $seqno_last[-1] ) + { + + # OK to outdent .. + # for absolute safety, be sure we only remove + # whitespace + my $ws = substr( $test_line, 0, $diff ); + if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) { + + $test_line = substr( $test_line, $diff ); + $cached_line_leading_space_count -= $diff; + $last_level_written = + level_change( + $cached_line_leading_space_count, + $diff, $last_level_written ); + reduce_valign_buffer_indentation($diff); + } + + # shouldn't happen, but not critical: + ##else { + ## ERROR transferring indentation here + ##} + } + } + } + + $str = $test_line; + $leading_string = ""; + $leading_space_count = $cached_line_leading_space_count; + $level = $last_level_written; + } + else { + valign_output_step_C( $cached_line_text, + $cached_line_leading_space_count, + $last_level_written ); + } + } + } + $cached_line_type = 0; + $cached_line_text = ""; + + # make the line to be written + my $line = $leading_string . $str; + + # write or cache this line + if ( !$open_or_close || $side_comment_length > 0 ) { + valign_output_step_C( $line, $leading_space_count, $level ); + } + else { + $cached_line_text = $line; + $cached_line_type = $open_or_close; + $cached_line_flag = $tightness_flag; + $cached_seqno = $seqno; + $cached_line_valid = $valid; + $cached_line_leading_space_count = $leading_space_count; + $cached_seqno_string = $seqno_string; + } + + $last_level_written = $level; + $last_side_comment_length = $side_comment_length; + $extra_indent_ok = 0; + return; +} + +sub valign_output_step_C { + + ############################################################### + # This is Step C in writing vertically aligned lines. + # Lines are either stored in a buffer or passed along to the next step. + # The reason for storing lines is that we may later want to reduce their + # indentation when -sot and -sct are both used. + ############################################################### + my @args = @_; + + # Dump any saved lines if we see a line with an unbalanced opening or + # closing token. + dump_valign_buffer() if ( $seqno_string && $valign_buffer_filling ); + + # Either store or write this line + if ($valign_buffer_filling) { + push @valign_buffer, [@args]; + } + else { + valign_output_step_D(@args); + } + + # For lines starting or ending with opening or closing tokens.. + if ($seqno_string) { + $last_nonblank_seqno_string = $seqno_string; + + # Start storing lines when we see a line with multiple stacked opening + # tokens. + # patch for RT #94354, requested by Colin Williams + if ( $seqno_string =~ /^\d+(\:+\d+)+$/ && $args[0] !~ /^[\}\)\]\:\?]/ ) + { + + # This test is efficient but a little subtle: The first test says + # that we have multiple sequence numbers and hence multiple opening + # or closing tokens in this line. The second part of the test + # rejects stacked closing and ternary tokens. So if we get here + # then we should have stacked unbalanced opening tokens. + + # Here is a complex example: + + # Foo($Bar[0], { # (side comment) + # baz => 1, + # }); + + # The first line has sequence 6::4. It does not begin with + # a closing token or ternary, so it passes the test and must be + # stacked opening tokens. + + # The last line has sequence 4:6 but is a stack of closing tokens, + # so it gets rejected. + + # Note that the sequence number of an opening token for a qw quote + # is a negative number and will be rejected. + # For example, for the following line: + # skip_symbols([qw( + # $seqno_string='10:5:-1'. It would be okay to accept it but + # I decided not to do this after testing. + + $valign_buffer_filling = $seqno_string; + + } + } + return; +} + +sub valign_output_step_D { + + ############################################################### + # This is Step D in writing vertically aligned lines. + # Write one vertically aligned line of code to the output object. + ############################################################### + + my ( $line, $leading_space_count, $level ) = @_; + + # The line is currently correct if there is no tabbing (recommended!) + # We may have to lop off some leading spaces and replace with tabs. + if ( $leading_space_count > 0 ) { + + # Nothing to do if no tabs + if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace ) + || $rOpts_indent_columns <= 0 ) + { + + # nothing to do + } + + # Handle entab option + elsif ($rOpts_entab_leading_whitespace) { + + # Patch 12-nov-2018 based on report from Glenn. Extra padding was + # not correctly entabbed, nor were side comments: + # Increase leading space count for a padded line to get correct tabbing + if ( $line =~ /^(\s+)(.*)$/ ) { + my $spaces = length($1); + if ( $spaces > $leading_space_count ) { + $leading_space_count = $spaces; + } + } + + my $space_count = + $leading_space_count % $rOpts_entab_leading_whitespace; + my $tab_count = + int( $leading_space_count / $rOpts_entab_leading_whitespace ); + my $leading_string = "\t" x $tab_count . ' ' x $space_count; + if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) { + substr( $line, 0, $leading_space_count ) = $leading_string; + } + else { + + # shouldn't happen - program error counting whitespace + # - skip entabbing + VALIGN_DEBUG_FLAG_TABS + && warning( +"Error entabbing in valign_output_step_D: expected count=$leading_space_count\n" + ); + } + } + + # Handle option of one tab per level + else { + my $leading_string = ( "\t" x $level ); + my $space_count = + $leading_space_count - $level * $rOpts_indent_columns; + + # shouldn't happen: + if ( $space_count < 0 ) { + + # But it could be an outdented comment + if ( $line !~ /^\s*#/ ) { + VALIGN_DEBUG_FLAG_TABS + && warning( +"Error entabbing in valign_output_step_D: for level=$group_level count=$leading_space_count\n" + ); + } + $leading_string = ( ' ' x $leading_space_count ); + } + else { + $leading_string .= ( ' ' x $space_count ); + } + if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) { + substr( $line, 0, $leading_space_count ) = $leading_string; + } + else { + + # shouldn't happen - program error counting whitespace + # we'll skip entabbing + VALIGN_DEBUG_FLAG_TABS + && warning( +"Error entabbing in valign_output_step_D: expected count=$leading_space_count\n" + ); + } + } + } + $file_writer_object->write_code_line( $line . "\n" ); + return; +} + +{ # begin get_leading_string + + my @leading_string_cache; + + sub get_leading_string { + + # define the leading whitespace string for this line.. + my $leading_whitespace_count = shift; + + # Handle case of zero whitespace, which includes multi-line quotes + # (which may have a finite level; this prevents tab problems) + if ( $leading_whitespace_count <= 0 ) { + return ""; + } + + # look for previous result + elsif ( $leading_string_cache[$leading_whitespace_count] ) { + return $leading_string_cache[$leading_whitespace_count]; + } + + # must compute a string for this number of spaces + my $leading_string; + + # Handle simple case of no tabs + if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace ) + || $rOpts_indent_columns <= 0 ) + { + $leading_string = ( ' ' x $leading_whitespace_count ); + } + + # Handle entab option + elsif ($rOpts_entab_leading_whitespace) { + my $space_count = + $leading_whitespace_count % $rOpts_entab_leading_whitespace; + my $tab_count = int( + $leading_whitespace_count / $rOpts_entab_leading_whitespace ); + $leading_string = "\t" x $tab_count . ' ' x $space_count; + } + + # Handle option of one tab per level + else { + $leading_string = ( "\t" x $group_level ); + my $space_count = + $leading_whitespace_count - $group_level * $rOpts_indent_columns; + + # shouldn't happen: + if ( $space_count < 0 ) { + VALIGN_DEBUG_FLAG_TABS + && warning( +"Error in get_leading_string: for level=$group_level count=$leading_whitespace_count\n" + ); + + # -- skip entabbing + $leading_string = ( ' ' x $leading_whitespace_count ); + } + else { + $leading_string .= ( ' ' x $space_count ); + } + } + $leading_string_cache[$leading_whitespace_count] = $leading_string; + return $leading_string; + } +} # end get_leading_string + +sub report_anything_unusual { + my $self = shift; + if ( $outdented_line_count > 0 ) { + write_logfile_entry( + "$outdented_line_count long lines were outdented:\n"); + write_logfile_entry( + " First at output line $first_outdented_line_at\n"); + + if ( $outdented_line_count > 1 ) { + write_logfile_entry( + " Last at output line $last_outdented_line_at\n"); + } + write_logfile_entry( + " use -noll to prevent outdenting, -l=n to increase line length\n" + ); + write_logfile_entry("\n"); + } + return; +} +1; diff --git a/lib/Perl/Tidy/VerticalAligner/Alignment.pm b/lib/Perl/Tidy/VerticalAligner/Alignment.pm new file mode 100644 index 0000000..8732e96 --- /dev/null +++ b/lib/Perl/Tidy/VerticalAligner/Alignment.pm @@ -0,0 +1,123 @@ +##################################################################### +# +# the Perl::Tidy::VerticalAligner::Alignment class holds information +# on a single column being aligned +# +##################################################################### +package Perl::Tidy::VerticalAligner::Alignment; +use strict; +use warnings; +our $VERSION = '20190601'; + +{ + + #use Carp; + + # _column # the current column number + # _starting_column # column number when created + # _matching_token # what token we are matching + # _starting_line # the line index of creation + # _ending_line + # the most recent line to use it + # _saved_column + # _serial_number # unique number for this alignment + + my %default_data = ( + column => undef, + starting_column => undef, + matching_token => undef, + starting_line => undef, + ending_line => undef, + saved_column => undef, + serial_number => undef, + ); + + # class population count + { + my $_count = 0; + sub get_count { return $_count } + sub _increment_count { return ++$_count } + sub _decrement_count { return --$_count } + } + + # constructor + sub new { + my ( $caller, %arg ) = @_; + my $caller_is_obj = ref($caller); + my $class = $caller_is_obj || $caller; + ##no strict "refs"; + my $self = bless {}, $class; + + foreach my $key ( keys %default_data ) { + my $_key = '_' . $key; + if ( exists $arg{$key} ) { $self->{$_key} = $arg{$key} } + elsif ($caller_is_obj) { $self->{$_key} = $caller->{$_key} } + else { $self->{$_key} = $default_data{$_key} } + } + $self->_increment_count(); + return $self; + } + + sub DESTROY { + my $self = shift; + $self->_decrement_count(); + return; + } + + sub get_column { my $self = shift; return $self->{_column} } + + sub get_starting_column { + my $self = shift; + return $self->{_starting_column}; + } + sub get_matching_token { my $self = shift; return $self->{_matching_token} } + sub get_starting_line { my $self = shift; return $self->{_starting_line} } + sub get_ending_line { my $self = shift; return $self->{_ending_line} } + sub get_serial_number { my $self = shift; return $self->{_serial_number} } + + sub set_column { my ( $self, $val ) = @_; $self->{_column} = $val; return } + + sub set_starting_column { + my ( $self, $val ) = @_; + $self->{_starting_column} = $val; + return; + } + + sub set_matching_token { + my ( $self, $val ) = @_; + $self->{_matching_token} = $val; + return; + } + + sub set_starting_line { + my ( $self, $val ) = @_; + $self->{_starting_line} = $val; + return; + } + + sub set_ending_line { + my ( $self, $val ) = @_; + $self->{_ending_line} = $val; + return; + } + + sub increment_column { + my ( $self, $val ) = @_; + $self->{_column} += $val; + return; + } + + sub save_column { + my $self = shift; + $self->{_saved_column} = $self->{_column}; + return; + } + + sub restore_column { + my $self = shift; + $self->{_column} = $self->{_saved_column}; + return; + } +} + +1; diff --git a/lib/Perl/Tidy/VerticalAligner/Line.pm b/lib/Perl/Tidy/VerticalAligner/Line.pm new file mode 100644 index 0000000..373896c --- /dev/null +++ b/lib/Perl/Tidy/VerticalAligner/Line.pm @@ -0,0 +1,261 @@ +##################################################################### +# +# the Perl::Tidy::VerticalAligner::Line class supplies an object to +# contain a single output line +# +##################################################################### + +package Perl::Tidy::VerticalAligner::Line; +use strict; +use warnings; +our $VERSION = '20190601'; + +{ + + ##use Carp; + + my %default_data = ( + jmax => undef, + jmax_original_line => undef, + rtokens => undef, + rfields => undef, + rpatterns => undef, + indentation => undef, + leading_space_count => undef, + outdent_long_lines => undef, + list_type => undef, + is_hanging_side_comment => undef, + ralignments => [], + maximum_line_length => undef, + rvertical_tightness_flags => undef, + is_terminal_ternary => undef, + is_terminal_else => undef, + j_terminal_match => undef, + ); + { + + # methods to count object population + my $_count = 0; + sub get_count { return $_count; } + sub _increment_count { return ++$_count } + sub _decrement_count { return --$_count } + } + + # Constructor may be called as a class method + sub new { + my ( $caller, %arg ) = @_; + my $caller_is_obj = ref($caller); + my $class = $caller_is_obj || $caller; + ##no strict "refs"; + my $self = bless {}, $class; + + $self->{_ralignments} = []; + + foreach my $key ( keys %default_data ) { + my $_key = '_' . $key; + + # Caller keys do not have an underscore + if ( exists $arg{$key} ) { $self->{$_key} = $arg{$key} } + elsif ($caller_is_obj) { $self->{$_key} = $caller->{$_key} } + else { $self->{$_key} = $default_data{$_key} } + } + + $self->_increment_count(); + return $self; + } + + sub DESTROY { + my $self = shift; + $self->_decrement_count(); + return; + } + + sub get_jmax { my $self = shift; return $self->{_jmax} } + + sub get_jmax_original_line { + my $self = shift; + return $self->{_jmax_original_line}; + } + sub get_rtokens { my $self = shift; return $self->{_rtokens} } + sub get_rfields { my $self = shift; return $self->{_rfields} } + sub get_rpatterns { my $self = shift; return $self->{_rpatterns} } + sub get_indentation { my $self = shift; return $self->{_indentation} } + + sub get_j_terminal_match { + my $self = shift; + return $self->{_j_terminal_match}; + } + + sub get_is_terminal_else { + my $self = shift; + return $self->{_is_terminal_else}; + } + + sub get_is_terminal_ternary { + my $self = shift; + return $self->{_is_terminal_ternary}; + } + + sub get_leading_space_count { + my $self = shift; + return $self->{_leading_space_count}; + } + + sub get_outdent_long_lines { + my $self = shift; + return $self->{_outdent_long_lines}; + } + sub get_list_type { my $self = shift; return $self->{_list_type} } + + sub get_is_hanging_side_comment { + my $self = shift; + return $self->{_is_hanging_side_comment}; + } + + sub get_rvertical_tightness_flags { + my $self = shift; + return $self->{_rvertical_tightness_flags}; + } + + sub set_column { + ## FIXME: does caller ever supply $val?? + my ( $self, $j, $val ) = @_; + return $self->{_ralignments}->[$j]->set_column($val); + } + + sub get_alignment { + my ( $self, $j ) = @_; + return $self->{_ralignments}->[$j]; + } + sub get_alignments { my $self = shift; return @{ $self->{_ralignments} } } + + sub get_column { + my ( $self, $j ) = @_; + return $self->{_ralignments}->[$j]->get_column(); + } + + sub get_starting_column { + my ( $self, $j ) = @_; + return $self->{_ralignments}->[$j]->get_starting_column(); + } + + sub increment_column { + my ( $self, $k, $pad ) = @_; + $self->{_ralignments}->[$k]->increment_column($pad); + return; + } + + sub set_alignments { + my ( $self, @args ) = @_; + @{ $self->{_ralignments} } = @args; + return; + } + + sub current_field_width { + my ( $self, $j ) = @_; + if ( $j == 0 ) { + return $self->get_column($j); + } + else { + return $self->get_column($j) - $self->get_column( $j - 1 ); + } + } + + sub field_width_growth { + my ( $self, $j ) = @_; + return $self->get_column($j) - $self->get_starting_column($j); + } + + sub starting_field_width { + my ( $self, $j ) = @_; + if ( $j == 0 ) { + return $self->get_starting_column($j); + } + else { + return $self->get_starting_column($j) - + $self->get_starting_column( $j - 1 ); + } + } + + sub increase_field_width { + + my ( $self, $j, $pad ) = @_; + my $jmax = $self->get_jmax(); + for my $k ( $j .. $jmax ) { + $self->increment_column( $k, $pad ); + } + return; + } + + sub get_available_space_on_right { + my $self = shift; + my $jmax = $self->get_jmax(); + return $self->{_maximum_line_length} - $self->get_column($jmax); + } + + sub set_jmax { my ( $self, $val ) = @_; $self->{_jmax} = $val; return } + + sub set_jmax_original_line { + my ( $self, $val ) = @_; + $self->{_jmax_original_line} = $val; + return; + } + + sub set_rtokens { + my ( $self, $val ) = @_; + $self->{_rtokens} = $val; + return; + } + + sub set_rfields { + my ( $self, $val ) = @_; + $self->{_rfields} = $val; + return; + } + + sub set_rpatterns { + my ( $self, $val ) = @_; + $self->{_rpatterns} = $val; + return; + } + + sub set_indentation { + my ( $self, $val ) = @_; + $self->{_indentation} = $val; + return; + } + + sub set_leading_space_count { + my ( $self, $val ) = @_; + $self->{_leading_space_count} = $val; + return; + } + + sub set_outdent_long_lines { + my ( $self, $val ) = @_; + $self->{_outdent_long_lines} = $val; + return; + } + + sub set_list_type { + my ( $self, $val ) = @_; + $self->{_list_type} = $val; + return; + } + + sub set_is_hanging_side_comment { + my ( $self, $val ) = @_; + $self->{_is_hanging_side_comment} = $val; + return; + } + + sub set_alignment { + my ( $self, $j, $val ) = @_; + $self->{_ralignments}->[$j] = $val; + return; + } + +} + +1; + diff --git a/pm2pl b/pm2pl index 5c49260..b62a6ef 100755 --- a/pm2pl +++ b/pm2pl @@ -1,38 +1,58 @@ #!/usr/bin/env perl use strict; -# This script will recombine the perltidy binary script and all of its -# modules into a single, monolithic script, which is how it was -# originally distributed. It might be useful for users who have -# difficulty installing modules, or prefer not to. +# This script will recombine the perltidy binary script and all of its modules +# into a single, monolithic script. I use it for making a temporary "sandbox" +# for debugging. + +# This is also useful for making a copy of previous versions for parallel +# debugging sessions. # usage: # perl pm2pl # Run this from the perltidy main installation directory. It reads -# bin/perltidy and lib/*.pm and writes a file 'perltidy' in the +# bin/perltidy and lib/*.pm and writes a file 'perltidy-VERSION.pl' in the # current directory. -# Then, put the file 'perltidy' in your path and it should work (You -# will also need to put the batch file 'perltidy.bat' in your path -# under msdos/windows) - -# For unix systems, a sample Makefile is included as Makefile.npm - # This should work for a system with File::Spec, # and for older Windows/Unix systems without File::Spec. -my $script = 'bin/perltidy'; -my $module = 'lib/Perl/Tidy.pm'; +my $script = 'bin/perltidy'; +my @modules = qw( + lib/Perl/Tidy.pm + lib/Perl/Tidy/Debugger.pm + lib/Perl/Tidy/DevNull.pm + lib/Perl/Tidy/Diagnostics.pm + lib/Perl/Tidy/FileWriter.pm + lib/Perl/Tidy/Formatter.pm + lib/Perl/Tidy/HtmlWriter.pm + lib/Perl/Tidy/IOScalar.pm + lib/Perl/Tidy/IOScalarArray.pm + lib/Perl/Tidy/IndentationItem.pm + lib/Perl/Tidy/LineBuffer.pm + lib/Perl/Tidy/LineSink.pm + lib/Perl/Tidy/LineSource.pm + lib/Perl/Tidy/Logger.pm + lib/Perl/Tidy/Tokenizer.pm + lib/Perl/Tidy/VerticalAligner.pm + lib/Perl/Tidy/VerticalAligner/Alignment.pm + lib/Perl/Tidy/VerticalAligner/Line.pm +); + +# try to make the pathnames system independent eval "use File::Spec;"; my $missing_file_spec = $@; unless ($missing_file_spec) { - $script = File::Spec->catfile( 'bin', 'perltidy' ); - $module = File::Spec->catfile( 'lib', 'Perl', 'Tidy.pm' ); + $script = File::Spec->catfile( split '/', $script ); + foreach my $module (@modules) { + $module = File::Spec->catfile( split '/', $module ); + } } -my $outfile = "perltidy"; +my $VERSION = get_version("lib/Perl/Tidy.pm"); +my $outfile = "perltidy-$VERSION.pl"; open OUTFILE, "> $outfile" or die "can't open file '$outfile' : $!\n"; -print "Creating file '$outfile' ....\n "; +print "Creating standalone formatter script '$outfile' ....\n "; # first, open the script and copy the first (hash-bang) line # (Note: forward slashes in file names here will work in Windows) @@ -40,45 +60,52 @@ open SCRIPT, "< $script" or die "can't open script file '$script' : $!\n"; my $hash_bang =