From: Steve Hancock Date: Sat, 7 Apr 2018 23:42:42 +0000 (-0700) Subject: Initial commit X-Git-Tag: 20181117~32 X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=c3edc321210b78a8828f2445f028ecf80693f68e;p=perltidy.git Initial commit --- c3edc321210b78a8828f2445f028ecf80693f68e diff --git a/.appveyor.yml b/.appveyor.yml new file mode 100644 index 00000000..ccdfb290 --- /dev/null +++ b/.appveyor.yml @@ -0,0 +1,12 @@ +skip_tags: true +cache: + - C:\strawberry -> appveyor.yml +install: + - if not exist "C:\strawberry" cinst strawberryperl + - set PATH=C:\strawberry\perl\bin;C:\strawberry\perl\site\bin;C:\strawberry\c\bin;%PATH% + - cd C:\projects\%APPVEYOR_PROJECT_NAME% +build_script: + - perl Makefile.PL + - gmake + - dir + - gmake test diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..555fb111 --- /dev/null +++ b/.gitignore @@ -0,0 +1,28 @@ +*.DEBUG +*.ERR +*.LOG +*.bak +*.log +*.sw? +*.tar.gz +*.tdy +*.tmp* +*.zip +*~ +.tidyall.d/ +/Makefile +/NOTES.txt +MANIFEST.bak +MYMETA.json +MYMETA.yml +RUNME.sh +RUNME.sh +blib/ +archive/ +diff.txt +junk* +local_docs/*.txt +perltidy-*.pl +perltidy.pl +pm_to_blib +tmp*/ diff --git a/.perlcriticrc b/.perlcriticrc new file mode 100644 index 00000000..2440ac13 --- /dev/null +++ b/.perlcriticrc @@ -0,0 +1,42 @@ +severity = 4 + +# Following is a list of policies to be skipped: + +# There is a localization in Tokenizer.pm that is essential +[-Variables::ProhibitLocalVars] + +# immediate initialization of locals is not appropriate where used +# in the Tokenizer.pm module +[-Variables::RequireInitializationForLocalVars] + +# C-style for loops are essential when working with multiple indexed +# arrays +[-ControlStructures::ProhibitCStyleForLoops] + +# There is a stringy eval in Formatter.pm which is essential. +[-BuiltinFunctions::ProhibitStringyEval] + +# maintaining VERSION numbers for all internal modules introduces +# needless maintenance issues +[-Modules::RequireVersionVar] + +# Tidy.pm exports 'perltidy'. Changing this could break existing scripts. +[-Modules::ProhibitAutomaticExportation] + +# print and close homonyms are appropriate where they are used. +[-Subroutines::ProhibitBuiltinHomonyms] + +# Nested subs are needed for error handling in Tidy.pm. +[-Subroutines::ProhibitNestedSubs] + +# constants are needed for clean array indexing. +[-ValuesAndExpressions::ProhibitConstantPragma] + +# Adding quotes on here doc terminators causes needless "line noise" in the +# source code. My editor uses color to make it clear if interpolation is in +# effect. +[-ValuesAndExpressions::RequireQuotedHeredocTerminator] + +# Perlcritic doesn't know ARGV actually is localized +[-Variables::RequireLocalizedPunctuationVars] + diff --git a/.tidyallrc b/.tidyallrc new file mode 100644 index 00000000..2ad74dab --- /dev/null +++ b/.tidyallrc @@ -0,0 +1,10 @@ +[PerlTidy] +select = lib/**/*.pm +select = bin/perltidy + +[PerlCritic] +select = lib/**/*.pm +argv = --severity 4 --exclude=nowarnings + +[SortLines] +select = .gitignore diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..2557cefb --- /dev/null +++ b/.travis.yml @@ -0,0 +1,11 @@ +language: perl +perl: + - "5.24" + - "5.22" + - "5.20" + - "5.18" + - "5.16" + - "5.14" + - "5.12" + - "5.10" + - "5.08" diff --git a/BUGS.md b/BUGS.md new file mode 100644 index 00000000..1ec2b487 --- /dev/null +++ b/BUGS.md @@ -0,0 +1,45 @@ +# Perltidy open BUGS and LIMITATIONS + +You can help perltidy evolve into a better program. If you think you +have hit a bug or weird behavior, or have a suggested improvement, +please send a note to perltidy at users.sourceforge.net. + +This file only lists open bugs. For bugs which have been fixed, +see the ChangeLog. + +## The --extrude and --mangle options can produce code with syntax errors + +The --extrude tries to put as many newlines in the formatted code as possible. +The --mangle tries to remove as many newlines as possible. These options are +very useful for stress testing perltidy (and Perl) but not so much for normal +formatting. Occasionally they will produce code which Perl considers to have a +syntax error. These problems often involve code where Perl is having to guess +the tokenization based on whitespace. The given/when and switch/case +statements are also particularly vulnerable to unusual line breaks and +whitespace. This type of error should not normally occur in practice, but if +it does it should be easy to fix the problem by rerunning perltidy with more +normal parameters or by manually changing whitespace or newlines. + +## The Pod:Html module has some bugs + +For the most part Pod::Html works very well and is very convenient because +it part of the standard Perl distribution. But for example the following line + + =item B<< = Session->new_cflt_deck; >> + +which uses double brackets to contain single brackets does not render correctly. + +## Two iterations are sometimes needed + +Usually the code produced by perltidy on the first pass does not change if it +is run again, but sometimes a second pass will produce some small additional +change. This mainly happens if a major style change is made, particularly when +perltidy is untangling complex ternary statements. Use the iteration parameter +**-it=2** if it is important that the results be unchanged on subsequent passes, +but note that this doubles the run time. + +## Latest Bug and Wishlist at CPAN: + +For the latest list of bugs and feature requests at CPAN see: + +https://rt.cpan.org/Public/Dist/Display.html?Name=Perl-Tidy diff --git a/CHANGES.md b/CHANGES.md new file mode 100644 index 00000000..4d026e41 --- /dev/null +++ b/CHANGES.md @@ -0,0 +1,3268 @@ +# Perltidy Change Log + +## 2018 02 20.01 + + - Fixed RT #81852: Stacked containers and quoting operators. Quoted words + (qw) delimited by container tokens ('{', '[', '(', '<') are now included in + the --weld-nested (-wn) flag: + + # perltidy -wn + use_all_ok( qw{ + PPI + PPI::Tokenizer + PPI::Lexer + PPI::Dumper + PPI::Find + PPI::Normal + PPI::Util + PPI::Cache + } ); + + - The cuddled-else (-ce) coding was merged with the new cuddled-block (-cb) + coding. The change is backward compatible and simplifies input. + The --cuddled-block-option=n (-cbo=n) flag now applies to both -ce and -cb + formatting. In fact the -cb flag is just an alias for -ce now. + + - Fixed RT #124594, license text desc. changed from 'GPL-2.0+' to 'gpl_2' + + - Numerous installation test snippets have been added. + + - added option --notimestamp or -nts to eliminate any time stamps in output + files. This is used to prevent differences in test scripts from causing + failure at installation. For example, the -cscw option will put a date + stamp on certain closing side comments. We need to avoid this in order + to test this feature in an installation test. + + - The packaging for this version has changed. The Tidy.pm file has + been split into a smaller Tidy.pm file plus supporting modules in the path + Perl/Tidy/*. + +## 2018 02 20 + + - RT #124469, #124494, perltidy often making empty files. The previous had + an index error causing it to fail, particularly in version 5.18 of Perl. + + Please avoid version 20180219. + +## 2018 02 19 + + - RT #79947, cuddled-else generalization. A new flag -cb provides + 'cuddled-else' type formatting for an arbitrary type of block chain. The + default is try-catch-finally, but this can be modified with the + parameter -cbl. + + - Fixed RT #124298: add space after ! operator without breaking !! secret + operator + + - RT #123749: numerous minor improvements to the -wn flag were made. + + - Fixed a problem with convergence tests in which iterations were stopping + prematurely. + + - Here doc targets for <<~ type here-docs may now have leading whitespace. + + - Fixed RT #124354. The '-indent-only' flag was not working correctly in the + previous release. A bug in version 20180101 caused extra blank lines + to be output. + + - Issue RT #124114. Some improvements were made in vertical alignment + involving 'fat commas'. + +## 2018 01 01 + + - Added new flag -wn (--weld-nested-containers) which addresses these issues: + RT #123749: Problem with promises; + RT #119970: opening token stacking strange behavior; + RT #81853: Can't stack block braces + + This option causes closely nested pairs of opening and closing containers + to be "welded" together and essentially be formatted as a single unit, + with just one level of indentation. + + Since this is a new flag it is set to be "off" by default but it has given + excellent results in testing. + + EXAMPLE 1, multiple blocks, default formatting: + do { + { + next if $x == $y; # do something here + } + } until $x++ > $z; + + perltidy -wn + do { { + next if $x == $y; + } } until $x++ > $z; + + EXAMPLE 2, three levels of wrapped function calls, default formatting: + p( + em( + conjug( + translate( param('verb') ), param('tense'), + param('person') + ) + ) + ); + + # perltidy -wn + p( em( conjug( + translate( param('verb') ), + param('tense'), param('person') + ) ) ); + + # EXAMPLE 3, chained method calls, default formatting: + get('http://mojolicious.org')->then( + sub { + my $mojo = shift; + say $mojo->res->code; + return get('http://metacpan.org'); + } + )->then( + sub { + my $cpan = shift; + say $cpan->res->code; + } + )->catch( + sub { + my $err = shift; + warn "Something went wrong: $err"; + } + )->wait; + + # perltidy -wn + get('http://mojolicious.org')->then( sub { + my $mojo = shift; + say $mojo->res->code; + return get('http://metacpan.org'); + } )->then( sub { + my $cpan = shift; + say $cpan->res->code; + } )->catch( sub { + my $err = shift; + warn "Something went wrong: $err"; + } )->wait; + + + - Fixed RT #114359: Missparsing of "print $x ** 0.5; + + - Deactivated the --check-syntax flag for better security. It will be + ignored if set. + + - Corrected minimum perl version from 5.004 to 5.008 based on perlver + report. The change is required for coding involving wide characters. + + - For certain severe errors, the source file will be copied directly to the + output without formatting. These include ending in a quote, ending in a + here doc, and encountering an unidentified character. + +## 2017 12 14 + + - RT #123749, partial fix. "Continuation indentation" is removed from lines + with leading closing parens which are part of a call chain. + For example, the call to pack() is is now outdented to the starting + indentation in the following experession: + + # OLD + $mw->Button( + -text => "New Document", + -command => \&new_document + )->pack( + -side => 'bottom', + -anchor => 'e' + ); + + # NEW + $mw->Button( + -text => "New Document", + -command => \&new_document + )->pack( + -side => 'bottom', + -anchor => 'e' + ); + + This modification improves readability of complex expressions, especially + when the user uses the same value for continuation indentation (-ci=n) and + normal indentation (-i=n). Perltidy was already programmed to + do this but a minor bug was preventing it. + + - RT #123774, added flag to control space between a backslash and a single or + double quote, requested by Robert Rothenberg. The issue is that lines like + + $str1=\"string1"; + $str2=\'string2'; + + confuse syntax highlighters unless a space is left between the backslash and + the quote. + + The new flag to control this is -sbq=n (--space-backslash-quote=n), + where n=0 means no space, n=1 means follow existing code, n=2 means always + space. The default is n=1, meaning that a space will be retained if there + is one in the source code. + + - Fixed RT #123492, support added for indented here doc operator <<~ added + in v5.26. Thanks to Chris Weyl for the report. + + - Fixed docs; --closing-side-comment-list-string should have been just + --closing-side-comment-list. Thanks to F.Li. + + - Added patch RT #122030] Perl::Tidy sometimes does not call binmode. + Thanks to Irilis Aelae. + + - Fixed RT #121959, PERLTIDY doesn't honor the 'three dot' notation for + locating a config file using environment variables. Thanks to John + Wittkowski. + + - Minor improvements to formatting, in which some additional vertical + aligmnemt is done. Thanks to Keith Neargarder. + + - RT #119588. Vertical alignment is no longer done for // operator. + +## 2017 05 21 + + - Fixed debian #862667: failure to check for perltidy.ERR deletion can lead + to overwriting abritrary files by symlink attack. Perltidy was continuing + to write files after an unlink failure. Thanks to Don Armstrong + for a patch. + + - Fixed RT #116344, perltidy fails on certain anonymous hash references: + in the following code snippet the '?' was misparsed as a pattern + delimiter rather than a ternary operator. + return ref {} ? 1 : 0; + + - Fixed RT #113792: misparsing of a fat comma (=>) right after + the __END__ or __DATA__ tokens. These keywords were getting + incorrectly quoted by the following => operator. + + - Fixed RT #118558. Custom Getopt::Long configuration breaks parsing + of perltidyrc. Perltidy was resetting the users configuration too soon. + + - Fixed RT #119140, failure to parse double diamond operator. Code to + handle this new operator has been added. + + - Fixed RT #120968. Fixed problem where -enc=utf8 didn't work + with --backup-and-modify-in-place. Thanks to Heinz Knutzen for this patch. + + - Fixed minor formatting issue where one-line blocks for subs with signatures + were unnecesarily broken + + - RT #32905, patch to fix utf-8 error when output was STDOUT. + + - RT #79947, improved spacing of try/catch/finally blocks. Thanks to qsimpleq + for a patch. + + - Fixed #114909, Anonymous subs with signatures and prototypes misparsed as + broken ternaries, in which a statement such as this was not being parsed + correctly: + return sub ( $fh, $out ) : prototype(*$) { ... } + + - Implemented RT #113689, option to introduces spaces after an opening block + brace and before a closing block brace. Four new optional controls are + added. The first two define the minimum number of blank lines to be + inserted + + -blao=i or --blank-lines-after-opening-block=i + -blbc=i or --blank-lines-before-closing-block=i + + where i is an integer, the number of lines (the default is 0). + + The second two define the types of blocks to which the first two apply + + -blaol=s or --blank-lines-after-opening-block-list=s + -blbcl=s or --blank-lines-before-closing-block-list=s + + where s is a string of possible block keywords (default is just 'sub', + meaning a named subroutine). + + For more information please see the documentation. + + - The method for specifying block types for certain input parameters has + been generalized to distinguish between normal named subroutines and + anonymous subs. The keyword for normal subroutines remains 'sub', and + the new keyword for anonymous subs is 'asub'. + + - Minor documentation changes. The BUGS sections now have a link + to CPAN where most open bugs and issues can be reviewed and bug reports + can be submitted. The information in the AUTHOR and CREDITS sections of + the man pages have been removed from the man pages to streamline the + documentation. This information is still in the source code. + +## 2016 03 02 + + - RT #112534. Corrected a minor problem in which an unwanted newline + was placed before the closing brace of an anonymous sub with + a signature, if it was in a list. Thanks to Dmytro Zagashev. + + - Corrected a minor problem in which occasional extra indentation was + given to the closing brace of an anonymous sub in a list when the -lp + parameter was set. + +## 2016 03 01 + + - RT #104427. Added support for signatures. + + - RT #111512. Changed global warning flag $^W = 1 to use warnings; + Thanks to Dmytro Zagashev. + + - RT #110297, added support for new regexp modifier /n + Thanks to Dmytro Zagashev. + + - RT #111519. The -io (--indent-only) and -dac (--delete-all-comments) + can now both be used in one pass. Thanks to Dmitry Veltishev. + + - Patch to avoid error message with 'catch' used by TryCatch, as in + catch($err){ + # do something + } + Thanks to Nick Tonkin. + + - RT #32905, UTF-8 coding is now more robust. Thanks to qsimpleq + and Dmytro for patches. + + - RT #106885. Added string bitwise operators ^. &. |. ~. ^.= &.= |.= + + - Fixed RT #107832 and #106492, lack of vertical alignment of two lines + when -boc flag (break at old commas) is set. This bug was + inadvertantly introduced in previous bug fix RT #98902. + + - Some common extensions to Perl syntax are handled better. + In particular, the following snippet is now foratted cleanly: + + method deposit( Num $amount) { + $self->balance( $self->balance + $amount ); + } + + A new flag -xs (--extended-syntax) was added to enable this, and the default + is to use -xs. + + In previous versions, and now only when -nxs is set, this snippet of code + generates the following error message: + + "syntax error at ') {', didn't see one of: case elsif for foreach given if switch unless until when while" + +## 2015 08 15 + + - Fixed RT# 105484, Invalid warning about 'else' in 'switch' statement. The + warning happened if a 'case' statement did not use parens. + + - Fixed RT# 101547, misparse of // caused error message. Also.. + + - Fixed RT# 102371, misparse of // caused unwated space in //= + + - Fixed RT# 100871, "silent failure of HTML Output on Windows". + Changed calls to tempfile() from: + my ( $fh_tmp, $tmpfile ) = tempfile(); + to have the full path name: + my ( $fh_tmp, $tmpfile ) = File::Temp::tempfile() + because of problems in the Windows version reported by Dean Pearce. + + - Fixed RT# 99514, calling the perltidy module multiple times with + a .perltidyrc file containing the parameter --output-line-ending + caused a crash. This was a glitch in the memoization logic. + + - Fixed RT#99961, multiple lines inside a cast block caused unwanted + continuation indentation. + + - RT# 32905, broken handling of UTF-8 strings. + A new flag -utf8 causes perltidy assume UTF-8 encoding for input and + output of an io stream. Thanks to Sebastian Podjasek for a patch. + This feature may not work correctly in older versions of Perl. + It worked in a linux version 5.10.1 but not in a Windows version 5.8.3 (but + otherwise perltidy ran correctly). + + - Warning files now report perltidy VERSION. Suggested by John Karr. + + - Fixed long flag --nostack-closing-tokens (-nsct has always worked though). + This was due to a typo. This also fixed --nostack-opening-tokens to + behave correctly. Thanks to Rob Dixon. + +## 2014 07 11 + + - Fixed RT #94902: abbreviation parsing in .perltidyrc files was not + working for multi-line abbreviations. Thanks to Eric Fung for + supplying a patch. + + - Fixed RT #95708, misparsing of a hash when the first key was a perl + keyword, causing a semicolon to be incorrectly added. + + - Fixed RT #94338 for-loop in a parenthesized block-map. A code block within + parentheses of a map, sort, or grep function was being mistokenized. In + rare cases this could produce in an incorrect error message. The fix will + produce some minor formatting changes. Thanks to Daniel Trizen + discovering and documenting this. + + - Fixed RT #94354, excess indentation for stacked tokens. Thanks to + Colin Williams for supplying a patch. + + - Added support for experimental postfix dereferencing notation introduced in + perl 5.20. RT #96021. + + - Updated documentation to clarify the behavior of the -io flag + in response to RT #95709. You can add -noll or -l=0 to prevent + long comments from being outdented when -io is used. + + - Added a check to prevent a problem reported in RT #81866, where large + scripts which had been compressed to a single line could not be formatted + because of a check for VERSION for MakeMaker. The workaround was to + use -nvpl, but this shouldn't be necessary now. + + - Fixed RT #96101; Closing brace of anonymous sub in a list was being + indented. For example, the closing brace of the anonymous sub below + will now be lined up with the word 'callback'. This problem + occured if there was no comma after the closing brace of the anonymous sub. + This update may cause minor changes to formatting of code with lists + of anonymous subs, especially TK code. + + # OLD + my @menu_items = ( + + #... + { + path => '/_Operate/Transcode and split', + callback => sub { + return 1 if not $self->project_opened; + $self->comp('project')->transcode( split => 1 ); + } + } + ); + + # NEW + my @menu_items = ( + + #... + { + path => '/_Operate/Transcode and split', + callback => sub { + return 1 if not $self->project_opened; + $self->comp('project')->transcode( split => 1 ); + } + } + ); + +## 2014 03 28 + + - Fixed RT #94190 and debian Bug #742004: perltidy.LOG file left behind. + Thanks to George Hartzell for debugging this. The problem was + caused by the memoization speedup patch in version 20121207. An + unwanted flag was being set which caused a LOG to be written if + perltidy was called multiple times. + + - New default behavior for LOG files: If the source is from an array or + string (through a call to the perltidy module) then a LOG output is only + possible if a logfile stream is specified. This is to prevent + unexpected perltidy.LOG files. + + - Fixed debian Bug #740670, insecure temporary file usage. File::Temp is now + used to get a temporary file. Thanks to Don Anderson for a patch. + + - Any -b (--backup-and-modify-in-place) flag is silently ignored when a + source stream, destination stream, or standard output is used. + This is because the -b flag may have been in a .perltidyrc file and + warnings break Test::NoWarnings. Thanks to Marijn Brand. + +## 2013 09 22 + + - Fixed RT #88020. --converge was not working with wide characters. + + - Fixed RT #78156. package NAMESPACE VERSION syntax not accepted. + + - First attempt to fix RT #88588. INDEX END tag change in pod2html breaks + perltidy -html. I put in a patch which should work but I don't yet have + a way of testing it. + +## 2013 08 06 + + - Fixed RT #87107, spelling + +## 2013 08 05 + + - Fixed RT #87502, incorrect of parsing of smartmatch before hash brace + + - Added feature request RT #87330, trim whitespace after POD. + The flag -trp (--trim-pod) will trim trailing whitespace from lines of POD + +## 2013 07 17 + + - Fixed RT #86929, #86930, missing lhs of assignment. + + - Fixed RT #84922, moved pod from Tidy.pm into Tidy.pod + +## 2012 12 07 + + - The flag -cab=n or --comma-arrow-breakpoints=n has been generalized + to give better control over breaking open short containers. The + possible values are now: + + n=0 break at all commas after => + n=1 stable: break at all commas after => if container is open, + EXCEPT FOR one-line containers + n=2 break at all commas after =>, BUT try to form the maximum + maximum one-line container lengths + n=3 do not treat commas after => specially at all + n=4 break everything: like n=0 but also break a short container with + a => not followed by a comma + n=5 stable: like n=1 but ALSO break at open one-line containers (default) + + New values n=4 and n=5 have been added to allow short blocks to be + broken open. The new default is n=5, stable. It should more closely + follow the breaks in the input file, and previously formatted code + should remain unchanged. If this causes problems use -cab=1 to recover + the former behavior. Thanks to Tony Maszeroski for the suggestion. + + To illustrate the need for the new options, if perltidy is given + the following code, then the old default (-cab=1) was to close up + the 'index' container even if it was open in the source. The new + default (-cab=5) will keep it open if it was open in the source. + + our $fancypkg = { + 'ALL' => { + 'index' => { + 'key' => 'value', + }, + 'alpine' => { + 'one' => '+', + 'two' => '+', + 'three' => '+', + }, + } + }; + + - New debug flag --memoize (-mem). This version contains a + patch supplied by Jonathan Swartz which can significantly speed up + repeated calls to Perl::Tidy::perltidy in a single process by caching + the result of parsing the formatting parameters. A factor of up to 10 + speedup was achieved for masontidy (https://metacpan.org/module/masontidy). + The memoization patch is on by default but can be deactivated for + testing with -nmem (or --no-memoize). + + - New flag -tso (--tight-secret-operators) causes certain perl operator + sequences (secret operators) to be formatted "tightly" (without spaces). + The most common of these are 0 + and + 0 which become 0+ and +0. The + operators currently modified by this flag are: + =( )= 0+ +0 ()x!! ~~<> ,=> + Suggested by by Philippe Bruhat. See https://metacpan.org/module/perlsecret + This flag is off by default. + + - New flag -vmll (--variable-maximum-line-length) makes the maximum + line length increase with the nesting depth of a line of code. + Basically, it causes the length of leading whitespace to be ignored when + setting line breaks, so the formatting of a block of code is independent + of its nesting depth. Try this option if you have deeply nested + code or data structures, perhaps in conjunction with the -wc flag + described next. The default is not todo this. + + - New flag -wc=n (--whitespace-cycle=n) also addresses problems with + very deeply nested code and data structures. When this parameter is + used and the nesting depth exceeds the value n, the leading whitespace + will be reduced and start at 1 again. The result is that deeply + nested blocks of code will shift back to the left. This occurs cyclically + to any nesting depth. This flag may be used either with or without -vmll. + The default is not to use this (-wc=0). + + - Fixed RT #78764, error parsing smartmatch operator followed by anonymous + hash or array and then a ternary operator; two examples: + + qr/3/ ~~ ['1234'] ? 1 : 0; + map { $_ ~~ [ '0', '1' ] ? 'x' : 'o' } @a; + + - Fixed problem with specifying spaces around arrows using -wls='->' + and -wrs='->'. Thanks to Alain Valleton for documenting this problem. + + - Implemented RT #53183, wishlist, lines of code with the same indentation + level which are contained with multiple stacked opening and closing tokens + (requested with flags -sot -sct) now have reduced indentation. + + # Default + $sender->MailMsg( + { + to => $addr, + subject => $subject, + msg => $body + } + ); + + # OLD: perltidy -sot -sct + $sender->MailMsg( { + to => $addr, + subject => $subject, + msg => $body + } ); + + # NEW: perltidy -sot -sct + $sender->MailMsg( { + to => $addr, + subject => $subject, + msg => $body + } ); + + - New flag -act=n (--all-containers-tightness=n) is an abbreviation for + -pt=n -sbt=n -bt=n -bbt=n, where n=0,1, or 2. It simplifies input when all + containers have the same tightness. Using the same example: + + # NEW: perltidy -sot -sct -act=2 + $sender->MailMsg({ + to => $addr, + subject => $subject, + msg => $body + }); + + - New flag -sac (--stack-all-containers) is an abbreviation for -sot -sct + This is part of wishlist item RT #53183. Using the same example again: + + # NEW: perltidy -sac -act=2 + $sender->MailMsg({ + to => $addr, + subject => $subject, + msg => $body + }); + + - new flag -scbb (--stack-closing-block-brace) causes isolated closing + block braces to stack as in the following example. (Wishlist item RT#73788) + + DEFAULT: + for $w1 (@w1) { + for $w2 (@w2) { + for $w3 (@w3) { + for $w4 (@w4) { + push( @lines, "$w1 $w2 $w3 $w4\n" ); + } + } + } + } + + perltidy -scbb: + for $w1 (@w1) { + for $w2 (@w2) { + for $w3 (@w3) { + for $w4 (@w4) { + push( @lines, "$w1 $w2 $w3 $w4\n" ); + } } } } + + There is, at present, no flag to place these closing braces at the end + of the previous line. It seems difficult to develop good rules for + doing this for a wide variety of code and data structures. + + - Parameters defining block types may use a wildcard '*' to indicate + all block types. Previously it was not possible to include bare blocks. + + - A flag -sobb (--stack-opening-block-brace) has been introduced as an + alias for -bbvt=2 -bbvtl='*'. So for example the following test code: + + {{{{{{{ $testing }}}}}}} + + cannot be formatted as above but can at least be kept vertically compact + using perltidy -sobb -scbb + + { { { { { { { $testing + } } } } } } } + + Or even, perltidy -sobb -scbb -i=1 -bbt=2 + {{{{{{{$testing + }}}}}}} + + + - Error message improved for conflicts due to -pbp; thanks to Djun Kim. + + - Fixed RT #80645, error parsing special array name '@$' when used as + @{$} or $#{$} + + - Eliminated the -chk debug flag which was included in version 20010406 to + do a one-time check for a bug with multi-line quotes. It has not been + needed since then. + + - Numerous other minor formatting improvements. + +## 2012 07 14 + + - Added flag -iscl (--ignore-side-comment-lengths) which causes perltidy + to ignore the length of side comments when setting line breaks, + RT #71848. The default is to include the length of side comments when + breaking lines to stay within the length prescribed by the -l=n + maximum line length parameter. For example, + + Default behavior on a single line with long side comment: + $vmsfile =~ s/;[\d\-]*$// + ; # Clip off version number; we can use a newer version as well + + perltidy -iscl leaves the line intact: + + $vmsfile =~ s/;[\d\-]*$//; # Clip off version number; we can use a newer version as well + + - Fixed RT #78182, side effects with STDERR. Error handling has been + revised and the documentation has been updated. STDERR can now be + redirected to a string reference, and perltidy now returns an + error flag instead of calling die when input errors are detected. + If the error flag is set then no tidied output was produced. + See man Perl::Tidy for an example. + + - Fixed RT #78156, erroneous warning message for package VERSION syntax. + + - Added abbreviations -conv (--converge) to simplify iteration control. + -conv is equivalent to -it=4 and will insure that the tidied code is + converged to its final state with the minimum number of iterations. + + - Minor formatting modifications have been made to insure convergence. + + - Simplified and hopefully improved the method for guessing the starting + indentation level of entabbed code. Added flag -dt=n (--default_tabsize=n) + which might be helpful if the guessing method does not work well for + some editors. + + - Added support for stacked labels, upper case X/B in hex and binary, and + CORE:: namespace. + + - Eliminated warning messages for using keyword names as constants. + +## 2012 07 01 + + - Corrected problem introduced by using a chomp on scalar references, RT #77978 + + - Added support for Perl 5.14 package block syntax, RT #78114. + + - A convergence test is made if three or more iterations are requested with + the -it=n parameter to avoid wasting computer time. Several hundred Mb of + code gleaned from the internet were searched with the results that: + - It is unusual for two iterations to be required unless a major + style change is being made. + - Only one case has been found where three iterations were required. + - No cases requiring four iterations have been found with this version. + For the previous version several cases where found the results could + oscillate between two semi-stable states. This version corrects this. + + So if it is important that the code be converged it is okay to set -it=4 + with this version and it will probably stop after the second iteration. + + - Improved ability to identify and retain good line break points in the + input stream, such as at commas and equals. You can always tell + perltidy to ignore old breakpoints with -iob. + + - Fixed glitch in which a terminal closing hash brace followed by semicolon + was not outdented back to the leading line depth like other closing + tokens. Thanks to Keith Neargarder for noting this. + + OLD: + my ( $pre, $post ) = @{ + { + "pp_anonlist" => [ "[", "]" ], + "pp_anonhash" => [ "{", "}" ] + }->{ $kid->ppaddr } + }; # terminal brace + + NEW: + my ( $pre, $post ) = @{ + { + "pp_anonlist" => [ "[", "]" ], + "pp_anonhash" => [ "{", "}" ] + }->{ $kid->ppaddr } + }; # terminal brace + + - Removed extra indentation given to trailing 'if' and 'unless' clauses + without parentheses because this occasionally produced undesirable + results. This only applies where parens are not used after the if or + unless. + + OLD: + return undef + unless my ( $who, $actions ) = + $clause =~ /^($who_re)((?:$action_re)+)$/o; + + NEW: + return undef + unless my ( $who, $actions ) = + $clause =~ /^($who_re)((?:$action_re)+)$/o; + +## 2012 06 19 + + - Updated perltidy to handle all quote modifiers defined for perl 5 version 16. + + - Side comment text in perltidyrc configuration files must now begin with + at least one space before the #. Thus: + + OK: + -l=78 # Max line width is 78 cols + BAD: + -l=78# Max line width is 78 cols + + This is probably true of almost all existing perltidyrc files, + but if you get an error message about bad parameters + involving a '#' the first time you run this version, please check the side + comments in your perltidyrc file, and add a space before the # if necessary. + You can quickly see the contents your perltidyrc file, if any, with the + command: + + perltidy -dpro + + The reason for this change is that some parameters naturally involve + the # symbol, and this can get interpreted as a side comment unless the + parameter is quoted. For example, to define -sphb=# it used to be necessary + to write + -sbcp='#' + to keep the # from becoming part of a comment. This was causing + trouble for new users. Now it can also be written without quotes: + -sbcp=# + + - Fixed bug in processing some .perltidyrc files containing parameters with + an opening brace character, '{'. For example the following was + incorrectly processed: + --static-block-comment-prefix="^#{2,}[^\s#]" + Thanks to pdagosto. + + - Added flag -boa (--break-at-old-attribute-breakpoints) which retains + any existing line breaks at attribute separation ':'. This is now the + default, use -nboa to deactivate. Thanks to Daphne Phister for the patch. + For example, given the following code, the line breaks at the ':'s will be + retained: + + my @field + : field + : Default(1) + : Get('Name' => 'foo') : Set('Name'); + + whereas the previous version would have output a single line. If + the attributes are on a single line then they will remain on a single line. + + - Added new flags --blank-lines-before-subs=n (-blbs=n) and + --blank-lines-before-packages=n (-blbp=n) to put n blank lines before + subs and packages. The old flag -bbs is now equivalent to -blbs=1 -blbp=1. + and -nbbs is equivalent to -blbs=0 -blbp=0. Requested by M. Schwern and + several others. + + - Added feature -nsak='*' meaning no space between any keyword and opening + paren. This avoids listing entering a long list of keywords. Requested + by M. Schwern. + + - Added option to delete a backup of original file with in-place-modify (-b) + if there were no errors. This can be requested with the flag -bext='/'. + See documentation for details. Requested by M. Schwern and others. + + - Fixed bug where the module postfilter parameter was not applied when -b + flag was used. This was discovered during testing. + + - Fixed in-place-modify (-b) to work with symbolic links to source files. + Thanks to Ted Johnson. + + - Fixed bug where the Perl::Tidy module did not allow -b to be used + in some cases. + + - No extra blank line is added before a comment which follows + a short line ending in an opening token, for example like this: + OLD: + if ( + + # unless we follow a blank or comment line + $last_line_leading_type !~ /^[#b]$/ + ... + + NEW: + if ( + # unless we follow a blank or comment line + $last_line_leading_type !~ /^[#b]$/ + ... + + The blank is not needed for readability in these cases because there + already is already space above the comment. If a blank already + exists there it will not be removed, so this change should not + change code which has previously been formatted with perltidy. + Thanks to R.W.Stauner. + + - Likewise, no extra blank line is added above a comment consisting of a + single #, since nothing is gained in readability. + + - Fixed error in which a blank line was removed after a #>>> directive. + Thanks to Ricky Morse. + + - Unnecessary semicolons after given/when/default blocks are now removed. + + - Fixed bug where an unwanted blank line could be added before + pod text in __DATA__ or __END__ section. Thanks to jidani. + + - Changed exit flags from 1 to 0 to indicate success for -help, -version, + and all -dump commands. Also added -? as another way to dump the help. + Requested by Keith Neargarder. + + - Fixed bug where .ERR and .LOG files were not written except for -it=2 or more + + - Fixed bug where trailing blank lines at the end of a file were dropped when + -it>1. + + - Fixed bug where a line occasionally ended with an extra space. This reduces + rhe number of instances where a second iteration gives a result different + from the first. + + - Updated documentation to note that the Tidy.pm module parameter may + not be a reference to SCALAR or ARRAY; it must be a file. + + - Syntax check with perl now work when the Tidy.pm module is processing + references to arrays and strings. Thanks to Charles Alderman. + + - Zero-length files are no longer processed due to concerns for data loss + due to side effects in some scenarios. + + - block labels, if any, are now included in closing side comment text + when the -csc flag is used. Suggested by Aaron. For example, + the label L102 in the following block is now included in the -csc text: + + L102: for my $i ( 1 .. 10 ) { + ... + } ## end L102: for my $i ( 1 .. 10 ) + +## 2010 12 17 + + - added new flag -it=n or --iterations=n + This flag causes perltidy to do n complete iterations. + For most purposes the default of n=1 should be satisfactory. However n=2 + can be useful when a major style change is being made, or when code is being + beautified on check-in to a source code control system. The run time will be + approximately proportional to n, and it should seldom be necessary to use a + value greater than n=2. Thanks to Jonathan Swartz + + - A configuration file pathname begins with three dots, e.g. + ".../.perltidyrc", indicates that the file should be searched for starting + in the current directory and working upwards. This makes it easier to have + multiple projects each with their own .perltidyrc in their root directories. + Thanks to Jonathan Swartz for this patch. + + - Added flag --notidy which disables all formatting and causes the input to be + copied unchanged. This can be useful in conjunction with hierarchical + F<.perltidyrc> files to prevent unwanted tidying. + Thanks to Jonathan Swartz for this patch. + + - Added prefilters and postfilters in the call to the Tidy.pm module. + Prefilters and postfilters. The prefilter is a code reference that + will be applied to the source before tidying, and the postfilter + is a code reference to the result before outputting. + + Thanks to Jonathan Swartz for this patch. He writes: + This is useful for all manner of customizations. For example, I use + it to convert the 'method' keyword to 'sub' so that perltidy will work for + Method::Signature::Simple code: + + Perl::Tidy::perltidy( + prefilter => sub { $_ = $_[0]; s/^method (.*)/sub $1 \#__METHOD/gm; return $_ }, + postfilter => sub { $_ = $_[0]; s/^sub (.*?)\s* \#__METHOD/method $1/gm; return $_ } + ); + + - The starting indentation level of sections of code entabbed with -et=n + is correctly guessed if it was also produced with the same -et=n flag. This + keeps the indentation stable on repeated formatting passes within an editor. + Thanks to Sam Kington and Glenn. + + - Functions with prototype '&' had a space between the function and opening + peren. This space now only occurs if the flag --space-function-paren (-sfp) + is set. Thanks to Zrajm Akfohg. + + - Patch to never put spaces around a bare word in braces beginning with ^ as in: + my $before = ${^PREMATCH}; + even if requested with the -bt=0 flag because any spaces cause a syntax error in perl. + Thanks to Fabrice Dulanoy. + +## 2009 06 16 + + - Allow configuration file to be 'perltidy.ini' for Windows systems. + i.e. C:\Documents and Settings\User\perltidy.ini + and added documentation for setting configuation file under Windows in man + page. Thanks to Stuart Clark. + + - Corrected problem of unwanted semicolons in hash ref within given/when code. + Thanks to Nelo Onyiah. + + - added new flag -cscb or --closing-side-comments-balanced + When using closing-side-comments, and the closing-side-comment-maximum-text + limit is exceeded, then the comment text must be truncated. Previous + versions of perltidy terminate with three dots, and this can still be + achieved with -ncscb: + + perltidy -csc -ncscb + + } ## end foreach my $foo (sort { $b cmp $a ... + + However this causes a problem with older editors which cannot recognize + comments or are not configured to doso because they cannot "bounce" around in + the text correctly. The B<-cscb> flag tries to help them by + appending appropriate terminal balancing structure: + + perltidy -csc -cscb + + } ## end foreach my $foo (sort { $b cmp $a ... }) + + Since there is much to be gained and little to be lost by doing this, + the default is B<-cscb>. Use B<-ncscb> if you do not want this. + + Thanks to Daniel Becker for suggesting this option. + + - After an isolated closing eval block the continuation indentation will be + removed so that the braces line up more like other blocks. Thanks to Yves Orton. + + OLD: + eval { + #STUFF; + 1; # return true + } + or do { + #handle error + }; + + NEW: + eval { + #STUFF; + 1; # return true + } or do { + #handle error + }; + + -A new flag -asbl (or --opening-anonymous-sub-brace-on-new-line) has + been added to put the opening brace of anonymous sub's on a new line, + as in the following snippet: + + my $code = sub + { + my $arg = shift; + return $arg->(@_); + }; + + This was not possible before because the -sbl flag only applies to named + subs. Thanks to Benjamin Krupp. + + -Fix tokenization bug with the following snippet + print 'hi' if { x => 1, }->{x}; + which resulted in a semicolon being added after the comma. The workaround + was to use -nasc, but this is no longer necessary. Thanks to Brian Duggan. + + -Fixed problem in which an incorrect error message could be triggered + by the (unusual) combination of parameters -lp -i=0 -l=2 -ci=0 for + example. Thanks to Richard Jelinek. + + -A new flag --keep-old-blank-lines=n has been added to + give more control over the treatment of old blank lines in + a script. The manual has been revised to discuss the new + flag and clarify the treatment of old blank lines. Thanks + to Oliver Schaefer. + +## 2007 12 05 + + -Improved support for perl 5.10: New quote modifier 'p', new block type UNITCHECK, + new keyword break, improved formatting of given/when. + + -Corrected tokenization bug of something like $var{-q}. + + -Numerous minor formatting improvements. + + -Corrected list of operators controlled by -baao -bbao to include + . : ? && || and or err xor + + -Corrected very minor error in log file involving incorrect comment + regarding need for upper case of labels. + + -Fixed problem where perltidy could run for a very long time + when given certain non-perl text files. + + -Line breaks in un-parenthesized lists now try to follow + line breaks in the input file rather than trying to fill + lines. This usually works better, but if this causes + trouble you can use -iob to ignore any old line breaks. + Example for the following input snippet: + + print + "conformability (Not the same dimension)\n", + "\t", $have, " is ", text_unit($hu), "\n", + "\t", $want, " is ", text_unit($wu), "\n", + ; + + OLD: + print "conformability (Not the same dimension)\n", "\t", $have, " is ", + text_unit($hu), "\n", "\t", $want, " is ", text_unit($wu), "\n",; + + NEW: + print "conformability (Not the same dimension)\n", + "\t", $have, " is ", text_unit($hu), "\n", + "\t", $want, " is ", text_unit($wu), "\n", + ; + +## 2007 08 01 + + -Added -fpsc option (--fixed-position-side-comment). Thanks to Ueli Hugenschmidt. + For example -fpsc=40 tells perltidy to put side comments in column 40 + if possible. + + -Added -bbao and -baao options (--break-before-all-operators and + --break-after-all-operators) to simplify command lines and configuration + files. These define an initial preference for breaking at operators which can + be modified with -wba and -wbb flags. For example to break before all operators + except an = one could use --bbao -wba='=' rather than listing every + single perl operator (except =) on a -wbb flag. + + -Added -kis option (--keep-interior-semicolons). Use the B<-kis> flag + to prevent breaking at a semicolon if there was no break there in the + input file. To illustrate, consider the following input lines: + + dbmclose(%verb_delim); undef %verb_delim; + dbmclose(%expanded); undef %expanded; + dbmclose(%global); undef %global; + + Normally these would be broken into six lines, but + perltidy -kis gives: + + dbmclose(%verb_delim); undef %verb_delim; + dbmclose(%expanded); undef %expanded; + dbmclose(%global); undef %global; + + -Improved formatting of complex ternary statements, with indentation + of nested statements. + OLD: + return defined( $cw->{Selected} ) + ? (wantarray) + ? @{ $cw->{Selected} } + : $cw->{Selected}[0] + : undef; + + NEW: + return defined( $cw->{Selected} ) + ? (wantarray) + ? @{ $cw->{Selected} } + : $cw->{Selected}[0] + : undef; + + -Text following un-parenthesized if/unless/while/until statements get a + full level of indentation. Suggested by Jeff Armstorng and others. + OLD: + return $ship->chargeWeapons("phaser-canon") + if $encounter->description eq 'klingon' + and $ship->firepower >= $encounter->firepower + and $location->status ne 'neutral'; + NEW: + return $ship->chargeWeapons("phaser-canon") + if $encounter->description eq 'klingon' + and $ship->firepower >= $encounter->firepower + and $location->status ne 'neutral'; + +## 2007 05 08 + + -Fixed bug where #line directives were being indented. Thanks to + Philippe Bruhat. + +## 2007 05 04 + + -Fixed problem where an extra blank line was added after an =cut when either + (a) the =cut started (not stopped) a POD section, or (b) -mbl > 1. + Thanks to J. Robert Ray and Bill Moseley. + +## 2007 04 24 + + -ole (--output-line-ending) and -ple (--preserve-line-endings) should + now work on all systems rather than just unix systems. Thanks to Dan + Tyrell. + + -Fixed problem of a warning issued for multiple subs for BEGIN subs + and other control subs. Thanks to Heiko Eissfeldt. + + -Fixed problem where no space was introduced between a keyword or + bareword and a colon, such as: + + ( ref($result) eq 'HASH' && !%$result ) ? undef: $result; + + Thanks to Niek. + + -Added a utility program 'break_long_quotes.pl' to the examples directory of + the distribution. It breaks long quoted strings into a chain of concatenated + sub strings no longer than a selected length. Suggested by Michael Renner as + a perltidy feature but was judged to be best done in a separate program. + + -Updated docs to remove extra < and >= from list of tokens + after which breaks are made by default. Thanks to Bob Kleemann. + + -Removed improper uses of $_ to avoid conflicts with external calls, giving + error message similar to: + Modification of a read-only value attempted at + /usr/share/perl5/Perl/Tidy.pm line 6907. + Thanks to Michael Renner. + + -Fixed problem when errorfile was not a plain filename or filehandle + in a call to Tidy.pm. The call + perltidy(source => \$input, destination => \$output, errorfile => \$err); + gave the following error message: + Not a GLOB reference at /usr/share/perl5/Perl/Tidy.pm line 3827. + Thanks to Michael Renner and Phillipe Bruhat. + + -Fixed problem where -sot would not stack an opening token followed by + a side comment. Thanks to Jens Schicke. + + -improved breakpoints in complex math and other long statements. Example: + OLD: + return + log($n) + 0.577215664901532 + ( 1 / ( 2 * $n ) ) - + ( 1 / ( 12 * ( $n**2 ) ) ) + ( 1 / ( 120 * ( $n**4 ) ) ); + NEW: + return + log($n) + 0.577215664901532 + + ( 1 / ( 2 * $n ) ) - + ( 1 / ( 12 * ( $n**2 ) ) ) + + ( 1 / ( 120 * ( $n**4 ) ) ); + + -more robust vertical alignment of complex terminal else blocks and ternary + statements. + +## 2006 07 19 + + -Eliminated bug where a here-doc invoked through an 'e' modifier on a pattern + replacement text was not recognized. The tokenizer now recursively scans + replacement text (but does not reformat it). + + -improved vertical alignment of terminal else blocks and ternary statements. + Thanks to Chris for the suggestion. + + OLD: + if ( IsBitmap() ) { return GetBitmap(); } + elsif ( IsFiles() ) { return GetFiles(); } + else { return GetText(); } + + NEW: + if ( IsBitmap() ) { return GetBitmap(); } + elsif ( IsFiles() ) { return GetFiles(); } + else { return GetText(); } + + OLD: + $which_search = + $opts{"t"} ? 'title' + : $opts{"s"} ? 'subject' + : $opts{"a"} ? 'author' + : 'title'; + + NEW: + $which_search = + $opts{"t"} ? 'title' + : $opts{"s"} ? 'subject' + : $opts{"a"} ? 'author' + : 'title'; + + -improved indentation of try/catch blocks and other externally defined + functions accepting a block argument. Thanks to jae. + + -Added support for Perl 5.10 features say and smartmatch. + + -Added flag -pbp (--perl-best-practices) as an abbreviation for parameters + suggested in Damian Conway's "Perl Best Practices". -pbp is the same as: + + -l=78 -i=4 -ci=4 -st -se -vt=2 -cti=0 -pt=1 -bt=1 -sbt=1 -bbt=1 -nsfs -nolq + -wbb="% + - * / x != == >= <= =~ !~ < > | & >= < = + **= += *= &= <<= &&= -= /= |= >>= ||= .= %= ^= x=" + + Please note that the -st here restricts input to standard input; use + -nst if necessary to override. + + -Eliminated some needless breaks at equals signs in -lp indentation. + + OLD: + $c = + Math::Complex->make(LEFT + $x * (RIGHT - LEFT) / SIZE, + TOP + $y * (BOTTOM - TOP) / SIZE); + NEW: + $c = Math::Complex->make(LEFT + $x * (RIGHT - LEFT) / SIZE, + TOP + $y * (BOTTOM - TOP) / SIZE); + + A break at an equals is sometimes useful for preventing complex statements + from hitting the line length limit. The decision to do this was + over-eager in some cases and has been improved. Thanks to Royce Reece. + + -qw quotes contained in braces, square brackets, and parens are being + treated more like those containers as far as stacking of tokens. Also + stack of closing tokens ending ');' will outdent to where the ');' would + have outdented if the closing stack is matched with a similar opening stack. + + OLD: perltidy -soc -sct + __PACKAGE__->load_components( + qw( + PK::Auto + Core + ) + ); + NEW: perltidy -soc -sct + __PACKAGE__->load_components( qw( + PK::Auto + Core + ) ); + Thanks to Aran Deltac + + -Eliminated some undesirable or marginally desirable vertical alignments. + These include terminal colons, opening braces, and equals, and particularly + when just two lines would be aligned. + + OLD: + my $accurate_timestamps = $Stamps{lnk}; + my $has_link = + ... + NEW: + my $accurate_timestamps = $Stamps{lnk}; + my $has_link = + + -Corrected a problem with -mangle in which a space would be removed + between a keyword and variable beginning with ::. + +## 2006 06 14 + + -Attribute argument lists are now correctly treated as quoted strings + and not formatted. This is the most important update in this version. + Thanks to Borris Zentner, Greg Ferguson, Steve Kirkup. + + -Updated to recognize the defined or operator, //, to be released in Perl 10. + Thanks to Sebastien Aperghis-Tramoni. + + -A useful utility perltidyrc_dump.pl is included in the examples section. It + will read any perltidyrc file and write it back out in a standard format + (though comments are lost). + + -Added option to have perltidy read and return a hash with the contents of a + perltidyrc file. This may be used by Leif Eriksen's tidyview code. This + feature is used by the demonstration program 'perltidyrc_dump.pl' in the + examples directory. + + -Improved error checking in perltidyrc files. Unknown bare words were not + being caught. + + -The --dump-options parameter now dumps parameters in the format required by a + perltidyrc file. + + -V-Strings with underscores are now recognized. + For example: $v = v1.2_3; + + -cti=3 option added which gives one extra indentation level to closing + tokens always. This provides more predictable closing token placement + than cti=2. If you are using cti=2 you might want to try cti=3. + + -To identify all left-adjusted comments as static block comments, use C<-sbcp='^#'>. + + -New parameters -fs, -fsb, -fse added to allow sections of code between #<<< + and #>>> to be passed through verbatim. This is enabled by default and turned + off by -nfs. Flags -fsb and -fse allow other beginning and ending markers. + Thanks to Wolfgang Werner and Marion Berryman for suggesting this. + + -added flag -skp to put a space between all Perl keywords and following paren. + The default is to only do this for certain keywords. Suggested by + H.Merijn Brand. + + -added flag -sfp to put a space between a function name and following paren. + The default is not to do this. Suggested by H.Merijn Brand. + + -Added patch to avoid breaking GetOpt::Long::Configure set by calling program. + Thanks to Philippe Bruhat. + + -An error was fixed in which certain parameters in a .perltidyrc file given + without the equals sign were not recognized. That is, + '--brace-tightness 0' gave an error but '--brace-tightness=0' worked + ok. Thanks to Zac Hansen. + + -An error preventing the -nwrs flag from working was corrected. Thanks to + Greg Ferguson. + + -Corrected some alignment problems with entab option. + + -A bug with the combination of -lp and -extrude was fixed (though this + combination doesn't really make sense). The bug was that a line with + a single zero would be dropped. Thanks to Cameron Hayne. + + -Updated Windows detection code to avoid an undefined variable. + Thanks to Joe Yates and Russ Jones. + + -Improved formatting for short trailing statements following a closing paren. + Thanks to Joe Matarazzo. + + -The handling of the -icb (indent closing block braces) flag has been changed + slightly to provide more consistent and predictable formatting of complex + structures. Instead of giving a closing block brace the indentation of the + previous line, it is now given one extra indentation level. The two methods + give the same result if the previous line was a complete statement, as in this + example: + + if ($task) { + yyy(); + } # -icb + else { + zzz(); + } + The change also fixes a problem with empty blocks such as: + + OLD, -icb: + elsif ($debug) { + } + + NEW, -icb: + elsif ($debug) { + } + + -A problem with -icb was fixed in which a closing brace was misplaced when + it followed a quote which spanned multiple lines. + + -Some improved breakpoints for -wba='&& || and or' + + -Fixed problem with misaligned cuddled else in complex statements + when the -bar flag was also used. Thanks to Alex and Royce Reese. + + -Corrected documentation to show that --outdent-long-comments is the default. + Thanks to Mario Lia. + + -New flag -otr (opening-token-right) is similar to -bar (braces-always-right) + but applies to non-structural opening tokens. + + -new flags -sot (stack-opening-token), -sct (stack-closing-token). + Suggested by Tony. + +## 2003 10 21 + + -The default has been changed to not do syntax checking with perl. + Use -syn if you want it. Perltidy is very robust now, and the -syn + flag now causes more problems than it's worth because of BEGIN blocks + (which get executed with perl -c). For example, perltidy will never + return when trying to beautify this code if -syn is used: + + BEGIN { 1 while { }; } + + Although this is an obvious error, perltidy is often run on untested + code which is more likely to have this sort of problem. A more subtle + example is: + + BEGIN { use FindBin; } + + which may hang on some systems using -syn if a shared file system is + unavailable. + + -Changed style -gnu to use -cti=1 instead of -cti=2 (see next item). + In most cases it looks better. To recover the previous format, use + '-gnu -cti=2' + + -Added flags -cti=n for finer control of closing token indentation. + -cti = 0 no extra indentation (default; same as -nicp) + -cti = 1 enough indentation so that the closing token + aligns with its opening token. + -cti = 2 one extra indentation level if the line has the form + ); ]; or }; (same as -icp). + + The new option -cti=1 works well with -lp: + + EXAMPLES: + + # perltidy -lp -cti=1 + @month_of_year = ( + 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' + ); + + # perltidy -lp -cti=2 + @month_of_year = ( + 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' + ); + This is backwards compatible with -icp. See revised manual for + details. Suggested by Mike Pennington. + + -Added flag '--preserve-line-endings' or '-ple' to cause the output + line ending to be the same as in the input file, for unix, dos, + or mac line endings. Only works under unix. Suggested by + Rainer Hochschild. + + -Added flag '--output-line-ending=s' or '-ole=s' where s=dos or win, + unix, or mac. Only works under unix. + + -Files with Mac line endings should now be handled properly under unix + and dos without being passed through a converter. + + -You may now include 'and', 'or', and 'xor' in the list following + '--want-break-after' to get line breaks after those keywords rather than + before them. Suggested by Rainer Hochschild. + + -Corrected problem with command line option for -vtc=n and -vt=n. The + equals sign was being eaten up by the Windows shell so perltidy didn't + see it. + +## 2003 07 26 + + -Corrected cause of warning message with recent versions of Perl: + "Possible precedence problem on bitwise & operator at ..." + Thanks to Jim Files. + + -fixed bug with -html with '=for pod2html' sections, in which code/pod + output order was incorrect. Thanks to Tassilo von Parseval. + + -fixed bug when the -html flag is used, in which the following error + message, plus others, appear: + did not see in pod2html output + This was caused by a change in the format of html output by pod2html + VERSION 1.04 (included with perl 5.8). Thanks to Tassilo von Parseval. + + -Fixed bug where an __END__ statement would be mistaken for a label + if it is immediately followed by a line with a leading colon. Thanks + to John Bayes. + + -Implemented guessing logic for brace types when it is ambiguous. This + has been on the TODO list a long time. Thanks to Boris Zentner for + an example. + + -Long options may now be negated either as '--nolong-option' + or '--no-long-option'. Thanks to Philip Newton for the suggestion. + + -added flag --html-entities or -hent which controls the use of + Html::Entities for html formatting. Use --nohtml-entities or -nhent to + prevent the use of Html::Entities to encode special symbols. The + default is -hent. Html::Entities when formatting perl text to escape + special symbols. This may or may not be the right thing to do, + depending on browser/language combinations. Thanks to Burak Gursoy for + this suggestion. + + -Bareword strings with leading '-', like, '-foo' now count as 1 token + for horizontal tightness. This way $a{'-foo'}, $a{foo}, and $a{-foo} + are now all treated similarly. Thus, by default, OLD: $a{ -foo } will + now be NEW: $a{-foo}. Suggested by Mark Olesen. + + -added 2 new flags to control spaces between keywords and opening parens: + -sak=s or --space-after-keyword=s, and + -nsak=s or --nospace-after-keyword=s, where 's' is a list of keywords. + + The new default list of keywords which get a space is: + + "my local our and or eq ne if else elsif until unless while for foreach + return switch case given when" + + Use -sak=s and -nsak=s to add and remove keywords from this list, + respectively. + + Explanation: Stephen Hildrey noted that perltidy was being inconsistent + in placing spaces between keywords and opening parens, and sent a patch + to give user control over this. The above list was selected as being + a reasonable default keyword list. Previously, perltidy + had a hardwired list which also included these keywords: + + push pop shift unshift join split die + + but did not have 'our'. Example: if you prefer to make perltidy behave + exactly as before, you can include the following two lines in your + .perltidyrc file: + + -sak="push pop local shift unshift join split die" + -nsak="our" + + -Corrected html error in .toc file when -frm -html is used (extra "); + browsers were tolerant of it. + + -Improved alignment of chains of binary and ?/: operators. Example: + OLD: + $leapyear = + $year % 4 ? 0 + : $year % 100 ? 1 + : $year % 400 ? 0 + : 1; + NEW: + $leapyear = + $year % 4 ? 0 + : $year % 100 ? 1 + : $year % 400 ? 0 + : 1; + + -improved breakpoint choices involving '->' + + -Corrected tokenization of things like ${#}. For example, + ${#} is valid, but ${# } is a syntax error. + + -Corrected minor tokenization errors with indirect object notation. + For example, 'new A::()' works now. + + -Minor tokenization improvements; all perl code distributed with perl 5.8 + seems to be parsed correctly except for one instance (lextest.t) + of the known bug. + +## 2002 11 30 + + -Implemented scalar attributes. Thanks to Sean Tobin for noting this. + + -Fixed glitch introduced in previous release where -pre option + was not outputting a leading html
 tag.
+
+    -Numerous minor improvements in vertical alignment, including the following:
+
+    -Improved alignment of opening braces in many cases.  Needed for improved
+    switch/case formatting, and also suggested by Mark Olesen for sort/map/grep
+    formatting.  For example:
+
+     OLD:
+       @modified =
+         map { $_->[0] }
+         sort { $a->[1] <=> $b->[1] }
+         map { [ $_, -M ] } @filenames;
+
+     NEW:
+       @modified =
+         map  { $_->[0] }
+         sort { $a->[1] <=> $b->[1] }
+         map  { [ $_, -M ] } @filenames;
+
+    -Eliminated alignments across unrelated statements. Example:
+     OLD:
+       $borrowerinfo->configure( -state => 'disabled' );
+       $borrowerinfo->grid( -col        => 1, -row => 0, -sticky => 'w' );
+
+     NEW:  
+       $borrowerinfo->configure( -state => 'disabled' );
+       $borrowerinfo->grid( -col => 1, -row => 0, -sticky => 'w' );
+
+     Thanks to Mark Olesen for suggesting this.
+
+    -Improved alignement of '='s in certain cases.
+     Thanks to Norbert Gruener for sending an example.
+
+    -Outdent-long-comments (-olc) has been re-instated as a default, since
+     it works much better now.  Use -nolc if you want to prevent it.
+
+    -Added check for 'perltidy file.pl -o file.pl', which causes file.pl
+    to be lost. (The -b option should be used instead). Thanks to mreister
+    for reporting this problem.
+
+## 2002 11 06
+
+    -Switch/case or given/when syntax is now recognized.  Its vertical alignment
+    is not great yet, but it parses ok.  The words 'switch', 'case', 'given',
+    and 'when' are now treated as keywords.  If this causes trouble with older
+    code, we could introduce a switch to deactivate it.  Thanks to Stan Brown
+    and Jochen Schneider for recommending this.
+
+    -Corrected error parsing sub attributes with call parameters.
+    Thanks to Marc Kerr for catching this.
+
+    -Sub prototypes no longer need to be on the same line as sub names.  
+
+    -a new flag -frm or --frames will cause html output to be in a
+    frame, with table of contents in the left panel and formatted source
+    in the right panel.  Try 'perltidy -html -frm somemodule.pm' for example.
+
+    -The new default for -html formatting is to pass the pod through Pod::Html.
+    The result is syntax colored code within your pod documents. This can be
+    deactivated with -npod.  Thanks to those who have written to discuss this,
+    particularly Mark Olesen and Hugh Myers.
+
+    -the -olc (--outdent-long-comments) option works much better.  It now outdents
+    groups of consecutive comments together, and by just the amount needed to
+    avoid having any one line exceeding the maximum line length.
+
+    -block comments are now trimmed of trailing whitespace.
+
+    -if a directory specified with -opath does not exist, it will be created.
+
+    -a table of contents to packages and subs is output when -html is used.
+    Use -ntoc to prevent this. 
+
+    -fixed an unusual bug in which a 'for' statement following a 'format'
+    statement was not correctly tokenized.  Thanks to Boris Zentner for
+    catching this.
+
+    -Tidy.pm is no longer dependent on modules IO::Scalar and IO::ScalarArray.  
+    There were some speed issues.  Suggested by Joerg Walter.
+
+    -The treatment of quoted wildcards (file globs) is now system-independent. 
+    For example
+
+       perltidy 'b*x.p[lm]'
+
+    would match box.pl, box.pm, brinx.pm under any operating system.  Of
+    course, anything unquoted will be subject to expansion by any shell.
+
+    -default color for keywords under -html changed from 
+    SaddleBrown (#8B4513) to magenta4 (#8B008B).
+
+    -fixed an arg parsing glitch in which something like:
+      perltidy quick-help
+    would trigger the help message and exit, rather than operate on the
+    file 'quick-help'.
+
+## 2002 09 22
+
+    -New option '-b' or '--backup-and-modify-in-place' will cause perltidy to
+    overwrite the original file with the tidied output file.  The original
+    file will be saved with a '.bak' extension (which can be changed with
+    -bext=s).  Thanks to Rudi Farkas for the suggestion.
+
+    -An index to all subs is included at the top of -html output, unless
+    only the 
 section is written.
+
+    -Anchor lines of the form  are now inserted at key points
+    in html output, such as before sub definitions, for the convenience of
+    postprocessing scripts.  Suggested by Howard Owen.
+
+    -The cuddled-else (-ce) flag now also makes cuddled continues, like
+    this:
+
+       while ( ( $pack, $file, $line ) = caller( $i++ ) ) {
+          # bla bla
+       } continue {
+           $prevpack = $pack;
+       }
+
+    Suggested by Simon Perreault.  
+
+    -Fixed bug in which an extra blank line was added before an =head or 
+    similar pod line after an __END__ or __DATA__ line each time 
+    perltidy was run.  Also, an extra blank was being added after
+    a terminal =cut.  Thanks to Mike Birdsall for reporting this.
+
+## 2002 08 26
+
+    -Fixed bug in which space was inserted in a hyphenated hash key:
+       my $val = $myhash{USER-NAME};
+     was converted to:
+       my $val = $myhash{USER -NAME}; 
+     Thanks to an anonymous bug reporter at sourceforge.
+
+    -Fixed problem with the '-io' ('--indent-only') where all lines 
+     were double spaced.  Thanks to Nick Andrew for reporting this bug.
+
+    -Fixed tokenization error in which something like '-e1' was 
+     parsed as a number. 
+
+    -Corrected a rare problem involving older perl versions, in which 
+     a line break before a bareword caused problems with 'use strict'.
+     Thanks to Wolfgang Weisselberg for noting this.
+
+    -More syntax error checking added.
+
+    -Outdenting labels (-ola) has been made the default, in order to follow the
+     perlstyle guidelines better.  It's probably a good idea in general, but
+     if you do not want this, use -nola in your .perltidyrc file.
+     
+    -Updated rules for padding logical expressions to include more cases.
+     Thanks to Wolfgang Weisselberg for helpful discussions.
+
+    -Added new flag -osbc (--outdent-static-block-comments) which will
+     outdent static block comments by 2 spaces (or whatever -ci equals).
+     Requested by Jon Robison.
+
+## 2002 04 25
+
+    -Corrected a bug, introduced in the previous release, in which some
+     closing side comments (-csc) could have incorrect text.  This is
+     annoying but will be correct the next time perltidy is run with -csc.
+
+    -Fixed bug where whitespace was being removed between 'Bar' and '()' 
+     in a use statement like:
+
+          use Foo::Bar ();
+
+    -Whenever possible, if a logical expression is broken with leading
+     '&&', '||', 'and', or 'or', then the leading line will be padded
+     with additional space to produce alignment.  This has been on the
+     todo list for a long time; thanks to Frank Steinhauer for reminding
+     me to do it.  Notice the first line after the open parens here:
+
+           OLD: perltidy -lp
+           if (
+                !param("rules.to.$linecount")
+                && !param("rules.from.$linecount")
+                && !param("rules.subject.$linecount")
+                && !(
+                      param("rules.fieldname.$linecount")
+                      && param("rules.fieldval.$linecount")
+                )
+                && !param("rules.size.$linecount")
+                && !param("rules.custom.$linecount")
+             )
+
+           NEW: perltidy -lp
+           if (
+                   !param("rules.to.$linecount")
+                && !param("rules.from.$linecount")
+                && !param("rules.subject.$linecount")
+                && !(
+                         param("rules.fieldname.$linecount")
+                      && param("rules.fieldval.$linecount")
+                )
+                && !param("rules.size.$linecount")
+                && !param("rules.custom.$linecount")
+             )
+
+## 2002 04 16
+
+    -Corrected a mistokenization of variables for a package with a name
+     equal to a perl keyword.  For example: 
+
+        my::qx();
+        package my;
+        sub qx{print "Hello from my::qx\n";}
+
+     In this case, the leading 'my' was mistokenized as a keyword, and a
+     space was being place between 'my' and '::'.  This has been
+     corrected.  Thanks to Martin Sluka for discovering this. 
+
+    -A new flag -bol (--break-at-old-logic-breakpoints)
+     has been added to control whether containers with logical expressions
+     should be broken open.  This is the default.
+
+    -A new flag -bok (--break-at-old-keyword-breakpoints)
+     has been added to follow breaks at old keywords which return lists,
+     such as sort and map.  This is the default.
+
+    -A new flag -bot (--break-at-old-trinary-breakpoints) has been added to
+     follow breaks at trinary (conditional) operators.  This is the default.
+
+    -A new flag -cab=n has been added to control breaks at commas after
+     '=>' tokens.  The default is n=1, meaning break unless this breaks
+     open an existing on-line container.
+
+    -A new flag -boc has been added to allow existing list formatting
+     to be retained.  (--break-at-old-comma-breakpoints).  See updated manual.
+
+    -A new flag -iob (--ignore-old-breakpoints) has been added to
+     prevent the locations of old breakpoints from influencing the output
+     format.
+
+    -Corrected problem where nested parentheses were not getting full
+     indentation.  This has been on the todo list for some time; thanks 
+     to Axel Rose for a snippet demonstrating this issue.
+
+               OLD: inner list is not indented
+               $this->sendnumeric(
+                   $this->server,
+                   (
+                     $ret->name,        $user->username, $user->host,
+                   $user->server->name, $user->nick,     "H"
+                   ),
+               );
+
+               NEW:
+               $this->sendnumeric(
+                   $this->server,
+                   (
+                       $ret->name,          $user->username, $user->host,
+                       $user->server->name, $user->nick,     "H"
+                   ),
+               );
+
+    -Code cleaned up by removing the following unused, undocumented flags.
+     They should not be in any .perltidyrc files because they were just
+     experimental flags which were never documented.  Most of them placed
+     artificial limits on spaces, and Wolfgang Weisselberg convinced me that
+     most of them they do more harm than good by causing unexpected results.
+
+     --maximum-continuation-indentation (-mci)
+     --maximum-whitespace-columns
+     --maximum-space-to-comment (-xsc)
+     --big-space-jump (-bsj)
+
+    -Pod file 'perltidy.pod' has been appended to the script 'perltidy', and
+     Tidy.pod has been append to the module 'Tidy.pm'.  Older MakeMaker's
+     were having trouble.
+    
+    -A new flag -isbc has been added for more control on comments. This flag
+     has the effect that if there is no leading space on the line, then the
+     comment will not be indented, and otherwise it may be.  If both -ibc and
+     -isbc are set, then -isbc takes priority.  Thanks to Frank Steinhauer
+     for suggesting this.
+
+    -A new document 'stylekey.pod' has been created to quickly guide new users
+     through the maze of perltidy style parameters.  An html version is 
+     on the perltidy web page.  Take a look! It should be very helpful.
+
+    -Parameters for controlling 'vertical tightness' have been added:
+     -vt and -vtc are the main controls, but finer control is provided
+     with -pvt, -pcvt, -bvt, -bcvt, -sbvt, -sbcvt.  Block brace vertical
+     tightness controls have also been added.
+     See updated manual and also see 'stylekey.pod'. Simple examples:
+
+       # perltidy -lp -vt=1 -vtc=1
+       @month_of_year = ( 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+                          'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' );
+
+       # perltidy -lp -vt=1 -vtc=0
+       @month_of_year = ( 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+                          'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
+       );
+
+    -Lists which do not format well in uniform columns are now better
+     identified and formated.
+
+       OLD:
+       return $c->create( 'polygon', $x, $y, $x + $ruler_info{'size'},
+           $y + $ruler_info{'size'}, $x - $ruler_info{'size'},
+           $y + $ruler_info{'size'} );
+
+       NEW:
+       return $c->create(
+           'polygon', $x, $y,
+           $x + $ruler_info{'size'},
+           $y + $ruler_info{'size'},
+           $x - $ruler_info{'size'},
+           $y + $ruler_info{'size'}
+       );
+
+       OLD:
+         radlablist($f1, pad('Initial', $p), $b->{Init}->get_panel_ref, 'None ',
+                    'None', 'Default', 'Default', 'Simple', 'Simple');
+       NEW:
+         radlablist($f1,
+                    pad('Initial', $p),
+                    $b->{Init}->get_panel_ref,
+                    'None ', 'None', 'Default', 'Default', 'Simple', 'Simple');
+
+    -Corrected problem where an incorrect html filename was generated for 
+     external calls to Tidy.pm module.  Fixed incorrect html title when
+     Tidy.pm is called with IO::Scalar or IO::Array source.
+
+    -Output file permissons are now set as follows.  An output script file
+     gets the same permission as the input file, except that owner
+     read/write permission is added (otherwise, perltidy could not be
+     rerun).  Html output files use system defaults.  Previously chmod 0755
+     was used in all cases.  Thanks to Mark Olesen for bringing this up.
+
+    -Missing semicolons will not be added in multi-line blocks of type
+     sort, map, or grep.  This brings perltidy into closer agreement
+     with common practice.  Of course, you can still put semicolons 
+     there if you like.  Thanks to Simon Perreault for a discussion of this.
+
+    -Most instances of extra semicolons are now deleted.  This is
+     particularly important if the -csc option is used.  Thanks to Wolfgang
+     Weisselberg for noting this.  For example, the following line
+     (produced by 'h2xs' :) has an extra semicolon which will now be
+     removed:
+
+        BEGIN { plan tests => 1 };
+
+    -New parameter -csce (--closing-side-comment-else-flag) can be used
+     to control what text is appended to 'else' and 'elsif' blocks.
+     Default is to just add leading 'if' text to an 'else'.  See manual.
+
+    -The -csc option now labels 'else' blocks with additinal information
+     from the opening if statement and elsif statements, if space.
+     Thanks to Wolfgang Weisselberg for suggesting this.
+
+    -The -csc option will now remove any old closing side comments
+     below the line interval threshold. Thanks to Wolfgang Weisselberg for
+     suggesting this.
+
+    -The abbreviation feature, which was broken in the previous version,
+     is now fixed.  Thanks to Michael Cartmell for noting this.
+
+    -Vertical alignment is now done for '||='  .. somehow this was 
+     overlooked.
+
+## 2002 02 25
+
+    -This version uses modules for the first time, and a standard perl
+     Makefile.PL has been supplied.  However, perltidy may still be
+     installed as a single script, without modules.  See INSTALL for
+     details.
+
+    -The man page 'perl2web' has been merged back into the main 'perltidy'
+     man page to simplify installation.  So you may remove that man page
+     if you have an older installation.
+
+    -Added patch from Axel Rose for MacPerl.  The patch prompts the user
+     for command line arguments before calling the module 
+     Perl::Tidy::perltidy.
+
+    -Corrected bug with '-bar' which was introduced in the previous
+     version.  A closing block brace was being indented.  Thanks to
+     Alexandros M Manoussakis for reporting this.
+
+    -New parameter '--entab-leading-whitespace=n', or '-et=n', has been
+     added for those who prefer tabs.  This behaves different from the
+     existing '-t' parameter; see updated man page.  Suggested by Mark
+     Olesen.
+
+    -New parameter '--perl-syntax-check-flags=s'  or '-pcsf=s' can be
+     used to change the flags passed to perltidy in a syntax check.
+     See updated man page.  Suggested by Mark Olesen. 
+
+    -New parameter '--output-path=s'  or '-opath=s' will cause output
+     files to be placed in directory s.  See updated man page.  Thanks for
+     Mark Olesen for suggesting this.
+
+    -New parameter --dump-profile (or -dpro) will dump to
+     standard output information about the search for a
+     configuration file, the name of whatever configuration file
+     is selected, and its contents.  This should help debugging
+     config files, especially on different Windows systems.
+
+    -The -w parameter now notes possible errors of the form:
+
+           $comment = s/^\s*(\S+)\..*/$1/;   # trim whitespace
+
+    -Corrections added for a leading ':' and for leaving a leading 'tcsh'
+     line untouched.  Mark Olesen reported that lines of this form were
+     accepted by perl but not by perltidy:
+
+           : # use -*- perl -*-
+           eval 'exec perl -wS $0 "$@"'  # shell should exec 'perl'
+           unless 1;                     # but Perl should skip this one
+
+     Perl will silently swallow a leading colon on line 1 of a
+     script, and now perltidy will do likewise.  For example,
+     this is a valid script, provided that it is the first line,
+     but not otherwise:
+
+           : print "Hello World\n";
+     
+     Also, perltidy will now mark a first line with leading ':' followed by
+     '#' as type SYSTEM (just as a #!  line), not to be formatted.
+
+    -List formatting improved for certain lists with special
+     initial terms, such as occur with 'printf', 'sprintf',
+     'push', 'pack', 'join', 'chmod'.  The special initial term is
+     now placed on a line by itself.  For example, perltidy -gnu
+
+        OLD:
+           $Addr = pack(
+                        "C4",                hex($SourceAddr[0]),
+                        hex($SourceAddr[1]), hex($SourceAddr[2]),
+                        hex($SourceAddr[3])
+                        );
+
+        NEW:
+           $Addr = pack("C4",
+                        hex($SourceAddr[0]), hex($SourceAddr[1]),
+                        hex($SourceAddr[2]), hex($SourceAddr[3]));
+
+         OLD:
+               push (
+                     @{$$self{states}}, '64', '66', '68',
+                     '70',              '72', '74', '76',
+                     '78',              '80', '82', '84',
+                     '86',              '88', '90', '92',
+                     '94',              '96', '98', '100',
+                     '102',             '104'
+                     );
+
+         NEW:
+               push (
+                     @{$$self{states}},
+                     '64', '66', '68', '70', '72',  '74',  '76',
+                     '78', '80', '82', '84', '86',  '88',  '90',
+                     '92', '94', '96', '98', '100', '102', '104'
+                     );
+
+    -Lists of complex items, such as matricies, are now detected
+     and displayed with just one item per row:
+
+       OLD:
+       $this->{'CURRENT'}{'gfx'}{'MatrixSkew'} = Text::PDF::API::Matrix->new(
+           [ 1, tan( deg2rad($a) ), 0 ], [ tan( deg2rad($b) ), 1, 0 ],
+           [ 0, 0, 1 ]
+       );
+
+       NEW:
+       $this->{'CURRENT'}{'gfx'}{'MatrixSkew'} = Text::PDF::API::Matrix->new(
+           [ 1,                  tan( deg2rad($a) ), 0 ],
+           [ tan( deg2rad($b) ), 1,                  0 ],
+           [ 0,                  0,                  1 ]
+       );
+
+    -The perl syntax check will be turned off for now when input is from
+     standard input or standard output.  The reason is that this requires
+     temporary files, which has produced far too many problems during
+     Windows testing.  For example, the POSIX module under Windows XP/2000
+     creates temporary names in the root directory, to which only the
+     administrator should have permission to write.
+
+    -Merged patch sent by Yves Orton to handle appropriate
+     configuration file locations for different Windows varieties
+     (2000, NT, Me, XP, 95, 98).
+
+    -Added patch to properly handle a for/foreach loop without
+     parens around a list represented as a qw.  I didn't know this
+     was possible until Wolfgang Weisselberg pointed it out:
+
+           foreach my $key qw\Uno Due Tres Quadro\ {
+               print "Set $key\n";
+           }
+
+     But Perl will give a syntax error without the $ variable; ie this will
+     not work:
+
+           foreach qw\Uno Due Tres Quadro\ {
+               print "Set $_\n";
+           }
+
+    -Merged Windows version detection code sent by Yves Orton.  Perltidy
+     now automatically turns off syntax checking for Win 9x/ME versions,
+     and this has solved a lot of robustness problems.  These systems 
+     cannot reliably handle backtick operators.  See man page for
+     details.
+     
+    -Merged VMS filename handling patch sent by Michael Cartmell.  (Invalid
+     output filenames were being created in some cases). 
+
+    -Numerous minor improvements have been made for -lp style indentation.
+
+    -Long C-style 'for' expressions will be broken after each ';'.   
+
+     'perltidy -gnu' gives:
+
+       OLD:
+       for ($status = $db->seq($key, $value, R_CURSOR()) ; $status == 0
+            and $key eq $origkey ; $status = $db->seq($key, $value, R_NEXT())) 
+
+       NEW:
+       for ($status = $db->seq($key, $value, R_CURSOR()) ;
+            $status == 0 and $key eq $origkey ;
+            $status = $db->seq($key, $value, R_NEXT()))
+
+    -For the -lp option, a single long term within parens
+     (without commas) now has better alignment.  For example,
+     perltidy -gnu
+
+               OLD:
+               $self->throw("Must specify a known host, not $location,"
+                     . " possible values ("
+                     . join (",", sort keys %hosts) . ")");
+
+               NEW:
+               $self->throw("Must specify a known host, not $location,"
+                            . " possible values ("
+                            . join (",", sort keys %hosts) . ")");
+
+## 2001 12 31
+
+    -This version is about 20 percent faster than the previous
+     version as a result of optimization work.  The largest gain
+     came from switching to a dispatch hash table in the
+     tokenizer.
+
+    -perltidy -html will check to see if HTML::Entities is
+     installed, and if so, it will use it to encode unsafe
+     characters.
+
+    -Added flag -oext=ext to change the output file extension to
+     be different from the default ('tdy' or 'html').  For
+     example:
+
+       perltidy -html -oext=htm filename
+
+    will produce filename.htm
+
+    -Added flag -cscw to issue warnings if a closing side comment would replace
+    an existing, different side comments.  See the man page for details.
+    Thanks to Peter Masiar for helpful discussions.
+
+    -Corrected tokenization error of signed hex/octal/binary numbers. For
+    example, the first hex number below would have been parsed correctly
+    but the second one was not:
+       if ( ( $tmp >= 0x80_00_00 ) || ( $tmp < -0x80_00_00 ) ) { }
+
+    -'**=' was incorrectly tokenized as '**' and '='.  This only
+        caused a problem with the -extrude opton.
+
+    -Corrected a divide by zero when -extrude option is used
+
+    -The flag -w will now contain all errors reported by 'perl -c' on the
+    input file, but otherwise they are not reported.  The reason is that
+    perl will report lots of problems and syntax errors which are not of
+    interest when only a small snippet is being formatted (such as missing
+    modules and unknown bare words).  Perltidy will always report all
+    significant syntax errors that it finds, such as unbalanced braces,
+    unless the -q (quiet) flag is set.
+
+    -Merged modifications created by Hugh Myers into perltidy.
+     These include a 'streamhandle' routine which allows perltidy
+     as a module to operate on input and output arrays and strings
+     in addition to files.  Documentation and new packaging as a
+     module should be ready early next year; This is an elegant,
+     powerful update; many thanks to Hugh for contributing it.
+
+## 2001 11 28
+
+    -added a tentative patch which tries to keep any existing breakpoints
+    at lines with leading keywords map,sort,eval,grep. The idea is to
+    improve formatting of sequences of list operations, as in a schwartzian
+    transform.  Example:
+
+       INPUT:
+       my @sorted = map { $_->[0] }
+                    sort { $a->[1] <=> $b->[1] }
+                    map { [ $_, rand ] } @list;
+
+       OLD:
+       my @sorted =
+         map { $_->[0] } sort { $a->[1] <=> $b->[1] } map { [ $_, rand ] } @list;
+
+       NEW:
+       my @sorted = map { $_->[0] }
+         sort { $a->[1] <=> $b->[1] }
+         map { [ $_, rand ] } @list;
+
+     The new alignment is not as nice as the input, but this is an improvement.
+     Thanks to Yves Orton for this suggestion.
+
+    -modified indentation logic so that a line with leading opening paren,
+    brace, or square bracket will never have less indentation than the
+    line with the corresponding opening token.  Here's a simple example:
+
+       OLD:
+           $mw->Button(
+               -text    => "New Document",
+               -command => \&new_document
+             )->pack(
+               -side   => 'bottom',
+               -anchor => 'e'
+           );
+
+       Note how the closing ');' is lined up with the first line, even
+       though it closes a paren in the 'pack' line.  That seems wrong.
+    
+       NEW:
+           $mw->Button(
+               -text    => "New Document",
+               -command => \&new_document
+             )->pack(
+               -side   => 'bottom',
+               -anchor => 'e'
+             );
+
+      This seems nicer: you can up-arrow with an editor and arrive at the
+      opening 'pack' line.
+    
+    -corrected minor glitch in which cuddled else (-ce) did not get applied
+    to an 'unless' block, which should look like this:
+
+           unless ($test) {
+
+           } else {
+
+           }
+
+     Thanks to Jeremy Mates for reporting this.
+
+    -The man page has been reorganized to parameters easier to find.
+    
+    -Added check for multiple definitions of same subroutine.  It is easy
+     to introduce this problem when cutting and pasting. Perl does not
+     complain about it, but it can lead to disaster.
+
+    -The command -pro=filename  or -profile=filename may be used to specify a
+     configuration file which will override the default name of .perltidyrc.
+     There must not be a space on either side of the '=' sign.  I needed
+     this to be able to easily test perltidy with a variety of different
+     configuration files.
+
+    -Side comment alignment has been improved somewhat across frequent level
+     changes, as in short if/else blocks.  Thanks to Wolfgang Weisselberg 
+     for pointing out this problem.  For example:
+       
+       OLD:
+       if ( ref $self ) {    # Called as a method
+           $format = shift;
+       }
+       else {    # Regular procedure call
+           $format = $self;
+           undef $self;
+       }
+
+       NEW:
+       if ( ref $self ) {    # Called as a method
+           $format = shift;
+       }
+       else {                # Regular procedure call
+           $format = $self;
+           undef $self;
+       }
+
+    -New command -ssc (--static-side-comment) and related command allows
+     side comments to be spaced close to preceding character.  This is
+     useful for displaying commented code as side comments.
+
+    -New command -csc (--closing-side-comment) and several related
+     commands allow comments to be added to (and deleted from) any or all
+     closing block braces.  This can be useful if you have to maintain large
+     programs, especially those that you didn't write.  See updated man page.
+     Thanks to Peter Masiar for this suggestion.  For a simple example:
+
+           perltidy -csc
+
+           sub foo {
+               if ( !defined( $_[0] ) ) {
+                   print("Hello, World\n");
+               }
+               else {
+                   print( $_[0], "\n" );
+               }
+           } ## end sub foo
+
+     This added '## end sub foo' to the closing brace.  
+     To remove it, perltidy -ncsc.
+
+    -New commands -ola, for outdenting labels, and -okw, for outdenting
+     selected control keywords, were implemented.  See the perltidy man
+     page for details.  Thanks to Peter Masiar for this suggestion.
+
+    -Hanging side comment change: a comment will not be considered to be a
+     hanging side comment if there is no leading whitespace on the line.
+     This should improve the reliability of identifying hanging side comments.
+     Thanks to Peter Masiar for this suggestion.
+
+    -Two new commands for outdenting, -olq (outdent-long-quotes) and -olc
+     (outdent-long-comments), have been added.  The original -oll
+     (outdent-long-lines) remains, and now is an abbreviation for -olq and -olc.
+     The new default is just -olq.  This was necessary to avoid inconsistency with
+     the new static block comment option.
+
+    -Static block comments:  to provide a way to display commented code
+     better, the convention is used that comments with a leading '##' should
+     not be formatted as usual.  Please see '-sbc' (or '--static-block-comment')
+     for documentation.  It can be deactivated with with -nsbc, but
+     should not normally be necessary. Thanks to Peter Masiar for this 
+     suggestion.
+
+    -Two changes were made to help show structure of complex lists:
+     (1) breakpoints are forced after every ',' in a list where any of
+     the list items spans multiple lines, and
+     (2) List items which span multiple lines now get continuation indentation.
+
+     The following example illustrates both of these points.  Many thanks to
+     Wolfgang Weisselberg for this snippet and a discussion of it; this is a
+     significant formatting improvement. Note how it is easier to see the call
+     parameters in the NEW version:
+
+       OLD:
+       assert( __LINE__, ( not defined $check )
+           or ref $check
+           or $check eq "new"
+           or $check eq "old", "Error in parameters",
+           defined $old_new ? ( ref $old_new ? ref $old_new : $old_new ) : "undef",
+           defined $db_new  ? ( ref $db_new  ? ref $db_new  : $db_new )  : "undef",
+           defined $old_db ? ( ref $old_db ? ref $old_db : $old_db ) : "undef" );
+
+       NEW: 
+       assert(
+           __LINE__,
+           ( not defined $check )
+             or ref $check
+             or $check eq "new"
+             or $check eq "old",
+           "Error in parameters",
+           defined $old_new ? ( ref $old_new ? ref $old_new : $old_new ) : "undef",
+           defined $db_new  ? ( ref $db_new  ? ref $db_new  : $db_new )  : "undef",
+           defined $old_db  ? ( ref $old_db  ? ref $old_db  : $old_db )  : "undef"
+       );
+
+       Another example shows how this helps displaying lists:
+
+       OLD:
+       %{ $self->{COMPONENTS} } = (
+           fname =>
+           { type => 'name', adj => 'yes', font => 'Helvetica', 'index' => 0 },
+           street =>
+           { type => 'road', adj => 'yes', font => 'Helvetica', 'index' => 2 },
+       );
+
+       The structure is clearer with the added indentation:
+       
+       NEW:
+       %{ $self->{COMPONENTS} } = (
+           fname =>
+             { type => 'name', adj => 'yes', font => 'Helvetica', 'index' => 0 },
+           street =>
+             { type => 'road', adj => 'yes', font => 'Helvetica', 'index' => 2 },
+       );
+
+       -The structure of nested logical expressions is now displayed better.
+       Thanks to Wolfgang Weisselberg for helpful discussions.  For example,
+       note how the status of the final 'or' is displayed in the following:
+
+       OLD:
+       return ( !null($op)
+             and null( $op->sibling )
+             and $op->ppaddr eq "pp_null"
+             and class($op) eq "UNOP"
+             and ( ( $op->first->ppaddr =~ /^pp_(and|or)$/
+               and $op->first->first->sibling->ppaddr eq "pp_lineseq" )
+               or ( $op->first->ppaddr eq "pp_lineseq"
+                   and not null $op->first->first->sibling
+                   and $op->first->first->sibling->ppaddr eq "pp_unstack" ) ) );
+
+       NEW:
+       return (
+           !null($op)
+             and null( $op->sibling )
+             and $op->ppaddr eq "pp_null"
+             and class($op) eq "UNOP"
+             and (
+               (
+                   $op->first->ppaddr =~ /^pp_(and|or)$/
+                   and $op->first->first->sibling->ppaddr eq "pp_lineseq"
+               )
+               or ( $op->first->ppaddr eq "pp_lineseq"
+                   and not null $op->first->first->sibling
+                   and $op->first->first->sibling->ppaddr eq "pp_unstack" )
+             )
+       );
+
+      -A break will always be put before a list item containing a comma-arrow.
+      This will improve formatting of mixed lists of this form:
+
+           OLD:
+           $c->create(
+               'text', 225, 20, -text => 'A Simple Plot',
+               -font => $font,
+               -fill => 'brown'
+           );
+
+           NEW:
+           $c->create(
+               'text', 225, 20,
+               -text => 'A Simple Plot',
+               -font => $font,
+               -fill => 'brown'
+           );
+
+     -For convenience, the command -dac (--delete-all-comments) now also
+     deletes pod.  Likewise, -tac (--tee-all-comments) now also sends pod
+     to a '.TEE' file.  Complete control over the treatment of pod and
+     comments is still possible, as described in the updated help message 
+     and man page.
+
+     -The logic which breaks open 'containers' has been rewritten to be completely
+     symmetric in the following sense: if a line break is placed after an opening
+     {, [, or (, then a break will be placed before the corresponding closing
+     token.  Thus, a container either remains closed or is completely cracked
+     open.
+
+     -Improved indentation of parenthesized lists.  For example, 
+
+               OLD:
+               $GPSCompCourse =
+                 int(
+                 atan2( $GPSTempCompLong - $GPSLongitude,
+                 $GPSLatitude - $GPSTempCompLat ) * 180 / 3.14159265 );
+
+               NEW:
+               $GPSCompCourse = int(
+                   atan2(
+                       $GPSTempCompLong - $GPSLongitude,
+                       $GPSLatitude - $GPSTempCompLat
+                     ) * 180 / 3.14159265
+               );
+
+      Further improvements will be made in future releases.
+
+     -Some improvements were made in formatting small lists.
+
+     -Correspondence between Input and Output line numbers reported in a 
+      .LOG file should now be exact.  They were sometimes off due to the size
+      of intermediate buffers.
+
+     -Corrected minor tokenization error in which a ';' in a foreach loop
+      control was tokenized as a statement termination, which forced a 
+      line break:
+
+           OLD:
+           foreach ( $i = 0;
+               $i <= 10;
+               $i += 2
+             )
+           {
+               print "$i ";
+           }
+
+           NEW:
+           foreach ( $i = 0 ; $i <= 10 ; $i += 2 ) {
+               print "$i ";
+           }
+
+     -Corrected a problem with reading config files, in which quote marks were not
+      stripped.  As a result, something like -wba="&& . || " would have the leading
+      quote attached to the && and not work correctly.  A workaround for older
+      versions is to place a space around all tokens within the quotes, like this:
+      -wba=" && . || "
+
+     -Removed any existing space between a label and its ':'
+       OLD    : { }
+       NEW: { }
+      This was necessary because the label and its colon are a single token.
+
+     -Corrected tokenization error for the following (highly non-recommended) 
+      construct:
+       $user = @vars[1] / 100;
+    
+     -Resolved cause of a difference between perltidy under perl v5.6.1 and
+     5.005_03; the problem was different behavior of \G regex position
+     marker(!)
+
+## 2001 10 20
+
+    -Corrected a bug in which a break was not being made after a full-line
+    comment within a short eval/sort/map/grep block.  A flag was not being
+    zeroed.  The syntax error check catches this.  Here is a snippet which
+    illustrates the bug:
+
+           eval {
+               #open Socket to Dispatcher
+               $sock = &OpenSocket;
+           };
+
+    The formatter mistakenly thought that it had found the following 
+    one-line block:
+    
+           eval {#open Socket to Dispatcher$sock = &OpenSocket; };
+
+    The patch fixes this. Many thanks to Henry Story for reporting this bug.
+
+    -Changes were made to help diagnose and resolve problems in a
+    .perltidyrc file: 
+      (1) processing of command parameters has been into two separate
+      batches so that any errors in a .perltidyrc file can be localized.  
+      (2) commands --help, --version, and as many of the --dump-xxx
+      commands are handled immediately, without any command line processing
+      at all.  
+      (3) Perltidy will ignore any commands in the .perltidyrc file which
+      cause immediate exit.  These are:  -h -v -ddf -dln -dop -dsn -dtt
+      -dwls -dwrs -ss.  Thanks to Wolfgang Weisselberg for helpful
+      suggestions regarding these updates.
+
+    -Syntax check has been reinstated as default for MSWin32 systems.  This
+    way Windows 2000 users will get syntax check by default, which seems
+    like a better idea, since the number of Win 95/98 systems will be
+    decreasing over time.  Documentation revised to warn Windows 95/98
+    users about the problem with empty '&1'.  Too bad these systems
+    all report themselves as MSWin32.
+
+## 2001 10 16
+
+    -Fixed tokenization error in which a method call of the form
+
+       Module::->new();
+    
+     got a space before the '::' like this:
+
+       Module ::->new();
+
+     Thanks to David Holden for reporting this.
+    
+    -Added -html control over pod text, using a new abbreviation 'pd'.  See
+    updated perl2web man page. The default is to use the color of a comment,
+    but italicized.  Old .css style sheets will need a new line for
+    .pd to use this.  The old color was the color of a string, and there
+    was no control.  
+    
+    -.css lines are now printed in sorted order.
+
+    -Fixed interpolation problem where html files had '$input_file' as title
+    instead of actual input file name.  Thanks to Simon Perreault for finding
+    this and sending a patch, and also to Tobias Weber.
+
+    -Breaks will now have the ':' placed at the start of a line, 
+    one per line by default because this shows logical structure
+    more clearly. This coding has been completely redone. Some 
+    examples of new ?/: formatting:
+
+          OLD:
+               wantarray ? map( $dir::cwd->lookup($_)->path, @_ ) :
+                 $dir::cwd->lookup( $_[0] )->path;
+
+          NEW:
+               wantarray 
+                 ? map( $dir::cwd->lookup($_)->path, @_ )
+                 : $dir::cwd->lookup( $_[0] )->path;
+
+          OLD:
+                   $a = ( $b > 0 ) ? {
+                       a => 1,
+                       b => 2
+                   } : { a => 6, b => 8 };
+
+          NEW:
+                   $a = ( $b > 0 )
+                     ? {
+                       a => 1,
+                       b => 2
+                     }
+                     : { a => 6, b => 8 };
+
+       OLD: (-gnu):
+       $self->note($self->{skip} ? "Hunk #$self->{hunk} ignored at 1.\n" :
+                   "Hunk #$self->{hunk} failed--$@");
+
+       NEW: (-gnu):
+       $self->note($self->{skip} 
+                   ? "Hunk #$self->{hunk} ignored at 1.\n"
+                   : "Hunk #$self->{hunk} failed--$@");
+
+       OLD:
+           $which_search =
+             $opts{"t"} ? 'title'   :
+             $opts{"s"} ? 'subject' : $opts{"a"} ? 'author' : 'title';
+
+       NEW:
+           $which_search =
+             $opts{"t"} ? 'title'
+             : $opts{"s"} ? 'subject'
+             : $opts{"a"} ? 'author'
+             : 'title';
+    
+    You can use -wba=':' to recover the previous default which placed ':'
+    at the end of a line.  Thanks to Michael Cartmell for helpful
+    discussions and examples.  
+
+    -Tokenizer updated to do syntax checking for matched ?/: pairs.  Also,
+    the tokenizer now outputs a unique serial number for every balanced
+    pair of brace types and ?/: pairs.  This greatly simplifies the
+    formatter.
+
+    -Long lines with repeated 'and', 'or', '&&', '||'  will now have
+    one such item per line.  For example:
+
+       OLD:
+           if ( $opt_d || $opt_m || $opt_p || $opt_t || $opt_x
+               || ( -e $archive && $opt_r ) )
+           {
+               ( $pAr, $pNames ) = readAr($archive);
+           }
+
+       NEW:
+           if ( $opt_d
+               || $opt_m
+               || $opt_p
+               || $opt_t
+               || $opt_x
+               || ( -e $archive && $opt_r ) )
+           {
+               ( $pAr, $pNames ) = readAr($archive);
+           }
+
+      OLD:
+           if ( $vp->{X0} + 4 <= $x && $vp->{X0} + $vp->{W} - 4 >= $x
+               && $vp->{Y0} + 4 <= $y && $vp->{Y0} + $vp->{H} - 4 >= $y ) 
+
+      NEW:
+           if ( $vp->{X0} + 4 <= $x
+               && $vp->{X0} + $vp->{W} - 4 >= $x
+               && $vp->{Y0} + 4 <= $y
+               && $vp->{Y0} + $vp->{H} - 4 >= $y )
+
+    -Long lines with multiple concatenated tokens will have concatenated
+    terms (see below) placed one per line, except for short items.  For
+    example:
+
+      OLD:
+           $report .=
+             "Device type:" . $ib->family . "  ID:" . $ib->serial . "  CRC:"
+             . $ib->crc . ": " . $ib->model() . "\n";
+
+      NEW:
+           $report .= "Device type:"
+             . $ib->family . "  ID:"
+             . $ib->serial . "  CRC:"
+             . $ib->model()
+             . $ib->crc . ": " . "\n";
+
+    NOTE: at present 'short' means 8 characters or less.  There is a
+    tentative flag to change this (-scl), but it is undocumented and
+    is likely to be changed or removed later, so only use it for testing.  
+    In the above example, the tokens "  ID:", "  CRC:", and "\n" are below
+    this limit.  
+
+    -If a line which is short enough to fit on a single line was
+    nevertheless broken in the input file at a 'good' location (see below), 
+    perltidy will try to retain a break.  For example, the following line
+    will be formatted as:
+    
+       open SUM, "<$file"
+         or die "Cannot open $file ($!)";
+    
+    if it was broken in the input file, and like this if not:
+
+       open SUM, "<$file" or die "Cannot open $file ($!)";
+
+    GOOD: 'good' location means before 'and','or','if','unless','&&','||'
+
+    The reason perltidy does not just always break at these points is that if
+    there are multiple, similar statements, this would preclude alignment.  So
+    rather than check for this, perltidy just tries to follow the input style,
+    in the hopes that the author made a good choice. Here is an example where 
+    we might not want to break before each 'if':
+
+       ($Locale, @Locale) = ($English, @English) if (@English > @Locale);
+       ($Locale, @Locale) = ($German,  @German)  if (@German > @Locale);
+       ($Locale, @Locale) = ($French,  @French)  if (@French > @Locale);
+       ($Locale, @Locale) = ($Spanish, @Spanish) if (@Spanish > @Locale);
+
+    -Added wildcard file expansion for systems with shells which lack this.
+    Now 'perltidy *.pl' should work under MSDOS/Windows.  Thanks to Hugh Myers 
+    for suggesting this.  This uses builtin glob() for now; I may change that.
+
+    -Added new flag -sbl which, if specified, overrides the value of -bl
+    for opening sub braces.  This allows formatting of this type:
+
+    perltidy -sbl 
+
+    sub foo
+    {
+       if (!defined($_[0])) {
+           print("Hello, World\n");
+       }
+       else {
+           print($_[0], "\n");
+       }
+    }
+    Requested by Don Alexander.
+
+    -Fixed minor parsing error which prevented a space after a $$ variable
+    (pid) in some cases.  Thanks to Michael Cartmell for noting this.
+    For example, 
+      old: $$< 700 
+      new: $$ < 700
+
+    -Improved line break choices 'and' and 'or' to display logic better.
+    For example:
+
+       OLD:
+           exists $self->{'build_dir'} and push @e,
+             "Unwrapped into directory $self->{'build_dir'}";
+
+       NEW:
+           exists $self->{'build_dir'}
+             and push @e, "Unwrapped into directory $self->{'build_dir'}";
+
+    -Fixed error of multiple use of abbreviatioin '-dsc'.  -dsc remains 
+    abbreviation for delete-side-comments; -dsm is new abbreviation for 
+    delete-semicolons.
+
+    -Corrected and updated 'usage' help routine.  Thanks to Slaven Rezic for 
+    noting an error.
+
+    -The default for Windows is, for now, not to do a 'perl -c' syntax
+    check (but -syn will activate it).  This is because of problems with
+    command.com.  James Freeman sent me a patch which tries to get around
+    the problems, and it works in many cases, but testing revealed several
+    issues that still need to be resolved.  So for now, the default is no
+    syntax check for Windows.
+
+    -I added a -T flag when doing perl -c syntax check.
+    This is because I test it on a large number of scripts from sources
+    unknown, and who knows what might be hidden in initialization blocks?
+    Also, deactivated the syntax check if perltidy is run as root.  As a
+    benign example, running the previous version of perltidy on the
+    following file would cause it to disappear:
+
+           BEGIN{
+                   print "Bye, bye baby!\n";
+                   unlink $0;
+           }
+           
+    The new version will not let that happen.
+
+    -I am contemplating (but have not yet implemented) making '-lp' the
+    default indentation, because it is stable now and may be closer to how
+    perl is commonly formatted.  This could be in the next release.  The
+    reason that '-lp' was not the original default is that the coding for
+    it was complex and not ready for the initial release of perltidy.  If
+    anyone has any strong feelings about this, I'd like to hear.  The
+    current default could always be recovered with the '-nlp' flag.  
+
+## 2001 09 03 
+
+    -html updates:
+        - sub definition names are now specially colored, red by default.  
+          The letter 'm' is used to identify them.
+        - keyword 'sub' now has color of other keywords.
+        - restored html keyword color to __END__ and __DATA__, which was 
+          accidentally removed in the previous version.
+
+    -A new -se (--standard-error-output) flag has been implemented and
+    documented which causes all errors to be written to standard output
+    instead of a .ERR file.
+
+    -A new -w (--warning-output) flag has been implemented and documented
+     which causes perltidy to output certain non-critical messages to the
+     error output file, .ERR.  These include complaints about pod usage,
+     for example.  The default is to not include these.
+
+     NOTE: This replaces an undocumented -w=0 or --warning-level flag
+     which was tentatively introduced in the previous version to avoid some
+     unwanted messages.  The new default is the same as the old -w=0, so
+     that is no longer needed. 
+
+     -Improved syntax checking and corrected tokenization of functions such
+     as rand, srand, sqrt, ...  These can accept either an operator or a term
+     to their right.  This has been corrected.
+    
+    -Corrected tokenization of semicolon: testing of the previous update showed 
+    that the semicolon in the following statement was being mis-tokenized.  That
+    did no harm, other than adding an extra blank space, but has been corrected.
+
+             for (sort {strcoll($a,$b);} keys %investments) {
+                ...
+             }
+
+    -New syntax check: after wasting 5 minutes trying to resolve a syntax
+     error in which I had an extra terminal ';' in a complex for (;;) statement, 
+     I spent a few more minutes adding a check for this in perltidy so it won't
+     happen again.
+
+    -The behavior of --break-before-subs (-bbs) and --break-before-blocks
+    (-bbb) has been modified.  Also, a new control parameter,
+    --long-block-line-count=n (-lbl=n) has been introduced to give more
+    control on -bbb.  This was previously a hardwired value.  The reason
+    for the change is to reduce the number of unwanted blank lines that
+    perltidy introduces, and make it less erratic.  It's annoying to remove
+    an unwanted blank line and have perltidy put it back.  The goal is to
+    be able to sprinkle a few blank lines in that dense script you
+    inherited from Bubba.  I did a lot of experimenting with different
+    schemes for introducing blank lines before and after code blocks, and
+    decided that there is no really good way to do it.  But I think the new
+    scheme is an improvement.  You can always deactivate this with -nbbb.
+    I've been meaning to work on this; thanks to Erik Thaysen for bringing
+    it to my attention.
+
+    -The .LOG file is seldom needed, and I get tired of deleting them, so
+     they will now only be automatically saved if perltidy thinks that it
+     made an error, which is almost never.  You can still force the logfile
+     to be saved with -log or -g.
+
+    -Improved method for computing number of columns in a table.  The old
+    method always tried for an even number.  The new method allows odd
+    numbers when it is obvious that a list is not a hash initialization
+    list.
+
+      old: my (
+                $name,       $xsargs, $parobjs, $optypes,
+                $hasp2child, $pmcode, $hdrcode, $inplacecode,
+                $globalnew,  $callcopy
+             )
+             = @_;
+
+      new: my (
+                $name,   $xsargs,  $parobjs,     $optypes,   $hasp2child,
+                $pmcode, $hdrcode, $inplacecode, $globalnew, $callcopy
+             )
+             = @_;
+
+    -I fiddled with the list threshold adjustment, and some small lists
+    look better now.  Here is the change for one of the lists in test file
+    'sparse.t':
+    old:
+      %units =
+        ("in", "in", "pt", "pt", "pc", "pi", "mm", "mm", "cm", "cm", "\\hsize", "%",
+          "\\vsize", "%", "\\textwidth", "%", "\\textheight", "%");
+
+    new:
+      %units = (
+                 "in",      "in", "pt",          "pt", "pc",           "pi",
+                 "mm",      "mm", "cm",          "cm", "\\hsize",      "%",
+                 "\\vsize", "%",  "\\textwidth", "%",  "\\textheight", "%"
+                 );
+
+    -Improved -lp formatting at '=' sign.  A break was always being added after
+    the '=' sign in a statement such as this, (to be sure there was enough room
+    for the parameters):
+
+    old: my $fee =
+           CalcReserveFee(
+                           $env,          $borrnum,
+                           $biblionumber, $constraint,
+                           $bibitems
+                           );
+    
+    The updated version doesn't do this unless the space is really needed:
+
+    new: my $fee = CalcReserveFee(
+                                  $env,          $borrnum,
+                                  $biblionumber, $constraint,
+                                  $bibitems
+                                  );
+
+    -I updated the tokenizer to allow $#+ and $#-, which seem to be new to
+    Perl 5.6.  Some experimenting with a recent version of Perl indicated
+    that it allows these non-alphanumeric '$#' array maximum index
+    varaibles: $#: $#- $#+ so I updated the parser accordingly.  Only $#:
+    seems to be valid in older versions of Perl.
+
+    -Fixed a rare formatting problem with -lp (and -gnu) which caused
+    excessive indentation.
+
+    -Many additional syntax checks have been added.
+
+    -Revised method for testing here-doc target strings; the following
+    was causing trouble with a regex test because of the '*' characters:
+     print <<"*EOF*";
+     bla bla
+     *EOF*
+    Perl seems to allow almost anything to be a here doc target, so an
+    exact string comparison is now used.
+
+    -Made update to allow underscores in binary numbers, like '0b1100_0000'.
+
+    -Corrected problem with scanning certain module names; a blank space was 
+    being inserted after 'warnings' in the following:
+       use warnings::register;
+    The problem was that warnings (and a couple of other key modules) were 
+    being tokenized as keywords.  They should have just been identifiers.
+
+    -Corrected tokenization of indirect objects after sort, system, and exec,
+    after testing produced an incorrect error message for the following
+    line of code:
+       print sort $sortsubref @list;
+
+    -Corrected minor problem where a line after a format had unwanted
+    extra continuation indentation.  
+
+    -Delete-block-comments (and -dac) now retain any leading hash-bang line
+
+    -Update for -lp (and -gnu) to not align the leading '=' of a list
+    with a previous '=', since this interferes with alignment of parameters.
+
+     old:  my $hireDay = new Date;
+           my $self    = {
+                        firstName => undef,
+                        lastName  => undef,
+                        hireDay   => $hireDay
+                        };
+       
+     new:  my $hireDay = new Date;
+           my $self = {
+                        firstName => undef,
+                        lastName  => undef,
+                        hireDay   => $hireDay
+                        };
+
+    -Modifications made to display tables more compactly when possible,
+     without adding lines. For example,
+     old:
+                   '1', "I", '2', "II", '3', "III", '4', "IV",
+                   '5', "V", '6', "VI", '7', "VII", '8', "VIII",
+                   '9', "IX"
+     new:
+                   '1', "I",   '2', "II",   '3', "III",
+                   '4', "IV",  '5', "V",    '6', "VI",
+                   '7', "VII", '8', "VIII", '9', "IX"
+
+    -Corrected minor bug in which -pt=2 did not keep the right paren tight
+    around a '++' or '--' token, like this:
+
+               for ($i = 0 ; $i < length $key ; $i++ )
+
+    The formatting for this should be, and now is: 
+
+               for ($i = 0 ; $i < length $key ; $i++)
+
+    Thanks to Erik Thaysen for noting this.
+
+    -Discovered a new bug involving here-docs during testing!  See BUGS.html.  
+
+    -Finally fixed parsing of subroutine attributes (A Perl 5.6 feature).
+    However, the attributes and prototypes must still be on the same line
+    as the sub name.
+
+## 2001 07 31 
+
+    -Corrected minor, uncommon bug found during routine testing, in which a
+    blank got inserted between a function name and its opening paren after
+    a file test operator, but only in the case that the function had not
+    been previously seen.  Perl uses the existence (or lack thereof) of 
+    the blank to guess if it is a function call.  That is,
+       if (-l pid_filename()) {
+    became
+       if (-l pid_filename ()) {
+    which is a syntax error if pid_filename has not been seen by perl.
+
+    -If the AutoLoader module is used, perltidy will continue formatting
+    code after seeing an __END__ line.  Use -nlal to deactivate this feature.  
+    Likewise, if the SelfLoader module is used, perltidy will continue 
+    formatting code after seeing a __DATA__ line.  Use -nlsl to
+    deactivate this feature.  Thanks to Slaven Rezic for this suggestion.
+
+    -pod text after __END__ and __DATA__ is now identified by perltidy
+    so that -dp works correctly.  Thanks to Slaven Rezic for this suggestion.
+
+    -The first $VERSION line which might be eval'd by MakeMaker
+    is now passed through unchanged.  Use -npvl to deactivate this feature.
+    Thanks to Manfred Winter for this suggestion.
+
+    -Improved indentation of nested parenthesized expressions.  Tests have
+    given favorable results.  Thanks to Wolfgang Weisselberg for helpful
+    examples.
+
+## 2001 07 23 
+
+    -Fixed a very rare problem in which an unwanted semicolon was inserted
+    due to misidentification of anonymous hash reference curly as a code
+    block curly.  (No instances of this have been reported; I discovered it
+    during testing).  A workaround for older versions of perltidy is to use
+    -nasc.
+
+    -Added -icb (-indent-closing-brace) parameter to indent a brace which
+    terminates a code block to the same level as the previous line.
+    Suggested by Andrew Cutler.  For example, 
+
+           if ($task) {
+               yyy();
+               }    # -icb
+           else {
+               zzz();
+               }
+
+    -Rewrote error message triggered by an unknown bareword in a print or
+    printf filehandle position, and added flag -w=0 to prevent issuing this
+    error message.  Suggested by Byron Jones.
+
+    -Added modification to align a one-line 'if' block with similar
+    following 'elsif' one-line blocks, like this:
+         if    ( $something eq "simple" )  { &handle_simple }
+         elsif ( $something eq "hard" )    { &handle_hard }
+    (Suggested by  Wolfgang Weisselberg).
+
+## 2001 07 02 
+
+    -Eliminated all constants with leading underscores because perl 5.005_03
+    does not support that.  For example, _SPACES changed to XX_SPACES.
+    Thanks to kromJx for this update.
+
+## 2001 07 01 
+
+    -the directory of test files has been moved to a separate distribution
+    file because it is getting large but is of little interest to most users.
+    For the current distribution:
+      perltidy-20010701.tgz        contains the source and docs for perltidy
+      perltidy-20010701-test.tgz   contains the test files
+
+    -fixed bug where temporary file perltidy.TMPI was not being deleted 
+    when input was from stdin.
+
+    -adjusted line break logic to not break after closing brace of an
+    eval block (suggested by Boris Zentner).
+
+    -added flag -gnu (--gnu-style) to give an approximation to the GNU
+    style as sometimes applied to perl.  The programming style in GNU
+    'automake' was used as a guide in setting the parameters; these
+    parameters will probably be adjusted over time.
+
+    -an empty code block now has one space for emphasis:
+      if ( $cmd eq "bg_untested" ) {}    # old
+      if ( $cmd eq "bg_untested" ) { }   # new
+    If this bothers anyone, we could create a parameter.
+
+    -the -bt (--brace-tightness) parameter has been split into two
+    parameters to give more control. -bt now applies only to non-BLOCK
+    braces, while a new parameter -bbt (block-brace-tightness) applies to
+    curly braces which contain code BLOCKS. The default value is -bbt=0.
+
+    -added flag -icp (--indent-closing-paren) which leaves a statement
+    termination of the form );, };, or ]; indented with the same
+    indentation as the previous line.  For example,
+
+       @month_of_year = (          # default, or -nicp
+           'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct',
+           'Nov', 'Dec'
+       );
+
+       @month_of_year = (          # -icp
+           'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct',
+           'Nov', 'Dec'
+           );
+
+    -Vertical alignment updated to synchronize with tokens &&, ||,
+    and, or, if, unless.  Allowable space before forcing
+    resynchronization has been increased.  (Suggested by  Wolfgang
+    Weisselberg).
+
+    -html corrected to use -nohtml-bold-xxxxxxx or -nhbx to negate bold,
+    and likewise -nohtml-italic-xxxxxxx or -nhbi to negate italic.  There
+    was no way to negate these previously.  html documentation updated and
+    corrected.  (Suggested by  Wolfgang Weisselberg).
+
+    -Some modifications have been made which improve the -lp formatting in
+    a few cases.
+
+    -Perltidy now retains or creates a blank line after an =cut to keep
+    podchecker happy (Suggested by Manfred H. Winter).  This appears to be
+    a glitch in podchecker, but it was annoying.
+
+## 2001 06 17  
+
+    -Added -bli flag to give continuation indentation to braces, like this
+
+           if ($bli_flag)
+             {
+               extra_indentation();
+             }
+
+    -Corrected an error with the tab (-t) option which caused the last line
+    of a multi-line quote to receive a leading tab.  This error was in
+    version 2001 06 08  but not 2001 04 06.  If you formatted a script
+    with -t with this version, please check it by running once with the
+    -chk flag and perltidy will scan for this possible error.
+
+    -Corrected an invalid pattern (\R should have been just R), changed
+    $^W =1 to BEGIN {$^W=1} to use warnings in compile phase, and corrected
+    several unnecessary 'my' declarations. Many thanks to Wolfgang Weisselberg,
+    2001-06-12, for catching these errors.
+    
+    -A '-bar' flag has been added to require braces to always be on the
+    right, even for multi-line if and foreach statements.  For example,
+    the default formatting of a long if statement would be:
+
+           if ($bigwasteofspace1 && $bigwasteofspace2
+             || $bigwasteofspace3 && $bigwasteofspace4)
+           {
+               bigwastoftime();
+           }
+
+    With -bar, the formatting is:
+
+           if ($bigwasteofspace1 && $bigwasteofspace2
+             || $bigwasteofspace3 && $bigwasteofspace4) {
+               bigwastoftime();
+           }
+    Suggested by Eli Fidler 2001-06-11.
+
+    -Uploaded perltidy to sourceforge cvs 2001-06-10.
+
+    -An '-lp' flag (--line-up-parentheses) has been added which causes lists
+    to be indented with extra indentation in the manner sometimes
+    associated with emacs or the GNU suggestions.  Thanks to Ian Stuart for
+    this suggestion and for extensive help in testing it. 
+
+    -Subroutine call parameter lists are now formatted as other lists.
+    This should improve formatting of tables being passed via subroutine
+    calls.  This will also cause full indentation ('-i=n, default n= 4) of
+    continued parameter list lines rather than just the number of spaces
+    given with -ci=n, default n=2.
+    
+    -Added support for hanging side comments.  Perltidy identifies a hanging
+    side comment as a comment immediately following a line with a side
+    comment or another hanging side comment.  This should work in most
+    cases.  It can be deactivated with --no-hanging-side-comments (-nhsc).
+    The manual has been updated to discuss this.  Suggested by Brad
+    Eisenberg some time ago, and finally implemented.
+
+## 2001 06 08  
+
+    -fixed problem with parsing command parameters containing quoted
+    strings in .perltidyrc files. (Reported by Roger Espel Llima 2001-06-07).
+
+    -added two command line flags, --want-break-after and 
+    --want-break-before, which allow changing whether perltidy
+    breaks lines before or after any operators.  Please see the revised 
+    man pages for details.
+
+    -added system-wide configuration file capability.
+    If perltidy does not find a .perltidyrc command line file in
+    the current directory, nor in the home directory, it now looks
+    for '/usr/local/etc/perltidyrc' and then for '/etc/perltidyrc'.
+    (Suggested by Roger Espel Llima 2001-05-31).
+
+    -fixed problem in which spaces were trimmed from lines of a multi-line
+    quote. (Reported by Roger Espel Llima 2001-05-30).  This is an 
+    uncommon situation, but serious, because it could conceivably change
+    the proper function of a script.
+
+    -fixed problem in which a semicolon was incorrectly added within 
+    an anonymous hash.  (Reported by A.C. Yardley, 2001-5-23).
+    (You would know if this happened, because perl would give a syntax
+    error for the resulting script).
+
+    -fixed problem in which an incorrect error message was produced
+     after a version number on a 'use' line, like this ( Reported 
+     by Andres Kroonmaa, 2001-5-14):
+
+                 use CGI 2.42 qw(fatalsToBrowser);
+
+     Other than the extraneous error message, this bug was harmless.
+
+## 2001 04 06 
+
+    -fixed serious bug in which the last line of some multi-line quotes or
+     patterns was given continuation indentation spaces.  This may make
+     a pattern incorrect unless it uses the /x modifier.  To find
+     instances of this error in scripts which have been formatted with
+     earlier versions of perltidy, run with the -chk flag, which has
+     been added for this purpose (SLH, 2001-04-05).
+
+     ** So, please check previously formatted scripts by running with -chk
+     at least once **
+
+    -continuation indentation has been reprogrammed to be hierarchical, 
+     which improves deeply nested structures.
+
+    -fixed problem with undefined value in list formatting (reported by Michael
+     Langner 2001-04-05)
+
+    -Switched to graphical display of nesting in .LOG files.  If an
+     old format string was "(1 [0 {2", the new string is "{{(".  This
+     is easier to read and also shows the order of nesting.
+
+    -added outdenting of cuddled paren structures, like  ")->pack(".
+
+    -added line break and outdenting of ')->' so that instead of
+
+           $mw->Label(
+             -text   => "perltidy",
+             -relief => 'ridge')->pack;
+    
+     the current default is:
+
+           $mw->Label(
+             -text   => "perltidy",
+             -relief => 'ridge'
+           )->pack;
+
+     (requested by Michael Langner 2001-03-31; in the future this could 
+     be controlled by a command-line parameter).
+
+    -revised list indentation logic, so that lists following an assignment
+     operator get one full indentation level, rather than just continuation 
+     indentation.  Also corrected some minor glitches in the continuation 
+     indentation logic. 
+
+    -Fixed problem with unwanted continuation indentation after a blank line 
+    (reported by Erik Thaysen 2001-03-28):
+
+    -minor update to avoid stranding a single '(' on one line
+
+## 2001 03 28:
+
+    -corrected serious error tokenizing filehandles, in which a sub call 
+    after a print or printf, like this:
+       print usage() and exit;
+    became this:
+       print usage () and exit;
+    Unfortunately, this converts 'usage' to a filehandle.  To fix this, rerun
+    perltidy; it will look for this situation and issue a warning. 
+
+    -fixed another cuddled-else formatting bug (Reported by Craig Bourne)
+
+    -added several diagnostic --dump routines
+    
+    -added token-level whitespace controls (suggested by Hans Ecke)
+
+## 2001 03 23:
+
+    -added support for special variables of the form ${^WANT_BITS}
+
+    -space added between scalar and left paren in 'for' and 'foreach' loops,
+     (suggestion by Michael Cartmell):
+
+       for $i( 1 .. 20 )   # old
+       for $i ( 1 .. 20 )   # new
+
+    -html now outputs cascading style sheets (thanks to suggestion from
+     Hans Ecke)
+
+    -flags -o and -st now work with -html
+
+    -added missing -html documentation for comments (noted by Alex Izvorski)
+
+    -support for VMS added (thanks to Michael Cartmell for code patches and 
+      testing)
+
+    -v-strings implemented (noted by Hans Ecke and Michael Cartmell; extensive
+      testing by Michael Cartmell)
+
+    -fixed problem where operand may be empty at line 3970 
+     (\b should be just b in lines 3970, 3973) (Thanks to Erik Thaysen, 
+     Keith Marshall for bug reports)
+
+    -fixed -ce bug (cuddled else), where lines like '} else {' were indented
+     (Thanks to Shawn Stepper and Rick Measham for reporting this)
+
+## 2001 03 04:
+
+    -fixed undefined value in line 153 (only worked with -I set)
+    (Thanks to Mike Stok, Phantom of the Opcodes, Ian Ehrenwald, and others)
+
+    -fixed undefined value in line 1069 (filehandle problem with perl versions <
+    5.6) (Thanks to Yuri Leikind, Mike Stok, Michael Holve, Jeff Kolber)
+
+## 2001 03 03:
+
+    -Initial announcement at freshmeat.net; started Change Log
+    (Unfortunately this version was DOA, but it was fixed the next day)
diff --git a/COPYING b/COPYING
new file mode 100644
index 00000000..d159169d
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,339 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+                            NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    
+    Copyright (C)   
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  , 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/INSTALL.md b/INSTALL.md
new file mode 100644
index 00000000..67ecab7b
--- /dev/null
+++ b/INSTALL.md
@@ -0,0 +1,398 @@
+# PERLTIDY INSTALLATION NOTES
+
+# Get a distribution file
+
+- Source Files in .tar.gz and .zip format
+
+    This document tells how to install perltidy from the basic source
+    distribution files in `.tar.gz` or `.zip` format.  These files are
+    identical except for the line endings.  The `.tar.gz` has Unix style
+    line endings, and the `.zip` file has Windows style line endings.  The
+    standard perl MakeMaker method should work for these in most cases. 
+
+- Source files in RPM and .deb format
+
+    The web site also has links to RPM and Debian .deb Linux packages, which may be
+    convenient for some users.
+
+# Quick Test Drive
+
+If you want to do a quick test of perltidy without doing any installation, get
+a `.tar.gz` or a `.zip` source file and see the section below "Method 2: Installation
+as a single binary script".
+
+# Uninstall older versions
+
+In certain circumstances, it is best to remove an older version
+of perltidy before installing the latest version.  These are:
+
+- Uninstall a Version older than 20020225
+
+    You can use perltidy -v to determine the version number.  The first
+    version of perltidy to use Makefile.PL for installation was 20020225, so
+    if your previous installation is older than this, it is best to remove
+    it, because the installation path may now be different.  There were up
+    to 3 files these older installations: the script `perltidy` and
+    possibly two man pages, `perltidy.1` and `perl2web.1`.  If you saved
+    your Makefile, you can probably use `make uninstall`.  Otherwise, you
+    can use a `locate` or `find` command to find and remove these files.
+
+- Uninstall older versions when changing installation method
+
+    If you switch from one installation method to another, the paths to the
+    components of perltidy may change, so it is probably best to remove the older
+    version before installing the new version.  If your older installation method
+    had an uninstall option (such as with RPM's and debian packages), use it.
+    Otherwise, you can locate and remove the older files by hand.  There are two
+    key files: `Tidy.pm` and `perltidy`.  In addition, there may be one or two
+    man pages, something like `Perl::Tidy.3pm` and `perltidy.1p`.  You can use a
+    `locate` and/or `find` command to find and remove these files.  After
+    installation, you can verify that the new version of perltidy is working with
+    the `perltidy -v` command.
+
+# Two Installation Methods - Overview
+
+These are generic instructions.  Some system-specific notes and hints
+are given in later sections.  
+
+Two separate installation methods are possible.  
+
+- Method 1: Standard Installation Method
+
+    The standard method based on MakeMaker should work in a normal perl
+    environment.  This is the recommended installation procedure for
+    systems which support it.
+
+            perl Makefile.PL
+            make
+            make test
+            make install
+
+    The `make` command is probably `nmake` under a Windows system.  You
+    may need to become root (or administrator) before doing the `make
+    install` step.  
+
+- Method 2: Installation as a single binary script
+
+    If you just want to take perltidy for a quick test drive without installing it,
+    or are having trouble installing modules, you can bundle it all in one
+    independent executable script.  This might also be helpful on a system for
+    which the Makefile.PL method does not work, or if you are temporarily a guest
+    on some system, or if you want to try hacking a special version of perltidy
+    without messing up your regular version.  
+
+    You just need to uncompress the source distribution, cd down into it, and enter
+    the command:
+
+            perl pm2pl
+
+    which will combine the pieces of perltidy into a single script named
+    `perltidy` in the current directory.  This script should be 
+    fully functional.  Try it out on a handy perl script, for example
+
+        perl perltidy Makefile.PL
+
+    This should create `Makefile.PL.tdy`.
+
+- After Installation
+
+    After installation by either method, verify that the installation worked
+    and that the correct new version is being by entering:
+
+        perltidy -v
+
+    If the version number disagrees with the version number embedded in the
+    distribution file name, search for and remove the old version.
+    For example, under a Unix system, the command `which perltidy` might 
+    show where it is.  Also, see the above notes on uninstalling older
+    versions.
+
+    On a Unix system running the `bash` shell, if you had a previous
+    installation of perltidy, you may have to use 
+
+        hash -r
+
+    to get the shell to find the new one.
+
+    After `perltidy` is installed, you can find where it will look for
+    configuration files and environment variables on your system with
+    the command:
+
+        perltidy -dpro
+
+- How to Uninstall
+
+    Unfortunately, the standard Perl installation method does not seem able
+    to do an uninstall.
+
+    But try this:
+
+        make uninstall
+
+    On some systems, it will give you a list of files to remove by hand.  If
+    not, you need to find the script `perltidy` and its module file
+    `Tidy.pm`, which will be in a subdirectory named `Perl` in the site
+    library.
+
+    If you installed perltidy with the alternative method, you should just
+    reverse the steps that you used.
+
+## Unix Installation Notes
+
+- Alternative method - Unix
+
+    If the alternative method is used, test the script produced by the
+    `pm2pl` perl script:
+
+        perl ./perltidy somefile.pl
+
+    where `somefile.pl` is any convenient test file, such as `Makefile.PL`
+    itself.  Then,
+
+    1\. If the script is not executable, use 
+
+        chmod +x perltidy
+
+    2\. Verify that the initial line in perltidy works for your system by
+    entering:
+
+        ./perltidy -h
+
+    which should produce the usage text and then exit.  This should usually
+    work, but if it does not, you will need to change the first line in
+    `perltidy` to reflect the location of perl on your system.  On a Unix
+    system, you might find the path to perl with the command 'which perl'.
+
+    3\. A sample `Makefile` for this installation method is `Makefile.npm`.
+    Edit it to have the correct paths.
+
+    You will need to become root unless you change the paths to point to
+    somewhere in your home directory.  Then issue the command
+
+        make -f Makefile.npm install
+
+    This installs perltidy and the man page perltidy.1. 
+
+    5\. Test the installation using
+
+        perltidy -h
+
+    You should see the usage screen.  Then, if you installed the man pages, 
+    try
+
+        man perltidy
+
+    which should bring up the manual page. 
+
+    If you ever want to remove perltidy, you can remove perltidy and its man
+    pages by hand or use
+
+        make uninstall
+
+## Windows Installation Notes
+
+On a Windows 9x/Me system you should CLOSE ANY OPEN APPLICATIONS to
+avoid losing unsaved data in case of trouble.
+
+- Standard Method - Windows
+
+    After you unzip the distribution file, the procedure is probably this:
+
+            perl Makefile.PL
+            nmake
+            nmake test
+            nmake install
+
+    You may need to download a copy of `unzip` to unzip the `.zip` distribution
+    file; you can get this at
+    http://www.info-zip.org/pub/infozip/UnZip.html
+
+    If you have ActiveState
+    Perl, the installation method is outlined at
+    http://aspn.activestate.com//ASPN/Reference/Products/ActivePerl/faq/Windows/ActivePerl-Winfaq9.html#How\_can\_I\_use\_modules\_from\_CPAN\_
+
+    You may need to download a copy of Microsoft's `nmake` program from
+    ftp://ftp.microsoft.com/Softlib/MSLFILES/nmake15.exe
+
+    If you are not familiar with installing modules, or have trouble doing
+    so, and want to start testing perltidy quickly, you may want to use the
+    alternative method instead (next section).
+
+- Alternative Method - Windows
+
+    From the main installation directory, just enter
+
+        perl pm2pl 
+
+    Placing the resulting file `perltidy` and the example batch file
+    `perltidy.bat`, located in the `examples` directory, in your path should
+    work.  (You can determine your path by issuing the msdos command
+    `PATH`).  However, the batch file probably will not support file
+    redirection.  So, for example, to pipe the long help message through
+    'more', you might have to invoke perltidy with perl directly, like this:
+
+        perl \somepath\perltidy -h | more
+
+    The batch file will not work properly with wildcard filenames, but you may
+    use wildcard filenames if you place them in quotes.  For example
+
+        perltidy '*.pl'
+
+## VMS Installation Notes
+
+- Links to VMS Utilities and Documentation
+
+    To install perltidy you will need the following utilities Perl, of
+    course, source with VMS goodies available from
+    http://www.sidhe.org/vmsperl or binary available from the Compaq OpenVMS
+    freeware CD.  To unpack the source either gunzip and vmstar available
+    from the Compaq OpenVMS freeware CD or zip available from
+    http://www.info-zip.org/
+
+    To build perltidy you can use either **MMS**, Compaq's VMS equivalent of
+    make, or **MMK**, an **MMS** clone available from
+    http://www.madgoat.com.
+
+    Information on running perl under VMS can be found at:
+    http://w4.lns.cornell.edu/~pvhp/perl/VMS.html
+
+- Unpack the source:
+
+        $ unzip -a perl-tidy-yyyymmdd.zip  ! or
+
+        $ unzip /text=auto perl-tidy-yyyymmdd.zip ! or
+
+        $ gunzip perl-tidy-yyyymmdd.tgz
+        $ vmstar perl-tidy-yyyymmdd.tar
+
+- Build and install perltidy under VMS:
+
+        $ set default [.perl-tidy-yyymmdd]
+        $ perl perltidy.pl
+        $ mmk
+        $ mmk test
+        $ mmk install
+
+- Using Perltidy under VMS
+
+    Create a symbol. This should be put in a logon script, eg sylogin.com
+
+        $ perltidy == "perl perl_root:[utils]perltidy."
+
+    Default parameters can be placed in a `perltidyrc` file.  Perltidy
+    looks for one in the following places and uses the first found if the
+    logical `PERLTIDY` is a file and the file exists then that is used if the
+    logical `PERLTIDY` is a directory then look for a `.perltidyrc` file in the
+    directory look for a `.perltidyrc` file in the user's home directory
+
+    To see where the search is done and which `.perltidyrc` is used type
+
+        $ perltidy -dpro
+
+    A system `PERLTIDY` logical can be defined pointing to a file with a
+    minimal configuration,  and users can defined their own logical to use a
+    personal `.perltidyrc` file.
+
+        $ define /system perltidy perl_root:[utils]perltidy.rc
+
+- The -x Parameter
+
+    If you have one of the magic incantations at the start of perl scripts,
+    so that they can be invoked as a .com file, then you will need to use
+    the **-x** parameter which causes perltidy to skip all lines until it
+    finds a hash bang line eg `#!perl -w`.  Since it is such a common
+    option this is probably a good thing to put in a `.perltidyrc` file.
+
+- VMS File Extensions
+
+    VMS file extensions will use an underscore character instead of a dot, 
+    when necessary, to create a valid filename.  So 
+
+          perltidy myfile.pl
+
+    will generate the output file `myfile.pl_tdy` instead of
+    `myfile.pl.tdy`, and so on. 
+
+# Troubleshooting / Other Operating Systems
+
+If there seems to be a problem locating a configuration file, you can see
+what is going on in the config file search with:
+
+    perltidy -dpro
+
+If you want to customize where perltidy looks for configuration files,
+look at the routine 'find\_config\_file' in module 'Tidy.pm'.  You should
+be able to at least use the '-pro=filename' method under most systems.  
+
+Remember to place quotes (either single or double) around input
+parameters which contain spaces, such as file names.  For example:
+
+    perltidy "file name with spaces"
+
+Without the quotes, perltidy would look for four files: `file`,
+`name`, `with`, and `spaces`.
+
+If you develop a system-dependent patch that might be of general
+interest, please let us know.
+
+# CONFIGURATION FILE
+
+You do not need a configuration file, but you may eventually want to
+create one to save typing; the tutorial and man page discuss this.
+
+# SYSTEM TEMPORARY FILES
+
+Perltidy needs to create a system temporary file when it invokes
+Pod::Html to format pod text under the -html option.  For Unix systems,
+this will normally be a file in /tmp, and for other systems, it will be
+a file in the current working directory named `perltidy.TMP`.  This file
+will be removed when the run finishes.
+
+# DOCUMENTATION
+
+Documentation is contained in **.pod** format, either in the `docs` directory
+or appended to the scripts.  
+
+These documents can also be found at http://perltidy.sourceforge.net
+
+Reading the brief tutorial should help you use perltidy effectively.  
+The tutorial can be read interactively with **perldoc**, for
+example
+
+    cd docs
+    perldoc tutorial.pod
+
+or else an `html` version can be made with **pod2html**:
+
+    pod2html tutorial.pod >tutorial.html
+
+If you use the Makefile.PL installation method on a Unix system, the
+**perltidy** and **Perl::Tidy** man pages should automatically be installed.
+Otherwise, you can extract the man pages with the **pod2xxxx** utilities, as
+follows:
+
+    cd bin
+    pod2text perltidy >perltidy.txt
+    pod2html perltidy >perltidy.html
+    
+    cd lib/Perl
+    pod2text Tidy.pm >Tidy.txt
+    pod2html Tidy.pm >Tidy.html
+
+After installation, the installation directory of files may be deleted. 
+
+Perltidy is still being developed, so please check sourceforge occasionally
+for updates if you find that it is useful.  New releases are announced
+on freshmeat.net.
+
+# CREDITS
+
+Thanks to the many programmers who have documented problems, made suggestions and sent patches.  
+
+# FEEDBACK / BUG REPORTS
+
+If you see ways to improve these notes, please let us know.
+
+A list of current bugs and issues can be found at the CPAN site [https://rt.cpan.org/Public/Dist/Display.html?Name=Perl-Tidy](https://rt.cpan.org/Public/Dist/Display.html?Name=Perl-Tidy)
+
+To report a new bug or problem, use the link on this page .  
diff --git a/MANIFEST b/MANIFEST
new file mode 100644
index 00000000..fe65aaea
--- /dev/null
+++ b/MANIFEST
@@ -0,0 +1,65 @@
+bin/perltidy
+BUGS.md
+CHANGES.md
+COPYING
+docs/README
+docs/stylekey.pod
+docs/testfile.pl
+docs/tutorial.pod
+examples/bbtidy.pl
+examples/break_long_quotes.pl
+examples/ex_mp.pl
+examples/filter_example.in
+examples/filter_example.pl
+examples/find_naughty.pl
+examples/lextest
+examples/perlcomment.pl
+examples/perllinetype.pl
+examples/perlmask.pl
+examples/perltidy_okw.pl
+examples/perltidyrc_dump.pl
+examples/perlxmltok.pl
+examples/pt.bat
+examples/README
+examples/testfa.t
+examples/testff.t
+INSTALL.md
+lib/Perl/Tidy.pm
+lib/Perl/Tidy.pod
+lib/Perl/Tidy/Debugger.pm
+lib/Perl/Tidy/DevNull.pm
+lib/Perl/Tidy/Diagnostics.pm
+lib/Perl/Tidy/FileWriter.pm
+lib/Perl/Tidy/Formatter.pm
+lib/Perl/Tidy/HtmlWriter.pm
+lib/Perl/Tidy/IndentationItem.pm
+lib/Perl/Tidy/IOScalar.pm
+lib/Perl/Tidy/IOScalarArray.pm
+lib/Perl/Tidy/LineBuffer.pm
+lib/Perl/Tidy/LineSink.pm
+lib/Perl/Tidy/LineSource.pm
+lib/Perl/Tidy/Logger.pm
+lib/Perl/Tidy/Tokenizer.pm
+lib/Perl/Tidy/VerticalAligner.pm
+lib/Perl/Tidy/VerticalAligner/Alignment.pm
+lib/Perl/Tidy/VerticalAligner/Line.pm
+Makefile.PL
+MANIFEST			This list of files
+pm2pl
+README.md
+t/filter_example.t
+t/snippets1.t
+t/snippets10.t
+t/snippets2.t
+t/snippets3.t
+t/snippets4.t
+t/snippets5.t
+t/snippets6.t
+t/snippets7.t
+t/snippets8.t
+t/snippets9.t
+t/test.t
+t/testsa.t
+t/testss.t
+t/testwide.pl.src
+t/testwide.t
diff --git a/MANIFEST.SKIP b/MANIFEST.SKIP
new file mode 100644
index 00000000..577bd418
--- /dev/null
+++ b/MANIFEST.SKIP
@@ -0,0 +1,130 @@
+############################################################################################
+# Top Section from ExtUtils::MANIFEST:
+#
+# https://metacpan.org/source/ETHER/ExtUtils-Manifest-1.70/lib/ExtUtils/MANIFEST.SKIP
+#
+############################################################################################
+
+# Avoid version control files.
+\bRCS\b
+\bCVS\b
+\bSCCS\b
+,v$
+\B\.svn\b
+\B\.git\b
+\B\.gitignore\b
+\b_darcs\b
+\B\.cvsignore$
+
+# Avoid VMS specific MakeMaker generated files
+\bDescrip.MMS$
+\bDESCRIP.MMS$
+\bdescrip.mms$
+
+# Avoid Makemaker generated and utility files.
+\bMANIFEST\.bak
+\bMakefile$
+\bblib/
+\bMakeMaker-\d
+\bpm_to_blib\.ts$
+\bpm_to_blib$
+\bblibdirs\.ts$         # 6.18 through 6.25 generated this
+\b_eumm/                # 7.05_05 and above
+
+# Avoid Module::Build generated and utility files.
+\bBuild$
+\b_build/
+\bBuild.bat$
+\bBuild.COM$
+\bBUILD.COM$
+\bbuild.com$
+
+# and Module::Build::Tiny generated files
+\b_build_params$
+
+# Avoid temp and backup files.
+~$
+\.old$
+\#$
+\b\.#
+\.bak$
+\.tmp$
+\.log$
+\.archive$
+\.#
+\.rej$
+\..*\.sw.?$
+
+# Avoid OS-specific files/dirs
+# Mac OSX metadata
+\B\.DS_Store
+# Mac OSX SMB mount metadata files
+\B\._
+
+# Avoid Devel::Cover and Devel::CoverX::Covered files.
+\bcover_db\b
+\bcovered\b
+
+# Avoid prove files
+\B\.prove$
+
+# Avoid MYMETA files
+^MYMETA\.
+
+#########################################################################
+# Project Specific Stuff, with help from:
+# ref: https://gist.github.com/jonasbn/aa6646544f2233de39f34c49532215ad #
+#########################################################################
+
+# avoid MANIFEST backups and this file
+\bMANIFEST\.\w
+
+# Avoid archives of perl distribution
+-[\d\.\_]+\.tar
+-[\d\.\_]+\.tar\.gz
+
+# Github files
+\.travis\.yml
+
+# tidyall files
+\.tidyall
+
+# Appveyor files
+\.appveyor\.yml
+
+# perlcritic config file
+\.perlcriticrc
+
+# any shell scripts
+\.sh$
+
+# Perl::Tidy error, log and tidy files
+\bperltidy\.ERR
+\bperltidy\.LOG
+
+# The temporary combined perltidy.pl made by pm2pl
+\bperltidy-.*\.pl
+\bperltidy\.pl
+
+# git files
+^\.git/
+^\.gitignore/
+
+# temporary file area
+^tmp/
+^tmp1/
+
+# snippet folder
+# These are bundled and distributed in the snippets*.t files
+\/snippets/
+
+# developer stuff
+^dev-bin/
+
+# my notes to self
+\bNOTES\.txt
+
+^local-docs/
+
+^archive/
+
diff --git a/Makefile.PL b/Makefile.PL
new file mode 100755
index 00000000..a3a2abbc
--- /dev/null
+++ b/Makefile.PL
@@ -0,0 +1,17 @@
+use ExtUtils::MakeMaker;
+WriteMakefile(
+    NAME         => "Perl::Tidy",
+    VERSION_FROM => "lib/Perl/Tidy.pm",
+    (
+        $] >= 5.005
+        ? (
+            ABSTRACT => 'indent and reformat perl scripts',
+            LICENSE  => 'gpl_2',
+            AUTHOR   => 'Steve Hancock '
+          )
+        : ()
+    ),
+
+    #EXE_FILES => ['bin/perltidy'],
+    dist => { COMPRESS => 'gzip', SUFFIX => 'gz' },
+);
diff --git a/README.md b/README.md
new file mode 100644
index 00000000..3028be44
--- /dev/null
+++ b/README.md
@@ -0,0 +1,57 @@
+# Welcome to Perltidy!
+
+Perltidy is a tool to indent and reformat perl scripts.  It can also
+write scripts in html format.
+
+Perltidy is free software released under the GNU General Public
+License -- please see the included file "COPYING" for details.
+
+# PREREQUISITES
+
+`perltidy` should work with most standard Perl installations.
+The following modules are not required, but perltidy may use them if
+detected:
+
+    HTML::Entities  will be used to encode HTML entities if detected
+    Pod::Html will be used to format pod text
+
+The total disk space needed after removing the installation directory will
+about 2 Mb.  
+
+# DOWNLOAD
+
+There are two source distribution files:
+
+- A `.tgz` "tarball", with Unix-style <lf> line endings, and 
+- A zip file, `.zip`, with Windows-style <cr><lf> line endings.
+
+In addition, the web site has links to debian and RPM packages.
+
+# INSTALLATION
+
+For most standard installations, the standard Makefile.PL method should work:
+
+    perl Makefile.PL
+    make
+    make test
+    make install
+
+The INSTALL file has additional installation notes, and tells how
+to use perltidy without doing an installation.
+
+# WHAT NEXT
+
+Please see the CHANGES file for notices of any recent updates.
+
+Please see the BUGS file for a list of all known open bugs.
+
+Documentation can be found in the `docs` directory, and it can also be 
+found at http://perltidy.sourceforge.net
+
+Reading the brief tutorial should help you use perltidy effectively.  
+
+# FEEDBACK / BUG REPORTS
+
+A list of current bugs and issues can be found at the CPAN site [https://rt.cpan.org/Public/Dist/Display.html?Name=Perl-Tidy](https://rt.cpan.org/Public/Dist/Display.html?Name=Perl-Tidy)
+
+To report a new bug or problem, use the link on this page .  
diff --git a/bin/perltidy b/bin/perltidy
new file mode 100755
index 00000000..df0512ed
--- /dev/null
+++ b/bin/perltidy
@@ -0,0 +1,3491 @@
+#!/usr/bin/perl
+package main;
+
+use Perl::Tidy;
+
+my $arg_string = undef;
+
+# give Macs a chance to provide command line parameters
+if ( $^O =~ /Mac/ ) {
+    $arg_string = MacPerl::Ask(
+        'Please enter @ARGV (-h for help)',
+        defined $ARGV[0] ? "\"$ARGV[0]\"" : ""
+    );
+}
+
+Perl::Tidy::perltidy( argv => $arg_string );
+
+__END__
+
+=head1 NAME
+
+perltidy - a perl script indenter and reformatter
+
+=head1 SYNOPSIS
+
+    perltidy [ options ] file1 file2 file3 ...
+            (output goes to file1.tdy, file2.tdy, file3.tdy, ...)
+    perltidy [ options ] file1 -o outfile
+    perltidy [ options ] file1 -st >outfile
+    perltidy [ options ] outfile
+
+=head1 DESCRIPTION
+
+Perltidy reads a perl script and writes an indented, reformatted script.
+
+Many users will find enough information in L<"EXAMPLES"> to get 
+started.  New users may benefit from the short tutorial 
+which can be found at
+http://perltidy.sourceforge.net/tutorial.html
+
+A convenient aid to systematically defining a set of style parameters
+can be found at
+http://perltidy.sourceforge.net/stylekey.html
+
+Perltidy can produce output on either of two modes, depending on the
+existence of an B<-html> flag.  Without this flag, the output is passed
+through a formatter.  The default formatting tries to follow the
+recommendations in perlstyle(1), but it can be controlled in detail with
+numerous input parameters, which are described in L<"FORMATTING
+OPTIONS">.  
+
+When the B<-html> flag is given, the output is passed through an HTML
+formatter which is described in L<"HTML OPTIONS">.  
+
+=head1 EXAMPLES
+
+  perltidy somefile.pl
+
+This will produce a file F containing the script reformatted
+using the default options, which approximate the style suggested in 
+perlstyle(1).  The source file F is unchanged.
+
+  perltidy *.pl
+
+Execute perltidy on all F<.pl> files in the current directory with the
+default options.  The output will be in files with an appended F<.tdy>
+extension.  For any file with an error, there will be a file with extension
+F<.ERR>.
+
+  perltidy -b file1.pl file2.pl
+
+Modify F and F in place, and backup the originals to
+F and F.  If F and/or F
+already exist, they will be overwritten.
+
+  perltidy -b -bext='/' file1.pl file2.pl
+
+Same as the previous example except that the backup files F and F will be deleted if there are no errors.
+
+  perltidy -gnu somefile.pl
+
+Execute perltidy on file F with a style which approximates the
+GNU Coding Standards for C programs.  The output will be F.
+
+  perltidy -i=3 somefile.pl
+
+Execute perltidy on file F, with 3 columns for each level of
+indentation (B<-i=3>) instead of the default 4 columns.  There will not be any
+tabs in the reformatted script, except for any which already exist in comments,
+pod documents, quotes, and here documents.  Output will be F. 
+
+  perltidy -i=3 -et=8 somefile.pl
+
+Same as the previous example, except that leading whitespace will
+be entabbed with one tab character per 8 spaces.
+
+  perltidy -ce -l=72 somefile.pl
+
+Execute perltidy on file F with all defaults except use "cuddled
+elses" (B<-ce>) and a maximum line length of 72 columns (B<-l=72>) instead of
+the default 80 columns.  
+
+  perltidy -g somefile.pl
+
+Execute perltidy on file F and save a log file F
+which shows the nesting of braces, parentheses, and square brackets at
+the start of every line.
+
+  perltidy -html somefile.pl
+
+This will produce a file F containing the script with
+html markup.  The output file will contain an embedded style sheet in
+the  section which may be edited to change the appearance.
+
+  perltidy -html -css=mystyle.css somefile.pl
+
+This will produce a file F containing the script with
+html markup.  This output file will contain a link to a separate style
+sheet file F.  If the file F does not exist,
+it will be created.  If it exists, it will not be overwritten.
+
+  perltidy -html -pre somefile.pl
+
+Write an html snippet with only the PRE section to F.
+This is useful when code snippets are being formatted for inclusion in a
+larger web page.  No style sheet will be written in this case.  
+
+  perltidy -html -ss >mystyle.css
+
+Write a style sheet to F and exit.
+
+  perltidy -html -frm mymodule.pm
+
+Write html with a frame holding a table of contents and the source code.  The
+output files will be F (the frame), F
+(the table of contents), and F (the source code).
+
+=head1 OPTIONS - OVERVIEW
+
+The entire command line is scanned for options, and they are processed
+before any files are processed.  As a result, it does not matter
+whether flags are before or after any filenames.  However, the relative
+order of parameters is important, with later parameters overriding the
+values of earlier parameters.  
+
+For each parameter, there is a long name and a short name.  The short
+names are convenient for keyboard input, while the long names are
+self-documenting and therefore useful in scripts.  It is customary to
+use two leading dashes for long names, but one may be used.
+
+Most parameters which serve as on/off flags can be negated with a
+leading "n" (for the short name) or a leading "no" or "no-" (for the
+long name).  For example, the flag to outdent long quotes is B<-olq>
+or B<--outdent-long-quotes>.  The flag to skip this is B<-nolq>
+or B<--nooutdent-long-quotes> or B<--no-outdent-long-quotes>.
+
+Options may not be bundled together.  In other words, options B<-q> and
+B<-g> may NOT be entered as B<-qg>.
+
+Option names may be terminated early as long as they are uniquely identified.
+For example, instead of B<--dump-token-types>, it would be sufficient to enter
+B<--dump-tok>, or even B<--dump-t>, to uniquely identify this command.
+
+=head2 I/O control
+
+The following parameters concern the files which are read and written.
+
+=over 4
+
+=item B<-h>,    B<--help> 
+
+Show summary of usage and exit.
+
+=item	B<-o>=filename,    B<--outfile>=filename  
+
+Name of the output file (only if a single input file is being
+processed).  If no output file is specified, and output is not
+redirected to the standard output, the output will go to F.
+
+=item	B<-st>,    B<--standard-output>
+
+Perltidy must be able to operate on an arbitrarily large number of files
+in a single run, with each output being directed to a different output
+file.  Obviously this would conflict with outputting to the single
+standard output device, so a special flag, B<-st>, is required to
+request outputting to the standard output.  For example,
+
+  perltidy somefile.pl -st >somefile.new.pl
+
+This option may only be used if there is just a single input file.  
+The default is B<-nst> or B<--nostandard-output>.
+
+=item	B<-se>,    B<--standard-error-output>
+
+If perltidy detects an error when processing file F, its
+default behavior is to write error messages to file F.
+Use B<-se> to cause all error messages to be sent to the standard error
+output stream instead.  This directive may be negated with B<-nse>.
+Thus, you may place B<-se> in a F<.perltidyrc> and override it when
+desired with B<-nse> on the command line.
+
+=item	B<-oext>=ext,    B<--output-file-extension>=ext  
+
+Change the extension of the output file to be F instead of the
+default F (or F in case the -B<-html> option is used).
+See L.
+
+=item	B<-opath>=path,    B<--output-path>=path  
+
+When perltidy creates a filename for an output file, by default it merely
+appends an extension to the path and basename of the input file.  This
+parameter causes the path to be changed to F instead.
+
+The path should end in a valid path separator character, but perltidy will try
+to add one if it is missing.
+
+For example
+ 
+ perltidy somefile.pl -opath=/tmp/
+
+will produce F.  Otherwise, F will
+appear in whatever directory contains F.
+
+If the path contains spaces, it should be placed in quotes.
+
+This parameter will be ignored if output is being directed to standard output,
+or if it is being specified explicitly with the B<-o=s> parameter.
+
+=item	B<-b>,    B<--backup-and-modify-in-place>
+
+Modify the input file or files in-place and save the original with the
+extension F<.bak>.  Any existing F<.bak> file will be deleted.  See next
+item for changing the default backup extension, and for eliminating the
+backup file altogether.  
+
+A B<-b> flag will be ignored if input is from standard input or goes to
+standard output, or if the B<-html> flag is set.  
+
+In particular, if you want to use both the B<-b> flag and the B<-pbp>
+(--perl-best-practices) flag, then you must put a B<-nst> flag after the
+B<-pbp> flag because it contains a B<-st> flag as one of its components,
+which means that output will go to the standard output stream.
+
+=item	B<-bext>=ext,    B<--backup-file-extension>=ext  
+
+This parameter serves two purposes: (1) to change the extension of the backup
+file to be something other than the default F<.bak>, and (2) to indicate
+that no backup file should be saved.
+
+To change the default extension to something other than F<.bak> see
+L.
+
+A backup file of the source is always written, but you can request that it
+be deleted at the end of processing if there were no errors.  This is risky
+unless the source code is being maintained with a source code control
+system.  
+
+To indicate that the backup should be deleted include one forward slash,
+B, in the extension.  If any text remains after the slash is removed
+it will be used to define the backup file extension (which is always
+created and only deleted if there were no errors).
+
+Here are some examples:
+
+  Parameter           Extension          Backup File Treatment
+  <-bext=bak>         F<.bak>            Keep (same as the default behavior)
+  <-bext='/'>         F<.bak>            Delete if no errors
+  <-bext='/backup'>   F<.backup>         Delete if no errors
+  <-bext='original/'> F<.original>       Delete if no errors
+
+=item B<-w>,    B<--warning-output>             
+
+Setting B<-w> causes any non-critical warning
+messages to be reported as errors.  These include messages
+about possible pod problems, possibly bad starting indentation level,
+and cautions about indirect object usage.  The default, B<-nw> or
+B<--nowarning-output>, is not to include these warnings.
+
+=item B<-q>,    B<--quiet>             
+
+Deactivate error messages and syntax checking (for running under
+an editor). 
+
+For example, if you use a vi-style editor, such as vim, you may execute
+perltidy as a filter from within the editor using something like
+
+ :n1,n2!perltidy -q
+
+where C represents the selected text.  Without the B<-q> flag,
+any error message may mess up your screen, so be prepared to use your
+"undo" key.
+
+=item B<-log>,    B<--logfile>           
+
+Save the F<.LOG> file, which has many useful diagnostics.  Perltidy always
+creates a F<.LOG> file, but by default it is deleted unless a program bug is
+suspected.  Setting the B<-log> flag forces the log file to be saved.
+
+=item B<-g=n>, B<--logfile-gap=n>
+
+Set maximum interval between input code lines in the logfile.  This purpose of
+this flag is to assist in debugging nesting errors.  The value of C is
+optional.  If you set the flag B<-g> without the value of C, it will be
+taken to be 1, meaning that every line will be written to the log file.  This
+can be helpful if you are looking for a brace, paren, or bracket nesting error. 
+
+Setting B<-g> also causes the logfile to be saved, so it is not necessary to
+also include B<-log>. 
+
+If no B<-g> flag is given, a value of 50 will be used, meaning that at least
+every 50th line will be recorded in the logfile.  This helps prevent
+excessively long log files.  
+
+Setting a negative value of C is the same as not setting B<-g> at all.
+
+=item B<-npro>  B<--noprofile>    
+
+Ignore any F<.perltidyrc> command file.  Normally, perltidy looks first in
+your current directory for a F<.perltidyrc> file of parameters.  (The format
+is described below).  If it finds one, it applies those options to the
+initial default values, and then it applies any that have been defined
+on the command line.  If no F<.perltidyrc> file is found, it looks for one
+in your home directory.
+
+If you set the B<-npro> flag, perltidy will not look for this file.
+
+=item B<-pro=filename> or  B<--profile=filename>    
+
+To simplify testing and switching .perltidyrc files, this command may be
+used to specify a configuration file which will override the default
+name of .perltidyrc.  There must not be a space on either side of the
+'=' sign.  For example, the line
+
+   perltidy -pro=testcfg
+
+would cause file F to be used instead of the 
+default F<.perltidyrc>.
+
+A pathname begins with three dots, e.g. ".../.perltidyrc", indicates that
+the file should be searched for starting in the current directory and
+working upwards. This makes it easier to have multiple projects each with
+their own .perltidyrc in their root directories.
+
+=item B<-opt>,   B<--show-options>      
+
+Write a list of all options used to the F<.LOG> file.  
+Please see B<--dump-options> for a simpler way to do this.
+
+=item B<-f>,   B<--force-read-binary>      
+
+Force perltidy to process binary files.  To avoid producing excessive
+error messages, perltidy skips files identified by the system as non-text.
+However, valid perl scripts containing binary data may sometimes be identified
+as non-text, and this flag forces perltidy to process them.
+
+=back
+
+=head1 FORMATTING OPTIONS
+
+=head2 Basic Options
+
+=over 4
+
+=item B<--notidy>
+
+This flag disables all formatting and causes the input to be copied unchanged
+to the output except for possible changes in line ending characters and any
+pre- and post-filters.  This can be useful in conjunction with a hierarchical
+set of F<.perltidyrc> files to avoid unwanted code tidying.  See also
+L for a way to avoid tidying specific
+sections of code.
+
+=item B<-i=n>,  B<--indent-columns=n>  
+
+Use n columns per indentation level (default n=4).
+
+=item B<-l=n>, B<--maximum-line-length=n>
+
+The default maximum line length is n=80 characters.  Perltidy will try
+to find line break points to keep lines below this length. However, long
+quotes and side comments may cause lines to exceed this length. 
+Setting B<-l=0> is equivalent to setting B<-l=(a large number)>. 
+
+=item B<-vmll>, B<--variable-maximum-line-length>
+
+A problem arises using a fixed maximum line length with very deeply nested code
+and data structures because eventually the amount of leading whitespace used
+for indicating indentation takes up most or all of the available line width,
+leaving little or no space for the actual code or data.  One solution is to use
+a vary long line length.  Another solution is to use the B<-vmll> flag, which
+basically tells perltidy to ignore leading whitespace when measuring the line
+length.  
+
+To be precise, when the B<-vmll> parameter is set, the maximum line length of a
+line of code will be M+L*I, where
+
+      M is the value of --maximum-line-length=M (-l=M), default 80,
+      I is the value of --indent-columns=I (-i=I), default 4,
+      L is the indentation level of the line of code
+
+When this flag is set, the choice of breakpoints for a block of code should be
+essentially independent of its nesting depth.  However, the absolute line
+lengths, including leading whitespace, can still be arbitrarily large.  This
+problem can be avoided by including the next parameter.  
+
+The default is not to do this (B<-nvmll>).
+
+=item B<-wc=n>, B<--whitespace-cycle=n>
+
+This flag also addresses problems with very deeply nested code and data
+structures.  When the nesting depth exceeds the value B the leading
+whitespace will be reduced and start at a depth of 1 again.  The result is that
+blocks of code will shift back to the left rather than moving arbitrarily far
+to the right.  This occurs cyclically to any depth.  
+
+For example if one level of indentation equals 4 spaces (B<-i=4>, the default),
+and one uses B<-wc=15>, then if the leading whitespace on a line exceeds about
+4*15=60 spaces it will be reduced back to 4*1=4 spaces and continue increasing
+from there.  If the whitespace never exceeds this limit the formatting remains
+unchanged.
+
+The combination of B<-vmll> and B<-wc=n> provides a solution to the problem of
+displaying arbitrarily deep data structures and code in a finite window,
+although B<-wc=n> may of course be used without B<-vmll>.
+
+The default is not to use this, which can also be indicated using B<-wc=0>.
+
+=item tabs
+
+Using tab characters will almost certainly lead to future portability
+and maintenance problems, so the default and recommendation is not to
+use them.  For those who prefer tabs, however, there are two different
+options.  
+
+Except for possibly introducing tab indentation characters, as outlined
+below, perltidy does not introduce any tab characters into your file,
+and it removes any tabs from the code (unless requested not to do so
+with B<-fws>).  If you have any tabs in your comments, quotes, or
+here-documents, they will remain.
+
+=over 4
+
+=item B<-et=n>,   B<--entab-leading-whitespace>
+
+This flag causes each B initial space characters to be replaced by
+one tab character.  Note that the integer B is completely independent
+of the integer specified for indentation parameter, B<-i=n>.
+
+=item B<-t>,   B<--tabs>
+
+This flag causes one leading tab character to be inserted for each level
+of indentation.  Certain other features are incompatible with this
+option, and if these options are also given, then a warning message will
+be issued and this flag will be unset.  One example is the B<-lp>
+option.
+
+=item B<-dt=n>,   B<--default-tabsize=n>
+
+If the first line of code passed to perltidy contains leading tabs but no
+tab scheme is specified for the output stream then perltidy must guess how many
+spaces correspond to each leading tab.  This number of spaces B
+corresponding to each leading tab of the input stream may be specified with
+B<-dt=n>.  The default is B.  
+
+This flag has no effect if a tab scheme is specified for the output stream,
+because then the input stream is assumed to use the same tab scheme and
+indentation spaces as for the output stream (any other assumption would lead to
+unstable editing).
+
+=back
+
+=item B<-syn>,   B<--check-syntax>      
+
+This flag is now ignored for safety, but the following documentation
+has been retained for reference.
+
+This flag causes perltidy to run C to check syntax of input
+and output.  (To change the flags passed to perl, see the next
+item, B<-pscf>).  The results are written to the F<.LOG> file, which
+will be saved if an error is detected in the output script.  The output
+script is not checked if the input script has a syntax error.  Perltidy
+does its own checking, but this option employs perl to get a "second
+opinion".
+
+If perl reports errors in the input file, they will not be reported in
+the error output unless the B<--warning-output> flag is given. 
+
+The default is B to do this type of syntax checking (although
+perltidy will still do as much self-checking as possible).  The reason
+is that it causes all code in BEGIN blocks to be executed, for all
+modules being used, and this opens the door to security issues and
+infinite loops when running perltidy.
+
+=item B<-pscf=s>, B<-perl-syntax-check-flags=s>
+
+When perl is invoked to check syntax, the normal flags are C<-c -T>.  In
+addition, if the B<-x> flag is given to perltidy, then perl will also be
+passed a B<-x> flag.  It should not normally be necessary to change
+these flags, but it can be done with the B<-pscf=s> flag.  For example,
+if the taint flag, C<-T>, is not wanted, the flag could be set to be just
+B<-pscf=-c>.  
+
+Perltidy will pass your string to perl with the exception that it will
+add a B<-c> and B<-x> if appropriate.  The F<.LOG> file will show
+exactly what flags were passed to perl.
+
+=item B<-xs>,   B<--extended-syntax>      
+
+A problem with formatting Perl code is that some modules can introduce new
+syntax.  This flag allows perltidy to handle certain common extensions
+to the standard syntax without complaint.  
+
+For example, without this flag a structure such as the following would generate
+a syntax error and the braces would not be balanced:
+
+    method deposit( Num $amount) {
+        $self->balance( $self->balance + $amount );
+    }
+
+This flag is enabled by default but it can be deactivated with B<-nxs>.
+Probably the only reason to deactivate this flag is to generate more diagnostic
+messages when debugging a script.
+
+
+=item B<-io>,   B<--indent-only>       
+
+This flag is used to deactivate all whitespace and line break changes
+within non-blank lines of code.
+When it is in effect, the only change to the script will be
+to the indentation and to the number of blank lines.
+And any flags controlling whitespace and newlines will be ignored.  You
+might want to use this if you are perfectly happy with your whitespace
+and line breaks, and merely want perltidy to handle the indentation.
+(This also speeds up perltidy by well over a factor of two, so it might be
+useful when perltidy is merely being used to help find a brace error in
+a large script).
+
+Setting this flag is equivalent to setting B<--freeze-newlines> and
+B<--freeze-whitespace>.  
+
+If you also want to keep your existing blank lines exactly
+as they are, you can add B<--freeze-blank-lines>. 
+
+With this option perltidy is still free to modify the indenting (and
+outdenting) of code and comments as it normally would.  If you also want to
+prevent long comment lines from being outdented, you can add either B<-noll> or
+B<-l=0>.
+
+Setting this flag will prevent perltidy from doing any special operations on
+closing side comments.  You may still delete all side comments however when
+this flag is in effect.
+
+
+=item B<-enc=s>,  B<--character-encoding=s>
+
+where B=B or B.  This flag tells perltidy the character encoding
+of both the input and output character streams.  The value B causes the
+stream to be read and written as UTF-8.  The value B causes the stream to
+be processed without special encoding assumptions.  At present there is no
+automatic detection of character encoding (even if there is a C<'use utf8'>
+statement in your code) so this flag must be set for streams encoded in UTF-8.
+Incorrectly setting this parameter can cause data corruption, so please
+carefully check the output.
+
+The default is B.  
+
+The abbreviations B<-utf8> or B<-UTF8> are equivalent to B<-enc=utf8>.
+So to process a file named B which is encoded in UTF-8 you can use:
+
+   perltidy -utf8 file.pl
+
+=item B<-ole=s>,  B<--output-line-ending=s>
+
+where s=C, C, C, or C.  This flag tells perltidy
+to output line endings for a specific system.  Normally,
+perltidy writes files with the line separator character of the host
+system.  The C and C flags have an identical result.
+
+=item B<-ple>,  B<--preserve-line-endings>
+
+This flag tells perltidy to write its output files with the same line
+endings as the input file, if possible.  It should work for
+B, B, and B line endings.  It will only work if perltidy
+input comes from a filename (rather than stdin, for example).  If
+perltidy has trouble determining the input file line ending, it will
+revert to the default behavior of using the line ending of the host system.
+
+=item B<-it=n>,   B<--iterations=n>
+
+This flag causes perltidy to do B complete iterations.  The reason for this
+flag is that code beautification is an iterative process and in some
+cases the output from perltidy can be different if it is applied a second time.
+For most purposes the default of B should be satisfactory.  However B
+can be useful when a major style change is being made, or when code is being
+beautified on check-in to a source code control system.  It has been found to
+be extremely rare for the output to change after 2 iterations.  If a value
+B is greater than 2 is input then a convergence test will be used to stop
+the iterations as soon as possible, almost always after 2 iterations.  See
+the next item for a simplified iteration control.
+
+This flag has no effect when perltidy is used to generate html.
+
+=item B<-conv>,   B<--converge>
+
+This flag is equivalent to B<-it=4> and is included to simplify iteration
+control.  For all practical purposes one either does or does not want to be
+sure that the output is converged, and there is no penalty to using a large
+iteration limit since perltidy will check for convergence and stop iterating as
+soon as possible.  The default is B<-nconv> (no convergence check).  Using
+B<-conv> will approximately double run time since normally one extra iteration
+is required to verify convergence.
+
+=back
+
+=head2 Code Indentation Control
+
+=over 4
+
+=item B<-ci=n>, B<--continuation-indentation=n>
+
+Continuation indentation is extra indentation spaces applied when
+a long line is broken.  The default is n=2, illustrated here:
+
+ my $level =   # -ci=2      
+   ( $max_index_to_go >= 0 ) ? $levels_to_go[0] : $last_output_level;
+
+The same example, with n=0, is a little harder to read:
+
+ my $level =   # -ci=0    
+ ( $max_index_to_go >= 0 ) ? $levels_to_go[0] : $last_output_level;
+
+The value given to B<-ci> is also used by some commands when a small
+space is required.  Examples are commands for outdenting labels,
+B<-ola>, and control keywords, B<-okw>.  
+
+When default values are not used, it is suggested that the value B
+given with B<-ci=n> be no more than about one-half of the number of
+spaces assigned to a full indentation level on the B<-i=n> command.
+
+=item B<-sil=n> B<--starting-indentation-level=n>   
+
+By default, perltidy examines the input file and tries to determine the
+starting indentation level.  While it is often zero, it may not be
+zero for a code snippet being sent from an editing session.  
+
+To guess the starting indentation level perltidy simply assumes that
+indentation scheme used to create the code snippet is the same as is being used
+for the current perltidy process.  This is the only sensible guess that can be
+made.  It should be correct if this is true, but otherwise it probably won't.
+For example, if the input script was written with -i=2 and the current peltidy
+flags have -i=4, the wrong initial indentation will be guessed for a code
+snippet which has non-zero initial indentation. Likewise, if an entabbing
+scheme is used in the input script and not in the current process then the
+guessed indentation will be wrong.
+
+If the default method does not work correctly, or you want to change the
+starting level, use B<-sil=n>, to force the starting level to be n.
+
+=item List indentation using B<-lp>, B<--line-up-parentheses>
+
+By default, perltidy indents lists with 4 spaces, or whatever value
+is specified with B<-i=n>.  Here is a small list formatted in this way:
+
+    # perltidy (default)
+    @month_of_year = (
+        'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+        'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
+    );
+
+Use the B<-lp> flag to add extra indentation to cause the data to begin
+past the opening parentheses of a sub call or list, or opening square
+bracket of an anonymous array, or opening curly brace of an anonymous
+hash.  With this option, the above list would become:
+
+    # perltidy -lp
+    @month_of_year = (
+                       'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+                       'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
+    );
+
+If the available line length (see B<-l=n> ) does not permit this much 
+space, perltidy will use less.   For alternate placement of the
+closing paren, see the next section.
+
+This option has no effect on code BLOCKS, such as if/then/else blocks,
+which always use whatever is specified with B<-i=n>.  Also, the
+existence of line breaks and/or block comments between the opening and
+closing parens may cause perltidy to temporarily revert to its default
+method.
+
+Note: The B<-lp> option may not be used together with the B<-t> tabs option.
+It may, however, be used with the B<-et=n> tab method.
+
+In addition, any parameter which significantly restricts the ability of
+perltidy to choose newlines will conflict with B<-lp> and will cause
+B<-lp> to be deactivated.  These include B<-io>, B<-fnl>, B<-nanl>, and
+B<-ndnl>.  The reason is that the B<-lp> indentation style can require
+the careful coordination of an arbitrary number of break points in
+hierarchical lists, and these flags may prevent that.
+
+=item B<-cti=n>, B<--closing-token-indentation>
+
+The B<-cti=n> flag controls the indentation of a line beginning with 
+a C<)>, C<]>, or a non-block C<}>.  Such a line receives:
+
+ -cti = 0 no extra indentation (default)
+ -cti = 1 extra indentation such that the closing token
+        aligns with its opening token.
+ -cti = 2 one extra indentation level if the line looks like:
+        );  or  ];  or  };
+ -cti = 3 one extra indentation level always
+
+The flags B<-cti=1> and B<-cti=2> work well with the B<-lp> flag (previous
+section).
+    
+    # perltidy -lp -cti=1
+    @month_of_year = (
+                       'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+                       'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
+                     );
+
+    # perltidy -lp -cti=2
+    @month_of_year = (
+                       'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+                       'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
+                       );
+
+These flags are merely hints to the formatter and they may not always be
+followed.  In particular, if -lp is not being used, the indentation for
+B is constrained to be no more than one indentation level.
+
+If desired, this control can be applied independently to each of the
+closing container token types.  In fact, B<-cti=n> is merely an
+abbreviation for B<-cpi=n -csbi=n -cbi=n>, where:  
+B<-cpi> or B<--closing-paren-indentation> controls B<)>'s,
+B<-csbi> or B<--closing-square-bracket-indentation> controls B<]>'s, 
+B<-cbi> or B<--closing-brace-indentation> controls non-block B<}>'s. 
+
+=item B<-icp>, B<--indent-closing-paren>
+
+The B<-icp> flag is equivalent to
+B<-cti=2>, described in the previous section.  The B<-nicp> flag is
+equivalent B<-cti=0>.  They are included for backwards compatibility.
+
+=item B<-icb>, B<--indent-closing-brace>
+
+The B<-icb> option gives one extra level of indentation to a brace which
+terminates a code block .  For example,
+
+        if ($task) {
+            yyy();
+            }    # -icb
+        else {
+            zzz();
+            }
+
+The default is not to do this, indicated by B<-nicb>.
+
+=item B<-olq>, B<--outdent-long-quotes>
+
+When B<-olq> is set, lines which is a quoted string longer than the
+value B will have their indentation removed to make
+them more readable.  This is the default.  To prevent such out-denting,
+use B<-nolq> or B<--nooutdent-long-lines>.
+
+=item B<-oll>, B<--outdent-long-lines>
+
+This command is equivalent to B<--outdent-long-quotes> and
+B<--outdent-long-comments>, and it is included for compatibility with previous
+versions of perltidy.  The negation of this also works, B<-noll> or
+B<--nooutdent-long-lines>, and is equivalent to setting B<-nolq> and B<-nolc>.
+
+=item Outdenting Labels: B<-ola>,  B<--outdent-labels>
+
+This command will cause labels to be outdented by 2 spaces (or whatever B<-ci>
+has been set to), if possible.  This is the default.  For example:
+
+        my $i;
+      LOOP: while ( $i =  ) {
+            chomp($i);
+            next unless $i;
+            fixit($i);
+        }
+
+Use B<-nola> to not outdent labels. 
+
+=item Outdenting Keywords
+
+=over 4
+
+=item B<-okw>,  B<--outdent-keywords>
+
+The command B<-okw> will cause certain leading control keywords to
+be outdented by 2 spaces (or whatever B<-ci> has been set to), if
+possible.  By default, these keywords are C, C, C,
+C, and C.  The intention is to make these control keywords
+easier to see.  To change this list of keywords being outdented, see
+the next section.
+
+For example, using C on the previous example gives:
+
+        my $i;
+      LOOP: while ( $i =  ) {
+            chomp($i);
+          next unless $i;
+            fixit($i);
+        }
+
+The default is not to do this.  
+
+=item Specifying Outdented Keywords: B<-okwl=string>,  B<--outdent-keyword-list=string>
+
+This command can be used to change the keywords which are outdented with
+the B<-okw> command.  The parameter B is a required list of perl
+keywords, which should be placed in quotes if there are more than one.
+By itself, it does not cause any outdenting to occur, so the B<-okw>
+command is still required.
+
+For example, the commands C<-okwl="next last redo goto" -okw> will cause
+those four keywords to be outdented.  It is probably simplest to place
+any B<-okwl> command in a F<.perltidyrc> file.
+
+=back
+
+=back
+
+=head2 Whitespace Control
+
+Whitespace refers to the blank space between variables, operators,
+and other code tokens.
+
+=over 4
+
+=item B<-fws>,  B<--freeze-whitespace>
+
+This flag causes your original whitespace to remain unchanged, and
+causes the rest of the whitespace commands in this section, the
+Code Indentation section, and
+the Comment Control section to be ignored.
+
+=item Tightness of curly braces, parentheses, and square brackets.
+
+Here the term "tightness" will mean the closeness with which
+pairs of enclosing tokens, such as parentheses, contain the quantities
+within.  A numerical value of 0, 1, or 2 defines the tightness, with
+0 being least tight and 2 being most tight.  Spaces within containers
+are always symmetric, so if there is a space after a C<(> then there
+will be a space before the corresponding C<)>.
+
+The B<-pt=n> or B<--paren-tightness=n> parameter controls the space within
+parens.  The example below shows the effect of the three possible
+values, 0, 1, and 2:
+
+ if ( ( my $len_tab = length( $tabstr ) ) > 0 ) {  # -pt=0
+ if ( ( my $len_tab = length($tabstr) ) > 0 ) {    # -pt=1 (default)
+ if ((my $len_tab = length($tabstr)) > 0) {        # -pt=2
+
+When n is 0, there is always a space to the right of a '(' and to the left
+of a ')'.  For n=2 there is never a space.  For n=1, the default, there
+is a space unless the quantity within the parens is a single token, such
+as an identifier or quoted string.  
+
+Likewise, the parameter B<-sbt=n> or B<--square-bracket-tightness=n>
+controls the space within square brackets, as illustrated below.
+
+ $width = $col[ $j + $k ] - $col[ $j ];  # -sbt=0
+ $width = $col[ $j + $k ] - $col[$j];    # -sbt=1 (default)
+ $width = $col[$j + $k] - $col[$j];      # -sbt=2 
+
+Curly braces which do not contain code blocks are controlled by
+the parameter B<-bt=n> or B<--brace-tightness=n>. 
+
+ $obj->{ $parsed_sql->{ 'table' }[0] };    # -bt=0
+ $obj->{ $parsed_sql->{'table'}[0] };      # -bt=1 (default)
+ $obj->{$parsed_sql->{'table'}[0]};        # -bt=2
+
+And finally, curly braces which contain blocks of code are controlled by the
+parameter B<-bbt=n> or B<--block-brace-tightness=n> as illustrated in the
+example below.   
+
+ %bf = map { $_ => -M $_ } grep { /\.deb$/ } dirents '.'; # -bbt=0 (default)
+ %bf = map { $_ => -M $_ } grep {/\.deb$/} dirents '.';   # -bbt=1
+ %bf = map {$_ => -M $_} grep {/\.deb$/} dirents '.';     # -bbt=2
+
+To simplify input in the case that all of the tightness flags have the same
+value , the parameter <-act=n> or B<--all-containers-tightness=n> is an
+abbreviation for the combination <-pt=n -sbt=n -bt=n -bbt=n>.
+
+
+=item B<-tso>,   B<--tight-secret-operators>
+
+The flag B<-tso> causes certain perl token sequences (secret operators)
+which might be considered to be a single operator to be formatted "tightly"
+(without spaces).  The operators currently modified by this flag are: 
+
+     0+  +0  ()x!! ~~<>  ,=>   =( )=  
+
+For example the sequence B<0 +>,  which converts a string to a number,
+would be formatted without a space: B<0+> when the B<-tso> flag is set.  This
+flag is off by default.
+
+=item B<-sts>,   B<--space-terminal-semicolon>
+
+Some programmers prefer a space before all terminal semicolons.  The
+default is for no such space, and is indicated with B<-nsts> or
+B<--nospace-terminal-semicolon>.
+
+	$i = 1 ;     #  -sts
+	$i = 1;      #  -nsts   (default)
+
+=item B<-sfs>,   B<--space-for-semicolon>
+
+Semicolons within B loops may sometimes be hard to see,
+particularly when commas are also present.  This option places spaces on
+both sides of these special semicolons, and is the default.  Use
+B<-nsfs> or B<--nospace-for-semicolon> to deactivate it.
+
+ for ( @a = @$ap, $u = shift @a ; @a ; $u = $v ) {  # -sfs (default)
+ for ( @a = @$ap, $u = shift @a; @a; $u = $v ) {    # -nsfs
+
+=item B<-asc>,  B<--add-semicolons>
+
+Setting B<-asc> allows perltidy to add any missing optional semicolon at the end 
+of a line which is followed by a closing curly brace on the next line.  This
+is the default, and may be deactivated with B<-nasc> or B<--noadd-semicolons>.
+
+=item B<-dsm>,  B<--delete-semicolons>
+
+Setting B<-dsm> allows perltidy to delete extra semicolons which are
+simply empty statements.  This is the default, and may be deactivated
+with B<-ndsm> or B<--nodelete-semicolons>.  (Such semicolons are not
+deleted, however, if they would promote a side comment to a block
+comment).
+
+=item B<-aws>,  B<--add-whitespace>
+
+Setting this option allows perltidy to add certain whitespace improve
+code readability.  This is the default. If you do not want any
+whitespace added, but are willing to have some whitespace deleted, use
+B<-naws>.  (Use B<-fws> to leave whitespace completely unchanged).
+
+=item B<-dws>,  B<--delete-old-whitespace>
+
+Setting this option allows perltidy to remove some old whitespace
+between characters, if necessary.  This is the default.  If you
+do not want any old whitespace removed, use B<-ndws> or
+B<--nodelete-old-whitespace>.
+
+=item Detailed whitespace controls around tokens
+
+For those who want more detailed control over the whitespace around
+tokens, there are four parameters which can directly modify the default
+whitespace rules built into perltidy for any token.  They are:
+
+B<-wls=s> or B<--want-left-space=s>,
+
+B<-nwls=s> or B<--nowant-left-space=s>,
+
+B<-wrs=s> or B<--want-right-space=s>,
+
+B<-nwrs=s> or B<--nowant-right-space=s>.
+
+These parameters are each followed by a quoted string, B, containing a
+list of token types.  No more than one of each of these parameters
+should be specified, because repeating a command-line parameter
+always overwrites the previous one before perltidy ever sees it.
+
+To illustrate how these are used, suppose it is desired that there be no
+space on either side of the token types B<= + - / *>.  The following two
+parameters would specify this desire:
+
+  -nwls="= + - / *"    -nwrs="= + - / *"
+
+(Note that the token types are in quotes, and that they are separated by
+spaces).  With these modified whitespace rules, the following line of math:
+
+  $root = -$b + sqrt( $b * $b - 4. * $a * $c ) / ( 2. * $a );
+
+becomes this:
+
+  $root=-$b+sqrt( $b*$b-4.*$a*$c )/( 2.*$a );
+
+These parameters should be considered to be hints to perltidy rather
+than fixed rules, because perltidy must try to resolve conflicts that
+arise between them and all of the other rules that it uses.  One
+conflict that can arise is if, between two tokens, the left token wants
+a space and the right one doesn't.  In this case, the token not wanting
+a space takes priority.  
+
+It is necessary to have a list of all token types in order to create
+this type of input.  Such a list can be obtained by the command
+B<--dump-token-types>.  Also try the B<-D> flag on a short snippet of code
+and look at the .DEBUG file to see the tokenization. 
+
+B Be sure to put these tokens in quotes to avoid having them
+misinterpreted by your command shell.
+
+=item Space between specific keywords and opening paren
+
+When an opening paren follows a Perl keyword, no space is introduced after the
+keyword, unless it is (by default) one of these:
+
+   my local our and or eq ne if else elsif until unless 
+   while for foreach return switch case given when
+
+These defaults can be modified with two commands:
+
+B<-sak=s>  or B<--space-after-keyword=s>  adds keywords.
+
+B<-nsak=s>  or B<--nospace-after-keyword=s>  removes keywords.
+
+where B is a list of keywords (in quotes if necessary).  For example, 
+
+  my ( $a, $b, $c ) = @_;    # default
+  my( $a, $b, $c ) = @_;     # -nsak="my local our"
+
+The abbreviation B<-nsak='*'> is equivalent to including all of the
+keywords in the above list.
+
+When both B<-nsak=s> and B<-sak=s> commands are included, the B<-nsak=s>
+command is executed first.  For example, to have space after only the
+keywords (my, local, our) you could use B<-nsak="*" -sak="my local our">.
+
+To put a space after all keywords, see the next item.
+
+=item Space between all keywords and opening parens
+
+When an opening paren follows a function or keyword, no space is introduced
+after the keyword except for the keywords noted in the previous item.  To
+always put a space between a function or keyword and its opening paren,
+use the command:
+
+B<-skp>  or B<--space-keyword-paren>
+
+You will probably also want to use the flag B<-sfp> (next item) too.
+
+=item Space between all function names and opening parens
+
+When an opening paren follows a function the default is not to introduce
+a space.  To cause a space to be introduced use:
+
+B<-sfp>  or B<--space-function-paren>
+
+  myfunc( $a, $b, $c );    # default 
+  myfunc ( $a, $b, $c );   # -sfp
+
+You will probably also want to use the flag B<-skp> (previous item) too.
+
+=item Trimming whitespace around C quotes
+
+B<-tqw> or B<--trim-qw> provide the default behavior of trimming
+spaces around multi-line C quotes and indenting them appropriately.
+
+B<-ntqw> or B<--notrim-qw> cause leading and trailing whitespace around
+multi-line C quotes to be left unchanged.  This option will not
+normally be necessary, but was added for testing purposes, because in
+some versions of perl, trimming C quotes changes the syntax tree.
+
+=item B<-sbq=n>  or B<--space-backslash-quote=n>
+
+Lines like
+
+       $str1=\"string1";
+       $str2=\'string2';
+
+can confuse syntax highlighters unless a space is included between the backslash and the single or double quotation mark.
+
+This can be controlled with the value of B as follows:
+
+    -sbq=0 means no space between the backslash and quote
+    -sbq=1 means follow the example of the source code
+    -sbq=2 means always put a space between the backslash and quote
+
+The default is B<-sbq=1>, meaning that a space will be used 0if there is one in the source code.
+
+=item Trimming trailing whitespace from lines of POD
+
+B<-trp> or B<--trim-pod> will remove trailing whitespace from lines of POD.
+The default is not to do this.
+
+=back
+
+=head2 Comment Controls
+
+Perltidy has a number of ways to control the appearance of both block comments
+and side comments.  The term B here refers to a full-line
+comment, whereas B will refer to a comment which appears on a
+line to the right of some code.
+
+=over 4
+
+=item B<-ibc>,  B<--indent-block-comments>
+
+Block comments normally look best when they are indented to the same
+level as the code which follows them.  This is the default behavior, but
+you may use B<-nibc> to keep block comments left-justified.  Here is an
+example:
+
+             # this comment is indented      (-ibc, default)
+	     if ($task) { yyy(); }
+
+The alternative is B<-nibc>:
+
+ # this comment is not indented              (-nibc)
+	     if ($task) { yyy(); }
+
+See also the next item, B<-isbc>, as well as B<-sbc>, for other ways to
+have some indented and some outdented block comments.
+
+=item B<-isbc>,  B<--indent-spaced-block-comments>
+
+If there is no leading space on the line, then the comment will not be
+indented, and otherwise it may be.
+
+If both B<-ibc> and B<-isbc> are set, then B<-isbc> takes priority.
+
+=item B<-olc>, B<--outdent-long-comments>
+
+When B<-olc> is set, lines which are full-line (block) comments longer
+than the value B will have their indentation
+removed.  This is the default; use B<-nolc> to prevent outdenting.
+
+=item B<-msc=n>,  B<--minimum-space-to-comment=n>
+
+Side comments look best when lined up several spaces to the right of
+code.  Perltidy will try to keep comments at least n spaces to the
+right.  The default is n=4 spaces.
+
+=item B<-fpsc=n>,  B<--fixed-position-side-comment=n>
+
+This parameter tells perltidy to line up side comments in column number B
+whenever possible.  The default, n=0, will not do this.
+
+=item B<-iscl>,  B<--ignore-side-comment-lengths>
+
+This parameter causes perltidy to ignore the length of side comments when
+setting line breaks.  The default, B<-niscl>, is to include the length of 
+side comments when breaking lines to stay within the length prescribed
+by the B<-l=n> maximum line length parameter.  For example, the following
+long single line would remain intact with -l=80 and -iscl:
+
+     perltidy -l=80 -iscl
+        $vmsfile =~ s/;[\d\-]*$//; # Clip off version number; we can use a newer version as well
+
+whereas without the -iscl flag the line will be broken:
+
+     perltidy -l=80
+        $vmsfile =~ s/;[\d\-]*$//
+          ;    # Clip off version number; we can use a newer version as well
+   
+
+=item B<-hsc>, B<--hanging-side-comments>
+
+By default, perltidy tries to identify and align "hanging side
+comments", which are something like this:
+
+        my $IGNORE = 0;    # This is a side comment
+                           # This is a hanging side comment
+                           # And so is this
+
+A comment is considered to be a hanging side comment if (1) it immediately
+follows a line with a side comment, or another hanging side comment, and
+(2) there is some leading whitespace on the line.
+To deactivate this feature, use B<-nhsc> or B<--nohanging-side-comments>.  
+If block comments are preceded by a blank line, or have no leading
+whitespace, they will not be mistaken as hanging side comments.
+
+=item Closing Side Comments
+
+A closing side comment is a special comment which perltidy can
+automatically create and place after the closing brace of a code block.
+They can be useful for code maintenance and debugging.  The command
+B<-csc> (or B<--closing-side-comments>) adds or updates closing side
+comments.  For example, here is a small code snippet
+
+        sub message {
+            if ( !defined( $_[0] ) ) {
+                print("Hello, World\n");
+            }
+            else {
+                print( $_[0], "\n" );
+            }
+        }
+
+And here is the result of processing with C:
+
+        sub message {
+            if ( !defined( $_[0] ) ) {
+                print("Hello, World\n");
+            }
+            else {
+                print( $_[0], "\n" );
+            }
+        } ## end sub message
+
+A closing side comment was added for C in this case, but not
+for the C and C blocks, because they were below the 6 line
+cutoff limit for adding closing side comments.  This limit may be
+changed with the B<-csci> command, described below.
+
+The command B<-dcsc> (or B<--delete-closing-side-comments>) reverses this 
+process and removes these comments.
+
+Several commands are available to modify the behavior of these two basic
+commands, B<-csc> and B<-dcsc>:
+
+=over 4
+
+=item B<-csci=n>, or B<--closing-side-comment-interval=n> 
+
+where C is the minimum number of lines that a block must have in
+order for a closing side comment to be added.  The default value is
+C.  To illustrate:
+
+        # perltidy -csci=2 -csc
+        sub message {
+            if ( !defined( $_[0] ) ) {
+                print("Hello, World\n");
+            } ## end if ( !defined( $_[0] ))
+            else {
+                print( $_[0], "\n" );
+            } ## end else [ if ( !defined( $_[0] ))
+        } ## end sub message
+
+Now the C and C blocks are commented.  However, now this has
+become very cluttered.
+
+=item B<-cscp=string>, or B<--closing-side-comment-prefix=string> 
+
+where string is the prefix used before the name of the block type.  The
+default prefix, shown above, is C<## end>.  This string will be added to
+closing side comments, and it will also be used to recognize them in
+order to update, delete, and format them.  Any comment identified as a
+closing side comment will be placed just a single space to the right of
+its closing brace.
+
+=item B<-cscl=string>, or B<--closing-side-comment-list> 
+
+where C is a list of block types to be tagged with closing side
+comments.  By default, all code block types preceded by a keyword or
+label (such as C, C, and so on) will be tagged.  The B<-cscl>
+command changes the default list to be any selected block types; see
+L.
+For example, the following command
+requests that only C's, labels, C, and C blocks be
+affected by any B<-csc> or B<-dcsc> operation:
+
+   -cscl="sub : BEGIN END"
+
+=item B<-csct=n>, or B<--closing-side-comment-maximum-text=n> 
+
+The text appended to certain block types, such as an C block, is
+whatever lies between the keyword introducing the block, such as C,
+and the opening brace.  Since this might be too much text for a side
+comment, there needs to be a limit, and that is the purpose of this
+parameter.  The default value is C, meaning that no additional
+tokens will be appended to this text after its length reaches 20
+characters.  Omitted text is indicated with C<...>.  (Tokens, including
+sub names, are never truncated, however, so actual lengths may exceed
+this).  To illustrate, in the above example, the appended text of the
+first block is C< ( !defined( $_[0] )...>.  The existing limit of
+C caused this text to be truncated, as indicated by the C<...>.  See
+the next flag for additional control of the abbreviated text.
+
+=item B<-cscb>, or B<--closing-side-comments-balanced> 
+
+As discussed in the previous item, when the
+closing-side-comment-maximum-text limit is exceeded the comment text must
+be truncated.  Older versions of perltidy terminated with three dots, and this
+can still be achieved with -ncscb:
+
+  perltidy -csc -ncscb
+  } ## end foreach my $foo (sort { $b cmp $a ...
+
+However this causes a problem with editors which cannot recognize
+comments or are not configured to do so because they cannot "bounce" around in
+the text correctly.  The B<-cscb> flag has been added to
+help them by appending appropriate balancing structure:
+
+  perltidy -csc -cscb
+  } ## end foreach my $foo (sort { $b cmp $a ... })
+
+The default is B<-cscb>.
+
+=item B<-csce=n>, or B<--closing-side-comment-else-flag=n> 
+
+The default, B, places the text of the opening C statement after any
+terminal C.
+
+If B is used, then each C is also given the text of the opening
+C statement.  Also, an C will include the text of a preceding
+C statement.  Note that this may result some long closing
+side comments.
+
+If B is used, the results will be the same as B whenever the
+resulting line length is less than the maximum allowed.
+
+=item B<-cscb>, or B<--closing-side-comments-balanced> 
+
+When using closing-side-comments, and the closing-side-comment-maximum-text
+limit is exceeded, then the comment text must be abbreviated.  
+It is terminated with three dots if the B<-cscb> flag is negated:
+
+  perltidy -csc -ncscb
+  } ## end foreach my $foo (sort { $b cmp $a ...
+
+This causes a problem with older editors which do not recognize comments
+because they cannot "bounce" around in the text correctly.  The B<-cscb>
+flag tries to help them by appending appropriate terminal balancing structures:
+
+  perltidy -csc -cscb
+  } ## end foreach my $foo (sort { $b cmp $a ... })
+
+The default is B<-cscb>.  
+
+
+=item B<-cscw>, or B<--closing-side-comment-warnings> 
+
+This parameter is intended to help make the initial transition to the use of
+closing side comments.  
+It causes two
+things to happen if a closing side comment replaces an existing, different
+closing side comment:  first, an error message will be issued, and second, the
+original side comment will be placed alone on a new specially marked comment
+line for later attention. 
+
+The intent is to avoid clobbering existing hand-written side comments
+which happen to match the pattern of closing side comments. This flag
+should only be needed on the first run with B<-csc>.
+
+=back
+
+B 
+
+=over 4
+
+=item *
+
+Closing side comments are only placed on lines terminated with a closing
+brace.  Certain closing styles, such as the use of cuddled elses
+(B<-ce>), preclude the generation of some closing side comments.
+
+=item *
+
+Please note that adding or deleting of closing side comments takes
+place only through the commands B<-csc> or B<-dcsc>.  The other commands,
+if used, merely modify the behavior of these two commands.  
+
+=item *
+
+It is recommended that the B<-cscw> flag be used along with B<-csc> on
+the first use of perltidy on a given file.  This will prevent loss of
+any existing side comment data which happens to have the csc prefix.
+
+=item *
+
+Once you use B<-csc>, you should continue to use it so that any
+closing side comments remain correct as code changes.  Otherwise, these
+comments will become incorrect as the code is updated.
+
+=item *
+
+If you edit the closing side comments generated by perltidy, you must also
+change the prefix to be different from the closing side comment prefix.
+Otherwise, your edits will be lost when you rerun perltidy with B<-csc>.   For
+example, you could simply change C<## end> to be C<## End>, since the test is
+case sensitive.  You may also want to use the B<-ssc> flag to keep these
+modified closing side comments spaced the same as actual closing side comments.
+
+=item *
+
+Temporarily generating closing side comments is a useful technique for
+exploring and/or debugging a perl script, especially one written by someone
+else.  You can always remove them with B<-dcsc>.
+
+=back
+
+=item Static Block Comments
+
+Static block comments are block comments with a special leading pattern,
+C<##> by default, which will be treated slightly differently from other
+block comments.  They effectively behave as if they had glue along their
+left and top edges, because they stick to the left edge and previous line
+when there is no blank spaces in those places.  This option is
+particularly useful for controlling how commented code is displayed.
+
+=over 4
+
+=item B<-sbc>, B<--static-block-comments>
+
+When B<-sbc> is used, a block comment with a special leading pattern, C<##> by
+default, will be treated specially. 
+
+Comments so identified  are treated as follows: 
+
+=over 4
+
+=item *
+
+If there is no leading space on the line, then the comment will not
+be indented, and otherwise it may be,
+
+=item *
+
+no new blank line will be
+inserted before such a comment, and 
+
+=item *
+
+such a comment will never become
+a hanging side comment.  
+
+=back
+
+For example, assuming C<@month_of_year> is
+left-adjusted:
+
+    @month_of_year = (    # -sbc (default)
+        'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct',
+    ##  'Dec', 'Nov'
+        'Nov', 'Dec');
+
+Without this convention, the above code would become
+
+    @month_of_year = (   # -nsbc
+        'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct',
+  
+        ##  'Dec', 'Nov'
+        'Nov', 'Dec'
+    );
+
+which is not as clear.
+The default is to use B<-sbc>.  This may be deactivated with B<-nsbc>.
+
+=item B<-sbcp=string>, B<--static-block-comment-prefix=string>
+
+This parameter defines the prefix used to identify static block comments
+when the B<-sbc> parameter is set.  The default prefix is C<##>,
+corresponding to C<-sbcp=##>.  The prefix is actually part of a perl 
+pattern used to match lines and it must either begin with C<#> or C<^#>.  
+In the first case a prefix ^\s* will be added to match any leading
+whitespace, while in the second case the pattern will match only
+comments with no leading whitespace.  For example, to
+identify all comments as static block comments, one would use C<-sbcp=#>.
+To identify all left-adjusted comments as static block comments, use C<-sbcp='^#'>.
+
+Please note that B<-sbcp> merely defines the pattern used to identify static
+block comments; it will not be used unless the switch B<-sbc> is set.  Also,
+please be aware that since this string is used in a perl regular expression
+which identifies these comments, it must enable a valid regular expression to
+be formed.
+
+A pattern which can be useful is:
+
+    -sbcp=^#{2,}[^\s#] 
+
+This pattern requires a static block comment to have at least one character
+which is neither a # nor a space.  It allows a line containing only '#'
+characters to be rejected as a static block comment.  Such lines are often used
+at the start and end of header information in subroutines and should not be
+separated from the intervening comments, which typically begin with just a
+single '#'.
+
+=item B<-osbc>, B<--outdent-static-block-comments>
+
+The command B<-osbc> will cause static block comments to be outdented by 2
+spaces (or whatever B<-ci=n> has been set to), if possible.
+
+=back
+
+=item Static Side Comments
+
+Static side comments are side comments with a special leading pattern.
+This option can be useful for controlling how commented code is displayed
+when it is a side comment.
+
+=over 4
+
+=item B<-ssc>, B<--static-side-comments>
+
+When B<-ssc> is used, a side comment with a static leading pattern, which is
+C<##> by default, will be spaced only a single space from previous
+character, and it will not be vertically aligned with other side comments.
+
+The default is B<-nssc>.
+
+=item B<-sscp=string>, B<--static-side-comment-prefix=string>
+
+This parameter defines the prefix used to identify static side comments
+when the B<-ssc> parameter is set.  The default prefix is C<##>,
+corresponding to C<-sscp=##>.  
+
+Please note that B<-sscp> merely defines the pattern used to identify
+static side comments; it will not be used unless the switch B<-ssc> is
+set.  Also, note that this string is used in a perl regular expression
+which identifies these comments, so it must enable a valid regular
+expression to be formed.
+
+=back
+
+
+=back
+
+=head2 Skipping Selected Sections of Code
+
+Selected lines of code may be passed verbatim to the output without any
+formatting.  This feature is enabled by default but can be disabled with
+the B<--noformat-skipping> or B<-nfs> flag.  It should be used sparingly to
+avoid littering code with markers, but it might be helpful for working
+around occasional problems.  For example it might be useful for keeping
+the indentation of old commented code unchanged, keeping indentation of
+long blocks of aligned comments unchanged, keeping certain list
+formatting unchanged, or working around a glitch in perltidy.
+
+=over 4
+
+=item B<-fs>,  B<--format-skipping>
+
+This flag, which is enabled by default, causes any code between
+special beginning and ending comment markers to be passed to the
+output without formatting.  The default beginning marker is #<<<
+and the default ending marker is #>>> but they
+may be changed (see next items below).  Additional text may appear on
+these special comment lines provided that it is separated from the
+marker by at least one space.  For example
+
+ #<<<  do not let perltidy touch this
+    my @list = (1,
+                1, 1,
+                1, 2, 1,
+                1, 3, 3, 1,
+                1, 4, 6, 4, 1,);
+ #>>>
+
+The comment markers may be placed at any location that a block comment may
+appear.  If they do not appear to be working, use the -log flag and examine the
+F<.LOG> file.  Use B<-nfs> to disable this feature.
+
+=item B<-fsb=string>,  B<--format-skipping-begin=string>
+
+The B<-fsb=string> parameter may be used to change the beginning marker for
+format skipping.  The default is equivalent to -fsb='#<<<'.  The string that
+you enter must begin with a # and should be in quotes as necessary to get past
+the command shell of your system.  It is actually the leading text of a pattern
+that is constructed by appending a '\s', so you must also include backslashes
+for characters to be taken literally rather than as patterns.  
+
+Some examples show how example strings become patterns:
+
+ -fsb='#\{\{\{' becomes /^#\{\{\{\s/  which matches  #{{{ but not #{{{{
+ -fsb='#\*\*'   becomes /^#\*\*\s/    which matches  #** but not #***
+ -fsb='#\*{2,}' becomes /^#\*{2,}\s/  which matches  #** and #***** 
+
+=item B<-fse=string>,  B<--format-skipping-end=string>
+
+The B<-fsb=string> is the corresponding parameter used to change the
+ending marker for format skipping.  The default is equivalent to
+-fse='#<<<'.  
+
+=back
+
+=head2 Line Break Control
+
+The parameters in this section control breaks after
+non-blank lines of code.  Blank lines are controlled
+separately by parameters in the section L.
+
+=over 4
+
+=item B<-fnl>,  B<--freeze-newlines>
+
+If you do not want any changes to the line breaks within
+lines of code in your script, set
+B<-fnl>, and they will remain fixed, and the rest of the commands in
+this section and sections 
+L,
+L. 
+You may want to use B<-noll> with this.
+
+Note: If you also want to keep your blank lines exactly
+as they are, you can use the B<-fbl> flag which is described
+in the section L.
+
+=item B<-ce>,   B<--cuddled-else>
+
+Enable the "cuddled else" style, in which C and C are
+follow immediately after the curly brace closing the previous block.
+The default is not to use cuddled elses, and is indicated with the flag
+B<-nce> or B<--nocuddled-else>.  Here is a comparison of the
+alternatives:
+
+  # -ce
+  if ($task) {
+      yyy();
+  } else {    
+      zzz();
+  }
+
+  # -nce (default)
+  if ($task) {
+	yyy();
+  }
+  else {    
+	zzz();
+  }
+
+In this example the keyword B is placed on the same line which begins with
+the preceding closing block brace and is followed by its own opening block brace
+on the same line.  Other keywords and function names which are formatted with
+this "cuddled" style are B, B, B, B.
+
+Other block types can be formatted by specifying their names on a 
+separate parameter B<-cbl>, described in a later section.  
+
+Cuddling between a pair of code blocks requires that the closing brace of the
+first block start a new line.  If this block is entirely on one line in the
+input file, it is necessary to decide if it should be broken to allow cuddling.
+This decision is controlled by the flag B<-cbo=n> discussed below.  The default
+and recommended value of B<-cbo=1> bases this decision on the first block in
+the chain.  If it spans multiple lines then cuddling is made and continues
+along the chain, regardless of the sizes of subsequent blocks. Otherwise, short
+lines remain intact.
+
+So for example, the B<-ce> flag would not have any effect if the above snippet
+is rewritten as
+
+  if ($task) { yyy() }
+  else {    zzz() }
+
+If the first block spans multiple lines, then cuddling can be done and will
+continue for the subsequent blocks in the chain, as illustrated in the previous
+snippet.
+
+If there are blank lines between cuddled blocks they will be eliminated.  If
+there are comments after the closing brace where cuddling would occur then
+cuddling will be prevented.  If this occurs, cuddling will restart later in the
+chain if possible.  
+
+=item B<-cb>,   B<--cuddled-blocks>
+
+This flag is equivalent to B<-ce>. 
+
+
+=item B<-cbl>,    B<--cuddled-block-list>     
+
+The built-in default cuddled block types are B.
+
+Additional block types to which the B<-cuddled-blocks> style applies can be defined by
+this parameter.  This parameter is a character string, giving a list of
+block types separated by commas or spaces.  For example, to cuddle code blocks
+of type sort, map and grep, in addition to the default types, the string could
+be set to
+  
+  -cbl="sort map grep"
+
+or equivalently
+
+  -cbl=sort,map,grep 
+
+Note however that these particular block types are typically short so there might not be much
+opportunity for the cuddled format style.
+
+Using commas avoids the need to protect spaces with quotes.
+
+As a diagnostic check, the flag B<--dump-cuddled-block-list> or B<-dcbl> can be
+used to view the hash of values that are generated by this flag. 
+
+Finally, note that the B<-cbl> flag by itself merely specifies which blocks are formatted
+with the cuddled format. It has no effect unless this formatting style is activated with
+B<-ce>.
+
+=item B<-cblx>,    B<--cuddled-block-list-exclusive>     
+
+When cuddled else formatting is selected with B<-ce>, setting this flag causes
+perltidy to ignore its built-in defaults and rely exclusively on the block types
+specified on the B<-cbl> flag described in the previous section.  For example,
+to avoid using cuddled B and B, which among in the defaults, the
+following set of parameters could be used:
+
+  perltidy -ce -cbl='else elsif continue' -cblx
+
+
+=item B<-cbo=n>,   B<--cuddled-break-option=n>
+
+Cuddled formatting is only possible between a pair of code blocks if the
+closing brace of the first block starts a new line. If a block is encountered
+which is entirely on a single line, and cuddled formatting is selected, it is
+necessary to make a decision as to whether or not to "break" the block, meaning
+to cause it to span multiple lines.  This parameter controls that decision. The
+options are:
+
+   cbo=0  Never force a short block to break.
+   cbo=1  If the first of a pair of blocks is broken in the input file, 
+          then break the second [DEFAULT].
+   cbo=2  Break open all blocks for maximal cuddled formatting.
+
+The default and recommended value is B.  With this value, if the starting
+block of a chain spans multiple lines, then a cascade of breaks will occur for
+remaining blocks causing the entire chain to be cuddled.
+
+The option B can produce erratic cuddling if there are numerous one-line
+blocks.
+
+The option B produces maximal cuddling but will not allow any short blocks.
+
+
+=item B<-bl>,    B<--opening-brace-on-new-line>     
+
+Use the flag B<-bl> to place the opening brace on a new line:
+
+  if ( $input_file eq '-' )    # -bl 
+  {                          
+      important_function();
+  }
+
+This flag applies to all structural blocks, including named sub's (unless
+the B<-sbl> flag is set -- see next item).
+
+The default style, B<-nbl>, places an opening brace on the same line as
+the keyword introducing it.  For example,
+
+  if ( $input_file eq '-' ) {   # -nbl (default)
+
+=item B<-sbl>,    B<--opening-sub-brace-on-new-line>     
+
+The flag B<-sbl> can be used to override the value of B<-bl> for
+the opening braces of named sub's.  For example, 
+
+ perltidy -sbl
+
+produces this result:
+
+ sub message
+ {
+    if (!defined($_[0])) {
+        print("Hello, World\n");
+    }
+    else {
+        print($_[0], "\n");
+    }
+ }
+
+This flag is negated with B<-nsbl>.  If B<-sbl> is not specified,
+the value of B<-bl> is used.
+
+=item B<-asbl>,    B<--opening-anonymous-sub-brace-on-new-line>     
+
+The flag B<-asbl> is like the B<-sbl> flag except that it applies
+to anonymous sub's instead of named subs. For example
+
+ perltidy -asbl
+
+produces this result:
+
+ $a = sub
+ {
+     if ( !defined( $_[0] ) ) {
+         print("Hello, World\n");
+     }
+     else {
+         print( $_[0], "\n" );
+     }
+ };
+
+This flag is negated with B<-nasbl>, and the default is B<-nasbl>.
+
+=item B<-bli>,    B<--brace-left-and-indent>     
+
+The flag B<-bli> is the same as B<-bl> but in addition it causes one 
+unit of continuation indentation ( see B<-ci> ) to be placed before 
+an opening and closing block braces.
+
+For example,
+
+        if ( $input_file eq '-' )    # -bli
+          {
+            important_function();
+          }
+
+By default, this extra indentation occurs for blocks of type:
+B, B, B, B, B, B, B, 
+B, B, and also with a preceding label.  The next item
+shows how to change this.
+
+=item B<-blil=s>,    B<--brace-left-and-indent-list=s>     
+
+Use this parameter to change the types of block braces for which the
+B<-bli> flag applies; see L.  For example,
+B<-blil='if elsif else'> would apply it to only C blocks.
+
+=item B<-bar>,    B<--opening-brace-always-on-right>     
+
+The default style, B<-nbl> places the opening code block brace on a new
+line if it does not fit on the same line as the opening keyword, like
+this:
+
+        if ( $bigwasteofspace1 && $bigwasteofspace2
+          || $bigwasteofspace3 && $bigwasteofspace4 )
+        {
+            big_waste_of_time();
+        }
+
+To force the opening brace to always be on the right, use the B<-bar>
+flag.  In this case, the above example becomes
+
+        if ( $bigwasteofspace1 && $bigwasteofspace2
+          || $bigwasteofspace3 && $bigwasteofspace4 ) {
+            big_waste_of_time();
+        }
+
+A conflict occurs if both B<-bl> and B<-bar> are specified.
+
+=item B<-otr>,  B<--opening-token-right> and related flags
+
+The B<-otr> flag is a hint that perltidy should not place a break between a
+comma and an opening token.  For example:
+
+    # default formatting
+    push @{ $self->{$module}{$key} },
+      {
+        accno       => $ref->{accno},
+        description => $ref->{description}
+      };
+
+    # perltidy -otr
+    push @{ $self->{$module}{$key} }, {
+        accno       => $ref->{accno},
+        description => $ref->{description}
+      };
+
+The flag B<-otr> is actually an abbreviation for three other flags
+which can be used to control parens, hash braces, and square brackets
+separately if desired:
+
+  -opr  or --opening-paren-right
+  -ohbr or --opening-hash-brace-right
+  -osbr or --opening-square-bracket-right
+
+=item B<-wn>,  B<--weld-nested-containers> 
+
+The B<-wn> flag causes closely nested pairs of opening and closing container
+symbols (curly braces, brackets, or parens) to be "welded" together, meaning
+that they are treated as if combined into a single unit, with the indentation
+of the innermost code reduced to be as if there were just a single container
+symbol.
+
+For example:
+
+	# default formatting
+        do {
+            {
+                next if $x == $y;    
+            }
+        } until $x++ > $z;
+
+	# perltidy -wn
+        do { {
+            next if $x == $y;
+        } } until $x++ > $z;
+
+When this flag is set perltidy makes a preliminary pass through the file and
+identifies all nested pairs of containers.  To qualify as a nested pair, the
+closing container symbols must be immediately adjacent. The opening symbols
+must either be adjacent, or, if the outer opening symbol is an opening
+paren, they may be separated by any single non-container symbol or something
+that looks like a function evaluation.  
+
+Any container symbol may serve as both the inner container of one pair and as
+the outer container of an adjacent pair. Consequently, any number of adjacent
+opening or closing symbols may join together in weld.  For example, here are
+three levels of wrapped function calls:
+
+	# default formatting
+        my (@date_time) = Localtime(
+            Date_to_Time(
+                Add_Delta_DHMS(
+                    $year, $month,  $day, $hour, $minute, $second,
+                    '0',   $offset, '0',  '0'
+                )
+            )
+        );
+
+        # perltidy -wn
+        my (@date_time) = Localtime( Date_to_Time( Add_Delta_DHMS(
+            $year, $month,  $day, $hour, $minute, $second,
+            '0',   $offset, '0',  '0'
+        ) ) );
+
+Notice how the indentation of the inner lines are reduced by two levels in this
+case.  This example also shows the typical result of this formatting, namely it
+is a sandwich consisting of an initial opening layer, a central section of any
+complexity forming the "meat" of the sandwich, and a final closing layer.  This
+predictable structure helps keep the compacted structure readable.
+
+The inner sandwich layer is required to be at least one line thick.  If this
+cannot be achieved, welding does not occur.  This constraint can cause
+formatting to take a couple of iterations to stabilize when it is first applied
+to a script. The B<-conv> flag can be used to insure that the final format is
+achieved in a single run.
+
+Here is an example illustrating a welded container within a welded containers:
+
+	# default formatting
+        $x->badd(
+            bmul(
+                $class->new(
+                    abs(
+                        $sx * int( $xr->numify() ) & $sy * int( $yr->numify() )
+                    )
+                ),
+                $m
+            )
+        );
+
+	# perltidy -wn
+        $x->badd( bmul(
+            $class->new( abs(
+                $sx * int( $xr->numify() ) & $sy * int( $yr->numify() )
+            ) ),
+            $m
+        ) );
+
+This format option is quite general but there are some limitations.  
+
+One limitiation is that any line length limit still applies and can cause long
+welded sections to be broken into multiple lines.  
+
+Another limitation is that an opening symbol which delimits quoted text cannot
+be included in a welded pair.  This is because quote delimiters are treated
+specially in perltidy.  
+
+Finally, the stacking of containers defined by this flag have priority over
+any other container stacking flags.  This is because any welding is done first.
+
+
+=item B of non-block curly braces, parentheses, and square brackets.
+
+These parameters control what shall be called vertical tightness.  Here are the
+main points:
+
+=over 4
+
+=item *
+
+Opening tokens (except for block braces) are controlled by B<-vt=n>, or
+B<--vertical-tightness=n>, where
+
+ -vt=0 always break a line after opening token (default). 
+ -vt=1 do not break unless this would produce more than one 
+         step in indentation in a line.
+ -vt=2 never break a line after opening token
+
+=item *
+
+You must also use the B<-lp> flag when you use the B<-vt> flag; the
+reason is explained below.
+
+=item *
+
+Closing tokens (except for block braces) are controlled by B<-vtc=n>, or
+B<--vertical-tightness-closing=n>, where
+
+ -vtc=0 always break a line before a closing token (default), 
+ -vtc=1 do not break before a closing token which is followed 
+        by a semicolon or another closing token, and is not in 
+        a list environment.
+ -vtc=2 never break before a closing token.
+
+The rules for B<-vtc=1> are designed to maintain a reasonable balance
+between tightness and readability in complex lists.
+
+=item *
+
+Different controls may be applied to different token types,
+and it is also possible to control block braces; see below.
+
+=item *
+
+Finally, please note that these vertical tightness flags are merely
+hints to the formatter, and it cannot always follow them.  Things which
+make it difficult or impossible include comments, blank lines, blocks of
+code within a list, and possibly the lack of the B<-lp> parameter.
+Also, these flags may be ignored for very small lists (2 or 3 lines in
+length).
+
+=back
+
+Here are some examples: 
+
+    # perltidy -lp -vt=0 -vtc=0
+    %romanNumerals = (
+                       one   => 'I',
+                       two   => 'II',
+                       three => 'III',
+                       four  => 'IV',
+    );
+
+    # perltidy -lp -vt=1 -vtc=0
+    %romanNumerals = ( one   => 'I',
+                       two   => 'II',
+                       three => 'III',
+                       four  => 'IV',
+    );
+
+    # perltidy -lp -vt=1 -vtc=1
+    %romanNumerals = ( one   => 'I',
+                       two   => 'II',
+                       three => 'III',
+                       four  => 'IV', );
+
+The difference between B<-vt=1> and B<-vt=2> is shown here:
+
+    # perltidy -lp -vt=1 
+    $init->add(
+                mysprintf( "(void)find_threadsv(%s);",
+                           cstring( $threadsv_names[ $op->targ ] )
+                )
+    );
+
+    # perltidy -lp -vt=2 
+    $init->add( mysprintf( "(void)find_threadsv(%s);",
+                           cstring( $threadsv_names[ $op->targ ] )
+                )
+    );
+
+With B<-vt=1>, the line ending in C does not combine with the next
+line because the next line is not balanced.  This can help with
+readability, but B<-vt=2> can be used to ignore this rule.
+
+The tightest, and least readable, code is produced with both C<-vt=2> and
+C<-vtc=2>:
+
+    # perltidy -lp -vt=2 -vtc=2
+    $init->add( mysprintf( "(void)find_threadsv(%s);",
+                           cstring( $threadsv_names[ $op->targ ] ) ) );
+
+Notice how the code in all of these examples collapses vertically as
+B<-vt> increases, but the indentation remains unchanged.  This is
+because perltidy implements the B<-vt> parameter by first formatting as
+if B<-vt=0>, and then simply overwriting one output line on top of the
+next, if possible, to achieve the desired vertical tightness.  The
+B<-lp> indentation style has been designed to allow this vertical
+collapse to occur, which is why it is required for the B<-vt> parameter.
+
+The B<-vt=n> and B<-vtc=n> parameters apply to each type of container
+token.  If desired, vertical tightness controls can be applied
+independently to each of the closing container token types.
+
+The parameters for controlling parentheses are B<-pvt=n> or
+B<--paren-vertical-tightness=n>, and B<-pcvt=n> or
+B<--paren-vertical-tightness-closing=n>.
+
+Likewise, the parameters for square brackets are B<-sbvt=n> or
+B<--square-bracket-vertical-tightness=n>, and B<-sbcvt=n> or
+B<--square-bracket-vertical-tightness-closing=n>.
+
+Finally, the parameters for controlling non-code block braces are
+B<-bvt=n> or B<--brace-vertical-tightness=n>, and B<-bcvt=n> or
+B<--brace-vertical-tightness-closing=n>.
+
+In fact, the parameter B<-vt=n> is actually just an abbreviation for
+B<-pvt=n -bvt=n sbvt=n>, and likewise B<-vtc=n> is an abbreviation
+for B<-pvtc=n -bvtc=n sbvtc=n>.
+
+=item B<-bbvt=n> or B<--block-brace-vertical-tightness=n>
+
+The B<-bbvt=n> flag is just like the B<-vt=n> flag but applies
+to opening code block braces.
+
+ -bbvt=0 break after opening block brace (default). 
+ -bbvt=1 do not break unless this would produce more than one 
+         step in indentation in a line.
+ -bbvt=2 do not break after opening block brace.
+
+It is necessary to also use either B<-bl> or B<-bli> for this to work,
+because, as with other vertical tightness controls, it is implemented by
+simply overwriting a line ending with an opening block brace with the
+subsequent line.  For example:
+
+    # perltidy -bli -bbvt=0
+    if ( open( FILE, "< $File" ) )
+      {
+        while ( $File =  )
+          {
+            $In .= $File;
+            $count++;
+          }
+        close(FILE);
+      }
+
+    # perltidy -bli -bbvt=1
+    if ( open( FILE, "< $File" ) )
+      { while ( $File =  )
+          { $In .= $File;
+            $count++;
+          }
+        close(FILE);
+      }
+
+By default this applies to blocks associated with keywords B,
+B, B, B, B, B, B, B,
+B, and also with a preceding label.  This can be changed with
+the parameter B<-bbvtl=string>, or
+B<--block-brace-vertical-tightness-list=string>, where B is a
+space-separated list of block types.  For more information on the
+possible values of this string, see L
+
+For example, if we want to just apply this style to C,
+C, and C blocks, we could use 
+C.
+
+There is no vertical tightness control for closing block braces; with
+one exception they will be placed on separate lines.
+The exception is that a cascade of closing block braces may
+be stacked on a single line.  See B<-scbb>.
+
+=item B<-sot>,  B<--stack-opening-tokens> and related flags
+
+The B<-sot> flag tells perltidy to "stack" opening tokens
+when possible to avoid lines with isolated opening tokens.
+
+For example:
+
+    # default
+    $opt_c = Text::CSV_XS->new(
+        {
+            binary       => 1,
+            sep_char     => $opt_c,
+            always_quote => 1,
+        }
+    );
+
+    # -sot
+    $opt_c = Text::CSV_XS->new( {
+            binary       => 1,
+            sep_char     => $opt_c,
+            always_quote => 1,
+        }
+    );
+
+For detailed control of individual closing tokens the following
+controls can be used:
+
+  -sop  or --stack-opening-paren
+  -sohb or --stack-opening-hash-brace
+  -sosb or --stack-opening-square-bracket
+  -sobb or --stack-opening-block-brace
+
+The flag B<-sot> is an abbreviation for B<-sop -sohb -sosb>.
+
+The flag B<-sobb> is a abbreviation for B<-bbvt=2 -bbvtl='*'>.  This
+will case a cascade of opening block braces to appear on a single line,
+although this an uncommon occurrence except in test scripts. 
+
+=item B<-sct>,  B<--stack-closing-tokens> and related flags
+
+The B<-sct> flag tells perltidy to "stack" closing tokens
+when possible to avoid lines with isolated closing tokens.
+
+For example:
+
+    # default
+    $opt_c = Text::CSV_XS->new(
+        {
+            binary       => 1,
+            sep_char     => $opt_c,
+            always_quote => 1,
+        }
+    );
+
+    # -sct
+    $opt_c = Text::CSV_XS->new(
+        {
+            binary       => 1,
+            sep_char     => $opt_c,
+            always_quote => 1,
+        } );
+
+The B<-sct> flag is somewhat similar to the B<-vtc> flags, and in some
+cases it can give a similar result.  The difference is that the B<-vtc>
+flags try to avoid lines with leading opening tokens by "hiding" them at
+the end of a previous line, whereas the B<-sct> flag merely tries to
+reduce the number of lines with isolated closing tokens by stacking them
+but does not try to hide them.  For example:
+
+    # -vtc=2
+    $opt_c = Text::CSV_XS->new(
+        {
+            binary       => 1,
+            sep_char     => $opt_c,
+            always_quote => 1, } );
+
+For detailed control of the stacking of individual closing tokens the
+following controls can be used:
+
+  -scp  or --stack-closing-paren
+  -schb or --stack-closing-hash-brace
+  -scsb or --stack-closing-square-bracket
+  -scbb or --stack-closing-block-brace
+
+The flag B<-sct> is an abbreviation for stacking the non-block closing
+tokens, B<-scp -schb -scsb>. 
+
+Stacking of closing block braces, B<-scbb>, causes a cascade of isolated
+closing block braces to be combined into a single line as in the following
+example:
+
+    # -scbb:
+    for $w1 (@w1) {
+        for $w2 (@w2) {
+            for $w3 (@w3) {
+                for $w4 (@w4) {
+                    push( @lines, "$w1 $w2 $w3 $w4\n" );
+                } } } }
+
+To simplify input even further for the case in which both opening and closing
+non-block containers are stacked, the flag B<-sac> or B<--stack-all-containers>
+is an abbreviation for B<-sot -sot>.
+
+=item B<-dnl>,  B<--delete-old-newlines>
+
+By default, perltidy first deletes all old line break locations, and then it
+looks for good break points to match the desired line length.  Use B<-ndnl>
+or  B<--nodelete-old-newlines> to force perltidy to retain all old line break
+points.  
+
+=item B<-anl>,  B<--add-newlines>
+
+By default, perltidy will add line breaks when necessary to create
+continuations of long lines and to improve the script appearance.  Use
+B<-nanl> or B<--noadd-newlines> to prevent any new line breaks.  
+
+This flag does not prevent perltidy from eliminating existing line
+breaks; see B<--freeze-newlines> to completely prevent changes to line
+break points.
+
+=item Controlling whether perltidy breaks before or after operators
+
+Four command line parameters provide some control over whether
+a line break should be before or after specific token types.
+Two parameters give detailed control:
+
+B<-wba=s> or B<--want-break-after=s>, and
+
+B<-wbb=s> or B<--want-break-before=s>.
+
+These parameters are each followed by a quoted string, B, containing
+a list of token types (separated only by spaces).  No more than one of each
+of these parameters should be specified, because repeating a
+command-line parameter always overwrites the previous one before
+perltidy ever sees it.
+
+By default, perltidy breaks B these token types:
+  % + - * / x != == >= <= =~ !~ < >  | & 
+  = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
+
+And perltidy breaks B these token types by default:
+  . << >> -> && || //
+
+To illustrate, to cause a break after a concatenation operator, C<'.'>,
+rather than before it, the command line would be
+
+  -wba="."
+
+As another example, the following command would cause a break before 
+math operators C<'+'>, C<'-'>, C<'/'>, and C<'*'>:
+
+  -wbb="+ - / *"
+
+These commands should work well for most of the token types that perltidy uses
+(use B<--dump-token-types> for a list).  Also try the B<-D> flag on a short
+snippet of code and look at the .DEBUG file to see the tokenization.  However,
+for a few token types there may be conflicts with hardwired logic which cause
+unexpected results.  One example is curly braces, which should be controlled
+with the parameter B provided for that purpose.
+
+B Be sure to put these tokens in quotes to avoid having them
+misinterpreted by your command shell.
+
+Two additional parameters are available which, though they provide no further
+capability, can simplify input are:
+
+B<-baao> or B<--break-after-all-operators>,
+
+B<-bbao> or B<--break-before-all-operators>.
+
+The -baao sets the default to be to break after all of the following operators:
+
+    % + - * / x != == >= <= =~ !~ < > | & 
+    = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
+    . : ? && || and or err xor
+
+and the B<-bbao> flag sets the default to break before all of these operators.
+These can be used to define an initial break preference which can be fine-tuned
+with the B<-wba> and B<-wbb> flags.  For example, to break before all operators
+except an B<=> one could use --bbao -wba='=' rather than listing every
+single perl operator except B<=> on a -wbb flag.
+
+=back
+
+=head2 Controlling List Formatting
+
+Perltidy attempts to place comma-separated arrays of values in tables
+which look good.  Its default algorithms usually work well, and they
+have been improving with each release, but several parameters are
+available to control list formatting.
+
+=over 4
+
+=item B<-boc>,  B<--break-at-old-comma-breakpoints>
+
+This flag tells perltidy to try to break at all old commas.  This is not
+the default.  Normally, perltidy makes a best guess at list formatting,
+and seldom uses old comma breakpoints.  Usually this works well,
+but consider:
+
+    my @list = (1,
+                1, 1,
+                1, 2, 1,
+                1, 3, 3, 1,
+                1, 4, 6, 4, 1,);
+
+The default formatting will flatten this down to one line:
+
+    # perltidy (default)
+    my @list = ( 1, 1, 1, 1, 2, 1, 1, 3, 3, 1, 1, 4, 6, 4, 1, );
+
+which hides the structure. Using B<-boc>, plus additional flags
+to retain the original style, yields
+
+    # perltidy -boc -lp -pt=2 -vt=1 -vtc=1
+    my @list = (1,
+                1, 1,
+                1, 2, 1,
+                1, 3, 3, 1,
+                1, 4, 6, 4, 1,);
+
+A disadvantage of this flag is that all tables in the file
+must already be nicely formatted.  For another possibility see
+the -fs flag in L.
+
+=item B<-mft=n>,  B<--maximum-fields-per-table=n>
+
+If the computed number of fields for any table exceeds B, then it
+will be reduced to B.  The default value for B is a large number,
+40.  While this value should probably be left unchanged as a general
+rule, it might be used on a small section of code to force a list to
+have a particular number of fields per line, and then either the B<-boc>
+flag could be used to retain this formatting, or a single comment could
+be introduced somewhere to freeze the formatting in future applications
+of perltidy.
+
+    # perltidy -mft=2
+    @month_of_year = (    
+        'Jan', 'Feb',
+        'Mar', 'Apr',
+        'May', 'Jun',
+        'Jul', 'Aug',
+        'Sep', 'Oct',
+        'Nov', 'Dec'
+    );
+
+=item B<-cab=n>,  B<--comma-arrow-breakpoints=n>
+
+A comma which follows a comma arrow, '=>', is given special
+consideration.  In a long list, it is common to break at all such
+commas.  This parameter can be used to control how perltidy breaks at
+these commas.  (However, it will have no effect if old comma breaks are
+being forced because B<-boc> is used).  The possible values of B are:
+
+ n=0 break at all commas after =>  
+ n=1 stable: break at all commas after => if container is open,
+     EXCEPT FOR one-line containers
+ n=2 break at all commas after =>, BUT try to form the maximum
+     maximum one-line container lengths
+ n=3 do not treat commas after => specially at all 
+ n=4 break everything: like n=0 but ALSO break a short container with
+     a => not followed by a comma when -vt=0 is used
+ n=5 stable: like n=1 but ALSO break at open one-line containers when
+     -vt=0 is used (default)
+
+For example, given the following single line, perltidy by default will
+not add any line breaks because it would break the existing one-line
+container:
+
+    bless { B => $B, Root => $Root } => $package;
+
+Using B<-cab=0> will force a break after each comma-arrow item:
+
+    # perltidy -cab=0:
+    bless {
+        B    => $B,
+        Root => $Root
+    } => $package;
+
+If perltidy is subsequently run with this container broken, then by
+default it will break after each '=>' because the container is now
+broken.  To reform a one-line container, the parameter B<-cab=2> could
+be used.
+
+The flag B<-cab=3> can be used to prevent these commas from being
+treated specially.  In this case, an item such as "01" => 31 is
+treated as a single item in a table.  The number of fields in this table
+will be determined by the same rules that are used for any other table.
+Here is an example.
+    
+    # perltidy -cab=3
+    my %last_day = (
+        "01" => 31, "02" => 29, "03" => 31, "04" => 30,
+        "05" => 31, "06" => 30, "07" => 31, "08" => 31,
+        "09" => 30, "10" => 31, "11" => 30, "12" => 31
+    );
+
+=back
+
+=head2 Retaining or Ignoring Existing Line Breaks
+
+Several additional parameters are available for controlling the extent
+to which line breaks in the input script influence the output script.
+In most cases, the default parameter values are set so that, if a choice
+is possible, the output style follows the input style.  For example, if
+a short logical container is broken in the input script, then the
+default behavior is for it to remain broken in the output script.
+
+Most of the parameters in this section would only be required for a
+one-time conversion of a script from short container lengths to longer
+container lengths.  The opposite effect, of converting long container
+lengths to shorter lengths, can be obtained by temporarily using a short
+maximum line length.
+
+=over 4
+
+=item B<-bol>,  B<--break-at-old-logical-breakpoints>
+
+By default, if a logical expression is broken at a C<&&>, C<||>, C,
+or C, then the container will remain broken.  Also, breaks
+at internal keywords C and C will normally be retained.
+To prevent this, and thus form longer lines, use B<-nbol>.
+
+=item B<-bok>,  B<--break-at-old-keyword-breakpoints>
+
+By default, perltidy will retain a breakpoint before keywords which may
+return lists, such as C and .  This allows chains of these
+operators to be displayed one per line.  Use B<-nbok> to prevent
+retaining these breakpoints.
+
+=item B<-bot>,  B<--break-at-old-ternary-breakpoints>
+
+By default, if a conditional (ternary) operator is broken at a C<:>,
+then it will remain broken.  To prevent this, and thereby
+form longer lines, use B<-nbot>.
+
+=item B<-boa>,  B<--break-at-old-attribute-breakpoints>
+
+By default, if an attribute list is broken at a C<:> in the source file, then
+it will remain broken.  For example, given the following code, the line breaks
+at the ':'s will be retained:
+       
+                    my @field
+                      : field
+                      : Default(1)
+                      : Get('Name' => 'foo') : Set('Name');
+
+If the attributes are on a single line in the source code then they will remain
+on a single line if possible.
+
+To prevent this, and thereby always form longer lines, use B<-nboa>.  
+
+=item B<-iob>,  B<--ignore-old-breakpoints>
+
+Use this flag to tell perltidy to ignore existing line breaks to the
+maximum extent possible.  This will tend to produce the longest possible
+containers, regardless of type, which do not exceed the line length
+limit.
+
+=item B<-kis>,  B<--keep-interior-semicolons>
+
+Use the B<-kis> flag to prevent breaking at a semicolon if
+there was no break there in the input file.  Normally
+perltidy places a newline after each semicolon which
+terminates a statement unless several statements are
+contained within a one-line brace block.  To illustrate,
+consider the following input lines:
+
+    dbmclose(%verb_delim); undef %verb_delim;
+    dbmclose(%expanded); undef %expanded;
+
+The default is to break after each statement, giving
+
+    dbmclose(%verb_delim);
+    undef %verb_delim;
+    dbmclose(%expanded);
+    undef %expanded;
+
+With B the multiple statements are retained:
+
+    dbmclose(%verb_delim); undef %verb_delim;
+    dbmclose(%expanded);   undef %expanded;
+
+The statements are still subject to the specified value
+of B and will be broken if this 
+maximum is exceeded.
+
+=back
+
+=head2 Blank Line Control
+
+Blank lines can improve the readability of a script if they are carefully
+placed.  Perltidy has several commands for controlling the insertion,
+retention, and removal of blank lines.  
+
+=over 4
+
+=item B<-fbl>,  B<--freeze-blank-lines>
+
+Set B<-fbl> if you want to the blank lines in your script to
+remain exactly as they are.  The rest of the parameters in
+this section may then be ignored.  (Note: setting the B<-fbl> flag
+is equivalent to setting B<-mbl=0> and B<-kbl=2>).
+
+=item B<-bbc>,  B<--blanks-before-comments>
+
+A blank line will be introduced before a full-line comment.  This is the
+default.  Use B<-nbbc> or  B<--noblanks-before-comments> to prevent
+such blank lines from being introduced.
+
+=item B<-blbs=n>,  B<--blank-lines-before-subs=n>
+
+The parameter B<-blbs=n> requests that least B blank lines precede a sub
+definition which does not follow a comment and which is more than one-line
+long.  The default is <-blbs=1>.  B and B blocks are included.
+
+The requested number of blanks statement will be inserted regardless of the
+value of B<--maximum-consecutive-blank-lines=n> (B<-mbl=n>) with the exception
+that if B<-mbl=0> then no blanks will be output.
+
+This parameter interacts with the value B of the parameter B<--maximum-consecutive-blank-lines=k> (B<-mbl=k>) as follows:
+
+1. If B<-mbl=0> then no blanks will be output.  This allows all blanks to be suppressed with a single parameter.  Otherwise,
+
+2. If the number of old blank lines in the script is less than B then
+additional blanks will be inserted to make the total B regardless of the
+value of B<-mbl=k>.  
+
+3. If the number of old blank lines in the script equals or exceeds B then
+this parameter has no effect, however the total will not exceed
+value specified on the B<-mbl=k> flag.
+
+
+=item B<-blbp=n>,  B<--blank-lines-before-packages=n>
+
+The parameter B<-blbp=n> requests that least B blank lines precede a package
+which does not follow a comment.  The default is B<-blbp=1>.  
+
+This parameter interacts with the value B of the parameter
+B<--maximum-consecutive-blank-lines=k> (B<-mbl=k>) in the same way as described
+for the previous item B<-blbs=n>.
+
+
+=item B<-bbs>,  B<--blanks-before-subs>
+
+For compatibility with previous versions, B<-bbs> or B<--blanks-before-subs>
+is equivalent to F<-blbp=1> and F<-blbs=1>.  
+
+Likewise, B<-nbbs> or B<--noblanks-before-subs> 
+is equivalent to F<-blbp=0> and F<-blbs=0>.  
+
+=item B<-bbb>,  B<--blanks-before-blocks>
+
+A blank line will be introduced before blocks of coding delimited by
+B, B, B, B, and B, B, in the following
+circumstances:
+
+=over 4
+
+=item *
+
+The block is not preceded by a comment.
+
+=item *
+
+The block is not a one-line block.
+
+=item *
+
+The number of consecutive non-blank lines at the current indentation depth is at least B<-lbl>
+(see next section).
+
+=back
+
+This is the default.  The intention of this option is to introduce
+some space within dense coding.
+This is negated with B<-nbbb> or  B<--noblanks-before-blocks>.
+
+=item B<-lbl=n> B<--long-block-line-count=n>
+
+This controls how often perltidy is allowed to add blank lines before 
+certain block types (see previous section).  The default is 8.  Entering
+a value of B<0> is equivalent to entering a very large number.
+
+=item B<-blao=i> or B<--blank-lines-after-opening-block=i>
+
+This control places a minimum of B blank lines B a line which B
+with an opening block brace of a specified type.  By default, this only applies
+to the block of a named B, but this can be changed (see B<-blaol> below).
+The default is not to do this (B).
+
+Please see the note below on using the B<-blao> and B<-blbc> options.
+
+=item B<-blbc=i> or B<--blank-lines-before-closing-block=i>
+
+This control places a minimum of B blank lines B a line which
+B with a closing block brace of a specified type.  By default, this
+only applies to the block of a named B, but this can be changed (see
+B<-blbcl> below).  The default is not to do this (B).
+
+=item B<-blaol=s> or B<--blank-lines-after-opening-block-list=s>
+
+The parameter B is a list of block type keywords to which the flag B<-blao>
+should apply.  The section L<"Specifying Block Types"> explains how to list
+block types.
+
+=item B<-blbcl=s> or B<--blank-lines-before-closing-block-list=s>
+
+This parameter is a list of block type keywords to which the flag B<-blbc>
+should apply.  The section L<"Specifying Block Types"> explains how to list
+block types.
+
+=item Note on using the B<-blao> and B<-blbc> options.
+
+These blank line controls introduce a certain minimum number of blank lines in
+the text, but the final number of blank lines may be greater, depending on
+values of the other blank line controls and the number of old blank lines.  A
+consequence is that introducing blank lines with these and other controls
+cannot be exactly undone, so some experimentation with these controls is
+recommended before using them.
+
+For example, suppose that for some reason we decide to introduce one blank
+space at the beginning and ending of all blocks.  We could do
+this using
+
+  perltidy -blao=2 -blbc=2 -blaol='*' -blbcl='*' filename
+
+Now suppose the script continues to be developed, but at some later date we
+decide we don't want these spaces after all. we might expect that running with
+the flags B<-blao=0> and B<-blbc=0> will undo them.  However, by default
+perltidy retains single blank lines, so the blank lines remain.  
+
+We can easily fix this by telling perltidy to ignore old blank lines by
+including the added parameter B<-kbl=0> and rerunning. Then the unwanted blank
+lines will be gone.  However, this will cause all old blank lines to be
+ignored, perhaps even some that were added by hand to improve formatting. So
+please be cautious when using these parameters.
+
+=item B<-mbl=n> B<--maximum-consecutive-blank-lines=n>   
+
+This parameter specifies the maximum number of consecutive blank lines which
+will be output within code sections of a script.  The default is n=1.  If the
+input file has more than n consecutive blank lines, the number will be reduced
+to n except as noted above for the B<-blbp> and B<-blbs> parameters.  If B
+then no blank lines will be output (unless all old blank lines are retained
+with the B<-kbl=2> flag of the next section).
+
+This flag obviously does not apply to pod sections,
+here-documents, and quotes.  
+
+=item B<-kbl=n>,  B<--keep-old-blank-lines=n>
+
+The B<-kbl=n> flag gives you control over how your existing blank lines are
+treated.  
+
+The possible values of B are:
+
+ n=0 ignore all old blank lines
+ n=1 stable: keep old blanks, but limited by the value of the B<-mbl=n> flag
+ n=2 keep all old blank lines, regardless of the value of the B<-mbl=n> flag
+
+The default is B.  
+
+=item B<-sob>,  B<--swallow-optional-blank-lines>
+
+This is equivalent to B and is included for compatibility with
+previous versions.
+
+=item B<-nsob>,  B<--noswallow-optional-blank-lines>
+
+This is equivalent to B and is included for compatibility with
+previous versions.
+
+=back
+
+=head2 Styles
+
+A style refers to a convenient collection of existing parameters.
+
+=over 4
+
+=item B<-gnu>, B<--gnu-style>
+
+B<-gnu> gives an approximation to the GNU Coding Standards (which do
+not apply to perl) as they are sometimes implemented.  At present, this
+style overrides the default style with the following parameters:
+
+    -lp -bl -noll -pt=2 -bt=2 -sbt=2 -icp
+
+=item B<-pbp>, B<--perl-best-practices>
+
+B<-pbp> is an abbreviation for the parameters in the book B
+by Damian Conway:
+
+    -l=78 -i=4 -ci=4 -st -se -vt=2 -cti=0 -pt=1 -bt=1 -sbt=1 -bbt=1 -nsfs -nolq
+    -wbb="% + - * / x != == >= <= =~ !~ < > | & = 
+          **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x="
+
+Please note that this parameter set includes -st and -se flags, which make
+perltidy act as a filter on one file only.  These can be overridden by placing
+B<-nst> and/or B<-nse> after the -pbp parameter. 
+
+Also note that the value of continuation indentation, -ci=4, is equal to the
+value of the full indentation, -i=4.  In some complex statements perltidy will
+produce nicer results with -ci=2. This can be implemented by including -ci=2
+after the -pbp parameter.  For example, 
+
+    # perltidy -pbp
+    $self->{_text} = (
+         !$section        ? ''
+        : $type eq 'item' ? "the $section entry"
+        :                   "the section on $section"
+        )
+        . (
+        $page
+        ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage"
+        : ' elsewhere in this document'
+        );
+
+    # perltidy -pbp -ci=2
+    $self->{_text} = (
+         !$section        ? ''
+        : $type eq 'item' ? "the $section entry"
+        :                   "the section on $section"
+      )
+      . (
+        $page
+        ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage"
+        : ' elsewhere in this document'
+      );
+
+=back
+
+=head2 Controlling Vertical Alignment
+
+Vertical alignment refers to lining up certain symbols in list of consecutive
+similar lines to improve readability.  For example, the "fat commas" are
+aligned in the following statement:
+
+        $data = $pkg->new(
+            PeerAddr => join( ".", @port[ 0 .. 3 ] ),   
+            PeerPort => $port[4] * 256 + $port[5],
+            Proto    => 'tcp'
+        );
+
+The only explicit control on vertical alignment is to turn it off using
+B<-novalign>, a flag mainly intended for debugging.  However, vertical
+alignment can be forced to stop and restart by selectively introducing blank
+lines.  For example, a blank has been inserted in the following code
+to keep somewhat similar things aligned.
+
+    %option_range = (
+        'format'             => [ 'tidy', 'html', 'user' ],
+        'output-line-ending' => [ 'dos',  'win',  'mac', 'unix' ],
+        'character-encoding' => [ 'none', 'utf8' ],
+
+        'block-brace-tightness'    => [ 0, 2 ],
+        'brace-tightness'          => [ 0, 2 ],
+        'paren-tightness'          => [ 0, 2 ],
+        'square-bracket-tightness' => [ 0, 2 ],
+    );
+
+
+=head2 Other Controls
+
+=over 4
+
+=item Deleting selected text 
+
+Perltidy can selectively delete comments and/or pod documentation.  The
+command B<-dac> or  B<--delete-all-comments> will delete all comments
+B all pod documentation, leaving just code and any leading system
+control lines.
+
+The command B<-dp> or B<--delete-pod> will remove all pod documentation
+(but not comments).
+
+Two commands which remove comments (but not pod) are: B<-dbc> or
+B<--delete-block-comments> and B<-dsc> or  B<--delete-side-comments>.
+(Hanging side comments will be deleted with block comments here.)
+
+The negatives of these commands also work, and are the defaults.  When
+block comments are deleted, any leading 'hash-bang' will be retained.
+Also, if the B<-x> flag is used, any system commands before a leading
+hash-bang will be retained (even if they are in the form of comments).
+
+=item Writing selected text to a file
+
+When perltidy writes a formatted text file, it has the ability to also
+send selected text to a file with a F<.TEE> extension.  This text can
+include comments and pod documentation.  
+
+The command B<-tac> or  B<--tee-all-comments> will write all comments
+B all pod documentation.
+
+The command B<-tp> or B<--tee-pod> will write all pod documentation (but
+not comments).
+
+The commands which write comments (but not pod) are: B<-tbc> or
+B<--tee-block-comments> and B<-tsc> or  B<--tee-side-comments>.
+(Hanging side comments will be written with block comments here.)
+
+The negatives of these commands also work, and are the defaults.  
+
+=item Using a F<.perltidyrc> command file
+
+If you use perltidy frequently, you probably won't be happy until you
+create a F<.perltidyrc> file to avoid typing commonly-used parameters.
+Perltidy will first look in your current directory for a command file
+named F<.perltidyrc>.  If it does not find one, it will continue looking
+for one in other standard locations.  
+
+These other locations are system-dependent, and may be displayed with
+the command C.  Under Unix systems, it will first look
+for an environment variable B.  Then it will look for a
+F<.perltidyrc> file in the home directory, and then for a system-wide
+file F, and then it will look for
+F.  Note that these last two system-wide files do not
+have a leading dot.  Further system-dependent information will be found
+in the INSTALL file distributed with perltidy.
+
+Under Windows, perltidy will also search for a configuration file named perltidy.ini since Windows does not allow files with a leading period (.).
+Use C to see the possible locations for your system.
+An example might be F.
+
+Another option is the use of the PERLTIDY environment variable.
+The method for setting environment variables depends upon the version of
+Windows that you are using.  Instructions for Windows 95 and later versions can
+be found here:
+
+http://www.netmanage.com/000/20021101_005_tcm21-6336.pdf
+
+Under Windows NT / 2000 / XP the PERLTIDY environment variable can be placed in
+either the user section or the system section.  The later makes the
+configuration file common to all users on the machine.  Be sure to enter the
+full path of the configuration file in the value of the environment variable.
+Ex.  PERLTIDY=C:\Documents and Settings\perltidy.ini
+
+The configuration file is free format, and simply a list of parameters, just as
+they would be entered on a command line.  Any number of lines may be used, with
+any number of parameters per line, although it may be easiest to read with one
+parameter per line.  Comment text begins with a #, and there must
+also be a space before the # for side comments.  It is a good idea to
+put complex parameters in either single or double quotes.
+
+Here is an example of a F<.perltidyrc> file:
+
+  # This is a simple of a .perltidyrc configuration file
+  # This implements a highly spaced style
+  -se    # errors to standard error output
+  -w     # show all warnings
+  -bl	 # braces on new lines
+  -pt=0  # parens not tight at all
+  -bt=0  # braces not tight
+  -sbt=0 # square brackets not tight
+
+The parameters in the F<.perltidyrc> file are installed first, so any
+parameters given on the command line will have priority over them.  
+
+To avoid confusion, perltidy ignores any command in the .perltidyrc
+file which would cause some kind of dump and an exit.  These are:
+
+ -h -v -ddf -dln -dop -dsn -dtt -dwls -dwrs -ss
+
+There are several options may be helpful in debugging a F<.perltidyrc>
+file:  
+
+=over 4
+
+=item *
+
+A very helpful command is B<--dump-profile> or B<-dpro>.  It writes a
+list of all configuration filenames tested to standard output, and 
+if a file is found, it dumps the content to standard output before
+exiting.  So, to find out where perltidy looks for its configuration
+files, and which one if any it selects, just enter 
+
+  perltidy -dpro
+
+=item *
+
+It may be simplest to develop and test configuration files with
+alternative names, and invoke them with B<-pro=filename> on the command
+line.  Then rename the desired file to F<.perltidyrc> when finished.
+
+=item *
+
+The parameters in the F<.perltidyrc> file can be switched off with 
+the B<-npro> option.
+
+=item *
+
+The commands B<--dump-options>, B<--dump-defaults>, B<--dump-long-names>,
+and B<--dump-short-names>, all described below, may all be helpful.
+
+=back
+
+=item Creating a new abbreviation
+
+A special notation is available for use in a F<.perltidyrc> file
+for creating an abbreviation for a group
+of options.  This can be used to create a
+shorthand for one or more styles which are frequently, but not always,
+used.  The notation is to group the options within curly braces which
+are preceded by the name of the alias (without leading dashes), like this:
+
+	newword {
+	-opt1
+	-opt2
+	}
+
+where B is the abbreviation, and B, etc, are existing parameters
+I.  The main syntax requirement is that the new
+abbreviation along with its opening curly brace must begin on a new line.
+Space before and after the curly braces is optional.
+For a
+specific example, the following line
+
+	airy {-bl -pt=0 -bt=0 -sbt=0}
+
+could be placed in a F<.perltidyrc> file, and then invoked at will with
+
+	perltidy -airy somefile.pl
+
+(Either C<-airy> or C<--airy> may be used).
+
+=item Skipping leading non-perl commands with B<-x> or B<--look-for-hash-bang>
+
+If your script has leading lines of system commands or other text which
+are not valid perl code, and which are separated from the start of the
+perl code by a "hash-bang" line, ( a line of the form C<#!...perl> ),
+you must use the B<-x> flag to tell perltidy not to parse and format any
+lines before the "hash-bang" line.  This option also invokes perl with a
+-x flag when checking the syntax.  This option was originally added to
+allow perltidy to parse interactive VMS scripts, but it should be used
+for any script which is normally invoked with C.
+
+=item  Making a file unreadable
+
+The goal of perltidy is to improve the readability of files, but there
+are two commands which have the opposite effect, B<--mangle> and
+B<--extrude>.  They are actually
+merely aliases for combinations of other parameters.  Both of these
+strip all possible whitespace, but leave comments and pod documents,
+so that they are essentially reversible.  The
+difference between these is that B<--mangle> puts the fewest possible
+line breaks in a script while B<--extrude> puts the maximum possible.
+Note that these options do not provided any meaningful obfuscation, because
+perltidy can be used to reformat the files.  They were originally
+developed to help test the tokenization logic of perltidy, but they
+have other uses.
+One use for B<--mangle> is the following:
+
+  perltidy --mangle myfile.pl -st | perltidy -o myfile.pl.new
+
+This will form the maximum possible number of one-line blocks (see next
+section), and can sometimes help clean up a badly formatted script.
+
+A similar technique can be used with B<--extrude> instead of B<--mangle>
+to make the minimum number of one-line blocks.
+
+Another use for B<--mangle> is to combine it with B<-dac> to reduce
+the file size of a perl script.
+
+=item  One-line blocks 
+
+There are a few points to note regarding one-line blocks.  A one-line
+block is something like this,
+
+	if ($x > 0) { $y = 1 / $x }  
+
+where the contents within the curly braces is short enough to fit
+on a single line.
+
+With few exceptions, perltidy retains existing one-line blocks, if it
+is possible within the line-length constraint, but it does not attempt
+to form new ones.  In other words, perltidy will try to follow the
+one-line block style of the input file.
+
+If an existing one-line block is longer than the maximum line length,
+however, it will be broken into multiple lines.  When this happens, perltidy
+checks for and adds any optional terminating semicolon (unless the B<-nasc>
+option is used) if the block is a code block.  
+
+The main exception is that perltidy will attempt to form new one-line
+blocks following the keywords C, C, and C, because
+these code blocks are often small and most clearly displayed in a single
+line.
+
+One-line block rules can conflict with the cuddled-else option.  When
+the cuddled-else option is used, perltidy retains existing one-line
+blocks, even if they do not obey cuddled-else formatting.
+
+Occasionally, when one-line blocks get broken because they exceed the
+available line length, the formatting will violate the requested brace style.
+If this happens, reformatting the script a second time should correct
+the problem.
+
+=item  Debugging 
+
+The following flags are available for debugging:
+
+B<--dump-cuddled-block-list> or B<-dcbl> will dump to standard output the
+internal hash of cuddled block types created by a B<-cuddled-block-list> input
+string.
+
+B<--dump-defaults> or B<-ddf> will write the default option set to standard output and quit
+
+B<--dump-profile> or B<-dpro>  will write the name of the current 
+configuration file and its contents to standard output and quit.
+
+B<--dump-options> or B<-dop>  will write current option set to standard
+output and quit.  
+
+B<--dump-long-names> or B<-dln>  will write all command line long names (passed 
+to Get_options) to standard output and quit.
+
+B<--dump-short-names>  or B<-dsn> will write all command line short names 
+to standard output and quit.
+
+B<--dump-token-types> or B<-dtt>  will write a list of all token types 
+to standard output and quit.
+
+B<--dump-want-left-space> or B<-dwls>  will write the hash %want_left_space
+to standard output and quit.  See the section on controlling whitespace
+around tokens.
+
+B<--dump-want-right-space> or B<-dwrs>  will write the hash %want_right_space
+to standard output and quit.  See the section on controlling whitespace
+around tokens.
+
+B<--no-memoize> or B<-nmem>  will turn of memoizing.
+Memoization can reduce run time when running perltidy repeatedly in a 
+single process.  It is on by default but can be deactivated for
+testing with B<-nmem>.
+
+B<--no-timestamp> or B<-nts> will eliminate any time stamps in output files to prevent
+differences in dates from causing test installation scripts to fail. There are just
+a couple of places where timestamps normally occur. One is in the headers of
+html files, and another is when the B<-cscw> option is selected. The default is
+to allow timestamps (B<--timestamp> or B<-ts>).
+
+B<--file-size-order> or B<-fso> will cause files to be processed in order of
+increasing size, when multiple files are being processed.  This is useful
+during program development, when large numbers of files with varying sizes are
+processed, because it can reduce virtual memory usage. 
+
+B<-DEBUG>  will write a file with extension F<.DEBUG> for each input file 
+showing the tokenization of all lines of code.
+
+=item Working with MakeMaker, AutoLoader and SelfLoader
+
+The first $VERSION line of a file which might be eval'd by MakeMaker
+is passed through unchanged except for indentation.  
+Use B<--nopass-version-line>, or B<-npvl>, to deactivate this feature.
+
+If the AutoLoader module is used, perltidy will continue formatting
+code after seeing an __END__ line.
+Use B<--nolook-for-autoloader>, or B<-nlal>, to deactivate this feature.
+
+Likewise, if the SelfLoader module is used, perltidy will continue formatting
+code after seeing a __DATA__ line.
+Use B<--nolook-for-selfloader>, or B<-nlsl>, to deactivate this feature.
+
+=item Working around problems with older version of Perl 
+
+Perltidy contains a number of rules which help avoid known subtleties
+and problems with older versions of perl, and these rules always
+take priority over whatever formatting flags have been set.  For example,
+perltidy will usually avoid starting a new line with a bareword, because
+this might cause problems if C is active.
+
+There is no way to override these rules.
+
+=back
+
+=head1 HTML OPTIONS
+
+=over 4
+
+=item  The B<-html> master switch
+
+The flag B<-html> causes perltidy to write an html file with extension
+F<.html>.  So, for example, the following command
+
+	perltidy -html somefile.pl
+
+will produce a syntax-colored html file named F
+which may be viewed with a browser.
+
+B: In this case, perltidy does not do any formatting to the
+input file, and it does not write a formatted file with extension
+F<.tdy>.  This means that two perltidy runs are required to create a
+fully reformatted, html copy of a script.  
+
+=item  The B<-pre> flag for code snippets
+
+When the B<-pre> flag is given, only the pre-formatted section, within
+the 
 and 
tags, will be output. This simplifies inclusion +of the output in other files. The default is to output a complete +web page. + +=item The B<-nnn> flag for line numbering + +When the B<-nnn> flag is given, the output lines will be numbered. + +=item The B<-toc>, or B<--html-table-of-contents> flag + +By default, a table of contents to packages and subroutines will be +written at the start of html output. Use B<-ntoc> to prevent this. +This might be useful, for example, for a pod document which contains a +number of unrelated code snippets. This flag only influences the code +table of contents; it has no effect on any table of contents produced by +pod2html (see next item). + +=item The B<-pod>, or B<--pod2html> flag + +There are two options for formatting pod documentation. The default is +to pass the pod through the Pod::Html module (which forms the basis of +the pod2html utility). Any code sections are formatted by perltidy, and +the results then merged. Note: perltidy creates a temporary file when +Pod::Html is used; see L<"FILES">. Also, Pod::Html creates temporary +files for its cache. + +NOTE: Perltidy counts the number of C<=cut> lines, and either moves the +pod text to the top of the html file if there is one C<=cut>, or leaves +the pod text in its original order (interleaved with code) otherwise. + +Most of the flags accepted by pod2html may be included in the perltidy +command line, and they will be passed to pod2html. In some cases, +the flags have a prefix C to emphasize that they are for the +pod2html, and this prefix will be removed before they are passed to +pod2html. The flags which have the additional C prefix are: + + --[no]podheader --[no]podindex --[no]podrecurse --[no]podquiet + --[no]podverbose --podflush + +The flags which are unchanged from their use in pod2html are: + + --backlink=s --cachedir=s --htmlroot=s --libpods=s --title=s + --podpath=s --podroot=s + +where 's' is an appropriate character string. Not all of these flags are +available in older versions of Pod::Html. See your Pod::Html documentation for +more information. + +The alternative, indicated with B<-npod>, is not to use Pod::Html, but +rather to format pod text in italics (or whatever the stylesheet +indicates), without special html markup. This is useful, for example, +if pod is being used as an alternative way to write comments. + +=item The B<-frm>, or B<--frames> flag + +By default, a single html output file is produced. This can be changed +with the B<-frm> option, which creates a frame holding a table of +contents in the left panel and the source code in the right side. This +simplifies code browsing. Assume, for example, that the input file is +F. Then, for default file extension choices, these three +files will be created: + + MyModule.pm.html - the frame + MyModule.pm.toc.html - the table of contents + MyModule.pm.src.html - the formatted source code + +Obviously this file naming scheme requires that output be directed to a real +file (as opposed to, say, standard output). If this is not the +case, or if the file extension is unknown, the B<-frm> option will be +ignored. + +=item The B<-text=s>, or B<--html-toc-extension> flag + +Use this flag to specify the extra file extension of the table of contents file +when html frames are used. The default is "toc". +See L. + +=item The B<-sext=s>, or B<--html-src-extension> flag + +Use this flag to specify the extra file extension of the content file when html +frames are used. The default is "src". +See L. + +=item The B<-hent>, or B<--html-entities> flag + +This flag controls the use of Html::Entities for html formatting. By +default, the module Html::Entities is used to encode special symbols. +This may not be the right thing for some browser/language +combinations. Use --nohtml-entities or -nhent to prevent this. + +=item Style Sheets + +Style sheets make it very convenient to control and adjust the +appearance of html pages. The default behavior is to write a page of +html with an embedded style sheet. + +An alternative to an embedded style sheet is to create a page with a +link to an external style sheet. This is indicated with the +B<-css=filename>, where the external style sheet is F. The +external style sheet F will be created if and only if it does +not exist. This option is useful for controlling multiple pages from a +single style sheet. + +To cause perltidy to write a style sheet to standard output and exit, +use the B<-ss>, or B<--stylesheet>, flag. This is useful if the style +sheet could not be written for some reason, such as if the B<-pre> flag +was used. Thus, for example, + + perltidy -html -ss >mystyle.css + +will write a style sheet with the default properties to file +F. + +The use of style sheets is encouraged, but a web page without a style +sheets can be created with the flag B<-nss>. Use this option if you +must to be sure that older browsers (roughly speaking, versions prior to +4.0 of Netscape Navigator and Internet Explorer) can display the +syntax-coloring of the html files. + +=item Controlling HTML properties + +Note: It is usually more convenient to accept the default properties +and then edit the stylesheet which is produced. However, this section +shows how to control the properties with flags to perltidy. + +Syntax colors may be changed from their default values by flags of the either +the long form, B<-html-color-xxxxxx=n>, or more conveniently the short form, +B<-hcx=n>, where B is one of the following words, and B is the +corresponding abbreviation: + + Token Type xxxxxx x + ---------- -------- -- + comment comment c + number numeric n + identifier identifier i + bareword, function bareword w + keyword keyword k + quite, pattern quote q + here doc text here-doc-text h + here doc target here-doc-target hh + punctuation punctuation pu + parentheses paren p + structural braces structure s + semicolon semicolon sc + colon colon co + comma comma cm + label label j + sub definition name subroutine m + pod text pod-text pd + +A default set of colors has been defined, but they may be changed by providing +values to any of the following parameters, where B is either a 6 digit +hex RGB color value or an ascii name for a color, such as 'red'. + +To illustrate, the following command will produce an html +file F with "aqua" keywords: + + perltidy -html -hck=00ffff somefile.pl + +and this should be equivalent for most browsers: + + perltidy -html -hck=aqua somefile.pl + +Perltidy merely writes any non-hex names that it sees in the html file. +The following 16 color names are defined in the HTML 3.2 standard: + + black => 000000, + silver => c0c0c0, + gray => 808080, + white => ffffff, + maroon => 800000, + red => ff0000, + purple => 800080, + fuchsia => ff00ff, + green => 008000, + lime => 00ff00, + olive => 808000, + yellow => ffff00 + navy => 000080, + blue => 0000ff, + teal => 008080, + aqua => 00ffff, + +Many more names are supported in specific browsers, but it is safest +to use the hex codes for other colors. Helpful color tables can be +located with an internet search for "HTML color tables". + +Besides color, two other character attributes may be set: bold, and italics. +To set a token type to use bold, use the flag +B<--html-bold-xxxxxx> or B<-hbx>, where B or B are the long +or short names from the above table. Conversely, to set a token type to +NOT use bold, use B<--nohtml-bold-xxxxxx> or B<-nhbx>. + +Likewise, to set a token type to use an italic font, use the flag +B<--html-italic-xxxxxx> or B<-hix>, where again B or B are the +long or short names from the above table. And to set a token type to +NOT use italics, use B<--nohtml-italic-xxxxxx> or B<-nhix>. + +For example, to use bold braces and lime color, non-bold, italics keywords the +following command would be used: + + perltidy -html -hbs -hck=00FF00 -nhbk -hik somefile.pl + +The background color can be specified with B<--html-color-background=n>, +or B<-hcbg=n> for short, where n is a 6 character hex RGB value. The +default color of text is the value given to B, which is +black as a default. + +Here are some notes and hints: + +1. If you find a preferred set of these parameters, you may want +to create a F<.perltidyrc> file containing them. See the perltidy man +page for an explanation. + +2. Rather than specifying values for these parameters, it is probably +easier to accept the defaults and then edit a style sheet. The style +sheet contains comments which should make this easy. + +3. The syntax-colored html files can be very large, so it may be best to +split large files into smaller pieces to improve download times. + +=back + +=head1 SOME COMMON INPUT CONVENTIONS + +=head2 Specifying Block Types + +Several parameters which refer to code block types may be customized by also +specifying an associated list of block types. The type of a block is the name +of the keyword which introduces that block, such as B, B, or B. +An exception is a labeled block, which has no keyword, and should be specified +with just a colon. To specify all blocks use B<'*'>. + +The keyword B indicates a named sub. For anonymous subs, use the special +keyword B. + +For example, the following parameter specifies C, labels, C, and +C blocks: + + -cscl="sub : BEGIN END" + +(the meaning of the -cscl parameter is described above.) Note that +quotes are required around the list of block types because of the +spaces. For another example, the following list specifies all block types +for vertical tightness: + + -bbvtl='*' + +=head2 Specifying File Extensions + +Several parameters allow default file extensions to be overridden. For +example, a backup file extension may be specified with B<-bext=ext>, +where B is some new extension. In order to provides the user some +flexibility, the following convention is used in all cases to decide if +a leading '.' should be used. If the extension C begins with +C, C, or C<0-9>, then it will be appended to the filename with +an intermediate '.' (or perhaps an '_' on VMS systems). Otherwise, it +will be appended directly. + +For example, suppose the file is F. For C<-bext=old>, a '.' is +added to give F. For C<-bext=.old>, no additional '.' is +added, so again the backup file is F. For C<-bext=~>, then no +dot is added, and the backup file will be F . + +=head1 SWITCHES WHICH MAY BE NEGATED + +The following list shows all short parameter names which allow a prefix +'n' to produce the negated form: + + D anl asc aws b bbb bbc bbs bl bli boc bok bol bot ce + csc dac dbc dcsc ddf dln dnl dop dp dpro dsc dsm dsn dtt dwls + dwrs dws f fll frm fs hsc html ibc icb icp iob isbc lal log + lp lsl ohbr okw ola oll opr opt osbr otr ple pod pvl q + sbc sbl schb scp scsb sct se sfp sfs skp sob sohb sop sosb sot + ssc st sts syn t tac tbc toc tp tqw tsc w x bar kis + +Equivalently, the prefix 'no' or 'no-' on the corresponding long names may be +used. + +=head1 LIMITATIONS + +=over 4 + +=item Parsing Limitations + +Perltidy should work properly on most perl scripts. It does a lot of +self-checking, but still, it is possible that an error could be +introduced and go undetected. Therefore, it is essential to make +careful backups and to test reformatted scripts. + +The main current limitation is that perltidy does not scan modules +included with 'use' statements. This makes it necessary to guess the +context of any bare words introduced by such modules. Perltidy has good +guessing algorithms, but they are not infallible. When it must guess, +it leaves a message in the log file. + +If you encounter a bug, please report it. + +=item What perltidy does not parse and format + +Perltidy indents but does not reformat comments and C quotes. +Perltidy does not in any way modify the contents of here documents or +quoted text, even if they contain source code. (You could, however, +reformat them separately). Perltidy does not format 'format' sections +in any way. And, of course, it does not modify pod documents. + +=back + +=head1 FILES + +=over 4 + +=item Temporary files + +Under the -html option with the default --pod2html flag, a temporary file is +required to pass text to Pod::Html. Unix systems will try to use the POSIX +tmpnam() function. Otherwise the file F will be temporarily +created in the current working directory. + +=item Special files when standard input is used + +When standard input is used, the log file, if saved, is F, +and any errors are written to F unless the B<-se> flag is +set. These are saved in the current working directory. + +=item Files overwritten + +The following file extensions are used by perltidy, and files with these +extensions may be overwritten or deleted: F<.ERR>, F<.LOG>, F<.TEE>, +and/or F<.tdy>, F<.html>, and F<.bak>, depending on the run type and +settings. + +=item Files extensions limitations + +Perltidy does not operate on files for which the run could produce a file with +a duplicated file extension. These extensions include F<.LOG>, F<.ERR>, +F<.TEE>, and perhaps F<.tdy> and F<.bak>, depending on the run type. The +purpose of this rule is to prevent generating confusing filenames such as +F. + +=back + +=head1 SEE ALSO + +perlstyle(1), Perl::Tidy(3) + +=head1 VERSION + +This man page documents perltidy version 20180220.01 + +=head1 BUG REPORTS + +A list of current bugs and issues can be found at the CPAN site + + https://rt.cpan.org/Public/Dist/Display.html?Name=Perl-Tidy + +To report a new bug or problem, use the link on this page. + +=head1 COPYRIGHT + +Copyright (c) 2000-2018 by Steve Hancock + +=head1 LICENSE + +This package is free software; you can redistribute it and/or modify it +under the terms of the "GNU General Public License". + +Please refer to the file "COPYING" for details. + +=head1 DISCLAIMER + +This package is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +See the "GNU General Public License" for more details. diff --git a/dev-bin/build.pl b/dev-bin/build.pl new file mode 100755 index 00000000..bb05e67f --- /dev/null +++ b/dev-bin/build.pl @@ -0,0 +1,839 @@ +#!/usr/bin/perl +use strict; +use warnings; +use Perl::Tidy; +use File::Copy; +use File::Temp qw(tempfile); +$| = 1; + +# a script to help make a new version of perltidy + +# First cd to the git root directory, so that all paths are then given from the +# git root +my $git_home = qx[git rev-parse --show-toplevel]; +chomp $git_home; +chdir $git_home; + +# Here are the main packages I used to setup a Ubuntu 16.04 system +# for Perl development: +# +# sudo apt-get install openssh-server +# sudo apt-get install markdown +# sudo apt-get install ispell +# +# # Perl Modules: +# sudo cpan -i App::cpanminus +# cpanm Perl::Critic +# cpanm Tidy::All +# cpanm Perl::::MinimumVersion (has perlver) +# sudo cpan App::perlbrew +# perlbrew init +# sudo apt-get install git +# git config --global user.name "Steve Hancock" +# git config --global user.email perltidy@users.sourceforge.net + +# TODO: +# add a perlver step +# add a browse the tar file step + +my $logfile = "dev-bin/build.log"; +my $changelog = "local-docs/ChangeLog.pod"; +my $fh_log; + +# These are the main steps, in approximate order, for making a new version +# Note: Since perl critic is in the .tidyallrc, a separate 'PC' step is not +# needed +my $rsteps = [qw( CHK V TIDY T CL POD DIST)]; + +my $rstatus = {}; +foreach my $step ( @{$rsteps} ) { $rstatus->{$step} = 'TBD' } + +my $rcode = { + 'A' => \&autopilot, + 'CHK' => sub { + openurl("local-docs/Release-Checklist.md") + unless $rstatus->{CHK} eq 'OK'; + $rstatus->{CHK} = 'OK'; + }, + 'V' => \&update_version_number, + 'PC' => \&run_perl_critic, + 'TIDY' => \&run_tidyall, + 'T' => \&make_tests, + 'POD' => \&make_docs, + 'DIST' => \&make_dist, + 'CL' => sub {openurl($changelog)}, + 'LOG' => sub { openurl($logfile) }, + 'DIR' => sub { openurl("local-docs") }, +}; + +open( $fh_log, ">", $logfile ) or die "cannot open log file $logfile: $!\n"; +main(); +$fh_log->close(); + +sub main { + while (1) { + print <{'CHK'} +v - check/update Version Number status: $rstatus->{'V'} +tidy - run tidyall (tidy & critic) status: $rstatus->{'TIDY'} +pc - run PerlCritic (critic only) status: $rstatus->{'PC'} +t - make Tests status: $rstatus->{'T'} +cl - review/edit ChangeLog.pod status: $rstatus->{'CL'} +pod - check and process POD docs status: $rstatus->{'POD'} +dist - make a Distribution tar.gz status: $rstatus->{'DIST'} +dir - browse doc files +log - view Log file + +q,x - eXit + +EOM + my $ans = queryu(":"); + if ( defined( $rcode->{$ans} ) ) { + $rcode->{$ans}->(); + } + elsif ( $ans eq 'Q' || $ans eq 'X' ) { + return; + } + } + return; +} + +sub autopilot { + foreach my $step ( @{$rsteps} ) { + if ( $rstatus->{$step} ne 'OK' ) { + $rcode->{$step}->(); + if ( $rstatus->{$step} ne 'OK' ) { + hitcr("Step '$step' FAILED; stopping Autopilot."); + return; + } + return if (!ifyes("Step '$step' Done; Continue [Y/N]")); + } + } + return; +} + +sub run_tidyall { + my $fout = "tmp/tidyall.out"; + $rstatus->{'TIDY'} = 'TBD'; + + # running with any .perltidyrc file + my $cmd = "tidyall -a >$fout"; + system_echo($cmd); + + my $fh; + if ( !open( $fh, '<', $fout ) ) { + hitcr("Strange: cannot open '$fout': $!."); + return; + } + my @lines = <$fh>; + foreach my $line (@lines) { $fh_log->print($line) } + + # FIXME: haven't tried to look for errors yet + my @errors; + + #my @errors = grep { !/source OK\s*$/ } @lines; + + $fh->close(); + if ( !@errors ) { + $rstatus->{'TIDY'} = 'OK'; + hitcr("Source OK."); + return; + } + openurl("$fout"); + return; +} + +sub run_perl_critic { + my $pcoutput = "tmp/perlcritic.out"; + $rstatus->{'PC'} = 'TBD'; + + # running with parameters in .perlcritic + my $cmd = "perlcritic lib/Perl/* >tmp/perlcritic.out"; + system_echo($cmd); + my $fh; + if ( !open( $fh, '<', $pcoutput ) ) { + hitcr("Strange: cannot open '$pcoutput': $!."); + return; + } + my @lines = <$fh>; + my @errors = grep { !/source OK\s*$/ } @lines; + foreach my $line (@lines) { $fh_log->print($line) } + $fh->close(); + + if ( !@errors ) { + $rstatus->{'PC'} = 'OK'; + hitcr("Source OK."); + return; + } + openurl("$pcoutput"); + return; +} + +sub make_tests { + + my $result; + $result = sys_command("perl Makefile.PL"); + $result = sys_command("make"); + unless ( -e "Makefile" ) { query("Makefile missing..hit {'T'} = $result =~ 'Result: PASS' ? 'OK' : 'TBD'; + hitcr(); + return $rstatus->{'T'}; +} + +sub make_docs { + +# Need to figure out if make fails. For now I'm looking for 'Stop' as in +# this error: +# make: *** No rule to make target 'tutorial.pod', needed by 'tutorial.html'. Stop. + my @errors; + foreach my $file ( + qw( + local-docs/ChangeLog.pod + local-docs/README.pod + local-docs/INSTALL.pod + lib/Perl/Tidy.pod + bin/perltidy + ) + ) + { + my $errfile = "tmp/podchecker.err"; + my $result = sys_command("podchecker $file 2>$errfile"); + + #if ( $result) { + my $fh; + open( $fh, '<', $errfile ) || die "cannot open $errfile: $!\n"; + my $saw_error; + foreach my $line (<$fh>) { + $fh_log->print($line); + if ( $line =~ /error/i ) { + $saw_error = 1; + } + } + $fh->close(); + push @errors, $file if ($saw_error); + + } + if (@errors) { + local $" = ') ('; + print "These file(s) had errors: (@errors)\n"; + hitcr("See the log file"); + $rstatus->{'POD'} = 'TBD'; + return; + } + + # finish up + my $result = sys_command("(cd local-docs; make)"); + print $result; + $rstatus->{'POD'} = $result =~ /Stop\./i ? 'TBD' : 'OK'; + hitcr(); + return; +} + +sub make_dist { + my $result; + + if ( $rstatus->{'T'} !~ /^OK$/ ) { + make_tests(); + } + if ( $rstatus->{'T'} !~ /^OK$/ ) { + hitcr("Problem with tests .. no .tar.gz."); + } + $result = sys_command("make dist"); + print $result; + + my ( $tar_gz_file, $created_VERSION ); + if ( $result =~ /Created (Perl-Tidy-(.*)\.tar\.gz)$/ ) { + $tar_gz_file = $1; + $created_VERSION = $2; + } + else { + hitcr("can't find the .tar.gz"); + return; + } + if ( !-e $tar_gz_file ) { + hitcr("Strange, can't find '$tar_gz_file'"); + return; + } + + $rstatus->{'DIST'} = 'OK'; + + # Make a zip for new releases + my $default = $created_VERSION =~ /\./ ? "N" : "Y"; + if ( ifyes( "OK. Make a .zip too? [Y/N], =$default", $default ) ) { + make_zip($tar_gz_file); + } + return; +} + +sub make_zip { + + my ($tar_gz_file) = @_; + my $dir_name = $tar_gz_file; + $dir_name =~ s/\.tar*$//; + + my $command; + + # clean out any old build in /tmp + my $result = sys_command("rm -rf /tmp/$dir_name"); + + # move the file + $result = sys_command("mv $tar_gz_file /tmp"); + + # untar it + $command = "(cd /tmp; tar xvfz $tar_gz_file;)"; + $result = sys_command("$command"); + + # zip it up + my $zip_name = $dir_name . ".zip"; + $command = "(cd /tmp; zip -r -y -m -T -9 $zip_name $dir_name ;)"; + $result = sys_command($command); + + # move it + $result = sys_command("mv /tmp/$zip_name ."); + return; +} + +sub update_version_number { + + my $reported_VERSION = $Perl::Tidy::VERSION; + my $lib_path = "lib/Perl/"; + my $bin_path = "bin/"; + my @sources = ( $lib_path . "Tidy.pm", $lib_path . "Tidy.pod", + $bin_path . "perltidy", ); + push @sources, "local-docs/ChangeLog.pod"; + + my $Tidy_pm_file = $lib_path . "Tidy.pm"; + + # I have removed this one; it was useful in development + # CS Check that Selected files have the current VERSION + + RETRY: + print <{'V'} = $ok ? 'OK' : 'TBD'; + } + return; + } + elsif ( $ans eq 'RV' ) { + my $new_VERSION = get_new_release_version($reported_VERSION); + next if ( $new_VERSION == $reported_VERSION ); + if ( ifyes("New version will be: '$new_VERSION'. OK? [Y/N]") ) { + my $ok = update_all_sources( $new_VERSION, @sources ); + $rstatus->{'V'} = $ok ? 'OK' : 'TBD'; + } + return; + } + elsif ( $ans eq 'CA' ) { + my $new_VERSION = $reported_VERSION; + my $ok = update_all_sources( $new_VERSION, @sources ); + $rstatus->{'V'} = $ok ? 'OK' : 'TBD'; + return; + } + + # I have left this as a hidden menu item for testing + # but it is not on the menu because it would be confusing + elsif ( $ans eq 'CS' ) { + my $new_VERSION = $reported_VERSION; + my @check = grep { ifyes("Check $_? [Y/N]") } @sources; + update_all_sources( $new_VERSION, @check ); + return; + } + elsif ( $ans eq 'Q' || $ans eq 'X' ) { + return; + } + goto RETRY if ( ifyes("?? I didn't get that, try again? [Y/N]") ); + return; +} + +sub get_new_development_version { + my ($reported_VERSION) = @_; + my $new_VERSION = $reported_VERSION; + my @parts = split /\./, $reported_VERSION; + if ( @parts == 1 ) { + + # first development after release + $parts[1] = "01"; + } + elsif ( @parts == 2 ) { + + # bumping development version + my $dv = $parts[1]; + if ( $dv !~ /^\d\d$/ ) { + query("development version: '$dv'. Hit "); + return; + } + if ( $dv == 99 ) { + query( +"development version: '$dv' is maxed out. Do a release. Hit " + ); + return; + } + $parts[1]++; + } + else { + query("Sorry: cannot interpret starting VERSION number\n"); + return; + } + + $new_VERSION = join '.', @parts; + return $new_VERSION; +} + +sub get_new_release_version { + my ($reported_VERSION) = @_; + my $new_VERSION = $reported_VERSION; + my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ]; + $year += 1900; + $month += 1; + $month = sprintf "%02d", $month; + $day = sprintf "%02d", $day; + $new_VERSION = "$year$month$day"; + + if ( !ifyes("Suggest VERSION $new_VERSION, OK [Y/N]") ) { + $new_VERSION = query("Enter release VERSION:"); + } + return $new_VERSION; +} + +sub query { + my ($msg) = @_; + print $msg; + my $ans = ; + chomp $ans; + return $ans; +} + +sub queryu { + return uc query(@_); +} + +sub hitcr { + my ($msg) = @_; + if ($msg) { $msg .= " Hit to continue"; } + else { $msg = "Hit to continue" } + query($msg); +} + +sub ifyes { + + # Updated to have default, which should be "Y" or "N" + my ( $msg, $default ) = @_; + my $count = 0; + ASK: + my $ans = query($msg); + if ( defined($default) ) { + $ans = $default unless ($ans); + } + if ( $ans =~ /^Y/i ) { return 1 } + elsif ( $ans =~ /^N/i ) { return 0 } + else { + $count++; + if ( $count > 6 ) { die "error count exceeded in ifyes\n" } + print STDERR "Please answer 'Y' or 'N'\n"; + goto ASK; + } +} + +sub update_all_sources { + my ( $new_VERSION, @sources ) = @_; + + # preliminary checks + if ( !$new_VERSION ) { + return; + } + foreach my $source_file (@sources) { + + if ( !-f $source_file ) { + print <$runme" ) ) { + print RUN <', $tmpfile ) ) { + query("cannot open $tmpfile: $!\n"); + return; + } + if ( !$ftmp ) { query("Could not get a temporary file"); return } + my $fh; + if ( !open( $fh, '<', $source_file ) ) { + query("cannot open $source_file: $!\n"); + return; + } + my $in_pod; + my $is_pod_file = $source_file !~ /\.pm/; + while ( my $line = <$fh> ) { + + # finish writing after the change + if ($old_VERSION_line) { + $ftmp->print($line); + next; + } + + # looking for VERSION in pod + if ($is_pod_file) { + $in_pod = $in_pod ? $line !~ /^=cut/ : $line =~ /^=/; + if ($in_pod) { + + # perltidy and Tidy.pod have lines like this + if ( $line =~ /(This man page documents.*version\s*)(.*)/ ) { + + $old_VERSION_line = $line; + chomp $old_VERSION_line; + $new_VERSION_line = $1 . $new_VERSION; + $line = $new_VERSION_line . "\n"; + } + + # ChangeLog.pod has a line like this: + # =head2 2018 xx xx + elsif ( $line =~ /=head2 \d\d\d\d/ ) { + $old_VERSION_line = $line; + chomp $old_VERSION_line; + my $spaced_new_VERSION = $new_VERSION; + if ( $spaced_new_VERSION =~ /(\d\d\d\d)(\d\d)(\d\d.*)/ ) { + $spaced_new_VERSION = "$1 $2 $3"; + } + $new_VERSION_line = "=head2 $spaced_new_VERSION"; + $line = $new_VERSION_line . "\n"; + } + } + } + + # looking for version in module + else { + + # Looking for something simple like this, with or without quotes, + # with semicolon and no sidecomments: + # $VERSION = "20180202.245" ; + # our $VERSION = 20104202 ; + if ( $line =~ + /^((our)?\s*\$VERSION\s*=\s*\'?) ([^'#]+) (\'?) \s* ;/x ) + { + $old_VERSION_line = $line; + chomp $old_VERSION_line; + $new_VERSION_line = $1 . $new_VERSION . $4 . ";"; + $line = $new_VERSION_line . "\n"; + } + } + $ftmp->print($line); + } + + $ftmp->close(); + + # Report results + if ( !$old_VERSION_line ) { + query("could not find old VERSION in file!"); + unlink $tmpfile; + return; + } + + print <>> $cmd\n"; + + $fh_log->print(">>> $cmd"); + my $result = qx{$cmd}; + if ($result) { + $fh_log->print($result); + + #print LOGFILE $result } + } + + return $result; +} +__END__ + +OLD SCRIPT FOLLOWS, FOR REFERENCE +#!/usr/bin/perl -w +use strict; +# +# This script creates the perltidy distribution files +# TBD: +# - add this stuff to CVS +my $result; +my $VERSION; +my $DEBIAN; # undefined, no longer doing debian + +# ----------------------------------------------------------------------- +# go through the CHECKLIST +# ----------------------------------------------------------------------- +system ("less CHECKLIST"); +print STDOUT "Continue? [Y/N]\n"; +my $ans=; +exit -1 unless ($ans =~ /^[Yy]/); +open LOGFILE, "> makedist.out"; + +# ----------------------------------------------------------------------- +# Build the distribution files +# ----------------------------------------------------------------------- +# +# copy the 'perltidy' script over, removing the built-in path +$result = sys_command("./nodist/fix_perltidy <../src/perltidy >bin/perltidy"); + +# update the doc files +$result = sys_command("(cd ../docs; make)"); + +# use MakeMaker to build the distribution +$result = sys_command("perl Makefile.PL"); +$result = sys_command("make"); +$result = sys_command("make test"); +unless ( -e "Makefile" ) { die "Makefile missing..\n" } +$result = sys_command("make dist"); + +# get the VERSION from the output +if ( $result =~ /Perl-Tidy-(\d+)/ ) { + $VERSION=$1; + print "version is : $VERSION\n"; +} +my $dir_name = "Perl-Tidy-$VERSION"; +my $tar_gz_file = "$dir_name" . ".tar.gz"; +my $tgz_file = "$dir_name" . ".tgz"; +my $command; + +# clean out any old build in /tmp +$result = sys_command("rm -rf /tmp/$dir_name"); + +# move the file +$result = sys_command("mv $tar_gz_file /tmp"); + +# untar it +$command = "(cd /tmp; tar xvfz $tar_gz_file;)"; +$result = sys_command("$command"); + +# ----------------------------------------------------------------------- +# set file permissions +# ----------------------------------------------------------------------- +# fix permissions to be: +# 0644 - text files +# 0755 - directories and executables + +# first walk through the manifest and set all to 0644 +open MANIFEST, "< MANIFEST" or die "cannot open MANIFEST: $!\n"; +while (my $line=) { + $line=~s/^\s+//; + + # remove excess text from lines + my $file=split /\s+/, $line; + sys_command("chmod 0644 /tmp/$dir_name/$file"); +} + +# then go back and set binaries to 0755 +sys_command("chmod 0755 /tmp/$dir_name/pm2pl"); +sys_command("chmod 0755 /tmp/$dir_name/bin/*"); +if ($DEBIAN) { + sys_command("chmod 0755 /tmp/$dir_name/debian/rules"); +} + +# ----------------------------------------------------------------------- +# rebuild the tar file, which will get correct permissions +# ----------------------------------------------------------------------- +$command = " (cd /tmp; rm $tar_gz_file; tar cvf - $dir_name | gzip -9 > $tar_gz_file ;)"; +$result = sys_command("$command"); + +# ----------------------------------------------------------------------- +# make the .zip file +# ----------------------------------------------------------------------- +# +##?# OLD: change line endings for windows +##?# $command .= " flipall $dir_name -m; cd $dir_name;"; +##?# my @dirs = qw( bin lib docs t examples); +##?# TESTING foreach (@dirs) { $command .= " flipall $_ -m;" } +##? +##?# this works +##?# perl -Mopen=OUT,:crlf -pi.bak -e0 filename +##?# $command .= " cd .. ; zip -r -y -m -T -9 $zip_name $dir_name ; "; + +# zip it up +my $zip_name = $dir_name . ".zip"; +$command = "(cd /tmp; zip -r -y -m -T -9 $zip_name $dir_name ;)"; +$result = sys_command($command); + +###$command = " (cd /tmp ; zip -r -y -m -T -9 $zip_name $dir_name ; "; +##3#$command .= ")"; +##print STDERR "$command", "\n"; +##exit 1; + +# ----------------------------------------------------------------------- +# Make the debian package +# TODO: go through and fix error messages +# ----------------------------------------------------------------------- +if ($DEBIAN) { + my $debian_package = 'perltidy'; + my $debian_dir_name = "$debian_package-$VERSION"; + $command = "(cd /tmp ; mkdir deb ; cd deb ; tar xvfz ../$tar_gz_file;)"; + $result = sys_command($command); + $command = "(cd /tmp/deb; mv $dir_name $debian_dir_name)"; + $result = sys_command($command); + $command = + "(cd /tmp/deb/$debian_dir_name ; dpkg-buildpackage -uc -d -rfakeroot ;)"; + $result = sys_command($command); + my $deb_name = $debian_package . '_' . $VERSION . '-1_all.deb'; + $result = sys_command("mv /tmp/deb/$deb_name /tmp"); + $result = sys_command("rm -rf /tmp/deb"); + $result = sys_command("mv /tmp/$deb_name ../archive"); +} + +# ----------------------------------------------------------------------- +# move the files over to the archive area +# ----------------------------------------------------------------------- +$result = sys_command("mv /tmp/$tar_gz_file ../archive"); +$result = sys_command("mv /tmp/$zip_name ../archive"); diff --git a/docs/README b/docs/README new file mode 100644 index 00000000..ef3b7623 --- /dev/null +++ b/docs/README @@ -0,0 +1,11 @@ +All of the documentation for perltidy can be found at +http://perltidy.sourceforge.net + +The man page is in pod format appended to the script bin/perltidy. + +The man page for use of the module Perl::Tidy.pm is in Perl::Tidy.pod + +tutorial.pod - is a short tutorial +testfile.pl - the test file referenced by tutorial.pod + +stylekey.pod - is a document to help find parameters for a given style diff --git a/docs/stylekey.pod b/docs/stylekey.pod new file mode 100644 index 00000000..ca9a7e00 --- /dev/null +++ b/docs/stylekey.pod @@ -0,0 +1,781 @@ +=head1 Perltidy Style Key + +When perltidy was first developed, the main parameter choices were the number +of indentation spaces and if the user liked cuddled else's. As the number of +users has grown so has the number of parameters. Now there are so many that it +can be difficult for a new user to find a good initial set. This document is +one attempt to help with this problem, and some other suggestions are given at +the end. + +Use this document to methodically find a starting set of perltidy parameters to +approximate your style. We will be working on just one aspect of formatting at +a time. Just read each question and select the best answer. Enter your +parameters in a file named F<.perltidyrc> (examples are listed at the end). +Then move it to one of the places where perltidy will find it. You can run +perltidy with the parameter B<-dpro> to see where these places are for your +system. + +=head2 Before You Start + +Before you begin, experiment using just C on some +of your files. From the results (which you will find in files with a +F<.tdy> extension), you will get a sense of what formatting changes, if +any, you'd like to make. If the default formatting is acceptable, you +do not need a F<.perltidyrc> file. + +=head2 Use as Filter? + +Do you almost always want to run perltidy as a standard filter on just +one input file? If yes, use B<-st> and B<-se>. + +=head2 Line Length Setting + +Perltidy will set line breaks to prevent lines from exceeding the +maximum line length. + +Do you want the maximum line length to be 80 columns? If no, use +B<-l=n>, where B is the number of columns you prefer. + +=head2 Indentation in Code Blocks + +In the block below, the variable C<$anchor> is one indentation level deep +and is indented by 4 spaces as shown here: + + if ( $flag eq "a" ) { + $anchor = $header; + } + +If you want to change this to be a different number B of spaces +per indentation level, use B<-i=n>. + +=head2 Continuation Indentation + +Look at the statement beginning with C<$anchor>: + + if ( $flag eq "a" ) { + $anchor = + substr( $header, 0, 6 ) + . substr( $char_list, $place_1, 1 ) + . substr( $char_list, $place_2, 1 ); + } + +The statement is too long for the line length (80 characters by default), so it +has been broken into 4 lines. The second and later lines have some extra +"continuation indentation" to help make the start of the statement easy to +find. The default number of extra spaces is 2. If you prefer a number n +different from 2, you may specify this with B<-ci=n>. It is probably best if +it does not exceed the value of the primary indentation. + +=head2 Tabs + +The default, and recommendation, is to represent leading whitespace +with actual space characters. However, if you prefer to entab +leading whitespace with one tab character for each B spaces, +use B<-et=n>. Typically, B would be 8. + +=head2 Opening Block Brace Right or Left? + +Opening and closing curly braces, parentheses, and square brackets are divided +into two separate categories and controlled separately in most cases. The two +categories are (1) code block curly braces, which contain perl code, and (2) +everything else. Basically, a code block brace is one which could contain +semicolon-terminated lines of perl code. We will first work on the scheme for +code block curly braces. + +Decide which of the following opening brace styles you prefer for most blocks +of code (with the possible exception of a B which will +be covered later): + +If you like opening braces on the right, like this, go to +L. + + if ( $flag eq "h" ) { + $headers = 0; + } + +If you like opening braces on the left, like this, go to +L. + + if ( $flag eq "h" ) + { + $headers = 0; + } + +=head2 Opening Braces Right + +In a multi-line B test expression, the default is to place +the opening brace on the left, like this: + + if ( $bigwasteofspace1 && $bigwasteofspace2 + || $bigwasteofspace3 && $bigwasteofspace4 ) + { + big_waste_of_time(); + } + +This helps to visually separate the block contents from the test +expression. + +An alternative is to keep the brace on the right even for +multiple-line test expressions, like this: + + if ( $bigwasteofspace1 && $bigwasteofspace2 + || $bigwasteofspace3 && $bigwasteofspace4 ) { + big_waste_of_time(); + } + +If you prefer this alternative, use B<-bar>. + +=head2 Cuddled Else? + +Do you prefer this B style + + if ( $flag eq "h" ) { + $headers = 0; + } elsif ( $flag eq "f" ) { + $sectiontype = 3; + } else { + print "invalid option: " . substr( $arg, $i, 1 ) . "\n"; + dohelp(); + } + +instead of this default style? + + if ( $flag eq "h" ) { + $headers = 0; + } + elsif ( $flag eq "f" ) { + $sectiontype = 3; + } + else { + print "invalid option: " . substr( $arg, $i, 1 ) . "\n"; + dohelp(); + } + +If yes, you should use B<-ce>. +Now skip ahead to L. + +=head2 Opening Braces Left + +Use B<-bl> if you prefer this style: + + if ( $flag eq "h" ) + { + $headers = 0; + } + +Use B<-bli> if you prefer this indented-brace style: + + if ( $flag eq "h" ) + { + $headers = 0; + } + +The number of spaces of extra indentation will be the value specified +for continuation indentation with the B<-ci=n> parameter (2 by default). + +=head2 Opening Sub Braces + +By default, the opening brace of a sub block will be treated +the same as other code blocks. If this is okay, skip ahead +to L. + +If you prefer an opening sub brace to be on a new line, +like this: + + sub message + { + # -sbl + } + +use B<-sbl>. If you prefer the sub brace on the right like this + + sub message { + + # -nsbl + } + +use B<-nsbl>. + +If you wish to give this opening sub brace some indentation you can do +that with the parameters B<-bli> and B<-blil> which are described in the +manual. + +=head2 Block Brace Vertical Tightness + +If you chose to put opening block braces of all types to the right, skip +ahead to L. + +If you chose to put braces of any type on the left, the default is to leave the +opening brace on a line by itself, like this (shown for B<-bli>, but also true +for B<-bl>): + + if ( $flag eq "h" ) + { + $headers = 0; + } + +But you may also use this more compressed style if you wish: + + if ( $flag eq "h" ) + { $headers = 0; + } + +If you do not prefer this more compressed form, go to +L. + +Otherwise use parameter B<-bbvt=n>, where n=1 or n=2. To decide, +look at this snippet: + + # -bli -bbvt=1 + sub _directives + { + { + 'ENDIF' => \&_endif, + 'IF' => \&_if, + }; + } + + # -bli -bbvt=2 + sub _directives + { { + 'ENDIF' => \&_endif, + 'IF' => \&_if, + }; + } + +The difference is that B<-bbvt=1> breaks after an opening brace if +the next line is unbalanced, whereas B<-bbvt=2> never breaks. + +If you were expecting the 'ENDIF' word to move up vertically here, note that +the second opening brace in the above example is not a code block brace (it is +a hash brace), so the B<-bbvt> does not apply to it (another parameter will). + +=head2 Closing Block Brace Indentation + +The default is to place closing braces at the same indentation as the +opening keyword or brace of that code block, as shown here: + + if ($task) { + yyy(); + } # default + +If you chose the B<-bli> style, however, the default closing braces will be +indented one continuation indentation like the opening brace: + + if ($task) + { + yyy(); + } # -bli + +If you prefer to give closing block braces one full level of +indentation, independently of how the opening brace is treated, +for example like this: + + if ($task) { + yyy(); + } # -icb + +use B<-icb>. + +This completes the definition of the placement of code block braces. + +=head2 Indentation Style for Other Containers + +You have a choice of two basic indentation schemes for non-block containers. +The default is to use a fixed number of spaces per indentation level (the same +number of spaces used for code blocks, which is 4 by default). Here is an +example of the default: + + $dbh = DBI->connect( + undef, undef, undef, + { + PrintError => 0, + RaiseError => 1 + } + ); + +In this default indentation scheme, a simple formula is used to find the +indentation of every line. Notice how the first 'undef' is indented 4 +spaces (one level) to the right, and how 'PrintError' is indented 4 more +speces (one more level) to the right. + +The alternate is to let the location of the opening paren (or square +bracket, or curly brace) define the indentation, like this: + + $dbh = DBI->connect( + undef, undef, undef, + { + PrintError => 0, + RaiseError => 1 + } + ); + +The first scheme is completely robust. The second scheme often looks a little +nicer, but be aware that deeply nested structures it can be spoiled if the line +length limit is exceeded. Also, if there are comments or blank lines within a +complex structure perltidy will temporarily fall back on the default +indentation scheme. You may want to try both on large sections of code to see +which works best. + +If you prefer the first (default) scheme, no parameter is needed. + +If you prefer the latter scheme, use B<-lp>. + +=head2 Opening Vertical Tightness + +The information in this section applies mainly to the B<-lp> +style but it also applies in some cases to the default style. +It will be illustrated for the B<-lp> indentation style. + +The default B<-lp> indentation style ends a line at the +opening tokens, like this: + + $dbh = DBI->connect( + undef, undef, undef, + { + PrintError => 0, + RaiseError => 1 + } + ); + +Here is a tighter alternative, which does not end a line +with the opening tokens: + + $dbh = DBI->connect( undef, undef, undef, + { PrintError => 0, + RaiseError => 1 + } + ); + +The difference is that the lines have been compressed vertically without +any changes to the indentation. This can almost always be done with the +B<-lp> indentation style, but only in limited cases for the default +indentation style. + +If you prefer the default, skip ahead to L. + +Otherwise, use B<-vt=n>, where B should be either 1 or 2. To help +decide, observe the first three opening parens in the following snippet +and choose the value of n you prefer. Here it is with B<-lp -vt=1>: + + if ( + !defined( + start_slip( $DEVICE, $PHONE, $ACCOUNT, $PASSWORD, + $LOCAL, $REMOTE, $NETMASK, $MTU + ) + ) + && $continuation_flag + ) + { + do_something_about_it(); + } + +And here it is again formatted with B<-lp -vt=2>: + + if ( !defined( start_slip( $DEVICE, $PHONE, $ACCOUNT, $PASSWORD, + $LOCAL, $REMOTE, $NETMASK, $MTU + ) + ) + && $continuation_flag + ) + { + do_something_about_it(); + } + +The B<-vt=1> style tries to display the structure by preventing more +than one step in indentation per line. In this example, the first two +opening parens were not followed by balanced lines, so B<-vt=1> broke +after them. + +The B<-vt=2> style does not limit itself to a single indentation step +per line. + +Note that in the above example the function 'do_sumething_about_it' +started on a new line. This is because it follows an opening code +block brace and is governed by the flag previously set in +L. + +=head2 Closing Token Placement + +You have several options for dealing with the terminal closing tokens of +non-blocks. In the following examples, a closing parenthesis is shown, but +these parameters apply to closing square brackets and non-block curly braces as +well. + +The default behavior for parenthesized relatively large lists is to place the +closing paren on a separate new line. The flag B<-cti=n> controls the amount +of indentation of such a closing paren. + +The default, B<-cti=0>, for a line beginning with a closing paren, is to use +the indentation defined by the next (lower) indentation level. This works +well for the default indentation scheme: + + # perltidy + @month_of_year = ( + 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' + ); + +but it may not look very good with the B<-lp> indentation scheme: + + # perltidy -lp + @month_of_year = ( + 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' + ); + +An alternative which works well with B<-lp> indentation is B<-cti=1>, +which aligns the closing paren vertically with its +opening paren, if possible: + + # perltidy -lp -cti=1 + @month_of_year = ( + 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' + ); + +Another alternative, B<-cti=3>, indents a line with leading closing +paren one full indentation level: + + # perltidy -lp -cti=3 + @month_of_year = ( + 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' + ); + +If you prefer the closing paren on a separate line like this, +note the value of B<-cti=n> that you prefer and skip ahead to +L. + +Finally, the question of paren indentation can be avoided by placing it +at the end of the previous line, like this: + + @month_of_year = ( + 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' ); + +Perltidy will automatically do this to save space for very short lists but not +for longer lists. + +Use B<-vtc=n> if you prefer to usually do this, where B is either 1 or 2. To +determine B, we have to look at something more complex. Observe the +behavior of the closing tokens in the following snippet: + +Here is B<-lp -vtc=1>: + + $srec->{'ACTION'} = [ + $self->read_value( + $lookup->{'VFMT'}, + $loc, $lookup, $fh + ), + $self->read_value( + $lookup->{'VFMT2'}, + $loc, $lookup, $fh + ) ]; + +Here is B<-lp -vtc=2>: + + $srec->{'ACTION'} = [ + $self->read_value( + $lookup->{'VFMT'}, + $loc, $lookup, $fh ), + $self->read_value( + $lookup->{'VFMT2'}, + $loc, $lookup, $fh ) ]; + +Choose the one that you prefer. The difference is that B<-vtc=1> leaves +closing tokens at the start of a line within a list, which can assist in +keeping hierarchical lists readable. The B<-vtc=2> style always tries +to move closing tokens to the end of a line. + +If you choose B<-vtc=1>, +you may also want to specify a value of B<-cti=n> (previous section) to +handle cases where a line begins with a closing paren. + +=head2 Stack Opening Tokens + +In the following snippet the opening hash brace has been placed +alone on a new line. + + $opt_c = Text::CSV_XS->new( + { + binary => 1, + sep_char => $opt_c, + always_quote => 1, + } + ); + +If you prefer to avoid isolated opening tokens by +"stacking" them together with other opening tokens like this: + + $opt_c = Text::CSV_XS->new( { + binary => 1, + sep_char => $opt_c, + always_quote => 1, + } + ); + +use B<-sot>. + +=head2 Stack Closing Tokens + +Likewise, in the same snippet the default formatting leaves +the closing paren on a line by itself here: + + $opt_c = Text::CSV_XS->new( + { + binary => 1, + sep_char => $opt_c, + always_quote => 1, + } + ); + +If you would like to avoid leaving isolated closing tokens by +stacking them with other closing tokens, like this: + + $opt_c = Text::CSV_XS->new( + { + binary => 1, + sep_char => $opt_c, + always_quote => 1, + } ); + +use B<-sct>. + +The B<-sct> flag is somewhat similar to the B<-vtc> flags, and in some cases it +can give a similar result. The difference is that the B<-vtc> flags try to +avoid lines with leading opening tokens by "hiding" them at the end of a +previous line, whereas the B<-sct> flag merely tries to reduce the number of +lines with isolated closing tokens by stacking multiple closing tokens +together, but it does not try to hide them. + +The manual shows how all of these vertical tightness controls may be applied +independently to each type of non-block opening and opening token. + +=head2 Define Horizontal Tightness + +Horizontal tightness parameters define how much space is included +within a set of container tokens. + +For parentheses, decide which of the following values of B<-pt=n> +you prefer: + + if ( ( my $len_tab = length( $tabstr ) ) > 0 ) { # -pt=0 + if ( ( my $len_tab = length($tabstr) ) > 0 ) { # -pt=1 (default) + if ((my $len_tab = length($tabstr)) > 0) { # -pt=2 + +For n=0, space is always used, and for n=2, space is never used. For +the default n=1, space is used if the parentheses contain more than +one token. + +For square brackets, decide which of the following values of B<-sbt=n> +you prefer: + + $width = $col[ $j + $k ] - $col[ $j ]; # -sbt=0 + $width = $col[ $j + $k ] - $col[$j]; # -sbt=1 (default) + $width = $col[$j + $k] - $col[$j]; # -sbt=2 + +For curly braces, decide which of the following values of B<-bt=n> +you prefer: + + $obj->{ $parsed_sql->{ 'table' }[0] }; # -bt=0 + $obj->{ $parsed_sql->{'table'}[0] }; # -bt=1 (default) + $obj->{$parsed_sql->{'table'}[0]}; # -bt=2 + +For code block curly braces, decide which of the following values of +B<-bbt=n> you prefer: + + %bf = map { $_ => -M $_ } grep { /\.deb$/ } dirents '.'; # -bbt=0 (default) + %bf = map { $_ => -M $_ } grep {/\.deb$/} dirents '.'; # -bbt=1 + %bf = map {$_ => -M $_} grep {/\.deb$/} dirents '.'; # -bbt=2 + +=head2 Spaces between function names and opening parens + +The default is not to place a space after a function call: + + myfunc( $a, $b, $c ); # default + +If you prefer a space: + + myfunc ( $a, $b, $c ); # -sfp + +use B<-sfp>. + +=head2 Spaces between Perl keywords and parens + +The default is to place a space between only these keywords +and an opening paren: + + my local our and or eq ne if else elsif until unless + while for foreach return switch case given when + +but no others. For example, the default is: + + $aa = pop(@bb); + +If you want a space between all Perl keywords and an opening paren, + + $aa = pop (@bb); + +use B<-skp>. For detailed control of individual keywords, see the manual. + +=head2 Statement Termination Semicolon Spaces + +The default is not to put a space before a statement termination +semicolon, like this: + + $i = 1; + +If you prefer a space, like this: + + $i = 1 ; + +enter B<-sts>. + +=head2 For Loop Semicolon Spaces + +The default is to place a space before a semicolon in a for statement, +like this: + + for ( @a = @$ap, $u = shift @a ; @a ; $u = $v ) { # -sfs (default) + +If you prefer no such space, like this: + + for ( @a = @$ap, $u = shift @a; @a; $u = $v ) { # -nsfs + +enter B<-nsfs>. + +=head2 Block Comment Indentation + +Block comments are comments which occupy a full line, as opposed to side +comments. The default is to indent block comments with the same +indentation as the code block that contains them (even though this +will allow long comments to exceed the maximum line length). + +If you would like block comments indented except when this would cause +the maximum line length to be exceeded, use B<-olc>. This will cause a +group of consecutive block comments to be outdented by the amount needed +to prevent any one from exceeding the maximum line length. + +If you never want block comments indented, use B<-nibc>. + +If block comments may only be indented if they have some space +characters before the leading C<#> character in the input file, use +B<-isbc>. + +The manual shows many other options for controlling comments. + +=head2 Outdenting Long Quotes + +Long quoted strings may exceed the specified line length limit. The +default, when this happens, is to outdent them to the first column. +Here is an example of an outdented long quote: + + if ($source_stream) { + if ( @ARGV > 0 ) { + die + "You may not specify any filenames when a source array is given\n"; + } + } + +The effect is not too different from using a here document to represent +the quote. If you prefer to leave the quote indented, like this: + + if ($source_stream) { + if ( @ARGV > 0 ) { + die + "You may not specify any filenames when a source array is given\n"; + } + } + +use B<-nolq>. + +=head2 Many Other Parameters + +This document has only covered the most popular parameters. The manual +contains many more and should be consulted if you did not find what you need +here. + +=head2 Example F<.perltidyrc> files + +Now gather together all of the parameters you prefer and enter them +in a file called F<.perltidyrc>. + +Here are some example F<.perltidyrc> files and the corresponding style. + +Here is a little test snippet, shown the way it would appear with +the default style. + + for (@methods) { + push ( + @results, + { + name => $_->name, + help => $_->help, + } + ); + } + +You do not need a F<.perltidyrc> file for this style. + +Here is the same snippet + + for (@methods) + { + push(@results, + { name => $_->name, + help => $_->help, + } + ); + } + +for a F<.perltidyrc> file containing these parameters: + + -bl + -lp + -cti=1 + -vt=1 + -pt=2 + +You do not need to place just one parameter per line, but this may be +convenient for long lists. You may then hide any parameter by placing +a C<#> symbol before it. + +And here is the snippet + + for (@methods) { + push ( @results, + { name => $_->name, + help => $_->help, + } ); + } + +for a F<.perltidyrc> file containing these parameters: + + -lp + -vt=1 + -vtc=1 + +=head2 Tidyview + +There is a graphical program called B which you can use to read a +preliminary F<.perltidyrc> file, make trial adjustments and immediately see +their effect on a test file, and then write a new F<.perltidyrc>. You can +download a copy at + +http://sourceforge.net/projects/tidyview + +=head2 Additional Information + +This document has covered the main parameters. Many more parameters are +available for special purposes and for fine-tuning a style. For complete +information see the perltidy manual +http://perltidy.sourceforge.net/perltidy.html + +For an introduction to using perltidy, see the tutorial +http://perltidy.sourceforge.net/tutorial.html + +Suggestions for improving this document are welcome and may be sent to +perltidy at users.sourceforge.net + +=cut diff --git a/docs/testfile.pl b/docs/testfile.pl new file mode 100644 index 00000000..182f2bef --- /dev/null +++ b/docs/testfile.pl @@ -0,0 +1,10 @@ +print "Help Desk -- What Editor do you use? "; +chomp($editor = ); +if ($editor =~ /emacs/i) { + print "Why aren't you using vi?\n"; +} elsif ($editor =~ /vi/i) { + print "Why aren't you using emacs?\n"; +} else { + print "I think that's the problem\n"; +} + diff --git a/docs/tutorial.pod b/docs/tutorial.pod new file mode 100644 index 00000000..9d1f2606 --- /dev/null +++ b/docs/tutorial.pod @@ -0,0 +1,534 @@ +=head1 A Brief Perltidy Tutorial + +Perltidy can save you a lot of tedious editing if you spend a few +minutes learning to use it effectively. Perltidy is highly +configurable, but for many programmers the default parameter set will be +satisfactory, with perhaps a few additional parameters to account for +style preferences. + +This tutorial assumes that perltidy has been installed on your system. +Installation instructions accompany the package. To follow along with +this tutorial, please find a small Perl script and place a copy in a +temporary directory. For example, here is a small (and silly) script: + + print "Help Desk -- What Editor do you use?"; + chomp($editor = ); + if ($editor =~ /emacs/i) { + print "Why aren't you using vi?\n"; + } elsif ($editor =~ /vi/i) { + print "Why aren't you using emacs?\n"; + } else { + print "I think that's the problem\n"; + } + +It is included in the F section of the distribution. + +=head2 A First Test + +Assume that the name of your script is F. You can reformat it +with the default options to use the style recommended in the perlstyle man +pages with the command: + + perltidy testfile.pl + +For safety, perltidy never overwrites your original file. In this case, +its output will go to a file named F, which you should +examine now with your editor. Here is what the above file looks like +with the default options: + + print "Help Desk -- What Editor do you use?"; + chomp( $editor = ); + if ( $editor =~ /emacs/i ) { + print "Why aren't you using vi?\n"; + } + elsif ( $editor =~ /vi/i ) { + print "Why aren't you using emacs?\n"; + } + else { + print "I think that's the problem\n"; + } + +You'll notice an immediate style change from the "cuddled-else" style of +the original to the default "non-cuddled-else" style. This is because +perltidy has to make some kind of default selection of formatting +options, and this default tries to follow the suggestions in the +perlstyle man pages. + +If you prefer the original "cuddled-else" style, don't worry, you can +indicate that with a B<-ce> flag. So if you rerun with that flag + + perltidy -ce testfile.pl + +you will see a return to the original "cuddled-else" style. There are +many more parameters for controlling style, and some of the most useful +of these are discussed below. + +=head2 Indentation + +Another noticeable difference between the original and the reformatted +file is that the indentation has been changed from 2 spaces to 4 spaces. +That's because 4 spaces is the default. You may change this to be a +different number with B<-i=n>. + +To get some practice, try these examples, and examine the resulting +F file: + + perltidy -i=8 testfile.pl + +This changes the default of 4 spaces per indentation level to be 8. Now +just to emphasize the point, try this and examine the result: + + perltidy -i=0 testfile.pl + +There will be no indentation at all in this case. + +=head2 Input Flags + +This is a good place to mention a few points regarding the input flags. +First, for each option, there are two forms, a long form and a short +form, and either may be used. + +For example, if you want to change the number of columns corresponding to one +indentation level to 3 (from the default of 4) you may use either + + -i=3 or --indent-columns=3 + +The short forms are convenient for entering parameters by hand, whereas +the long forms, though often ridiculously long, are self-documenting and +therefore useful in configuration scripts. You may use either one or +two dashes ahead of the parameters. Also, the '=' sign is optional, +and may be a single space instead. However, the value of a parameter +must NOT be adjacent to the flag, like this B<-i3> (WRONG). Also, +flags must be input separately, never bundled together. + +=head2 Line Length and Continuation Indentation. + +If you change the indentation spaces you will probably also need to +change the continuation indentation spaces with the parameter B<-ci=n>. +The continuation indentation is the extra indentation -- 2 spaces by +default -- given to that portion of a long line which has been placed +below the start of a statement. For example: + + croak "Couldn't pop genome file" + unless sysread( $impl->{file}, $element, $impl->{group} ) + and truncate( $impl->{file}, $new_end ); + +There is no fixed rule for setting the value for B<-ci=n>, but it should +probably not exceed one-half of the number of spaces of a full +indentation level. + +In the above snippet, the statement was broken into three lines. The +actual number is governed by a parameter, the maximum line length, as +well as by what perltidy considers to be good break points. The maximum +line length is 80 characters by default. You can change this to be any +number B with the B<-l=n> flag. Perltidy tries to produce lines +which do not exceed this length, and it does this by finding good break +points. For example, the above snippet would look like this with +B: + + croak "Couldn't pop genome file" + unless + sysread( $impl->{file}, $element, + $impl->{group} ) + and + truncate( $impl->{file}, $new_end ); + +You may be wondering what would happen with, say, B<-l=1>. Go +ahead and try it. + +=head2 Tabs or Spaces? + +With indentation, there is always a tab issue to resolve. By default, +perltidy will use leading ascii space characters instead of tabs. The +reason is that this will be displayed correctly by virtually all +editors, and in the long run, will avoid maintenance problems. + +However, if you prefer, you may have perltidy entab the leading +whitespace of a line with the command B<-et=n>, where B is the number +of spaces which will be represented by one tab. But note that your text +will not be displayed properly unless viewed with software that is +configured to display B spaces per tab. + +=head2 Input/Output Control + +In the first example, we saw that if we pass perltidy the name +of a file on the command line, it reformats it and creates a +new filename by appending an extension, F<.tdy>. This is the +default behavior, but there are several other options. + +On most systems, you may use wildcards to reformat a whole batch of +files at once, like this for example: + + perltidy *.pl + +and in this case, each of the output files will be have a name equal to +the input file with the extension F<.tdy> appended. If you decide that +the formatting is acceptable, you will want to backup your originals and +then remove the F<.tdy> extensions from the reformatted files. There is +an powerful perl script called C that can be used for this +purpose; if you don't have it, you can find it for example in B. + +If you find that the formatting done by perltidy is usually acceptable, +you may want to save some effort by letting perltidy do a simple backup +of the original files and then reformat them in place. You specify this +with a B<-b> flag. For example, the command + + perltidy -b *.pl + +will rename the original files by appending a F<.bak> extension, and then +create reformatted files with the same names as the originals. (If you don't +like the default backup extension choice F<.bak>, the manual tells how to +change it). Each time you run perltidy with the B<-b> option, the previous +F<.bak> files will be overwritten, so please make regular separate backups. + +If there is no input filename specified on the command line, then input +is assumed to come from standard input and output will go to standard +output. On systems with a Unix-like interface, you can use perltidy as +a filter, like this: + + perltidy newfile.pl + +What happens in this case is that the shell takes care of the redirected +input files, ' file, you can +indicate this with the flag B<-st>, like this: + + perltidy somefile.pl -st >otherfile.pl + +You can also control the name of the output file with the B<-o> flag, +like this: + + perltidy testfile.pl -o=testfile.new.pl + +=head2 Style Variations + +Perltidy has to make some kind of default selection of formatting +options, and its choice is to try to follow the suggestions in the +perlstyle man pages. Many programmers more or less follow these +suggestions with a few exceptions. In this section we will +look at just a few of the most commonly used style parameters. Later, +you may want to systematically develop a set of style +parameters with the help of +the perltidy B web page at +http://perltidy.sourceforge.net/stylekey.html + +=over 4 + +=item B<-ce>, cuddled elses + +If you prefer cuddled elses, use the B<-ce> flag. + +=item B<-bl>, braces left + +Here is what the C block in the above script looks like with B<-bl>: + + if ( $editor =~ /emacs/i ) + { + print "Why aren't you using vi?\n"; + } + elsif ( $editor =~ /vi/i ) + { + print "Why aren't you using emacs?\n"; + } + else + { + print "I think that's the problem\n"; + } + +=item B<-lp>, Lining up with parentheses + +The B<-lp> parameter can enhance the readability of lists by adding +extra indentation. Consider: + + %romanNumerals = ( + one => 'I', + two => 'II', + three => 'III', + four => 'IV', + five => 'V', + six => 'VI', + seven => 'VII', + eight => 'VIII', + nine => 'IX', + ten => 'X' + ); + +With the B<-lp> flag, this is formatted as: + + %romanNumerals = ( + one => 'I', + two => 'II', + three => 'III', + four => 'IV', + five => 'V', + six => 'VI', + seven => 'VII', + eight => 'VIII', + nine => 'IX', + ten => 'X' + ); + +which is preferred by some. (I've actually used B<-lp> and B<-cti=1> to +format this block. The B<-cti=1> flag causes the closing paren to align +vertically with the opening paren, which works well with the B<-lp> +indentation style). An advantage of B<-lp> indentation are that it +displays lists nicely. A disadvantage is that deeply nested lists can +require a long line length. + +=item B<-bt>,B<-pt>,B<-sbt>: Container tightness + +These are parameters for controlling the amount of space within +containing parentheses, braces, and square brackets. The example below +shows the effect of the three possible values, 0, 1, and 2, for the case +of parentheses: + + if ( ( my $len_tab = length( $tabstr ) ) > 0 ) { # -pt=0 + if ( ( my $len_tab = length($tabstr) ) > 0 ) { # -pt=1 (default) + if ((my $len_tab = length($tabstr)) > 0) { # -pt=2 + +A value of 0 causes all parens to be padded on the inside with a space, +and a value of 2 causes this never to happen. With a value of 1, spaces +will be introduced if the item within is more than a single token. + +=back + +=head2 Configuration Files + +While style preferences vary, most people would agree that it is +important to maintain a uniform style within a script, and this is a +major benefit provided by perltidy. Once you have decided on which, if +any, special options you prefer, you may want to avoid having to enter +them each time you run it. You can do this by creating a special file +named F<.perltidyrc> in either your home directory, your current +directory, or certain system-dependent locations. (Note the leading "." +in the file name). + +A handy command to know when you start using a configuration file is + + perltidy -dpro + +which will dump to standard output the search that perltidy makes when +looking for a configuration file, and the contents of the one that it +selects, if any. This is one of a number of useful "dump and die" +commands, in which perltidy will dump some information to standard +output and then immediately exit. Others include B<-h>, which dumps +help information, and B<-v>, which dumps the version number. + +Another useful command when working with configuration files is + + perltidy -pro=file + +which causes the contents of F to be used as the configuration +file instead of a F<.perltidyrc> file. With this command, you can +easily switch among several different candidate configuration files +during testing. + +This F<.perltidyrc> file is free format. It is simply a list of +parameters, just as they would be entered on a command line. Any number +of lines may be used, with any number of parameters per line, although +it may be easiest to read with one parameter per line. Blank lines are +ignored, and text after a '#' is ignored to the end of a line. + +Here is an example of a F<.perltidyrc> file: + + # This is a simple of a .perltidyrc configuration file + # This implements a highly spaced style + -bl # braces on new lines + -pt=0 # parens not tight at all + -bt=0 # braces not tight + -sbt=0 # square brackets not tight + +If you experiment with this file, remember that it is in your directory, +since if you are running on a Unix system, files beginning with a "." +are normally hidden. + +If you have a F<.perltidyrc> file, and want perltidy to ignore it, +use the B<-npro> flag on the command line. + +=head2 Error Reporting + +Let's run through a 'fire drill' to see how perltidy reports errors. Try +introducing an extra opening brace somewhere in a test file. For example, +introducing an extra brace in the file listed above produces the following +message on the terminal (or standard error output): + + ## Please see file testfile.pl.ERR! + +Here is what F contains: + + 10: final indentation level: 1 + + Final nesting depth of '{'s is 1 + The most recent un-matched '{' is on line 6 + 6: } elsif ($temperature < 68) {{ + ^ + +This shows how perltidy will, by default, write error messages to a file +with the extension F<.ERR>, and it will write a note that it did so to +the standard error device. If you would prefer to have the error +messages sent to standard output, instead of to a F<.ERR> file, use the +B<-se> flag. + +Almost every programmer would want to see error messages of this type, +but there are a number of messages which, if reported, would be +annoying. To manage this problem, perltidy puts its messages into two +categories: errors and warnings. The default is to just report the +errors, but you can control this with input flags, as follows: + + flag what this does + ---- -------------- + default: report errors but not warnings + -w report all errors and warnings + -q quiet! do not report either errors or warnings + +The default is generally a good choice, but it's not a bad idea to check +programs with B<-w> occasionally, especially if your are looking for a +bug. For example, it will ask if you really want '=' instead of '=~' in +this line: + + $line = s/^\s*//; + +This kind of error can otherwise be hard to find. + +=head2 The Log File + +One last topic that needs to be touched upon concerns the F<.LOG> file. +This is where perltidy records messages that are not normally of any +interest, but which just might occasionally be useful. This file is not +saved, though, unless perltidy detects that it has made a mistake or you +ask for it to be saved. + +There are a couple of ways to ask perltidy to save a log file. To +create a relatively sparse log file, use + + perltidy -log testfile.pl + +and for a verbose log file, use + + perltidy -g testfile.pl + +The difference is that the first form only saves detailed information at +least every 50th line, while the second form saves detailed information +about every line. + +So returning to our example, lets force perltidy to save a +verbose log file by issuing the following command + + perltidy -g testfile.pl + +You will find that a file named F has been +created in your directory. + +If you open this file, you will see that it is a text file with a +combination of warning messages and informative messages. All you need +to know for now is that it exists; someday it may be useful. + +=head2 Using Perltidy as a Filter on Selected Text from an Editor + +Most programmer's editors allow a selected group of lines to be passed +through an external filter. Perltidy has been designed to work well as +a filter, and it is well worthwhile learning the appropriate commands to +do this with your editor. This means that you can enter a few +keystrokes and watch a block of text get reformatted. If you are not +doing this, you are missing out of a lot of fun! You may want to supply +the B<-q> flag to prevent error messages regarding incorrect syntax, +since errors may be obvious in the indentation of the reformatted text. +This is entirely optional, but if you do not use the B<-q> flag, you +will need to use the undo keys in case an error message appears on the +screen. + +For example, within the B editor it is only necessary to select the +text by any of the text selection methods, and then issue the command +!perltidy in command mode. Thus, an entire file can be formatted using + + :%!perltidy -q + +or, without the B<-q> flag, just + + :%!perltidy + +It isn't necessary to format an entire file, however. Perltidy will +probably work well as long as you select blocks of text whose braces, +parentheses, and square brackets are properly balanced. You can +even format an C block without the leading C block, as +long as the text you select has all braces balanced. + +For the B editor, first mark a region and then pipe it through +perltidy. For example, to format an entire file, select it with C +and then pipe it with C and then C. The numeric +argument, C causes the output from perltidy to replace the marked +text. See "GNU Emacs Manual" for more information, +http://www.gnu.org/manual/emacs-20.3/html_node/emacs_toc.html + +If you have difficulty with an editor, try the B<-st> flag, which will +force perltidy to send output to standard output. This might be needed, +for example, if the editor passes text to perltidy as temporary filename +instead of through the standard input. If this works, you might put the +B<-st> flag in your F<.perltidyrc> file. + +If you have some tips for making perltidy work with your editor, and +are willing to share them, please email me (see below) and I'll try to +incorporate them in this document or put up a link to them. + +After you get your editor and perltidy successfully talking to each +other, try formatting a snippet of code with a brace error to see what +happens. (Do not use the quiet flag, B<-q>, for this test). Perltidy +will send one line starting with C<##> to standard error output. Your +editor may either display it at the top of the reformatted text or at +the bottom (or even midstream!). You probably cannot control this, and +perltidy can't, but you need to know where to look when an actual error +is detected. + +=head2 Writing an HTML File + +Perltidy can switch between two different output modes. We have been +discussing what might be called its "beautifier" mode, but it can also +output in HTML. To do this, use the B<-html> flag, like this: + + perltidy -html testfile.pl + +which will produce a file F. There are many +parameters available for adjusting the appearance of an HTML file, but a +very easy way is to just write the HTML file with this simple command +and then edit the stylesheet which is embedded at its top. + +One important thing to know about the B<-html> flag is that perltidy can +either send its output to its beautifier or to its HTML writer, but +(unfortunately) not both in a single run. So the situation can be +represented like this: + + ------------ + | | --->beautifier--> testfile.pl.tdy + testfile.pl --> | perltidy | --> + | | --->HTML -------> testfile.pl.html + ------------ + +And in the future, there may be more output filters. So if you would +like to both beautify a script and write it to HTML, you need to do it +in two steps. + +=head2 Summary + +That's enough to get started using perltidy. +When you are ready to create a F<.perltidyrc> file, you may find it +helpful to use the F page as a guide at +http://perltidy.sourceforge.net/stylekey.html + +Many additional special +features and capabilities can be found in the manual pages for perltidy +at +http://perltidy.sourceforge.net/perltidy.html + +We hope that perltidy makes perl programming a little more fun. +Please check the perltidy +web site http://perltidy.sourceforge.net occasionally +for updates. + +The author may be contacted at perltidy at users.sourceforge.net. + +=cut diff --git a/examples/README b/examples/README new file mode 100644 index 00000000..ed246509 --- /dev/null +++ b/examples/README @@ -0,0 +1,24 @@ +These are some files to test and illustrate Perl::Tidy + +testfa.t - test with input from file and output to array +testff.t - test with input from file and output to file + +ex_mp.pl - example from Perl::Tidy(3) man page +lextest - needed by testfa. and testff.t + +bbtidy.pl - a main program which works as a filter under BBEdit+MacPerl + +pt.bat - sample batch file for msdos installations + (change name to perltidy.bat) + +perltidyrc_dump.pl - a program to dump a .perltidyrc file + see comments inside for usage + +Some examples of a user defined callback object to parse perl: +-------------------------------------------------------------- +find_naughty.pl - example using Perl::Tidy to find $`, $&, $' variables +perlcomment.pl - example using Perl::Tidy to reformat comments +perllinetype.pl - example using Perl::Tidy to display types of lines +perlmask.pl - example using Perl::Tidy to create a masked file +perlxmltok.pl - example using Perl::Tidy to write a script in xml format + diff --git a/examples/bbtidy.pl b/examples/bbtidy.pl new file mode 100644 index 00000000..79cd4429 --- /dev/null +++ b/examples/bbtidy.pl @@ -0,0 +1,25 @@ +#!/usr/bin/perl -wn + +# This program was posted on the MacPerl mailing list by +# Charles Albrecht as one way to get perltidy to work as a filter +# under BBEdit. + +use Perl::Tidy; + +BEGIN { my $input_string = ""; my $output_string = ""; } + +$input_string .= $_; + +END { + my $err=Perl::Tidy::perltidy( + source => \$input_string, + destination => \$output_string + ); + if ($err){ + die "Error calling perltidy\n"; + } + print "$output_string\n"; +} + +__END__ + diff --git a/examples/break_long_quotes.pl b/examples/break_long_quotes.pl new file mode 100755 index 00000000..ba40d9a5 --- /dev/null +++ b/examples/break_long_quotes.pl @@ -0,0 +1,200 @@ +#!/usr/bin/perl -w + +# Break long quoted strings in perl code into smaller pieces +# This version only breaks at blanks. See sub break_at_blanks to +# customize. +# +# usage: +# break_long_quotes.pl -ln myfile.pl >myfile.new +# +# where n specifies the maximum quote length. + +# NOTES: +# 1. Use with caution - has not been extensively tested +# +# 2. The output is not beautified so that you can use diff to see what +# changed. If all is ok, run the output through perltidy to clean it up. +# +# 3. This version only breaks single-line quotes contained within +# either single or double quotes. + +# Steve Hancock, Sept 28, 2006 +# +use strict; +use Getopt::Std; +$| = 1; +use vars qw($opt_l $opt_h); + +my $usage = <outfile + where n=line length (default 72) +EOM + +getopts('hl:') or die "$usage"; +if ($opt_h) { die $usage } +if ( !defined $opt_l ) { + $opt_l = 70; +} +else { + $opt_l =~ /^\d+$/ or die "$usage"; +} + +unless ( @ARGV == 1 ) { die $usage } +my $file = $ARGV[0]; +scan_file( $file, $opt_l ); + +sub scan_file { + my ( $file, $line_length ) = @_; + use Perl::Tidy; + use IO::File; + my $fh = IO::File->new( $file, 'r' ); + unless ($fh) { die "cannot open '$file': $!\n" } + my $formatter = MyWriter->new($line_length); + + my $err=perltidy( + 'formatter' => $formatter, # callback object + 'source' => $fh, + 'argv' => "-npro -se", # don't need .perltidyrc + # errors to STDOUT + ); + if ($err){ + die "Error calling perltidy\n"; + } + $fh->close(); +} ## end sub scan_file + +##################################################################### +# +# This is a class with a write_line() method which receives +# tokenized lines from perltidy +# +##################################################################### + +package MyWriter; + +sub new { + my ( $class, $line_length ) = @_; + my $comment_block = ""; + bless { + _rcomment_block => \$comment_block, + _maximum_comment_length => 0, + _max_quote_length => $line_length, + _in_hanging_side_comment => 0, + }, $class; +} ## end sub new + +sub write_line { + + # This is called from perltidy line-by-line + # We will look for quotes and fix them up if necessary + my $self = shift; + my $line_of_tokens = shift; + my $line_type = $line_of_tokens->{_line_type}; + my $input_line_number = $line_of_tokens->{_line_number}; + my $input_line = $line_of_tokens->{_line_text}; # the original line + my $rtoken_type = $line_of_tokens->{_rtoken_type}; # type of tokens + my $rtokens = $line_of_tokens->{_rtokens}; # text of tokens + my $starting_in_quote = + $line_of_tokens->{_starting_in_quote}; # text of tokens + my $ending_in_quote = $line_of_tokens->{_ending_in_quote}; # text of tokens + my $max_quote_length = $self->{_max_quote_length}; + chomp $input_line; + + # look in lines of CODE (and not POD for example) + if ( $line_type eq 'CODE' && @$rtoken_type ) { + + my $jmax = @$rtoken_type - 1; + + # find leading whitespace + my $leading_whitespace = ( $input_line =~ /^(\s*)/ ) ? $1 : ""; + if ($starting_in_quote) {$leading_whitespace=""}; + my $new_line = $leading_whitespace; + + # loop over tokens looking for quotes (token type Q) + for ( my $j = 0 ; $j <= $jmax ; $j++ ) { + + # pull out the actual token text + my $token = $$rtokens[$j]; + + # look for long quoted strings on a single line + # (multiple line quotes not currently handled) + if ( $$rtoken_type[$j] eq 'Q' + && !( $j == 0 && $starting_in_quote ) + && !( $j == $jmax && $ending_in_quote ) + && ( length($token) > $max_quote_length ) ) + { + my $quote_char = substr( $token, 0, 1 ); + if ( $quote_char eq '"' || $quote_char eq '\'' ) { + + # safety check - shouldn't happen + my $check_char = substr( $token, -1, 1 ); + if ( $check_char ne $quote_char ) { + die <> but ending quote character is <<$check_char>> +quoted string is: +$token +EOM + } ## end if ( $check_char ne $quote_char) + $token = + break_at_blanks( $token, $quote_char, $max_quote_length ); + } ## end if ( $quote_char eq '"'... + } ## end if ( $$rtoken_type[$j]... + $new_line .= $token; + } ## end for ( my $j = 0 ; $j <=... + + # substitute the modified line for the original line + $input_line = $new_line; + } ## end if ( $line_type eq 'CODE') + + # print the line + $self->print($input_line."\n"); + return; +} ## end sub write_line + +sub break_at_blanks { + + # break a string at one or more spaces so that the longest substring is + # less than the desired length (if possible). + my ( $str, $quote_char, $max_length ) = @_; + my $blank = ' '; + my $prev_char = ""; + my @break_after_pos; + my $quote_pos = -1; + while ( ( $quote_pos = index( $str, $blank, 1 + $quote_pos ) ) >= 0 ) { + + # as a precaution, do not break if preceded by a backslash + if ( $quote_pos > 0 ) { + next if ( substr( $str, $quote_pos - 1, 1 ) eq '\\' ); + } + push @break_after_pos, $quote_pos; + } ## end while ( ( $quote_pos = index... + push @break_after_pos, length($str); + + my $starting_pos = 0; + my $new_str = ""; + for ( my $i = 1 ; $i < @break_after_pos ; $i++ ) { + my $pos = $break_after_pos[$i]; + my $length = $pos - $starting_pos; + if ( $length > $max_length - 1 ) { + $pos = $break_after_pos[ $i - 1 ]; + $new_str .= substr( $str, $starting_pos, $pos - $starting_pos + 1 ) + . "$quote_char . $quote_char"; + $starting_pos = $pos + 1; + } ## end if ( $length > $max_length... + } ## end for ( my $i = 1 ; $i < ... + my $pos = length($str); + $new_str .= substr( $str, $starting_pos, $pos ); + return $new_str; +} ## end sub break_at_blanks + +sub print { + my ( $self, $input_line ) = @_; + print $input_line; +} + +# called once after the last line of a file +sub finish_formatting { + my $self = shift; + $self->flush_comments(); +} diff --git a/examples/ex_mp.pl b/examples/ex_mp.pl new file mode 100755 index 00000000..2bee266a --- /dev/null +++ b/examples/ex_mp.pl @@ -0,0 +1,43 @@ +#!/usr/bin/perl -w + +# example call to perltidy from man page documentation of Perl::Tidy + +use strict; +use Perl::Tidy; + +my $source_string = <<'EOT'; +my$error=Perl::Tidy::perltidy(argv=>$argv,source=>\$source_string, + destination=>\$dest_string,stderr=>\$stderr_string, +errorfile=>\$errorfile_string,); +EOT + +my $dest_string; +my $stderr_string; +my $errorfile_string; +my $argv = "-npro"; # Ignore any .perltidyrc at this site +$argv .= " -pbp"; # Format according to perl best practices +$argv .= " -nst"; # Must turn off -st in case -pbp is specified +$argv .= " -se"; # -se appends the errorfile to stderr +## $argv .= " --spell-check"; # uncomment to trigger an error + +print "<>\n$source_string\n"; + +my $error = Perl::Tidy::perltidy( + argv => $argv, + source => \$source_string, + destination => \$dest_string, + stderr => \$stderr_string, + errorfile => \$errorfile_string, # not used when -se flag is set + ##phasers => 'stun', # uncomment to trigger an error +); + +if ($error) { + + # serious error in input parameters, no tidied output + print "<>\n$stderr_string\n"; + die "Exiting because of serious errors\n"; +} + +if ($dest_string) { print "<>\n$dest_string\n" } +if ($stderr_string) { print "<>\n$stderr_string\n" } +if ($errorfile_string) { print "<<.ERR file>>\n$errorfile_string\n" } diff --git a/examples/filter_example.in b/examples/filter_example.in new file mode 100644 index 00000000..72f2666d --- /dev/null +++ b/examples/filter_example.in @@ -0,0 +1,16 @@ +# input file for testing filter_example.pl +use Method::Signatures::Simple; + + method foo1 { $self->bar } + + # with signature + method foo2($bar, %opts) { $self->bar(reverse $bar) if $opts{rev}; + } + + # attributes + method foo3 : lvalue { $self->{foo} +} + + # change invocant name + method +foo4 ($class: $bar) { $class->bar($bar) } diff --git a/examples/filter_example.pl b/examples/filter_example.pl new file mode 100755 index 00000000..55f54524 --- /dev/null +++ b/examples/filter_example.pl @@ -0,0 +1,57 @@ +#!/usr/bin/perl -w +use Perl::Tidy; + +# Illustrate use of prefilter and postfilter parameters to perltidy. +# This example program uses a prefilter it to convert the 'method' +# keyword to 'sub', and a postfilter to convert back, so that perltidy will +# work for Method::Signature::Simple code. +# NOTE: This program illustrates the use of filters but has not been +# extensively tested. + +# usage: +# perl filter_example.pl filter_example.in +# +# How it works: +# 1. First the prefilter changes lines beginning with 'method foo' to 'sub +# METHOD_foo' +# 2. Then perltidy formats the code +# 3. Then the postfilter changes 'sub METHOD_' to 'method ' everywhere. +# (This assumes that there are no methods named METHOD_*, and that the keyword +# method always begins a line in the input file). +# +# Debugging hints: +# 1. Try commenting out the postfilter and running with +# the --notidy option to see what the prefilter alone is doing. +# 2. Then run with both pre- and post ters with --notidy to be sure +# that the postfilter properly undoes the prefilter. + +my $arg_string = undef; +my $err=Perl::Tidy::perltidy( + argv => $arg_string, + prefilter => + sub { $_ = $_[0]; s/^\s*method\s+(\w.*)/sub METHOD_$1/gm; return $_ }, + postfilter => + sub { $_ = $_[0]; s/sub\s+METHOD_/method /gm; return $_ } +); +if ($err) { + die "Error calling perltidy\n"; +} +__END__ + +# Try running on the following code (file filter_example.in): + +use Method::Signatures::Simple; + + method foo { $self->bar } + + # with signature + method foo($bar, %opts) { $self->bar(reverse $bar) if $opts{rev}; + } + + # attributes + method foo : lvalue { $self->{foo} +} + + # change invocant name + method +foo ($class: $bar) { $class->bar($bar) } diff --git a/examples/find_naughty.pl b/examples/find_naughty.pl new file mode 100755 index 00000000..fb5719ce --- /dev/null +++ b/examples/find_naughty.pl @@ -0,0 +1,117 @@ +#!/usr/bin/perl -w +use strict; + +# Walk through a perl script and look for 'naughty match variables' +# $`, $&, and $', which may cause poor performance. +# +# usage: +# find_naughty file1 [file2 [...]] +# find_naughty $source, + ); +} + +##################################################################### +# +# The PerlTokenSearch package is an interface to perltidy which accepts a +# source filehandle and looks for selected variables. +# +# It works by making a callback object with a write_line() method to +# receive tokenized lines from perltidy. +# +# Usage: +# +# PerlTokenSearch::find_naughty( +# _source => $fh, # required source +# ); +# +# _source is any source that perltidy will accept, including a +# filehandle or reference to SCALAR or ARRAY +# +##################################################################### + +package PerlTokenSearch; +use Carp; +use Perl::Tidy; + +sub find_naughty { + + my %args = ( @_ ); + print "Testing File: $args{_source}\n"; + + # run perltidy, which will call $formatter's write_line() for each line + my $err=perltidy( + 'source' => $args{_source}, + 'formatter' => bless( \%args, __PACKAGE__ ), # callback object + 'argv' => "-npro -se", # -npro : ignore .perltidyrc, + # -se : errors to STDOUT + ); + if ($err) { + die "Error calling perltidy\n"; + } +} + +sub write_line { + + # This is called back from perltidy line-by-line + # We're looking for $`, $&, and $' + my ( $self, $line_of_tokens ) = @_; + my $source = $self->{_source}; + + # pull out some stuff we might need + my $line_type = $line_of_tokens->{_line_type}; + my $input_line_number = $line_of_tokens->{_line_number}; + my $input_line = $line_of_tokens->{_line_text}; + my $rtoken_type = $line_of_tokens->{_rtoken_type}; + my $rtokens = $line_of_tokens->{_rtokens}; + chomp $input_line; + + # skip comments, pod, etc + return if ( $line_type ne 'CODE' ); + + # loop over tokens looking for $`, $&, and $' + for ( my $j = 0 ; $j < @$rtoken_type ; $j++ ) { + + # we only want to examine token types 'i' (identifier) + next unless $$rtoken_type[$j] eq 'i'; + + # pull out the actual token text + my $token = $$rtokens[$j]; + + # and check it + if ( $token =~ /^\$[\`\&\']$/ ) { + print STDERR + "$source:$input_line_number: $token\n"; + } + } +} + +# optional routine, called once after the last line of a file +sub finish_formatting { + my $self = shift; + return; +} diff --git a/examples/lextest b/examples/lextest new file mode 100755 index 00000000..1ef62204 --- /dev/null +++ b/examples/lextest @@ -0,0 +1,10 @@ +# This is a simple testfile to demonstrate perltidy, from perlop(1). +# One way (of several) to run perltidy is as follows: +# +# perl ./perltidy lextest +# +# The output will be "lextest.tdy" +$_= <<'EOL'; + $url = new URI::URL "http://www/"; die if $url eq "xXx"; +EOL +LOOP:{print(" digits"),redo LOOP if/\G\d+\b[,.;]?\s*/gc;print(" lowercase"),redo LOOP if/\G[a-z]+\b[,.;]?\s*/gc;print(" UPPERCASE"),redo LOOP if/\G[A-Z]+\b[,.;]?\s*/gc;print(" Capitalized"),redo LOOP if/\G[A-Z][a-z]+\b[,.;]?\s*/gc;print(" MiXeD"),redo LOOP if/\G[A-Za-z]+\b[,.;]?\s*/gc;print(" alphanumeric"),redo LOOP if/\G[A-Za-z0-9]+\b[,.;]?\s*/gc;print(" line-noise"),redo LOOP if/\G[^A-Za-z0-9]+/gc;print". That's all!\n";} diff --git a/examples/perlcomment.pl b/examples/perlcomment.pl new file mode 100755 index 00000000..c6143c76 --- /dev/null +++ b/examples/perlcomment.pl @@ -0,0 +1,247 @@ +#!/usr/bin/perl -w +# +# Walk through a perl script and reformat perl comments +# using Text::Autoformat. +# +# usage: +# perlcomment -l72 myfile.pl >myfile.new +# +# where -l specifies the maximum comment line length. +# +# You will be given an opportunity to accept or reject each proposed +# change. +# +# This file demonstrates using Perl::Tidy to walk through a perl file +# and find all of its comments. It offers to reformat each group of +# consecutive full-line comments with Text::Autoformat. +# +# This may or may not be useful, depending on your coding style. +# Change it to suit your own purposes; see sub get_line(). +# +# Uses: Text::Autoformat +# Perl::Tidy +# +# Steve Hancock, March 2003 +# Based on a suggestion by Tim Maher +# +# TODO: (just ideas that probably won't get done) +# -Handle lines of stars, dashes, etc better +# -Need flag to limit changes to lines greater than some minimum length +# -reformat side and hanging side comments +use strict; +use Getopt::Std; +use Text::Autoformat; +$| = 1; +use vars qw($opt_l $opt_h); + +my $usage = <outfile + where n=line length (default 72) +EOM + +getopts('hl:') or die "$usage"; +if ($opt_h) {die $usage} +if ( !defined $opt_l ) { + $opt_l = 72; +} +else { + $opt_l =~ /^\d+$/ or die "$usage"; +} + +unless ( @ARGV == 1 ) { die $usage } +my $file = $ARGV[0]; +autoformat_file( $file, $opt_l ); + +sub autoformat_file { + my ( $file, $line_length ) = @_; + use Perl::Tidy; + use IO::File; + my $fh = IO::File->new( $file, 'r' ); + unless ($fh) { die "cannot open '$file': $!\n" } + my $formatter = CommentFormatter->new($line_length); + + my $err=perltidy( + 'formatter' => $formatter, # callback object + 'source' => $fh, + 'argv' => "-npro -se", # dont need .perltidyrc + # errors to STDOUT + ); + if ($err) { + die "Error calling perltidy\n"; + } + $fh->close(); +} + +##################################################################### +# +# The CommentFormatter object has a write_line() method which receives +# tokenized lines from perltidy +# +##################################################################### + +package CommentFormatter; + +sub new { + my ( $class, $line_length ) = @_; + my $comment_block = ""; + bless { + _rcomment_block => \$comment_block, + _maximum_comment_length => 0, + _line_length => $line_length, + _in_hanging_side_comment => 0, + }, + $class; +} + +sub write_line { + + # This is called from perltidy line-by-line + # Comments will be treated specially (reformatted) + # Other lines go to stdout immediately + my $self = shift; + my $line_of_tokens = shift; + my $line_type = $line_of_tokens->{_line_type}; + ## my $input_line_number = $line_of_tokens->{_line_number}; + my $input_line = $line_of_tokens->{_line_text}; # the original line + my $rtoken_type = $line_of_tokens->{_rtoken_type}; # type of tokens + my $rtokens = $line_of_tokens->{_rtokens}; # text of tokens + + # Just print non-code, non-comment lines + if ( + $line_type ne 'CODE' # if it's not code, + || !@$rtokens # or is a blank line + || $$rtoken_type[-1] ne '#' # or the last token isn't a comment + ) + { + $self->print($input_line); + $self->{_in_hanging_side_comment} = 0; + return; + } + + # Now we either have: + # - a line with a side comment (@$rtokens >1), or + # - a full line comment (@$rtokens==1) + + # Output a line with a side comment, but remember it + if (@$rtokens > 1) { + $self->print($input_line); + $self->{_in_hanging_side_comment} = 1; + return; + } + + # A hanging side comment is a full-line comment immediately + # following a side comment or another hanging side comment. + # Output a hanging side comment directly + if ($self->{_in_hanging_side_comment}) { + $self->print($input_line); + return; + } + + # Now we know we have a full-line, non-hanging, comment + # Decide what to do -- + + # output comment without any words directly, since these don't get + # handled well by autoformat yet. For example, a box of stars. + # TODO: we could truncate obvious separator lines to the desired + # line length + if ( $$rtokens[-1] !~ /\w/ ) { + $self->print($input_line); + } + + # otherwise, append this comment to the group we are collecting + else { + $self->append_comment($input_line); + } + return; +} + +sub print { + my ( $self, $input_line ) = @_; + $self->flush_comments(); + print $input_line; +} + +sub append_comment { + my ( $self, $input_line ) = @_; + my $rcomment_block = $self->{_rcomment_block}; + my $maximum_comment_length = $self->{_maximum_comment_length}; + $$rcomment_block .= $input_line; + if (length($input_line) > $maximum_comment_length) { + $self->{_maximum_comment_length}=length($input_line); + } +} + +{ + my ( $separator1, $separator2, $separator3 ); + + BEGIN { + $separator1 = '-' x 2 . ' Original ' . '-' x 60 . "\n"; + $separator2 = '-' x 2 . ' Modified ' . '-' x 60 . "\n"; + $separator3 = '-' x 72 . "\n"; + } + + sub flush_comments { + + my ($self) = @_; + my $rcomment_block = $self->{_rcomment_block}; + my $line_length = $self->{_line_length}; + my $maximum_comment_length = $self->{_maximum_comment_length}; + if ($$rcomment_block) { + my $comments = $$rcomment_block; + + # we will just reformat lines longer than the desired length for now + # TODO: this can be changed + if ( $maximum_comment_length > $line_length ) { + my $formatted_comments = + Text::Autoformat::autoformat( $comments, + { right => $line_length, all => 1 } ); + + if ( $formatted_comments ne $comments ) { + print STDERR $separator1; + print STDERR $$rcomment_block; + print STDERR $separator2; + print STDERR $formatted_comments; + print STDERR $separator3; + if ( ifyes("Accept Changes? [Y/N]") ) { + $comments = $formatted_comments; + } + } + } + print $comments; + $$rcomment_block = ""; + $self->{_maximum_comment_length}=0; + } + } +} + +sub query { + my ($msg) = @_; + print STDERR $msg; + my $ans = ; + chomp $ans; + return $ans; +} + +sub queryu { + return uc query(@_); +} + +sub ifyes { + my $count = 0; + ASK: + my $ans = queryu(@_); + if ( $ans =~ /^Y/ ) { return 1 } + elsif ( $ans =~ /^N/ ) { return 0 } + else { + $count++; + if ( $count > 6 ) { die "error count exceeded in ifyes\n" } + print STDERR "Please answer 'Y' or 'N'\n"; + goto ASK; + } +} + +# called once after the last line of a file +sub finish_formatting { + my $self = shift; + $self->flush_comments(); +} diff --git a/examples/perllinetype.pl b/examples/perllinetype.pl new file mode 100755 index 00000000..6164b959 --- /dev/null +++ b/examples/perllinetype.pl @@ -0,0 +1,81 @@ +#!/usr/bin/perl -w +use strict; + +# For each line in a perl script, write to STDOUT lines of the form +# line number : line type : line text +# +# usage: +# perllinetype myfile.pl >myfile.new +# perllinetype myfile.new +# +# This file is one of the examples distributed with perltidy and is a +# simple demonstration of using a callback object with Perl::Tidy. +# +# Steve Hancock, July 2003 +# +use Getopt::Std; +use Perl::Tidy; +use IO::File; +$| = 1; +use vars qw($opt_h); +my $usage = <outfile +EOM +getopts('h') or die "$usage"; +if ($opt_h) { die $usage } + +# Make the source for perltidy, which will be a filehandle +# or just '-' if the source is stdin +my ($file, $fh, $source); +if ( @ARGV == 0 ) { + $source = '-'; +} +elsif ( @ARGV == 1 ) { + $file = $ARGV[0]; + $fh = IO::File->new( $file, 'r' ); + unless ($fh) { die "cannot open '$file': $!\n" } + $source = $fh; +} +else { die $usage } + +# make the callback object +my $formatter = MyFormatter->new(); + +my $dest; + +# start perltidy, which will start calling our write_line() +my $err=perltidy( + 'formatter' => $formatter, # callback object + 'source' => $source, + 'destination' => \$dest, # (not really needed) + 'argv' => "-npro -se", # dont need .perltidyrc + # errors to STDOUT +); +if ($err) { + die "Error calling perltidy\n"; +} +$fh->close() if $fh; + +package MyFormatter; + +sub new { + my ($class) = @_; + bless {}, $class; +} + +sub write_line { + + # This is called from perltidy line-by-line + my $self = shift; + my $line_of_tokens = shift; + my $line_type = $line_of_tokens->{_line_type}; + my $input_line_number = $line_of_tokens->{_line_number}; + my $input_line = $line_of_tokens->{_line_text}; + print "$input_line_number:$line_type:$input_line"; +} + +# called once after the last line of a file +sub finish_formatting { + my $self = shift; + return; +} diff --git a/examples/perlmask.pl b/examples/perlmask.pl new file mode 100755 index 00000000..5d94e4fd --- /dev/null +++ b/examples/perlmask.pl @@ -0,0 +1,250 @@ +#!/usr/bin/perl -w +use strict; + +# Walk through a perl script and create a masked file which is +# similar but which masks comments, quotes, patterns, and non-code +# lines so that it is easy to parse with regular expressions. +# +# usage: +# perlmask [-cn] myfile.pl >myfile.new +# perlmask [-cn] myfile.new +# +# In the masked file, +# -comments and pod will be masked (or removed) +# -here-doc text lines will be masked (or removed) +# -quotes and patterns, qw quotes, and here doc << operators will be +# replaced by the letters 'Q', 'q', or 'h' +# +# The result is a file in which all braces, parens, and square brackets +# are balanced, and it can be parsed relatively easily by regular +# expressions. +# +# -cn is an optional 'compression' flag. By default the masked file will have +# the same number of characters as the input file, with the difference being +# that certain characters will be changed (masked). +# +# If character position correspondence is not required, the size of the masked +# file can be significantly reduced by increasing the 'compression' level as +# follows: +# +# -c0 all mask file line numbers and character positions agree with +# original file (DEFAULT) +# -c1 line numbers agree and character positions agree within lines of code +# -c2 line numbers agree but character positions do not +# -c3 no correspondence between line numbers or character positions +# +# Try each of these on a file of significant size to see how they work. +# The default, -c0, is required if you are working with character positions +# that span multiple lines. The other levels may be useful if you +# do not need this level of correspondence. +# +# This file is one of the examples distributed with perltidy and demonstrates +# using a callback object with Perl::Tidy to walk through a perl file and find +# all of its tokens. It can be useful for simple perl code parsing tasks. It +# might even be helpful in debugging. Or you may want to modify it to suit +# your own purposes. +# +use Getopt::Std; +use IO::File; +$| = 1; +use vars qw($opt_c $opt_h); +my $usage = <outfile +EOM +getopts('c:h') or die "$usage"; +if ($opt_h) { die $usage } +unless ( defined($opt_c) ) { $opt_c = 0 } +if (@ARGV > 1) { die $usage } + +my $source=$ARGV[0]; # an undefined filename will become stdin + +# strings to hold the files (arrays could be used to) +my ( $masked_file, $original_file ); + +PerlMask::perlmask( + _source => $source, + _rmasked_file => \$masked_file, + _roriginal_file => \$original_file, # optional + _compression => $opt_c # optional, default=0 +); + +# Now we have the masked and original files in strings of equal length. +# We could search for specific text in the masked file here. But here +# we'll just print the masked file: +if ($masked_file) { print $masked_file; } + +##################################################################### +# +# The PerlMask package is an interface to perltidy which accepts a +# source filehandle and returns a 'masked' version of the source as +# a string or array. It can also optionally return the original file +# as a string or array. +# +# It works by making a callback object with a write_line() method to +# receive tokenized lines from perltidy. This write_line method +# selectively replaces tokens with either their original text or with a +# benign masking character (such as '#' or 'Q'). +# +# Usage: +# +# PerlMask::perlmask( +# _source => $fh, # required source +# _rmasked_file => \$masked_file, # required ref to ARRAY or SCALAR +# _roriginal_file => \$original_file, # optional ref to ARRAY or SCALAR +# _compression => $opt_c # optional +# ); +# +# _source is any source that perltidy will accept, including a +# filehandle or reference to SCALAR or ARRAY +# +# The compression flag may have these values: +# 0 all mask file line numbers and character positions agree with +# original file (DEFAULT) +# 1 line numbers agree and character positions agree within lines of code +# 2 line numbers agree but character positions do not +# 3 no correspondence between line numbers or character positions +# +##################################################################### + +package PerlMask; +use Carp; +use Perl::Tidy; + +sub perlmask { + + my %args = ( _compression => 0, @_ ); + my $rfile = $args{_rmasked_file}; + unless ( defined($rfile) ) { + croak + "Missing required parameter '_rmasked_file' in call to perlmask\n"; + } + my $ref=ref($rfile); + unless ( $ref =~ /^(SCALAR|ARRAY)$/ ) { + croak < $args{_source}, + 'formatter' => bless( \%args, __PACKAGE__ ), # callback object + 'argv' => "-npro -se", # -npro : ignore .perltidyrc, + # -se : errors to STDOUT + ); + if ($err) { + die "Error calling perltidy\n"; + } +} + +sub print_line { + + # called from write_line to dispatch one line (either masked or original).. + # here we'll either append it to a string or array, as appropriate + my ( $rfile, $line ) = @_; + if ( defined($rfile) ) { + if ( ref($rfile) eq 'SCALAR' ) { + $$rfile .= $line . "\n"; + } + elsif ( ref($rfile) eq 'ARRAY' ) { + push @{$rfile}, $line . "\n"; + } + } +} + +sub write_line { + + # This is called from perltidy line-by-line + my ( $self, $line_of_tokens ) = @_; + my $rmasked_file = $self->{_rmasked_file}; + my $roriginal_file = $self->{_roriginal_file}; + my $opt_c = $self->{_compression}; + + my $line_type = $line_of_tokens->{_line_type}; + my $input_line_number = $line_of_tokens->{_line_number}; + my $input_line = $line_of_tokens->{_line_text}; + my $rtoken_type = $line_of_tokens->{_rtoken_type}; + my $rtokens = $line_of_tokens->{_rtokens}; + chomp $input_line; + + # mask non-CODE lines + if ( $line_type ne 'CODE' ) { + return if ( $opt_c == 3 ); + my $len = length($input_line); + if ( $opt_c == 0 && $len > 0 ) { + print_line( $roriginal_file, $input_line ) if $roriginal_file; + print_line( $rmasked_file, '#' x $len ); + } + else { + print_line( $roriginal_file, $input_line ) if $roriginal_file; + print_line( $rmasked_file, "" ); + } + return; + } + + # we'll build the masked line token by token + my $masked_line = ""; + + # add leading spaces if not in a higher compression mode + if ( $opt_c <= 1 ) { + + # Find leading whitespace. But be careful..we don't want the + # whitespace if it is part of quoted text, because it will + # already be contained in a token. + if ( $input_line =~ /^(\s+)/ && !$line_of_tokens->{_starting_in_quote} ) + { + $masked_line = $1; + } + } + + # loop over tokens to construct one masked line + for ( my $j = 0 ; $j < @$rtoken_type ; $j++ ) { + + # Mask certain token types by replacing them with their type code: + # type definition + # ---- ---------- + # Q quote or pattern + # q qw quote + # h << here doc operator + # # comment + # + # This choice will produce a mask file that has balanced + # container tokens and does not cause parsing problems. + if ( $$rtoken_type[$j] =~ /^[Qqh]$/ ) { + if ( $opt_c <= 1 ) { + $masked_line .= $$rtoken_type[$j] x length( $$rtokens[$j] ); + } + else { + $masked_line .= $$rtoken_type[$j]; + } + } + + # Mask a comment + elsif ( $$rtoken_type[$j] eq '#' ) { + if ( $opt_c == 0 ) { + $masked_line .= '#' x length( $$rtokens[$j] ); + } + } + + # All other tokens go out verbatim + else { + $masked_line .= $$rtokens[$j]; + } + } + print_line( $roriginal_file, $input_line ) if $roriginal_file; + print_line( $rmasked_file, $masked_line ); + + # self-check lengths; this error should never happen + if ( $opt_c == 0 && length($masked_line) != length($input_line) ) { + my $lmask = length($masked_line); + my $linput = length($input_line); + print STDERR +"$input_line_number: length ERROR, masked length=$lmask but input length=$linput\n"; + } +} + +# called once after the last line of a file +sub finish_formatting { + my $self = shift; + return; +} diff --git a/examples/perltidy_okw.pl b/examples/perltidy_okw.pl new file mode 100755 index 00000000..d12370ff --- /dev/null +++ b/examples/perltidy_okw.pl @@ -0,0 +1,43 @@ +#!/usr/bin/perl -w + +# Example use a perltidy postfilter to outdent certain leading keywords + +# Usage: +# perltidy_okw.pl -sil=1 file.pl + +# This version outdents hardwired keywords 'step', 'command', and 'expected' +# The following is an example of the desired effect. The flag -sil=1 is +# needed to get a starting indentation level so that the outdenting +# is visible. + +=pod +step 4; +command 'Share project: project1'; +expected 'A project megjelenik a serveren'; + shareProject ('project1', 'login', '123', Login => 1, PortalServer => +$openJoinAddress); + valueCheck ('project1_share', listBIMCloudData ('projects')); + + +step 5; +command 'quitAC'; + quitAC (); +=cut + +# Run it exactly like perltidy, and the postfilter removes the +# leading whitespace of lines which begin with your keywords. The +# postfilter works on the file as a single string, so the 'm' quote +# modifier is needed to make the ^ and $ string positioners work + +# See http://perltidy.sourceforge.net/Tidy.html for further details +# on how to call Perl::Tidy +use Perl::Tidy; +my $arg_string = undef; +my $err=Perl::Tidy::perltidy( + argv => $arg_string, + postfilter => + sub { $_ = $_[0]; s/^\s*(step|command|expected)(.*)$/$1$2/gm; return $_ } +); +if ($err) { + die "Error calling perltidy\n"; +} diff --git a/examples/perltidyrc_dump.pl b/examples/perltidyrc_dump.pl new file mode 100755 index 00000000..f2008e71 --- /dev/null +++ b/examples/perltidyrc_dump.pl @@ -0,0 +1,328 @@ +#!/usr/bin/perl -w +use strict; + +# This program reads .perltidyrc files and writes them back out +# into a standard format (but comments will be lost). +# +# It also demonstrates how to use the perltidy 'options-dump' and related call +# parameters to read a .perltidyrc file, convert to long names, put it in a +# hash, and write back to standard output in sorted order. Requires +# Perl::Tidy. +# +# Steve Hancock, June 2006 +# +my $usage = < 1 ) { die "$usage" } + +my $config_file = $ARGV[0]; +my ( + $error_message, $rOpts, $rGetopt_flags, + $rsections, $rabbreviations, $rOpts_default, + $rabbreviations_default, + +) = read_perltidyrc($config_file); + +# always check the error message first +if ($error_message) { + die "$error_message\n"; +} + +# make a list of perltidyrc options which are same as default +my %equals_default; +foreach my $long_name ( keys %{$rOpts} ) { + my $val = $rOpts->{$long_name}; + if ( defined( $rOpts_default->{$long_name} ) ) { + my $val2 = $rOpts_default->{$long_name}; + if ( defined($val2) && defined($val) ) { + $equals_default{$long_name} = ( $val2 eq $val ); + } + } +} + +# Optional: minimize the perltidyrc file length by deleting long_names +# in $rOpts which are also in $rOpts_default and have the same value. +# This would be useful if a perltidyrc file has been constructed from a +# full parameter dump, for example. +if ( $my_opts{d} ) { + foreach my $long_name ( keys %{$rOpts} ) { + delete $rOpts->{$long_name} if $equals_default{$long_name}; + } +} + +# find user-defined abbreviations +my %abbreviations_user; +foreach my $key ( keys %$rabbreviations ) { + unless ( $rabbreviations_default->{$key} ) { + $abbreviations_user{$key} = $rabbreviations->{$key}; + } +} + +# dump the options, if any +if ( %$rOpts || %abbreviations_user ) { + dump_options( $cmdline, \%my_opts, $rOpts, $rGetopt_flags, $rsections, + $rabbreviations, \%equals_default, \%abbreviations_user ); +} +else { + if ($config_file) { + print STDERR <long_name->parameter_value + # so that we can write parameters by section + my %section_and_name; + my $rsection_name_value = \%section_and_name; + my %saw_section; + foreach my $long_name ( keys %{$rOpts} ) { + my $section = $rsections->{$long_name}; + $section = "UNKNOWN" unless ($section); # shouldn't happen + + # build a hash giving section->long_name->parameter_value + $rsection_name_value->{$section}->{$long_name} = $rOpts->{$long_name}; + + # remember what sections are in this hash + $saw_section{$section}++; + } + + # build a table for long_name->short_name abbreviations + my %short_name; + foreach my $abbrev ( keys %{$rabbreviations} ) { + foreach my $abbrev ( sort keys %$rabbreviations ) { + my @list = @{ $$rabbreviations{$abbrev} }; + + # an abbreviation may expand into one or more other words, + # but only those that expand to a single word (which must be + # one of the long names) are the short names that we want + # here. + next unless @list == 1; + my $long_name = $list[0]; + $short_name{$long_name} = $abbrev; + } + } + + unless ( $rmy_opts->{q} ) { + my $date = localtime(); + print "# perltidy configuration file created $date\n"; + print "# using: $cmdline\n"; + } + + # loop to write section-by-section + foreach my $section ( sort keys %saw_section ) { + unless ( $rmy_opts->{q} ) { + print "\n"; + + # remove leading section number, which is there + # for sorting, i.e., + # 1. Basic formatting options -> Basic formatting options + my $trimmed_section = $section; + $trimmed_section =~ s/^\d+\. //; + print "# $trimmed_section\n"; + } + + # loop over all long names for this section + my $rname_value = $rsection_name_value->{$section}; + foreach my $long_name ( sort keys %{$rname_value} ) { + + # pull out getopt flag and actual parameter value + my $flag = $rGetopt_flags->{$long_name}; + my $value = $rname_value->{$long_name}; + + # turn this it back into a parameter + my $prefix = '--'; + my $short_prefix = '-'; + my $suffix = ""; + if ($flag) { + if ( $flag =~ /^=/ ) { + if ( $value !~ /^\d+$/ ) { $value = '"' . $value . '"' } + $suffix = "=" . $value; + } + elsif ( $flag =~ /^!/ ) { + $prefix .= "no" unless ($value); + $short_prefix .= "n" unless ($value); + } + elsif ( $flag =~ /^:/ ) { + if ( $value !~ /^\d+$/ ) { $value = '"' . $value . '"' } + $suffix = "=" . $value; + } + else { + + # shouldn't happen + print +"# ERROR in dump_options: unrecognized flag $flag for $long_name\n"; + } + } + + # print the long version of the parameter + # with the short version as a side comment + my $short_name = $short_name{$long_name}; + my $long_option = $prefix . $long_name . $suffix; + + # A few options do not have a short abbreviation. These include + # 'recombine' and 'valign', which are mainly for debugging. As a + # workaround, we will make it the same as the long option. This + # will insure that the -s -q flags work. + my $short_option = $long_option; + if ($short_name) { + $short_option = $short_prefix . $short_name . $suffix; + } + + my $note = $requals_default->{$long_name} ? " [=default]" : ""; + if ( $rmy_opts->{s} ) { + print $short_option. "\n"; + } + else { + my $side_comment = ""; + unless ( $rmy_opts->{q} ) { + my $spaces = 40 - length($long_option); + $spaces = 2 if ( $spaces < 2 ); + $side_comment = + ' ' x $spaces . '# ' . $short_option . $note; + } + print $long_option . $side_comment . "\n"; + } + } + } + + if ( %{$rabbreviations_user} ) { + unless ( $rmy_opts->{q} ) { + print "\n"; + print "# Abbreviations\n"; + } + foreach my $key ( keys %$rabbreviations_user ) { + my @vals = @{ $rabbreviations_user->{$key} }; + print $key. ' {' . join( ' ', @vals ) . '}' . "\n"; + } + } +} + +sub read_perltidyrc { + + # Example routine to have Perl::Tidy read and validate perltidyrc + # file, and return related flags and abbreviations. + # + # input parameter - + # $config_file is the name of a .perltidyrc file we want to read + # or a reference to a string or array containing the .perltidyrc file + # if not defined, Perl::Tidy will try to find the user's .perltidyrc + # output parameters - + # $error_message will be blank unless an error occurs + # $rOpts - reference to the hash of options in the .perlticyrc + # NOTE: + # Perl::Tidy will croak or die on certain severe errors + + my ($config_file) = @_; + my $error_message = ""; + my %Opts; # any options found will be put here + + # the module must be installed for this to work + eval "use Perl::Tidy"; + if ($@) { + $error_message = "Perl::Tidy not installed\n"; + return ( $error_message, \%Opts ); + } + + # be sure this version supports this + my $version = $Perl::Tidy::VERSION; + if ( $version < 20060528 ) { + $error_message = "perltidy version $version cannot read options\n"; + return ( $error_message, \%Opts ); + } + + my $stderr = ""; # try to capture error messages + my $argv = ""; # do not let perltidy see our @ARGV + + # we are going to make two calls to perltidy... + # first with an empty .perltidyrc to get the default parameters + my $empty_file = ""; # this will be our .perltidyrc file + my %Opts_default; # this will receive the default options hash + my %abbreviations_default; + my $err = Perl::Tidy::perltidy( + perltidyrc => \$empty_file, + dump_options => \%Opts_default, + dump_options_type => 'full', # 'full' gives everything + dump_abbreviations => \%abbreviations_default, + stderr => \$stderr, + argv => \$argv, + ); + if ($err) { + die "Error calling perltidy\n"; + } + + # now we call with a .perltidyrc file to get its parameters + my %Getopt_flags; + my %sections; + my %abbreviations; + Perl::Tidy::perltidy( + perltidyrc => $config_file, + dump_options => \%Opts, + dump_options_type => 'perltidyrc', # default is 'perltidyrc' + dump_getopt_flags => \%Getopt_flags, + dump_options_category => \%sections, + dump_abbreviations => \%abbreviations, + stderr => \$stderr, + argv => \$argv, + ); + + # try to capture any errors generated by perltidy call + # but for severe errors it will typically croak + $error_message .= $stderr; + + # debug: show how everything is stored by printing it out + my $DEBUG = 0; + if ($DEBUG) { + print "---Getopt Parameters---\n"; + foreach my $key ( sort keys %Getopt_flags ) { + print "$key$Getopt_flags{$key}\n"; + } + print "---Manual Sections---\n"; + foreach my $key ( sort keys %sections ) { + print "$key -> $sections{$key}\n"; + } + print "---Abbreviations---\n"; + foreach my $key ( sort keys %abbreviations ) { + my @names = @{ $abbreviations{$key} }; + print "$key -> {@names}\n"; + unless ( $abbreviations_default{$key} ) { + print "NOTE: $key is user defined\n"; + } + } + } + + return ( $error_message, \%Opts, \%Getopt_flags, \%sections, + \%abbreviations, \%Opts_default, \%abbreviations_default, ); +} diff --git a/examples/perlxmltok.pl b/examples/perlxmltok.pl new file mode 100755 index 00000000..17ef080f --- /dev/null +++ b/examples/perlxmltok.pl @@ -0,0 +1,294 @@ +#!/usr/bin/perl -w +use strict; +# +# Convert a perl script into an xml file +# +# usage: +# perlxmltok myfile.pl >myfile.xml +# perlxmltok myfile.xml +# +# The script is broken at the line and token level. +# +# This file is one of the examples distributed with perltidy and demonstrates +# using a callback object with Perl::Tidy to walk through a perl file and +# process its tokens. It may or may not have any actual usefulness. You can +# modify it to suit your own purposes; see sub get_line(). +# +use Perl::Tidy; +use IO::File; +use Getopt::Std; +use vars qw($opt_h); +my $file; +my $usage = <outfile +EOM +getopts('h') or die "$usage"; +if ($opt_h) {die $usage} +if ( @ARGV == 1 ) { + $file = $ARGV[0]; +} +else { die $usage } +my $source; +my $fh; +if ($file) { + $fh = IO::File->new( $file, 'r' ); + unless ($fh) { die "cannot open '$file': $!\n" } + $source = $fh; +} +else { + $source = '-'; +} +my $formatter = Perl::Tidy::XmlWriter->new($file); +my $dest; + +# start perltidy, which will start calling our write_line() +my $err = perltidy( + 'formatter' => $formatter, # callback object + 'source' => $source, + 'destination' => \$dest, # not really needed + 'argv' => "-npro -se", # dont need .perltidyrc + # errors to STDOUT +); +if ($err) { + die "Error calling perltidy\n"; +} +$fh->close() if $fh; + +##################################################################### +# +# The Perl::Tidy::XmlWriter class writes a copy of the input stream in xml +# +##################################################################### + +package Perl::Tidy::XmlWriter; + +# class variables +use vars qw{ + %token_short_names + %short_to_long_names + $rOpts + $missing_html_entities +}; + +# replace unsafe characters with HTML entity representation if HTML::Entities +# is available +{ eval "use HTML::Entities"; $missing_html_entities = $@; } + +sub new { + + my ( $class, $input_file ) = @_; + my $self = bless { }, $class; + + $self->print( <<"HEADER"); + +HEADER + + unless ( !$input_file || $input_file eq '-' || ref($input_file) ) { + + $self->print( <<"COMMENT"); + +COMMENT + } + + $self->print("\n"); + return $self; +} + +sub print { + my ( $self, $line ) = @_; + print $line; +} + +sub write_line { + + # This routine will be called once perl line by perltidy + my $self = shift; + my ($line_of_tokens) = @_; + my $line_type = $line_of_tokens->{_line_type}; + my $input_line = $line_of_tokens->{_line_text}; + my $line_number = $line_of_tokens->{_line_number}; + chomp $input_line; + $self->print(" \n"); + $self->print(" \n"); + + $input_line = my_encode_entities($input_line); + $self->print("$input_line\n"); + $self->print(" \n"); + + # markup line of code.. + if ( $line_type eq 'CODE' ) { + my $xml_line; + my $rtoken_type = $line_of_tokens->{_rtoken_type}; + my $rtokens = $line_of_tokens->{_rtokens}; + + if ( $input_line =~ /(^\s*)/ ) { + $xml_line = $1; + } + else { + $xml_line = ""; + } + my $rmarked_tokens = $self->markup_tokens( $rtokens, $rtoken_type ); + $xml_line .= join '', @$rmarked_tokens; + + $self->print(" \n"); + $self->print("$xml_line\n"); + $self->print(" \n"); + } + + $self->print(" \n"); +} + +BEGIN { + + # This is the official list of tokens which may be identified by the + # user. Long names are used as getopt keys. Short names are + # convenient short abbreviations for specifying input. Short names + # somewhat resemble token type characters, but are often different + # because they may only be alphanumeric, to allow command line + # input. Also, note that because of case insensitivity of xml, + # this table must be in a single case only (I've chosen to use all + # lower case). + # When adding NEW_TOKENS: update this hash table + # short names => long names + %short_to_long_names = ( + 'n' => 'numeric', + 'p' => 'paren', + 'q' => 'quote', + 's' => 'structure', + 'c' => 'comment', + 'b' => 'blank', + 'v' => 'v-string', + 'cm' => 'comma', + 'w' => 'bareword', + 'co' => 'colon', + 'pu' => 'punctuation', + 'i' => 'identifier', + 'j' => 'label', + 'h' => 'here-doc-target', + 'hh' => 'here-doc-text', + 'k' => 'keyword', + 'sc' => 'semicolon', + 'm' => 'subroutine', + 'pd' => 'pod-text', + ); + + # Now we have to map actual token types into one of the above short + # names; any token types not mapped will get 'punctuation' + # properties. + + # The values of this hash table correspond to the keys of the + # previous hash table. + # The keys of this hash table are token types and can be seen + # by running with --dump-token-types (-dtt). + + # When adding NEW_TOKENS: update this hash table + # $type => $short_name + %token_short_names = ( + '#' => 'c', + 'n' => 'n', + 'v' => 'v', + 'b' => 'b', + 'k' => 'k', + 'F' => 'k', + 'Q' => 'q', + 'q' => 'q', + 'J' => 'j', + 'j' => 'j', + 'h' => 'h', + 'H' => 'hh', + 'w' => 'w', + ',' => 'cm', + '=>' => 'cm', + ';' => 'sc', + ':' => 'co', + 'f' => 'sc', + '(' => 'p', + ')' => 'p', + 'M' => 'm', + 'P' => 'pd', + ); + + # These token types will all be called identifiers for now + # FIXME: need to separate user defined modules as separate type + my @identifier = qw" i t U C Y Z G :: "; + @token_short_names{@identifier} = ('i') x scalar(@identifier); + + # These token types will be called 'structure' + my @structure = qw" { } "; + @token_short_names{@structure} = ('s') x scalar(@structure); + +} + +sub markup_tokens { + my $self = shift; + my ( $rtokens, $rtoken_type ) = @_; + my ( @marked_tokens, $j, $string, $type, $token ); + + for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) { + $type = $$rtoken_type[$j]; + $token = $$rtokens[$j]; + + #------------------------------------------------------- + # Patch : intercept a sub name here and split it + # into keyword 'sub' and sub name + if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) { + $token = $self->markup_xml_element( $1, 'k' ); + push @marked_tokens, $token; + $token = $2; + $type = 'M'; + } + + # Patch : intercept a package name here and split it + # into keyword 'package' and name + if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) { + $token = $self->markup_xml_element( $1, 'k' ); + push @marked_tokens, $token; + $token = $2; + $type = 'i'; + } + #------------------------------------------------------- + + $token = $self->markup_xml_element( $token, $type ); + push @marked_tokens, $token; + } + return \@marked_tokens; +} + +sub my_encode_entities { + my ($token) = @_; + + # escape any characters not allowed in XML content. + # ??s/’/'/; + if ($missing_html_entities) { + $token =~ s/\&/&/g; + $token =~ s/\/>/g; + $token =~ s/\"/"/g; + } + else { + HTML::Entities::encode_entities($token); + } + return $token; +} + +sub markup_xml_element { + my $self = shift; + my ( $token, $type ) = @_; + if ($token) { $token = my_encode_entities($token) } + + # get the short abbreviation for this token type + my $short_name = $token_short_names{$type}; + if ( !defined($short_name) ) { + $short_name = "pu"; # punctuation is default + } + $token = qq(<$short_name>) . $token . qq(); + return $token; +} + +sub finish_formatting { + + # called after last line + my $self = shift; + $self->print("\n"); + return; +} diff --git a/examples/pt.bat b/examples/pt.bat new file mode 100644 index 00000000..5ac12a6a --- /dev/null +++ b/examples/pt.bat @@ -0,0 +1,3 @@ +@echo off +rem batch file to run perltidy under msdos +perl -S perltidy %1 %2 %3 %4 %5 %6 %7 %8 %9 diff --git a/examples/testfa.t b/examples/testfa.t new file mode 100644 index 00000000..757e99d0 --- /dev/null +++ b/examples/testfa.t @@ -0,0 +1,70 @@ +use strict; +use Test; +use Carp; +BEGIN {plan tests => 1} +use Perl::Tidy; + +#---------------------------------------------------------------------- +## test file->array +# +# Also tests: +# passing perltidyrc (we cannot allow local .perltidyrc flags to be used) +# the -gnu flag +#---------------------------------------------------------------------- +my $source = "lextest"; +my $perltidyrc = <<'EOM'; +-gnu +EOM + +my @tidy_output; + +Perl::Tidy::perltidy( + source => $source, + destination => \@tidy_output, + perltidyrc => \$perltidyrc, + argv => '-nsyn', +); + +my @expected_output=; +my $ok=1; +if (@expected_output == @tidy_output) { + while ( $_ = pop @tidy_output ) { + my $expect = pop @expected_output; + if ( $expect ne $_ ) { + print STDERR "got:$_"; + print STDERR "---\n"; + print STDERR "expected_output:$expect"; + $ok=0; + last; + } + } +} +else { + print STDERR "Line Counts differ\n"; + $ok=0; +} +ok ($ok,1); + +# This is the expected result of 'perltidy -gnu lextest': + +__DATA__ +# This is a simple testfile to demonstrate perltidy, from perlop(1). +# One way (of several) to run perltidy is as follows: +# +# perl ./perltidy lextest +# +# The output will be "lextest.tdy" +$_ = <<'EOL'; + $url = new URI::URL "http://www/"; die if $url eq "xXx"; +EOL +LOOP: +{ + print(" digits"), redo LOOP if /\G\d+\b[,.;]?\s*/gc; + print(" lowercase"), redo LOOP if /\G[a-z]+\b[,.;]?\s*/gc; + print(" UPPERCASE"), redo LOOP if /\G[A-Z]+\b[,.;]?\s*/gc; + print(" Capitalized"), redo LOOP if /\G[A-Z][a-z]+\b[,.;]?\s*/gc; + print(" MiXeD"), redo LOOP if /\G[A-Za-z]+\b[,.;]?\s*/gc; + print(" alphanumeric"), redo LOOP if /\G[A-Za-z0-9]+\b[,.;]?\s*/gc; + print(" line-noise"), redo LOOP if /\G[^A-Za-z0-9]+/gc; + print ". That's all!\n"; +} diff --git a/examples/testff.t b/examples/testff.t new file mode 100644 index 00000000..9ac83092 --- /dev/null +++ b/examples/testff.t @@ -0,0 +1,71 @@ +use strict; +use Test; +use Carp; +BEGIN {plan tests => 1} +use Perl::Tidy; + +#---------------------------------------------------------------------- +## test file->file through arg list +# Also tests: +# passing parameters names through 'argv=>' +# -o flag +# +# NOTE: This will read file 'lextest' and create file 'lextest.out' +# (for portability, 8.3 filenames are best) +# NOTE: must use -npro flag to avoid using local perltidyrc flags +#---------------------------------------------------------------------- + +my $input_file = "lextest"; +my $output_file = "lextest.out"; + +Perl::Tidy::perltidy( + source => undef, + destination => undef, + perltidyrc => undef, + argv => "-nsyn -npro $input_file -o $output_file", +); + +open FILE, "< $output_file"; +my @output=; +my @expected_output=; +my $ok=1; +if (@expected_output == @output) { + while ( $_ = pop @output ) { + my $expect = pop @expected_output; + if ( $expect ne $_ ) { + print STDERR "got:$_"; + print STDERR "---\n"; + print STDERR "expected_output:$expect"; + $ok=0; + last; + } + } +} +else { + print STDERR "Line Counts differ\n"; + $ok=0; +} +ok ($ok,1); + +# This is the expected result of 'perltidy lextest': + +__DATA__ +# This is a simple testfile to demonstrate perltidy, from perlop(1). +# One way (of several) to run perltidy is as follows: +# +# perl ./perltidy lextest +# +# The output will be "lextest.tdy" +$_ = <<'EOL'; + $url = new URI::URL "http://www/"; die if $url eq "xXx"; +EOL +LOOP: { + print(" digits"), redo LOOP if /\G\d+\b[,.;]?\s*/gc; + print(" lowercase"), redo LOOP if /\G[a-z]+\b[,.;]?\s*/gc; + print(" UPPERCASE"), redo LOOP if /\G[A-Z]+\b[,.;]?\s*/gc; + print(" Capitalized"), redo LOOP if /\G[A-Z][a-z]+\b[,.;]?\s*/gc; + print(" MiXeD"), redo LOOP if /\G[A-Za-z]+\b[,.;]?\s*/gc; + print(" alphanumeric"), redo LOOP if /\G[A-Za-z0-9]+\b[,.;]?\s*/gc; + print(" line-noise"), redo LOOP if /\G[^A-Za-z0-9]+/gc; + print ". That's all!\n"; +} diff --git a/lib/Perl/Tidy.pm b/lib/Perl/Tidy.pm new file mode 100644 index 00000000..d5f3360b --- /dev/null +++ b/lib/Perl/Tidy.pm @@ -0,0 +1,3903 @@ +# +###########################################################- +# +# perltidy - a perl script indenter and formatter +# +# Copyright (c) 2000-2018 by Steve Hancock +# Distributed under the GPL license agreement; see file COPYING +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# For brief instructions, try 'perltidy -h'. +# For more complete documentation, try 'man perltidy' +# or visit http://perltidy.sourceforge.net +# +# This script is an example of the default style. It was formatted with: +# +# perltidy Tidy.pm +# +# Code Contributions: See ChangeLog.html for a complete history. +# Michael Cartmell supplied code for adaptation to VMS and helped with +# v-strings. +# Hugh S. Myers supplied sub streamhandle and the supporting code to +# create a Perl::Tidy module which can operate on strings, arrays, etc. +# Yves Orton supplied coding to help detect Windows versions. +# Axel Rose supplied a patch for MacPerl. +# Sebastien Aperghis-Tramoni supplied a patch for the defined or operator. +# Dan Tyrell contributed a patch for binary I/O. +# Ueli Hugenschmidt contributed a patch for -fpsc +# Sam Kington supplied a patch to identify the initial indentation of +# entabbed code. +# jonathan swartz supplied patches for: +# * .../ pattern, which looks upwards from directory +# * --notidy, to be used in directories where we want to avoid +# accidentally tidying +# * prefilter and postfilter +# * iterations option +# +# Many others have supplied key ideas, suggestions, and bug reports; +# see the CHANGES file. +# +############################################################ + +package Perl::Tidy; + +# perlver reports minimum version needed is 5.8.0 +# 5.004 needed for IO::File +# 5.008 needed for wide characters +use 5.008; +use warnings; +use strict; +use Exporter; +use Carp; +use Digest::MD5 qw(md5_hex); +use Perl::Tidy::Debugger; +use Perl::Tidy::DevNull; +use Perl::Tidy::Diagnostics; +use Perl::Tidy::FileWriter; +use Perl::Tidy::Formatter; +use Perl::Tidy::HtmlWriter; +use Perl::Tidy::IOScalar; +use Perl::Tidy::IOScalarArray; +use Perl::Tidy::IndentationItem; +use Perl::Tidy::LineSink; +use Perl::Tidy::LineSource; +use Perl::Tidy::Logger; +use Perl::Tidy::Tokenizer; +use Perl::Tidy::VerticalAligner; +local $| = 1; + +use vars qw{ + $VERSION + @ISA + @EXPORT + $missing_file_spec + $fh_stderr + $rOpts_character_encoding +}; + +@ISA = qw( Exporter ); +@EXPORT = qw( &perltidy ); + +use Cwd; +use Encode (); +use IO::File; +use File::Basename; +use File::Copy; +use File::Temp qw(tempfile); + +BEGIN { + + # Release version is the approximate YYMMDD of the release. + # Development version is (Last Release).(Development Number) + + # To make the number continually increasing, the Development Number is a 2 + # digit number starting at 01 after a release is continually bumped along + # at significant points during developement. If it ever reaches 99 then the + # Release version must be bumped, and it is probably past time for a + # release anyway. + + $VERSION = '20180220.01'; +} + +sub streamhandle { + + # given filename and mode (r or w), create an object which: + # has a 'getline' method if mode='r', and + # has a 'print' method if mode='w'. + # The objects also need a 'close' method. + # + # How the object is made: + # + # if $filename is: Make object using: + # ---------------- ----------------- + # '-' (STDIN if mode = 'r', STDOUT if mode='w') + # string IO::File + # ARRAY ref Perl::Tidy::IOScalarArray (formerly IO::ScalarArray) + # STRING ref Perl::Tidy::IOScalar (formerly IO::Scalar) + # object object + # (check for 'print' method for 'w' mode) + # (check for 'getline' method for 'r' mode) + my ( $filename, $mode ) = @_; + + my $ref = ref($filename); + my $New; + my $fh; + + # handle a reference + if ($ref) { + if ( $ref eq 'ARRAY' ) { + $New = sub { Perl::Tidy::IOScalarArray->new(@_) }; + } + elsif ( $ref eq 'SCALAR' ) { + $New = sub { Perl::Tidy::IOScalar->new(@_) }; + } + else { + + # Accept an object with a getline method for reading. Note: + # IO::File is built-in and does not respond to the defined + # operator. If this causes trouble, the check can be + # skipped and we can just let it crash if there is no + # getline. + if ( $mode =~ /[rR]/ ) { + + # RT#97159; part 1 of 2: updated to use 'can' + ##if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) { + if ( $ref->can('getline') ) { + $New = sub { $filename }; + } + else { + $New = sub { undef }; + confess <can('print') ) { + $New = sub { $filename }; + } + else { + $New = sub { undef }; + confess <new(@_) }; + } + } + $fh = $New->( $filename, $mode ) + or Warn("Couldn't open file:$filename in mode:$mode : $!\n"); + + return $fh, ( $ref or $filename ); +} + +sub find_input_line_ending { + + # Peek at a file and return first line ending character. + # Quietly return undef in case of any trouble. + my ($input_file) = @_; + my $ending; + + # silently ignore input from object or stdin + if ( ref($input_file) || $input_file eq '-' ) { + return $ending; + } + + my $fh; + open( $fh, '<', $input_file ) || return $ending; + + binmode $fh; + my $buf; + read( $fh, $buf, 1024 ); + close $fh; + if ( $buf && $buf =~ /([\012\015]+)/ ) { + my $test = $1; + + # dos + if ( $test =~ /^(\015\012)+$/ ) { $ending = "\015\012" } + + # mac + elsif ( $test =~ /^\015+$/ ) { $ending = "\015" } + + # unix + elsif ( $test =~ /^\012+$/ ) { $ending = "\012" } + + # unknown + else { } + } + + # no ending seen + else { } + + return $ending; +} + +sub catfile { + + # concatenate a path and file basename + # returns undef in case of error + + my @parts = @_; + + #BEGIN { eval "require File::Spec"; $missing_file_spec = $@; } + BEGIN { + eval { require File::Spec }; + $missing_file_spec = $@; + } + + # use File::Spec if we can + unless ($missing_file_spec) { + return File::Spec->catfile(@parts); + } + + # Perl 5.004 systems may not have File::Spec so we'll make + # a simple try. We assume File::Basename is available. + # return undef if not successful. + my $name = pop @parts; + my $path = join '/', @parts; + my $test_file = $path . $name; + my ( $test_name, $test_path ) = fileparse($test_file); + return $test_file if ( $test_name eq $name ); + return if ( $^O eq 'VMS' ); + + # this should work at least for Windows and Unix: + $test_file = $path . '/' . $name; + ( $test_name, $test_path ) = fileparse($test_file); + return $test_file if ( $test_name eq $name ); + return; +} + +# Here is a map of the flow of data from the input source to the output +# line sink: +# +# LineSource-->Tokenizer-->Formatter-->VerticalAligner-->FileWriter--> +# input groups output +# lines tokens lines of lines lines +# lines +# +# The names correspond to the package names responsible for the unit processes. +# +# The overall process is controlled by the "main" package. +# +# LineSource is the stream of input lines +# +# Tokenizer analyzes a line and breaks it into tokens, peeking ahead +# if necessary. A token is any section of the input line which should be +# manipulated as a single entity during formatting. For example, a single +# ',' character is a token, and so is an entire side comment. It handles +# the complexities of Perl syntax, such as distinguishing between '<<' as +# a shift operator and as a here-document, or distinguishing between '/' +# as a divide symbol and as a pattern delimiter. +# +# Formatter inserts and deletes whitespace between tokens, and breaks +# sequences of tokens at appropriate points as output lines. It bases its +# decisions on the default rules as modified by any command-line options. +# +# VerticalAligner collects groups of lines together and tries to line up +# certain tokens, such as '=>', '#', and '=' by adding whitespace. +# +# FileWriter simply writes lines to the output stream. +# +# The Logger package, not shown, records significant events and warning +# messages. It writes a .LOG file, which may be saved with a +# '-log' or a '-g' flag. + +sub perltidy { + + my %input_hash = @_; + + my %defaults = ( + argv => undef, + destination => undef, + formatter => undef, + logfile => undef, + errorfile => undef, + perltidyrc => undef, + source => undef, + stderr => undef, + dump_options => undef, + dump_options_type => undef, + dump_getopt_flags => undef, + dump_options_category => undef, + dump_options_range => undef, + dump_abbreviations => undef, + prefilter => undef, + postfilter => undef, + ); + + # don't overwrite callers ARGV + local @ARGV = @ARGV; + local *STDERR = *STDERR; + + if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) { + local $" = ')('; + my @good_keys = sort keys %defaults; + @bad_keys = sort @bad_keys; + confess <print($msg); return } + + sub Exit { + my $flag = shift; + if ($flag) { goto ERROR_EXIT } + else { goto NORMAL_EXIT } + croak "unexpectd return to Exit"; + } + + sub Die { + my $msg = shift; + Warn($msg); + Exit(1); + croak "unexpected return to Die"; + } + + # extract various dump parameters + my $dump_options_type = $input_hash{'dump_options_type'}; + my $dump_options = $get_hash_ref->('dump_options'); + my $dump_getopt_flags = $get_hash_ref->('dump_getopt_flags'); + my $dump_options_category = $get_hash_ref->('dump_options_category'); + my $dump_abbreviations = $get_hash_ref->('dump_abbreviations'); + my $dump_options_range = $get_hash_ref->('dump_options_range'); + + # validate dump_options_type + if ( defined($dump_options) ) { + unless ( defined($dump_options_type) ) { + $dump_options_type = 'perltidyrc'; + } + unless ( $dump_options_type =~ /^(perltidyrc|full)$/ ) { + croak <new(); + } + + # see if ARGV is overridden + if ( defined($argv) ) { + + my $rargv = ref $argv; + if ( $rargv eq 'SCALAR' ) { $argv = ${$argv}; $rargv = undef } + + # ref to ARRAY + if ($rargv) { + if ( $rargv eq 'ARRAY' ) { + @ARGV = @{$argv}; + } + else { + croak <{$opt} = $flag; + } + } + + if ( defined($dump_options_category) ) { + $quit_now = 1; + %{$dump_options_category} = %{$roption_category}; + } + + if ( defined($dump_options_range) ) { + $quit_now = 1; + %{$dump_options_range} = %{$roption_range}; + } + + if ( defined($dump_abbreviations) ) { + $quit_now = 1; + %{$dump_abbreviations} = %{$rexpansion}; + } + + if ( defined($dump_options) ) { + $quit_now = 1; + %{$dump_options} = %{$rOpts}; + } + + Exit(0) if ($quit_now); + + # make printable string of options for this run as possible diagnostic + my $readable_options = readable_options( $rOpts, $roption_string ); + + # dump from command line + if ( $rOpts->{'dump-options'} ) { + print STDOUT $readable_options; + Exit(0); + } + + #--------------------------------------------------------------- + # check parameters and their interactions + #--------------------------------------------------------------- + my $tabsize = + check_options( $rOpts, $is_Windows, $Windows_type, $rpending_complaint ); + + if ($user_formatter) { + $rOpts->{'format'} = 'user'; + } + + # there must be one entry here for every possible format + my %default_file_extension = ( + tidy => 'tdy', + html => 'html', + user => '', + ); + + $rOpts_character_encoding = $rOpts->{'character-encoding'}; + + # be sure we have a valid output format + unless ( exists $default_file_extension{ $rOpts->{'format'} } ) { + my $formats = join ' ', + sort map { "'" . $_ . "'" } keys %default_file_extension; + my $fmt = $rOpts->{'format'}; + Die("-format='$fmt' but must be one of: $formats\n"); + } + + my $output_extension = make_extension( $rOpts->{'output-file-extension'}, + $default_file_extension{ $rOpts->{'format'} }, $dot ); + + # If the backup extension contains a / character then the backup should + # be deleted when the -b option is used. On older versions of + # perltidy this will generate an error message due to an illegal + # file name. + # + # A backup file will still be generated but will be deleted + # at the end. If -bext='/' then this extension will be + # the default 'bak'. Otherwise it will be whatever characters + # remains after all '/' characters are removed. For example: + # -bext extension slashes + # '/' bak 1 + # '/delete' delete 1 + # 'delete/' delete 1 + # '/dev/null' devnull 2 (Currently not allowed) + my $bext = $rOpts->{'backup-file-extension'}; + my $delete_backup = ( $rOpts->{'backup-file-extension'} =~ s/\///g ); + + # At present only one forward slash is allowed. In the future multiple + # slashes may be allowed to allow for other options + if ( $delete_backup > 1 ) { + Die("-bext=$bext contains more than one '/'\n"); + } + + my $backup_extension = + make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot ); + + my $html_toc_extension = + make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot ); + + my $html_src_extension = + make_extension( $rOpts->{'html-src-extension'}, 'src', $dot ); + + # check for -b option; + # silently ignore unless beautify mode + my $in_place_modify = $rOpts->{'backup-and-modify-in-place'} + && $rOpts->{'format'} eq 'tidy'; + + # Turn off -b with warnings in case of conflicts with other options. + # NOTE: Do this silently, without warnings, if there is a source or + # destination stream, or standard output is used. This is because the -b + # flag may have been in a .perltidyrc file and warnings break + # Test::NoWarnings. See email discussion with Merijn Brand 26 Feb 2014. + if ($in_place_modify) { + if ( $rOpts->{'standard-output'} + || $destination_stream + || ref $source_stream + || $rOpts->{'outfile'} + || defined( $rOpts->{'output-path'} ) ) + { + $in_place_modify = 0; + } + } + + Perl::Tidy::Formatter::check_options($rOpts); + if ( $rOpts->{'format'} eq 'html' ) { + Perl::Tidy::HtmlWriter->check_options($rOpts); + } + + # make the pattern of file extensions that we shouldn't touch + my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)"; + if ($output_extension) { + my $ext = quotemeta($output_extension); + $forbidden_file_extensions .= "|$ext"; + } + if ( $in_place_modify && $backup_extension ) { + my $ext = quotemeta($backup_extension); + $forbidden_file_extensions .= "|$ext"; + } + $forbidden_file_extensions .= ')$'; + + # Create a diagnostics object if requested; + # This is only useful for code development + my $diagnostics_object = undef; + if ( $rOpts->{'DIAGNOSTICS'} ) { + $diagnostics_object = Perl::Tidy::Diagnostics->new(); + } + + # no filenames should be given if input is from an array + if ($source_stream) { + if ( @ARGV > 0 ) { + Die( +"You may not specify any filenames when a source array is given\n" + ); + } + + # we'll stuff the source array into ARGV + unshift( @ARGV, $source_stream ); + + # No special treatment for source stream which is a filename. + # This will enable checks for binary files and other bad stuff. + $source_stream = undef unless ref($source_stream); + } + + # use stdin by default if no source array and no args + else { + unshift( @ARGV, '-' ) unless @ARGV; + } + + #--------------------------------------------------------------- + # Ready to go... + # main loop to process all files in argument list + #--------------------------------------------------------------- + my $number_of_files = @ARGV; + my $formatter = undef; + my $tokenizer = undef; + + # If requested, process in order of increasing file size + # This can significantly reduce perl's virtual memory usage during testing. + if ( $number_of_files > 1 && $rOpts->{'file-size-order'} ) { + @ARGV = + map { $_->[0] } + sort { $a->[1] <=> $b->[1] } + map { [ $_, -e $_ ? -s $_ : 0 ] } @ARGV; + } + + while ( my $input_file = shift @ARGV ) { + my $fileroot; + my $input_file_permissions; + + #--------------------------------------------------------------- + # prepare this input stream + #--------------------------------------------------------------- + if ($source_stream) { + $fileroot = "perltidy"; + + # If the source is from an array or string, then .LOG output + # is only possible if a logfile stream is specified. This prevents + # unexpected perltidy.LOG files. + if ( !defined($logfile_stream) ) { + $logfile_stream = Perl::Tidy::DevNull->new(); + } + } + elsif ( $input_file eq '-' ) { # '-' indicates input from STDIN + $fileroot = "perltidy"; # root name to use for .ERR, .LOG, etc + $in_place_modify = 0; + } + else { + $fileroot = $input_file; + unless ( -e $input_file ) { + + # file doesn't exist - check for a file glob + if ( $input_file =~ /([\?\*\[\{])/ ) { + + # Windows shell may not remove quotes, so do it + my $input_file = $input_file; + if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 } + if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 } + my $pattern = fileglob_to_re($input_file); + ##eval "/$pattern/"; + if ( !$@ && opendir( DIR, './' ) ) { + my @files = + grep { /$pattern/ && !-d $_ } readdir(DIR); + closedir(DIR); + if (@files) { + unshift @ARGV, @files; + next; + } + } + } + Warn("skipping file: '$input_file': no matches found\n"); + next; + } + + unless ( -f $input_file ) { + Warn("skipping file: $input_file: not a regular file\n"); + next; + } + + # As a safety precaution, skip zero length files. + # If for example a source file got clobbered somehow, + # the old .tdy or .bak files might still exist so we + # shouldn't overwrite them with zero length files. + unless ( -s $input_file ) { + Warn("skipping file: $input_file: Zero size\n"); + next; + } + + unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) { + Warn( + "skipping file: $input_file: Non-text (override with -f)\n" + ); + next; + } + + # we should have a valid filename now + $fileroot = $input_file; + $input_file_permissions = ( stat $input_file )[2] & oct(7777); + + if ( $^O eq 'VMS' ) { + ( $fileroot, $dot ) = check_vms_filename($fileroot); + } + + # add option to change path here + if ( defined( $rOpts->{'output-path'} ) ) { + + my ( $base, $old_path ) = fileparse($fileroot); + my $new_path = $rOpts->{'output-path'}; + unless ( -d $new_path ) { + unless ( mkdir $new_path, 0777 ) { + Die("unable to create directory $new_path: $!\n"); + } + } + my $path = $new_path; + $fileroot = catfile( $path, $base ); + unless ($fileroot) { + Die(<new( $input_file, $rOpts, + $rpending_logfile_message ); + next unless ($source_object); + + # Prefilters and postfilters: The prefilter is a code reference + # that will be applied to the source before tidying, and the + # postfilter is a code reference to the result before outputting. + if ( + $prefilter + || ( $rOpts_character_encoding + && $rOpts_character_encoding eq 'utf8' ) + ) + { + my $buf = ''; + while ( my $line = $source_object->get_line() ) { + $buf .= $line; + } + + $buf = $prefilter->($buf) if $prefilter; + + if ( $rOpts_character_encoding + && $rOpts_character_encoding eq 'utf8' + && !utf8::is_utf8($buf) ) + { + eval { + $buf = Encode::decode( 'UTF-8', $buf, + Encode::FB_CROAK | Encode::LEAVE_SRC ); + }; + if ($@) { + Warn( +"skipping file: $input_file: Unable to decode source as UTF-8\n" + ); + next; + } + } + + $source_object = Perl::Tidy::LineSource->new( \$buf, $rOpts, + $rpending_logfile_message ); + } + + # register this file name with the Diagnostics package + $diagnostics_object->set_input_file($input_file) + if $diagnostics_object; + + #--------------------------------------------------------------- + # prepare the output stream + #--------------------------------------------------------------- + my $output_file = undef; + my $actual_output_extension; + + if ( $rOpts->{'outfile'} ) { + + if ( $number_of_files <= 1 ) { + + if ( $rOpts->{'standard-output'} ) { + my $msg = "You may not use -o and -st together"; + $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp); + Die("$msg\n"); + } + elsif ($destination_stream) { + Die( +"You may not specify a destination array and -o together\n" + ); + } + elsif ( defined( $rOpts->{'output-path'} ) ) { + Die("You may not specify -o and -opath together\n"); + } + elsif ( defined( $rOpts->{'output-file-extension'} ) ) { + Die("You may not specify -o and -oext together\n"); + } + $output_file = $rOpts->{outfile}; + + # make sure user gives a file name after -o + if ( $output_file =~ /^-/ ) { + Die("You must specify a valid filename after -o\n"); + } + + # do not overwrite input file with -o + if ( defined($input_file_permissions) + && ( $output_file eq $input_file ) ) + { + Die("Use 'perltidy -b $input_file' to modify in-place\n"); + } + } + else { + Die("You may not use -o with more than one input file\n"); + } + } + elsif ( $rOpts->{'standard-output'} ) { + if ($destination_stream) { + my $msg = + "You may not specify a destination array and -st together\n"; + $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp); + Die("$msg\n"); + } + $output_file = '-'; + + if ( $number_of_files <= 1 ) { + } + else { + Die("You may not use -st with more than one input file\n"); + } + } + elsif ($destination_stream) { + $output_file = $destination_stream; + } + elsif ($source_stream) { # source but no destination goes to stdout + $output_file = '-'; + } + elsif ( $input_file eq '-' ) { + $output_file = '-'; + } + else { + if ($in_place_modify) { + $output_file = IO::File->new_tmpfile() + or Die("cannot open temp file for -b option: $!\n"); + } + else { + $actual_output_extension = $output_extension; + $output_file = $fileroot . $output_extension; + } + } + + # the 'sink_object' knows how to write the output file + my $tee_file = $fileroot . $dot . "TEE"; + + my $line_separator = $rOpts->{'output-line-ending'}; + if ( $rOpts->{'preserve-line-endings'} ) { + $line_separator = find_input_line_ending($input_file); + } + + # Eventually all I/O may be done with binmode, but for now it is + # only done when a user requests a particular line separator + # through the -ple or -ole flags + my $binmode = defined($line_separator) + || defined($rOpts_character_encoding); + $line_separator = "\n" unless defined($line_separator); + + my ( $sink_object, $postfilter_buffer ); + if ($postfilter) { + $sink_object = + Perl::Tidy::LineSink->new( \$postfilter_buffer, $tee_file, + $line_separator, $rOpts, $rpending_logfile_message, $binmode ); + } + else { + $sink_object = + Perl::Tidy::LineSink->new( $output_file, $tee_file, + $line_separator, $rOpts, $rpending_logfile_message, $binmode ); + } + + #--------------------------------------------------------------- + # initialize the error logger for this file + #--------------------------------------------------------------- + my $warning_file = $fileroot . $dot . "ERR"; + if ($errorfile_stream) { $warning_file = $errorfile_stream } + my $log_file = $fileroot . $dot . "LOG"; + if ($logfile_stream) { $log_file = $logfile_stream } + + my $logger_object = + Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file, + $fh_stderr, $saw_extrude ); + write_logfile_header( + $rOpts, $logger_object, $config_file, + $rraw_options, $Windows_type, $readable_options, + ); + if ( ${$rpending_logfile_message} ) { + $logger_object->write_logfile_entry( ${$rpending_logfile_message} ); + } + if ( ${$rpending_complaint} ) { + $logger_object->complain( ${$rpending_complaint} ); + } + + #--------------------------------------------------------------- + # initialize the debug object, if any + #--------------------------------------------------------------- + my $debugger_object = undef; + if ( $rOpts->{DEBUG} ) { + $debugger_object = + Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" ); + } + + #--------------------------------------------------------------- + # loop over iterations for one source stream + #--------------------------------------------------------------- + + # We will do a convergence test if 3 or more iterations are allowed. + # It would be pointless for fewer because we have to make at least + # two passes before we can see if we are converged, and the test + # would just slow things down. + my $max_iterations = $rOpts->{'iterations'}; + my $convergence_log_message; + my %saw_md5; + my $do_convergence_test = $max_iterations > 2; + + # Since Digest::MD5 qw(md5_hex) has been in the earliest version of Perl + # we are requiring (5.8), I have commented out this check +##? if ($do_convergence_test) { +##? eval "use Digest::MD5 qw(md5_hex)"; +##? $do_convergence_test = !$@; +##? +##? ### Trying to avoid problems with ancient versions of perl +##? ##eval { my $string = "perltidy"; utf8::encode($string) }; +##? ##$do_convergence_test = $do_convergence_test && !$@; +##? } + + # save objects to allow redirecting output during iterations + my $sink_object_final = $sink_object; + my $debugger_object_final = $debugger_object; + my $logger_object_final = $logger_object; + + foreach my $iter ( 1 .. $max_iterations ) { + + # send output stream to temp buffers until last iteration + my $sink_buffer; + if ( $iter < $max_iterations ) { + $sink_object = + Perl::Tidy::LineSink->new( \$sink_buffer, $tee_file, + $line_separator, $rOpts, $rpending_logfile_message, + $binmode ); + } + else { + $sink_object = $sink_object_final; + } + + # Save logger, debugger output only on pass 1 because: + # (1) line number references must be to the starting + # source, not an intermediate result, and + # (2) we need to know if there are errors so we can stop the + # iterations early if necessary. + if ( $iter > 1 ) { + $debugger_object = undef; + $logger_object = undef; + } + + #------------------------------------------------------------ + # create a formatter for this file : html writer or + # pretty printer + #------------------------------------------------------------ + + # we have to delete any old formatter because, for safety, + # the formatter will check to see that there is only one. + $formatter = undef; + + if ($user_formatter) { + $formatter = $user_formatter; + } + elsif ( $rOpts->{'format'} eq 'html' ) { + $formatter = + Perl::Tidy::HtmlWriter->new( $fileroot, $output_file, + $actual_output_extension, $html_toc_extension, + $html_src_extension ); + } + elsif ( $rOpts->{'format'} eq 'tidy' ) { + $formatter = Perl::Tidy::Formatter->new( + logger_object => $logger_object, + diagnostics_object => $diagnostics_object, + sink_object => $sink_object, + ); + } + else { + Die("I don't know how to do -format=$rOpts->{'format'}\n"); + } + + unless ($formatter) { + Die("Unable to continue with $rOpts->{'format'} formatting\n"); + } + + #--------------------------------------------------------------- + # create the tokenizer for this file + #--------------------------------------------------------------- + $tokenizer = undef; # must destroy old tokenizer + $tokenizer = Perl::Tidy::Tokenizer->new( + source_object => $source_object, + logger_object => $logger_object, + debugger_object => $debugger_object, + diagnostics_object => $diagnostics_object, + tabsize => $tabsize, + + starting_level => $rOpts->{'starting-indentation-level'}, + indent_columns => $rOpts->{'indent-columns'}, + look_for_hash_bang => $rOpts->{'look-for-hash-bang'}, + look_for_autoloader => $rOpts->{'look-for-autoloader'}, + look_for_selfloader => $rOpts->{'look-for-selfloader'}, + trim_qw => $rOpts->{'trim-qw'}, + extended_syntax => $rOpts->{'extended-syntax'}, + + continuation_indentation => + $rOpts->{'continuation-indentation'}, + outdent_labels => $rOpts->{'outdent-labels'}, + ); + + #--------------------------------------------------------------- + # now we can do it + #--------------------------------------------------------------- + process_this_file( $tokenizer, $formatter ); + + #--------------------------------------------------------------- + # close the input source and report errors + #--------------------------------------------------------------- + $source_object->close_input_file(); + + # line source for next iteration (if any) comes from the current + # temporary output buffer + if ( $iter < $max_iterations ) { + + $sink_object->close_output_file(); + $source_object = + Perl::Tidy::LineSource->new( \$sink_buffer, $rOpts, + $rpending_logfile_message ); + + # stop iterations if errors or converged + #my $stop_now = $logger_object->{_warning_count}; + my $stop_now = $tokenizer->report_tokenization_errors(); + if ($stop_now) { + $convergence_log_message = <write_diagnostics( + $convergence_log_message) + if $diagnostics_object; + } + else { + $convergence_log_message = <write_diagnostics( + $convergence_log_message) + if $diagnostics_object && $iterm > 2; + } + } + } ## end if ($do_convergence_test) + + if ($stop_now) { + + # we are stopping the iterations early; + # copy the output stream to its final destination + $sink_object = $sink_object_final; + while ( my $line = $source_object->get_line() ) { + $sink_object->write_line($line); + } + $source_object->close_input_file(); + last; + } + } ## end if ( $iter < $max_iterations) + } # end loop over iterations for one source file + + # restore objects which have been temporarily undefined + # for second and higher iterations + $debugger_object = $debugger_object_final; + $logger_object = $logger_object_final; + + $logger_object->write_logfile_entry($convergence_log_message) + if $convergence_log_message; + + #--------------------------------------------------------------- + # Perform any postfilter operation + #--------------------------------------------------------------- + if ($postfilter) { + $sink_object->close_output_file(); + $sink_object = + Perl::Tidy::LineSink->new( $output_file, $tee_file, + $line_separator, $rOpts, $rpending_logfile_message, $binmode ); + my $buf = $postfilter->($postfilter_buffer); + $source_object = + Perl::Tidy::LineSource->new( \$buf, $rOpts, + $rpending_logfile_message ); + while ( my $line = $source_object->get_line() ) { + $sink_object->write_line($line); + } + $source_object->close_input_file(); + } + + # Save names of the input and output files for syntax check + my $ifname = $input_file; + my $ofname = $output_file; + + #--------------------------------------------------------------- + # handle the -b option (backup and modify in-place) + #--------------------------------------------------------------- + if ($in_place_modify) { + unless ( -f $input_file ) { + + # oh, oh, no real file to backup .. + # shouldn't happen because of numerous preliminary checks + Die( +"problem with -b backing up input file '$input_file': not a file\n" + ); + } + my $backup_name = $input_file . $backup_extension; + if ( -f $backup_name ) { + unlink($backup_name) + or Die( +"unable to remove previous '$backup_name' for -b option; check permissions: $!\n" + ); + } + + # backup the input file + # we use copy for symlinks, move for regular files + if ( -l $input_file ) { + File::Copy::copy( $input_file, $backup_name ) + or Die("File::Copy failed trying to backup source: $!"); + } + else { + rename( $input_file, $backup_name ) + or Die( +"problem renaming $input_file to $backup_name for -b option: $!\n" + ); + } + $ifname = $backup_name; + + # copy the output to the original input file + # NOTE: it would be nice to just close $output_file and use + # File::Copy::copy here, but in this case $output_file is the + # handle of an open nameless temporary file so we would lose + # everything if we closed it. + seek( $output_file, 0, 0 ) + or Die("unable to rewind a temporary file for -b option: $!\n"); + my $fout = IO::File->new("> $input_file") + or Die( +"problem re-opening $input_file for write for -b option; check file and directory permissions: $!\n" + ); + if ($binmode) { + if ( $rOpts->{'character-encoding'} + && $rOpts->{'character-encoding'} eq 'utf8' ) + { + binmode $fout, ":encoding(UTF-8)"; + } + else { binmode $fout } + } + my $line; + while ( $line = $output_file->getline() ) { + $fout->print($line); + } + $fout->close(); + $output_file = $input_file; + $ofname = $input_file; + } + + #--------------------------------------------------------------- + # clean up and report errors + #--------------------------------------------------------------- + $sink_object->close_output_file() if $sink_object; + $debugger_object->close_debug_file() if $debugger_object; + + # set output file permissions + if ( $output_file && -f $output_file && !-l $output_file ) { + if ($input_file_permissions) { + + # give output script same permissions as input script, but + # make it user-writable or else we can't run perltidy again. + # Thus we retain whatever executable flags were set. + if ( $rOpts->{'format'} eq 'tidy' ) { + chmod( $input_file_permissions | oct(600), $output_file ); + } + + # else use default permissions for html and any other format + } + } + + #--------------------------------------------------------------- + # Do syntax check if requested and possible + #--------------------------------------------------------------- + my $infile_syntax_ok = 0; # -1 no 0=don't know 1 yes + if ( $logger_object + && $rOpts->{'check-syntax'} + && $ifname + && $ofname ) + { + $infile_syntax_ok = + check_syntax( $ifname, $ofname, $logger_object, $rOpts ); + } + + #--------------------------------------------------------------- + # remove the original file for in-place modify as follows: + # $delete_backup=0 never + # $delete_backup=1 only if no errors + # $delete_backup>1 always : NOT ALLOWED, too risky, see above + #--------------------------------------------------------------- + if ( $in_place_modify + && $delete_backup + && -f $ifname + && ( $delete_backup > 1 || !$logger_object->{_warning_count} ) ) + { + + # As an added safety precaution, do not delete the source file + # if its size has dropped from positive to zero, since this + # could indicate a disaster of some kind, including a hardware + # failure. Actually, this could happen if you had a file of + # all comments (or pod) and deleted everything with -dac (-dap) + # for some reason. + if ( !-s $output_file && -s $ifname && $delete_backup == 1 ) { + Warn( +"output file '$output_file' missing or zero length; original '$ifname' not deleted\n" + ); + } + else { + unlink($ifname) + or Die( +"unable to remove previous '$ifname' for -b option; check permissions: $!\n" + ); + } + } + + $logger_object->finish( $infile_syntax_ok, $formatter ) + if $logger_object; + } # end of main loop to process all files + + NORMAL_EXIT: + return 0; + + ERROR_EXIT: + return 1; +} # end of main program perltidy + +sub get_stream_as_named_file { + + # Return the name of a file containing a stream of data, creating + # a temporary file if necessary. + # Given: + # $stream - the name of a file or stream + # Returns: + # $fname = name of file if possible, or undef + # $if_tmpfile = true if temp file, undef if not temp file + # + # This routine is needed for passing actual files to Perl for + # a syntax check. + my ($stream) = @_; + my $is_tmpfile; + my $fname; + if ($stream) { + if ( ref($stream) ) { + my ( $fh_stream, $fh_name ) = + Perl::Tidy::streamhandle( $stream, 'r' ); + if ($fh_stream) { + my ( $fout, $tmpnam ) = File::Temp::tempfile(); + if ($fout) { + $fname = $tmpnam; + $is_tmpfile = 1; + binmode $fout; + while ( my $line = $fh_stream->getline() ) { + $fout->print($line); + } + $fout->close(); + } + $fh_stream->close(); + } + } + elsif ( $stream ne '-' && -f $stream ) { + $fname = $stream; + } + } + return ( $fname, $is_tmpfile ); +} + +sub fileglob_to_re { + + # modified (corrected) from version in find2perl + my $x = shift; + $x =~ s#([./^\$()])#\\$1#g; # escape special characters + $x =~ s#\*#.*#g; # '*' -> '.*' + $x =~ s#\?#.#g; # '?' -> '.' + return "^$x\\z"; # match whole word +} + +sub make_extension { + + # Make a file extension, including any leading '.' if necessary + # The '.' may actually be an '_' under VMS + my ( $extension, $default, $dot ) = @_; + + # Use the default if none specified + $extension = $default unless ($extension); + + # Only extensions with these leading characters get a '.' + # This rule gives the user some freedom + if ( $extension =~ /^[a-zA-Z0-9]/ ) { + $extension = $dot . $extension; + } + return $extension; +} + +sub write_logfile_header { + my ( + $rOpts, $logger_object, $config_file, + $rraw_options, $Windows_type, $readable_options + ) = @_; + $logger_object->write_logfile_entry( +"perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n" + ); + if ($Windows_type) { + $logger_object->write_logfile_entry("Windows type is $Windows_type\n"); + } + my $options_string = join( ' ', @{$rraw_options} ); + + if ($config_file) { + $logger_object->write_logfile_entry( + "Found Configuration File >>> $config_file \n"); + } + $logger_object->write_logfile_entry( + "Configuration and command line parameters for this run:\n"); + $logger_object->write_logfile_entry("$options_string\n"); + + if ( $rOpts->{'DEBUG'} || $rOpts->{'show-options'} ) { + $rOpts->{'logfile'} = 1; # force logfile to be saved + $logger_object->write_logfile_entry( + "Final parameter set for this run\n"); + $logger_object->write_logfile_entry( + "------------------------------------\n"); + + $logger_object->write_logfile_entry($readable_options); + + $logger_object->write_logfile_entry( + "------------------------------------\n"); + } + $logger_object->write_logfile_entry( + "To find error messages search for 'WARNING' with your editor\n"); + return; +} + +sub generate_options { + + ###################################################################### + # Generate and return references to: + # @option_string - the list of options to be passed to Getopt::Long + # @defaults - the list of default options + # %expansion - a hash showing how all abbreviations are expanded + # %category - a hash giving the general category of each option + # %option_range - a hash giving the valid ranges of certain options + + # Note: a few options are not documented in the man page and usage + # message. This is because these are experimental or debug options and + # may or may not be retained in future versions. + # + # Here are the undocumented flags as far as I know. Any of them + # may disappear at any time. They are mainly for fine-tuning + # and debugging. + # + # fll --> fuzzy-line-length # a trivial parameter which gets + # turned off for the extrude option + # which is mainly for debugging + # scl --> short-concatenation-item-length # helps break at '.' + # recombine # for debugging line breaks + # valign # for debugging vertical alignment + # I --> DIAGNOSTICS # for debugging [**DEACTIVATED**] + ###################################################################### + + # here is a summary of the Getopt codes: + # does not take an argument + # =s takes a mandatory string + # :s takes an optional string (DO NOT USE - filenames will get eaten up) + # =i takes a mandatory integer + # :i takes an optional integer (NOT RECOMMENDED - can cause trouble) + # ! does not take an argument and may be negated + # i.e., -foo and -nofoo are allowed + # a double dash signals the end of the options list + # + #--------------------------------------------------------------- + # Define the option string passed to GetOptions. + #--------------------------------------------------------------- + + my @option_string = (); + my %expansion = (); + my %option_category = (); + my %option_range = (); + my $rexpansion = \%expansion; + + # names of categories in manual + # leading integers will allow sorting + my @category_name = ( + '0. I/O control', + '1. Basic formatting options', + '2. Code indentation control', + '3. Whitespace control', + '4. Comment controls', + '5. Linebreak controls', + '6. Controlling list formatting', + '7. Retaining or ignoring existing line breaks', + '8. Blank line control', + '9. Other controls', + '10. HTML options', + '11. pod2html options', + '12. Controlling HTML properties', + '13. Debugging', + ); + + # These options are parsed directly by perltidy: + # help h + # version v + # However, they are included in the option set so that they will + # be seen in the options dump. + + # These long option names have no abbreviations or are treated specially + @option_string = qw( + html! + noprofile + no-profile + npro + recombine! + valign! + notidy + ); + + my $category = 13; # Debugging + foreach (@option_string) { + my $opt = $_; # must avoid changing the actual flag + $opt =~ s/!$//; + $option_category{$opt} = $category_name[$category]; + } + + $category = 11; # HTML + $option_category{html} = $category_name[$category]; + + # routine to install and check options + my $add_option = sub { + my ( $long_name, $short_name, $flag ) = @_; + push @option_string, $long_name . $flag; + $option_category{$long_name} = $category_name[$category]; + if ($short_name) { + if ( $expansion{$short_name} ) { + my $existing_name = $expansion{$short_name}[0]; + Die( +"redefining abbreviation $short_name for $long_name; already used for $existing_name\n" + ); + } + $expansion{$short_name} = [$long_name]; + if ( $flag eq '!' ) { + my $nshort_name = 'n' . $short_name; + my $nolong_name = 'no' . $long_name; + if ( $expansion{$nshort_name} ) { + my $existing_name = $expansion{$nshort_name}[0]; + Die( +"attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n" + ); + } + $expansion{$nshort_name} = [$nolong_name]; + } + } + }; + + # Install long option names which have a simple abbreviation. + # Options with code '!' get standard negation ('no' for long names, + # 'n' for abbreviations). Categories follow the manual. + + ########################### + $category = 0; # I/O_Control + ########################### + $add_option->( 'backup-and-modify-in-place', 'b', '!' ); + $add_option->( 'backup-file-extension', 'bext', '=s' ); + $add_option->( 'force-read-binary', 'f', '!' ); + $add_option->( 'format', 'fmt', '=s' ); + $add_option->( 'iterations', 'it', '=i' ); + $add_option->( 'logfile', 'log', '!' ); + $add_option->( 'logfile-gap', 'g', ':i' ); + $add_option->( 'outfile', 'o', '=s' ); + $add_option->( 'output-file-extension', 'oext', '=s' ); + $add_option->( 'output-path', 'opath', '=s' ); + $add_option->( 'profile', 'pro', '=s' ); + $add_option->( 'quiet', 'q', '!' ); + $add_option->( 'standard-error-output', 'se', '!' ); + $add_option->( 'standard-output', 'st', '!' ); + $add_option->( 'warning-output', 'w', '!' ); + $add_option->( 'character-encoding', 'enc', '=s' ); + + # options which are both toggle switches and values moved here + # to hide from tidyview (which does not show category 0 flags): + # -ole moved here from category 1 + # -sil moved here from category 2 + $add_option->( 'output-line-ending', 'ole', '=s' ); + $add_option->( 'starting-indentation-level', 'sil', '=i' ); + + ######################################## + $category = 1; # Basic formatting options + ######################################## + $add_option->( 'check-syntax', 'syn', '!' ); + $add_option->( 'entab-leading-whitespace', 'et', '=i' ); + $add_option->( 'indent-columns', 'i', '=i' ); + $add_option->( 'maximum-line-length', 'l', '=i' ); + $add_option->( 'variable-maximum-line-length', 'vmll', '!' ); + $add_option->( 'whitespace-cycle', 'wc', '=i' ); + $add_option->( 'perl-syntax-check-flags', 'pscf', '=s' ); + $add_option->( 'preserve-line-endings', 'ple', '!' ); + $add_option->( 'tabs', 't', '!' ); + $add_option->( 'default-tabsize', 'dt', '=i' ); + $add_option->( 'extended-syntax', 'xs', '!' ); + + ######################################## + $category = 2; # Code indentation control + ######################################## + $add_option->( 'continuation-indentation', 'ci', '=i' ); + $add_option->( 'line-up-parentheses', 'lp', '!' ); + $add_option->( 'outdent-keyword-list', 'okwl', '=s' ); + $add_option->( 'outdent-keywords', 'okw', '!' ); + $add_option->( 'outdent-labels', 'ola', '!' ); + $add_option->( 'outdent-long-quotes', 'olq', '!' ); + $add_option->( 'indent-closing-brace', 'icb', '!' ); + $add_option->( 'closing-token-indentation', 'cti', '=i' ); + $add_option->( 'closing-paren-indentation', 'cpi', '=i' ); + $add_option->( 'closing-brace-indentation', 'cbi', '=i' ); + $add_option->( 'closing-square-bracket-indentation', 'csbi', '=i' ); + $add_option->( 'brace-left-and-indent', 'bli', '!' ); + $add_option->( 'brace-left-and-indent-list', 'blil', '=s' ); + + ######################################## + $category = 3; # Whitespace control + ######################################## + $add_option->( 'add-semicolons', 'asc', '!' ); + $add_option->( 'add-whitespace', 'aws', '!' ); + $add_option->( 'block-brace-tightness', 'bbt', '=i' ); + $add_option->( 'brace-tightness', 'bt', '=i' ); + $add_option->( 'delete-old-whitespace', 'dws', '!' ); + $add_option->( 'delete-semicolons', 'dsm', '!' ); + $add_option->( 'nospace-after-keyword', 'nsak', '=s' ); + $add_option->( 'nowant-left-space', 'nwls', '=s' ); + $add_option->( 'nowant-right-space', 'nwrs', '=s' ); + $add_option->( 'paren-tightness', 'pt', '=i' ); + $add_option->( 'space-after-keyword', 'sak', '=s' ); + $add_option->( 'space-for-semicolon', 'sfs', '!' ); + $add_option->( 'space-function-paren', 'sfp', '!' ); + $add_option->( 'space-keyword-paren', 'skp', '!' ); + $add_option->( 'space-terminal-semicolon', 'sts', '!' ); + $add_option->( 'square-bracket-tightness', 'sbt', '=i' ); + $add_option->( 'square-bracket-vertical-tightness', 'sbvt', '=i' ); + $add_option->( 'square-bracket-vertical-tightness-closing', 'sbvtc', '=i' ); + $add_option->( 'tight-secret-operators', 'tso', '!' ); + $add_option->( 'trim-qw', 'tqw', '!' ); + $add_option->( 'trim-pod', 'trp', '!' ); + $add_option->( 'want-left-space', 'wls', '=s' ); + $add_option->( 'want-right-space', 'wrs', '=s' ); + + ######################################## + $category = 4; # Comment controls + ######################################## + $add_option->( 'closing-side-comment-else-flag', 'csce', '=i' ); + $add_option->( 'closing-side-comment-interval', 'csci', '=i' ); + $add_option->( 'closing-side-comment-list', 'cscl', '=s' ); + $add_option->( 'closing-side-comment-maximum-text', 'csct', '=i' ); + $add_option->( 'closing-side-comment-prefix', 'cscp', '=s' ); + $add_option->( 'closing-side-comment-warnings', 'cscw', '!' ); + $add_option->( 'closing-side-comments', 'csc', '!' ); + $add_option->( 'closing-side-comments-balanced', 'cscb', '!' ); + $add_option->( 'format-skipping', 'fs', '!' ); + $add_option->( 'format-skipping-begin', 'fsb', '=s' ); + $add_option->( 'format-skipping-end', 'fse', '=s' ); + $add_option->( 'hanging-side-comments', 'hsc', '!' ); + $add_option->( 'indent-block-comments', 'ibc', '!' ); + $add_option->( 'indent-spaced-block-comments', 'isbc', '!' ); + $add_option->( 'fixed-position-side-comment', 'fpsc', '=i' ); + $add_option->( 'minimum-space-to-comment', 'msc', '=i' ); + $add_option->( 'outdent-long-comments', 'olc', '!' ); + $add_option->( 'outdent-static-block-comments', 'osbc', '!' ); + $add_option->( 'static-block-comment-prefix', 'sbcp', '=s' ); + $add_option->( 'static-block-comments', 'sbc', '!' ); + $add_option->( 'static-side-comment-prefix', 'sscp', '=s' ); + $add_option->( 'static-side-comments', 'ssc', '!' ); + $add_option->( 'ignore-side-comment-lengths', 'iscl', '!' ); + + ######################################## + $category = 5; # Linebreak controls + ######################################## + $add_option->( 'add-newlines', 'anl', '!' ); + $add_option->( 'block-brace-vertical-tightness', 'bbvt', '=i' ); + $add_option->( 'block-brace-vertical-tightness-list', 'bbvtl', '=s' ); + $add_option->( 'brace-vertical-tightness', 'bvt', '=i' ); + $add_option->( 'brace-vertical-tightness-closing', 'bvtc', '=i' ); + $add_option->( 'cuddled-else', 'ce', '!' ); + $add_option->( 'cuddled-block-list', 'cbl', '=s' ); + $add_option->( 'cuddled-block-list-exclusive', 'cblx', '!' ); + $add_option->( 'cuddled-break-option', 'cbo', '=i' ); + $add_option->( 'delete-old-newlines', 'dnl', '!' ); + $add_option->( 'opening-brace-always-on-right', 'bar', '!' ); + $add_option->( 'opening-brace-on-new-line', 'bl', '!' ); + $add_option->( 'opening-hash-brace-right', 'ohbr', '!' ); + $add_option->( 'opening-paren-right', 'opr', '!' ); + $add_option->( 'opening-square-bracket-right', 'osbr', '!' ); + $add_option->( 'opening-anonymous-sub-brace-on-new-line', 'asbl', '!' ); + $add_option->( 'opening-sub-brace-on-new-line', 'sbl', '!' ); + $add_option->( 'paren-vertical-tightness', 'pvt', '=i' ); + $add_option->( 'paren-vertical-tightness-closing', 'pvtc', '=i' ); + $add_option->( 'weld-nested-containers', 'wn', '!' ); + $add_option->( 'space-backslash-quote', 'sbq', '=i' ); + $add_option->( 'stack-closing-block-brace', 'scbb', '!' ); + $add_option->( 'stack-closing-hash-brace', 'schb', '!' ); + $add_option->( 'stack-closing-paren', 'scp', '!' ); + $add_option->( 'stack-closing-square-bracket', 'scsb', '!' ); + $add_option->( 'stack-opening-block-brace', 'sobb', '!' ); + $add_option->( 'stack-opening-hash-brace', 'sohb', '!' ); + $add_option->( 'stack-opening-paren', 'sop', '!' ); + $add_option->( 'stack-opening-square-bracket', 'sosb', '!' ); + $add_option->( 'vertical-tightness', 'vt', '=i' ); + $add_option->( 'vertical-tightness-closing', 'vtc', '=i' ); + $add_option->( 'want-break-after', 'wba', '=s' ); + $add_option->( 'want-break-before', 'wbb', '=s' ); + $add_option->( 'break-after-all-operators', 'baao', '!' ); + $add_option->( 'break-before-all-operators', 'bbao', '!' ); + $add_option->( 'keep-interior-semicolons', 'kis', '!' ); + + ######################################## + $category = 6; # Controlling list formatting + ######################################## + $add_option->( 'break-at-old-comma-breakpoints', 'boc', '!' ); + $add_option->( 'comma-arrow-breakpoints', 'cab', '=i' ); + $add_option->( 'maximum-fields-per-table', 'mft', '=i' ); + + ######################################## + $category = 7; # Retaining or ignoring existing line breaks + ######################################## + $add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' ); + $add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' ); + $add_option->( 'break-at-old-ternary-breakpoints', 'bot', '!' ); + $add_option->( 'break-at-old-attribute-breakpoints', 'boa', '!' ); + $add_option->( 'ignore-old-breakpoints', 'iob', '!' ); + + ######################################## + $category = 8; # Blank line control + ######################################## + $add_option->( 'blanks-before-blocks', 'bbb', '!' ); + $add_option->( 'blanks-before-comments', 'bbc', '!' ); + $add_option->( 'blank-lines-before-subs', 'blbs', '=i' ); + $add_option->( 'blank-lines-before-packages', 'blbp', '=i' ); + $add_option->( 'long-block-line-count', 'lbl', '=i' ); + $add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' ); + $add_option->( 'keep-old-blank-lines', 'kbl', '=i' ); + + $add_option->( 'blank-lines-after-opening-block', 'blao', '=i' ); + $add_option->( 'blank-lines-before-closing-block', 'blbc', '=i' ); + $add_option->( 'blank-lines-after-opening-block-list', 'blaol', '=s' ); + $add_option->( 'blank-lines-before-closing-block-list', 'blbcl', '=s' ); + + ######################################## + $category = 9; # Other controls + ######################################## + $add_option->( 'delete-block-comments', 'dbc', '!' ); + $add_option->( 'delete-closing-side-comments', 'dcsc', '!' ); + $add_option->( 'delete-pod', 'dp', '!' ); + $add_option->( 'delete-side-comments', 'dsc', '!' ); + $add_option->( 'tee-block-comments', 'tbc', '!' ); + $add_option->( 'tee-pod', 'tp', '!' ); + $add_option->( 'tee-side-comments', 'tsc', '!' ); + $add_option->( 'look-for-autoloader', 'lal', '!' ); + $add_option->( 'look-for-hash-bang', 'x', '!' ); + $add_option->( 'look-for-selfloader', 'lsl', '!' ); + $add_option->( 'pass-version-line', 'pvl', '!' ); + + ######################################## + $category = 13; # Debugging + ######################################## +## $add_option->( 'DIAGNOSTICS', 'I', '!' ); + $add_option->( 'DEBUG', 'D', '!' ); + $add_option->( 'dump-cuddled-block-list', 'dcbl', '!' ); + $add_option->( 'dump-defaults', 'ddf', '!' ); + $add_option->( 'dump-long-names', 'dln', '!' ); + $add_option->( 'dump-options', 'dop', '!' ); + $add_option->( 'dump-profile', 'dpro', '!' ); + $add_option->( 'dump-short-names', 'dsn', '!' ); + $add_option->( 'dump-token-types', 'dtt', '!' ); + $add_option->( 'dump-want-left-space', 'dwls', '!' ); + $add_option->( 'dump-want-right-space', 'dwrs', '!' ); + $add_option->( 'fuzzy-line-length', 'fll', '!' ); + $add_option->( 'help', 'h', '' ); + $add_option->( 'short-concatenation-item-length', 'scl', '=i' ); + $add_option->( 'show-options', 'opt', '!' ); + $add_option->( 'timestamp', 'ts', '!' ); + $add_option->( 'version', 'v', '' ); + $add_option->( 'memoize', 'mem', '!' ); + $add_option->( 'file-size-order', 'fso', '!' ); + + #--------------------------------------------------------------------- + + # The Perl::Tidy::HtmlWriter will add its own options to the string + Perl::Tidy::HtmlWriter->make_getopt_long_names( \@option_string ); + + ######################################## + # Set categories 10, 11, 12 + ######################################## + # Based on their known order + $category = 12; # HTML properties + foreach my $opt (@option_string) { + my $long_name = $opt; + $long_name =~ s/(!|=.*|:.*)$//; + unless ( defined( $option_category{$long_name} ) ) { + if ( $long_name =~ /^html-linked/ ) { + $category = 10; # HTML options + } + elsif ( $long_name =~ /^pod2html/ ) { + $category = 11; # Pod2html + } + $option_category{$long_name} = $category_name[$category]; + } + } + + #--------------------------------------------------------------- + # Assign valid ranges to certain options + #--------------------------------------------------------------- + # In the future, these may be used to make preliminary checks + # hash keys are long names + # If key or value is undefined: + # strings may have any value + # integer ranges are >=0 + # If value is defined: + # value is [qw(any valid words)] for strings + # value is [min, max] for integers + # if min is undefined, there is no lower limit + # if max is undefined, there is no upper limit + # Parameters not listed here have defaults + %option_range = ( + 'format' => [ 'tidy', 'html', 'user' ], + 'output-line-ending' => [ 'dos', 'win', 'mac', 'unix' ], + 'character-encoding' => [ 'none', 'utf8' ], + + 'space-backslash-quote' => [ 0, 2 ], + + 'block-brace-tightness' => [ 0, 2 ], + 'brace-tightness' => [ 0, 2 ], + 'paren-tightness' => [ 0, 2 ], + 'square-bracket-tightness' => [ 0, 2 ], + + 'block-brace-vertical-tightness' => [ 0, 2 ], + 'brace-vertical-tightness' => [ 0, 2 ], + 'brace-vertical-tightness-closing' => [ 0, 2 ], + 'paren-vertical-tightness' => [ 0, 2 ], + 'paren-vertical-tightness-closing' => [ 0, 2 ], + 'square-bracket-vertical-tightness' => [ 0, 2 ], + 'square-bracket-vertical-tightness-closing' => [ 0, 2 ], + 'vertical-tightness' => [ 0, 2 ], + 'vertical-tightness-closing' => [ 0, 2 ], + + 'closing-brace-indentation' => [ 0, 3 ], + 'closing-paren-indentation' => [ 0, 3 ], + 'closing-square-bracket-indentation' => [ 0, 3 ], + 'closing-token-indentation' => [ 0, 3 ], + + 'closing-side-comment-else-flag' => [ 0, 2 ], + 'comma-arrow-breakpoints' => [ 0, 5 ], + ); + + # Note: we could actually allow negative ci if someone really wants it: + # $option_range{'continuation-indentation'} = [ undef, undef ]; + + #--------------------------------------------------------------- + # Assign default values to the above options here, except + # for 'outfile' and 'help'. + # These settings should approximate the perlstyle(1) suggestions. + #--------------------------------------------------------------- + my @defaults = qw( + add-newlines + add-semicolons + add-whitespace + blanks-before-blocks + blanks-before-comments + blank-lines-before-subs=1 + blank-lines-before-packages=1 + block-brace-tightness=0 + block-brace-vertical-tightness=0 + brace-tightness=1 + brace-vertical-tightness-closing=0 + brace-vertical-tightness=0 + break-at-old-logical-breakpoints + break-at-old-ternary-breakpoints + break-at-old-attribute-breakpoints + break-at-old-keyword-breakpoints + comma-arrow-breakpoints=5 + nocheck-syntax + closing-side-comment-interval=6 + closing-side-comment-maximum-text=20 + closing-side-comment-else-flag=0 + closing-side-comments-balanced + closing-paren-indentation=0 + closing-brace-indentation=0 + closing-square-bracket-indentation=0 + continuation-indentation=2 + cuddled-break-option=1 + delete-old-newlines + delete-semicolons + extended-syntax + fuzzy-line-length + hanging-side-comments + indent-block-comments + indent-columns=4 + iterations=1 + keep-old-blank-lines=1 + long-block-line-count=8 + look-for-autoloader + look-for-selfloader + maximum-consecutive-blank-lines=1 + maximum-fields-per-table=0 + maximum-line-length=80 + memoize + minimum-space-to-comment=4 + nobrace-left-and-indent + nocuddled-else + nodelete-old-whitespace + nohtml + nologfile + noquiet + noshow-options + nostatic-side-comments + notabs + nowarning-output + character-encoding=none + outdent-labels + outdent-long-quotes + outdent-long-comments + paren-tightness=1 + paren-vertical-tightness-closing=0 + paren-vertical-tightness=0 + pass-version-line + noweld-nested-containers + recombine + valign + short-concatenation-item-length=8 + space-for-semicolon + space-backslash-quote=1 + square-bracket-tightness=1 + square-bracket-vertical-tightness-closing=0 + square-bracket-vertical-tightness=0 + static-block-comments + timestamp + trim-qw + format=tidy + backup-file-extension=bak + format-skipping + default-tabsize=8 + + pod2html + html-table-of-contents + html-entities + ); + + push @defaults, "perl-syntax-check-flags=-c -T"; + + #--------------------------------------------------------------- + # Define abbreviations which will be expanded into the above primitives. + # These may be defined recursively. + #--------------------------------------------------------------- + %expansion = ( + %expansion, + 'freeze-newlines' => [qw(noadd-newlines nodelete-old-newlines)], + 'fnl' => [qw(freeze-newlines)], + 'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)], + 'fws' => [qw(freeze-whitespace)], + 'freeze-blank-lines' => + [qw(maximum-consecutive-blank-lines=0 keep-old-blank-lines=2)], + 'fbl' => [qw(freeze-blank-lines)], + 'indent-only' => [qw(freeze-newlines freeze-whitespace)], + 'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)], + 'nooutdent-long-lines' => + [qw(nooutdent-long-quotes nooutdent-long-comments)], + 'noll' => [qw(nooutdent-long-lines)], + 'io' => [qw(indent-only)], + 'delete-all-comments' => + [qw(delete-block-comments delete-side-comments delete-pod)], + 'nodelete-all-comments' => + [qw(nodelete-block-comments nodelete-side-comments nodelete-pod)], + 'dac' => [qw(delete-all-comments)], + 'ndac' => [qw(nodelete-all-comments)], + 'gnu' => [qw(gnu-style)], + 'pbp' => [qw(perl-best-practices)], + 'tee-all-comments' => + [qw(tee-block-comments tee-side-comments tee-pod)], + 'notee-all-comments' => + [qw(notee-block-comments notee-side-comments notee-pod)], + 'tac' => [qw(tee-all-comments)], + 'ntac' => [qw(notee-all-comments)], + 'html' => [qw(format=html)], + 'nhtml' => [qw(format=tidy)], + 'tidy' => [qw(format=tidy)], + + # -cb is now a synonym for -ce + 'cb' => [qw(cuddled-else)], + 'cuddled-blocks' => [qw(cuddled-else)], + + 'utf8' => [qw(character-encoding=utf8)], + 'UTF8' => [qw(character-encoding=utf8)], + + 'swallow-optional-blank-lines' => [qw(kbl=0)], + 'noswallow-optional-blank-lines' => [qw(kbl=1)], + 'sob' => [qw(kbl=0)], + 'nsob' => [qw(kbl=1)], + + 'break-after-comma-arrows' => [qw(cab=0)], + 'nobreak-after-comma-arrows' => [qw(cab=1)], + 'baa' => [qw(cab=0)], + 'nbaa' => [qw(cab=1)], + + 'blanks-before-subs' => [qw(blbs=1 blbp=1)], + 'bbs' => [qw(blbs=1 blbp=1)], + 'noblanks-before-subs' => [qw(blbs=0 blbp=0)], + 'nbbs' => [qw(blbs=0 blbp=0)], + + 'break-at-old-trinary-breakpoints' => [qw(bot)], + + 'cti=0' => [qw(cpi=0 cbi=0 csbi=0)], + 'cti=1' => [qw(cpi=1 cbi=1 csbi=1)], + 'cti=2' => [qw(cpi=2 cbi=2 csbi=2)], + 'icp' => [qw(cpi=2 cbi=2 csbi=2)], + 'nicp' => [qw(cpi=0 cbi=0 csbi=0)], + + 'closing-token-indentation=0' => [qw(cpi=0 cbi=0 csbi=0)], + 'closing-token-indentation=1' => [qw(cpi=1 cbi=1 csbi=1)], + 'closing-token-indentation=2' => [qw(cpi=2 cbi=2 csbi=2)], + 'indent-closing-paren' => [qw(cpi=2 cbi=2 csbi=2)], + 'noindent-closing-paren' => [qw(cpi=0 cbi=0 csbi=0)], + + 'vt=0' => [qw(pvt=0 bvt=0 sbvt=0)], + 'vt=1' => [qw(pvt=1 bvt=1 sbvt=1)], + 'vt=2' => [qw(pvt=2 bvt=2 sbvt=2)], + + 'vertical-tightness=0' => [qw(pvt=0 bvt=0 sbvt=0)], + 'vertical-tightness=1' => [qw(pvt=1 bvt=1 sbvt=1)], + 'vertical-tightness=2' => [qw(pvt=2 bvt=2 sbvt=2)], + + 'vtc=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)], + 'vtc=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)], + 'vtc=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)], + + 'vertical-tightness-closing=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)], + 'vertical-tightness-closing=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)], + 'vertical-tightness-closing=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)], + + 'otr' => [qw(opr ohbr osbr)], + 'opening-token-right' => [qw(opr ohbr osbr)], + 'notr' => [qw(nopr nohbr nosbr)], + 'noopening-token-right' => [qw(nopr nohbr nosbr)], + + 'sot' => [qw(sop sohb sosb)], + 'nsot' => [qw(nsop nsohb nsosb)], + 'stack-opening-tokens' => [qw(sop sohb sosb)], + 'nostack-opening-tokens' => [qw(nsop nsohb nsosb)], + + 'sct' => [qw(scp schb scsb)], + 'stack-closing-tokens' => => [qw(scp schb scsb)], + 'nsct' => [qw(nscp nschb nscsb)], + 'nostack-closing-tokens' => [qw(nscp nschb nscsb)], + + 'sac' => [qw(sot sct)], + 'nsac' => [qw(nsot nsct)], + 'stack-all-containers' => [qw(sot sct)], + 'nostack-all-containers' => [qw(nsot nsct)], + + 'act=0' => [qw(pt=0 sbt=0 bt=0 bbt=0)], + 'act=1' => [qw(pt=1 sbt=1 bt=1 bbt=1)], + 'act=2' => [qw(pt=2 sbt=2 bt=2 bbt=2)], + 'all-containers-tightness=0' => [qw(pt=0 sbt=0 bt=0 bbt=0)], + 'all-containers-tightness=1' => [qw(pt=1 sbt=1 bt=1 bbt=1)], + 'all-containers-tightness=2' => [qw(pt=2 sbt=2 bt=2 bbt=2)], + + 'stack-opening-block-brace' => [qw(bbvt=2 bbvtl=*)], + 'sobb' => [qw(bbvt=2 bbvtl=*)], + 'nostack-opening-block-brace' => [qw(bbvt=0)], + 'nsobb' => [qw(bbvt=0)], + + 'converge' => [qw(it=4)], + 'noconverge' => [qw(it=1)], + 'conv' => [qw(it=4)], + 'nconv' => [qw(it=1)], + + # 'mangle' originally deleted pod and comments, but to keep it + # reversible, it no longer does. But if you really want to + # delete them, just use: + # -mangle -dac + + # An interesting use for 'mangle' is to do this: + # perltidy -mangle myfile.pl -st | perltidy -o myfile.pl.new + # which will form as many one-line blocks as possible + + 'mangle' => [ + qw( + check-syntax + keep-old-blank-lines=0 + delete-old-newlines + delete-old-whitespace + delete-semicolons + indent-columns=0 + maximum-consecutive-blank-lines=0 + maximum-line-length=100000 + noadd-newlines + noadd-semicolons + noadd-whitespace + noblanks-before-blocks + blank-lines-before-subs=0 + blank-lines-before-packages=0 + notabs + ) + ], + + # 'extrude' originally deleted pod and comments, but to keep it + # reversible, it no longer does. But if you really want to + # delete them, just use + # extrude -dac + # + # An interesting use for 'extrude' is to do this: + # perltidy -extrude myfile.pl -st | perltidy -o myfile.pl.new + # which will break up all one-line blocks. + # + # Removed 'check-syntax' option, which is unsafe because it may execute + # code in BEGIN blocks. Example 'Moose/debugger-duck_type.t'. + + 'extrude' => [ + qw( + ci=0 + delete-old-newlines + delete-old-whitespace + delete-semicolons + indent-columns=0 + maximum-consecutive-blank-lines=0 + maximum-line-length=1 + noadd-semicolons + noadd-whitespace + noblanks-before-blocks + blank-lines-before-subs=0 + blank-lines-before-packages=0 + nofuzzy-line-length + notabs + norecombine + ) + ], + + # this style tries to follow the GNU Coding Standards (which do + # not really apply to perl but which are followed by some perl + # programmers). + 'gnu-style' => [ + qw( + lp bl noll pt=2 bt=2 sbt=2 cpi=1 csbi=1 cbi=1 + ) + ], + + # Style suggested in Damian Conway's Perl Best Practices + 'perl-best-practices' => [ + qw(l=78 i=4 ci=4 st se vt=2 cti=0 pt=1 bt=1 sbt=1 bbt=1 nsfs nolq), +q(wbb=% + - * / x != == >= <= =~ !~ < > | & = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=) + ], + + # Additional styles can be added here + ); + + Perl::Tidy::HtmlWriter->make_abbreviated_names( \%expansion ); + + # Uncomment next line to dump all expansions for debugging: + # dump_short_names(\%expansion); + return ( + \@option_string, \@defaults, \%expansion, + \%option_category, \%option_range + ); + +} # end of generate_options + +# Memoize process_command_line. Given same @ARGV passed in, return same +# values and same @ARGV back. +# This patch was supplied by Jonathan Swartz Nov 2012 and significantly speeds +# up masontidy (https://metacpan.org/module/masontidy) + +my %process_command_line_cache; + +sub process_command_line { + + my @q = @_; + my ( + $perltidyrc_stream, $is_Windows, $Windows_type, + $rpending_complaint, $dump_options_type + ) = @q; + + my $use_cache = !defined($perltidyrc_stream) && !$dump_options_type; + if ($use_cache) { + my $cache_key = join( chr(28), @ARGV ); + if ( my $result = $process_command_line_cache{$cache_key} ) { + my ( $argv, @retvals ) = @{$result}; + @ARGV = @{$argv}; + return @retvals; + } + else { + my @retvals = _process_command_line(@q); + $process_command_line_cache{$cache_key} = [ \@ARGV, @retvals ] + if $retvals[0]->{'memoize'}; + return @retvals; + } + } + else { + return _process_command_line(@q); + } +} + +# This is the original coding, which worked, +# but I've rewritten it (above) to keep Perl-Critic from complaining +# Keep for awhile. + +=pod +sub process_command_line { + + my ( + $perltidyrc_stream, $is_Windows, $Windows_type, + $rpending_complaint, $dump_options_type + ) = @_; + + my $use_cache = !defined($perltidyrc_stream) && !$dump_options_type; + if ($use_cache) { + my $cache_key = join( chr(28), @ARGV ); + if ( my $result = $process_command_line_cache{$cache_key} ) { + my ( $argv, @retvals ) = @{$result}; + @ARGV = @{$argv}; + return @retvals; + } + else { + my @retvals = _process_command_line(@_); + $process_command_line_cache{$cache_key} = [ \@ARGV, @retvals ] + if $retvals[0]->{'memoize'}; + return @retvals; + } + } + else { + return _process_command_line(@_); + } +} +=cut + +# (note the underscore here) +sub _process_command_line { + + my ( + $perltidyrc_stream, $is_Windows, $Windows_type, + $rpending_complaint, $dump_options_type + ) = @_; + + use Getopt::Long; + + # Save any current Getopt::Long configuration + # and set to Getopt::Long defaults. Use eval to avoid + # breaking old versions of Perl without these routines. + # Previous configuration is reset at the exit of this routine. + my $glc; + eval { $glc = Getopt::Long::Configure() }; + unless ($@) { + eval { Getopt::Long::ConfigDefaults() }; + } + else { $glc = undef } + + my ( + $roption_string, $rdefaults, $rexpansion, + $roption_category, $roption_range + ) = generate_options(); + + #--------------------------------------------------------------- + # set the defaults by passing the above list through GetOptions + #--------------------------------------------------------------- + my %Opts = (); + { + local @ARGV = (); + + # do not load the defaults if we are just dumping perltidyrc + unless ( $dump_options_type eq 'perltidyrc' ) { + for my $i ( @{$rdefaults} ) { push @ARGV, "--" . $i } + } + if ( !GetOptions( \%Opts, @{$roption_string} ) ) { + Die( +"Programming Bug reported by 'GetOptions': error in setting default options" + ); + } + } + + my $word; + my @raw_options = (); + my $config_file = ""; + my $saw_ignore_profile = 0; + my $saw_dump_profile = 0; + + #--------------------------------------------------------------- + # Take a first look at the command-line parameters. Do as many + # immediate dumps as possible, which can avoid confusion if the + # perltidyrc file has an error. + #--------------------------------------------------------------- + foreach my $i (@ARGV) { + + $i =~ s/^--/-/; + if ( $i =~ /^-(npro|noprofile|no-profile)$/ ) { + $saw_ignore_profile = 1; + } + + # note: this must come before -pro and -profile, below: + elsif ( $i =~ /^-(dump-profile|dpro)$/ ) { + $saw_dump_profile = 1; + } + elsif ( $i =~ /^-(pro|profile)=(.+)/ ) { + if ($config_file) { + Warn( +"Only one -pro=filename allowed, using '$2' instead of '$config_file'\n" + ); + } + $config_file = $2; + + # resolve /.../, meaning look upwards from directory + if ( defined($config_file) ) { + if ( my ( $start_dir, $search_file ) = + ( $config_file =~ m{^(.*)\.\.\./(.*)$} ) ) + { + $start_dir = '.' if !$start_dir; + $start_dir = Cwd::realpath($start_dir); + if ( my $found_file = + find_file_upwards( $start_dir, $search_file ) ) + { + $config_file = $found_file; + } + } + } + unless ( -e $config_file ) { + Warn("cannot find file given with -pro=$config_file: $!\n"); + $config_file = ""; + } + } + elsif ( $i =~ /^-(pro|profile)=?$/ ) { + Die("usage: -pro=filename or --profile=filename, no spaces\n"); + } + elsif ( $i =~ /^-(help|h|HELP|H|\?)$/ ) { + usage(); + Exit(0); + } + elsif ( $i =~ /^-(version|v)$/ ) { + show_version(); + Exit(0); + } + elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) { + dump_defaults( @{$rdefaults} ); + Exit(0); + } + elsif ( $i =~ /^-(dump-long-names|dln)$/ ) { + dump_long_names( @{$roption_string} ); + Exit(0); + } + elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) { + dump_short_names($rexpansion); + Exit(0); + } + elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) { + Perl::Tidy::Tokenizer->dump_token_types(*STDOUT); + Exit(0); + } + } + + if ( $saw_dump_profile && $saw_ignore_profile ) { + Warn("No profile to dump because of -npro\n"); + Exit(1); + } + + #--------------------------------------------------------------- + # read any .perltidyrc configuration file + #--------------------------------------------------------------- + unless ($saw_ignore_profile) { + + # resolve possible conflict between $perltidyrc_stream passed + # as call parameter to perltidy and -pro=filename on command + # line. + if ($perltidyrc_stream) { + if ($config_file) { + Warn(<{'vertical-tightness'} ) { + my $vt = $rOpts->{'vertical-tightness'}; + $rOpts->{'paren-vertical-tightness'} = $vt; + $rOpts->{'square-bracket-vertical-tightness'} = $vt; + $rOpts->{'brace-vertical-tightness'} = $vt; + } + + if ( defined $rOpts->{'vertical-tightness-closing'} ) { + my $vtc = $rOpts->{'vertical-tightness-closing'}; + $rOpts->{'paren-vertical-tightness-closing'} = $vtc; + $rOpts->{'square-bracket-vertical-tightness-closing'} = $vtc; + $rOpts->{'brace-vertical-tightness-closing'} = $vtc; + } + + if ( defined $rOpts->{'closing-token-indentation'} ) { + my $cti = $rOpts->{'closing-token-indentation'}; + $rOpts->{'closing-square-bracket-indentation'} = $cti; + $rOpts->{'closing-brace-indentation'} = $cti; + $rOpts->{'closing-paren-indentation'} = $cti; + } + + # In quiet mode, there is no log file and hence no way to report + # results of syntax check, so don't do it. + if ( $rOpts->{'quiet'} ) { + $rOpts->{'check-syntax'} = 0; + } + + # can't check syntax if no output + if ( $rOpts->{'format'} ne 'tidy' ) { + $rOpts->{'check-syntax'} = 0; + } + + # Never let Windows 9x/Me systems run syntax check -- this will prevent a + # wide variety of nasty problems on these systems, because they cannot + # reliably run backticks. Don't even think about changing this! + if ( $rOpts->{'check-syntax'} + && $is_Windows + && ( !$Windows_type || $Windows_type =~ /^(9|Me)/ ) ) + { + $rOpts->{'check-syntax'} = 0; + } + + # Added Dec 2017: Deactivating check-syntax for all systems for safety + # because unexpected results can occur when code in BEGIN blocks is + # executed. This flag was included to help check for perltidy mistakes, + # and may still be useful for debugging. To activate for testing comment + # out the next three lines. + else { + $rOpts->{'check-syntax'} = 0; + } + + # It's really a bad idea to check syntax as root unless you wrote + # the script yourself. FIXME: not sure if this works with VMS + unless ($is_Windows) { + + if ( $< == 0 && $rOpts->{'check-syntax'} ) { + $rOpts->{'check-syntax'} = 0; + ${$rpending_complaint} .= +"Syntax check deactivated for safety; you shouldn't run this as root\n"; + } + } + + # check iteration count and quietly fix if necessary: + # - iterations option only applies to code beautification mode + # - the convergence check should stop most runs on iteration 2, and + # virtually all on iteration 3. But we'll allow up to 6. + if ( $rOpts->{'format'} ne 'tidy' ) { + $rOpts->{'iterations'} = 1; + } + elsif ( defined( $rOpts->{'iterations'} ) ) { + if ( $rOpts->{'iterations'} <= 0 ) { $rOpts->{'iterations'} = 1 } + elsif ( $rOpts->{'iterations'} > 6 ) { $rOpts->{'iterations'} = 6 } + } + else { + $rOpts->{'iterations'} = 1; + } + + my $check_blank_count = sub { + my ( $key, $abbrev ) = @_; + if ( $rOpts->{$key} ) { + if ( $rOpts->{$key} < 0 ) { + $rOpts->{$key} = 0; + Warn("negative value of $abbrev, setting 0\n"); + } + if ( $rOpts->{$key} > 100 ) { + Warn("unreasonably large value of $abbrev, reducing\n"); + $rOpts->{$key} = 100; + } + } + }; + + # check for reasonable number of blank lines and fix to avoid problems + $check_blank_count->( 'blank-lines-before-subs', '-blbs' ); + $check_blank_count->( 'blank-lines-before-packages', '-blbp' ); + $check_blank_count->( 'blank-lines-after-block-opening', '-blao' ); + $check_blank_count->( 'blank-lines-before-block-closing', '-blbc' ); + + # setting a non-negative logfile gap causes logfile to be saved + if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) { + $rOpts->{'logfile'} = 1; + } + + # set short-cut flag when only indentation is to be done. + # Note that the user may or may not have already set the + # indent-only flag. + if ( !$rOpts->{'add-whitespace'} + && !$rOpts->{'delete-old-whitespace'} + && !$rOpts->{'add-newlines'} + && !$rOpts->{'delete-old-newlines'} ) + { + $rOpts->{'indent-only'} = 1; + } + + # -isbc implies -ibc + if ( $rOpts->{'indent-spaced-block-comments'} ) { + $rOpts->{'indent-block-comments'} = 1; + } + + # -bli flag implies -bl + if ( $rOpts->{'brace-left-and-indent'} ) { + $rOpts->{'opening-brace-on-new-line'} = 1; + } + + if ( $rOpts->{'opening-brace-always-on-right'} + && $rOpts->{'opening-brace-on-new-line'} ) + { + Warn(<{'opening-brace-on-new-line'} = 0; + } + + # it simplifies things if -bl is 0 rather than undefined + if ( !defined( $rOpts->{'opening-brace-on-new-line'} ) ) { + $rOpts->{'opening-brace-on-new-line'} = 0; + } + + # -sbl defaults to -bl if not defined + if ( !defined( $rOpts->{'opening-sub-brace-on-new-line'} ) ) { + $rOpts->{'opening-sub-brace-on-new-line'} = + $rOpts->{'opening-brace-on-new-line'}; + } + + if ( $rOpts->{'entab-leading-whitespace'} ) { + if ( $rOpts->{'entab-leading-whitespace'} < 0 ) { + Warn("-et=n must use a positive integer; ignoring -et\n"); + $rOpts->{'entab-leading-whitespace'} = undef; + } + + # entab leading whitespace has priority over the older 'tabs' option + if ( $rOpts->{'tabs'} ) { $rOpts->{'tabs'} = 0; } + } + + # set a default tabsize to be used in guessing the starting indentation + # level if and only if this run does not use tabs and the old code does + # use tabs + if ( $rOpts->{'default-tabsize'} ) { + if ( $rOpts->{'default-tabsize'} < 0 ) { + Warn("negative value of -dt, setting 0\n"); + $rOpts->{'default-tabsize'} = 0; + } + if ( $rOpts->{'default-tabsize'} > 20 ) { + Warn("unreasonably large value of -dt, reducing\n"); + $rOpts->{'default-tabsize'} = 20; + } + } + else { + $rOpts->{'default-tabsize'} = 8; + } + + # Define $tabsize, the number of spaces per tab for use in + # guessing the indentation of source lines with leading tabs. + # Assume same as for this run if tabs are used , otherwise assume + # a default value, typically 8 + my $tabsize = + $rOpts->{'entab-leading-whitespace'} + ? $rOpts->{'entab-leading-whitespace'} + : $rOpts->{'tabs'} ? $rOpts->{'indent-columns'} + : $rOpts->{'default-tabsize'}; + return $tabsize; +} + +sub find_file_upwards { + my ( $search_dir, $search_file ) = @_; + + $search_dir =~ s{/+$}{}; + $search_file =~ s{^/+}{}; + + while (1) { + my $try_path = "$search_dir/$search_file"; + if ( -f $try_path ) { + return $try_path; + } + elsif ( $search_dir eq '/' ) { + return; + } + else { + $search_dir = dirname($search_dir); + } + } + + # This return is for Perl-Critic. + # We shouldn't get out of the while loop without a return + return; +} + +sub expand_command_abbreviations { + + # go through @ARGV and expand any abbreviations + + my ( $rexpansion, $rraw_options, $config_file ) = @_; + + # set a pass limit to prevent an infinite loop; + # 10 should be plenty, but it may be increased to allow deeply + # nested expansions. + my $max_passes = 10; + my @new_argv = (); + + # keep looping until all expansions have been converted into actual + # dash parameters.. + foreach my $pass_count ( 0 .. $max_passes ) { + my @new_argv = (); + my $abbrev_count = 0; + + # loop over each item in @ARGV.. + foreach my $word (@ARGV) { + + # convert any leading 'no-' to just 'no' + if ( $word =~ /^(-[-]?no)-(.*)/ ) { $word = $1 . $2 } + + # if it is a dash flag (instead of a file name).. + if ( $word =~ /^-[-]?([\w\-]+)(.*)/ ) { + + my $abr = $1; + my $flags = $2; + + # save the raw input for debug output in case of circular refs + if ( $pass_count == 0 ) { + push( @{$rraw_options}, $word ); + } + + # recombine abbreviation and flag, if necessary, + # to allow abbreviations with arguments such as '-vt=1' + if ( $rexpansion->{ $abr . $flags } ) { + $abr = $abr . $flags; + $flags = ""; + } + + # if we see this dash item in the expansion hash.. + if ( $rexpansion->{$abr} ) { + $abbrev_count++; + + # stuff all of the words that it expands to into the + # new arg list for the next pass + foreach my $abbrev ( @{ $rexpansion->{$abr} } ) { + next unless $abbrev; # for safety; shouldn't happen + push( @new_argv, '--' . $abbrev . $flags ); + } + } + + # not in expansion hash, must be actual long name + else { + push( @new_argv, $word ); + } + } + + # not a dash item, so just save it for the next pass + else { + push( @new_argv, $word ); + } + } # end of this pass + + # update parameter list @ARGV to the new one + @ARGV = @new_argv; + last unless ( $abbrev_count > 0 ); + + # make sure we are not in an infinite loop + if ( $pass_count == $max_passes ) { + local $" = ')('; + Warn(<{$abbrev} }; + print STDOUT "$abbrev --> @list\n"; + } + return; +} + +sub check_vms_filename { + + # given a valid filename (the perltidy input file) + # create a modified filename and separator character + # suitable for VMS. + # + # Contributed by Michael Cartmell + # + my $filename = shift; + my ( $base, $path ) = fileparse($filename); + + # remove explicit ; version + $base =~ s/;-?\d*$// + + # remove explicit . version ie two dots in filename NB ^ escapes a dot + or $base =~ s/( # begin capture $1 + (?:^|[^^])\. # match a dot not preceded by a caret + (?: # followed by nothing + | # or + .*[^^] # anything ending in a non caret + ) + ) # end capture $1 + \.-?\d*$ # match . version number + /$1/x; + + # normalise filename, if there are no unescaped dots then append one + $base .= '.' unless $base =~ /(?:^|[^^])\./; + + # if we don't already have an extension then we just append the extension + my $separator = ( $base =~ /\.$/ ) ? "" : "_"; + return ( $path . $base, $separator ); +} + +sub Win_OS_Type { + + # TODO: are these more standard names? + # Win32s Win95 Win98 WinMe WinNT3.51 WinNT4 Win2000 WinXP/.Net Win2003 + + # Returns a string that determines what MS OS we are on. + # Returns win32s,95,98,Me,NT3.51,NT4,2000,XP/.Net,Win2003 + # Returns blank string if not an MS system. + # Original code contributed by: Yves Orton + # We need to know this to decide where to look for config files + + my $rpending_complaint = shift; + my $os = ""; + return $os unless $^O =~ /win32|dos/i; # is it a MS box? + + # Systems built from Perl source may not have Win32.pm + # But probably have Win32::GetOSVersion() anyway so the + # following line is not 'required': + # return $os unless eval('require Win32'); + + # Use the standard API call to determine the version + my ( $undef, $major, $minor, $build, $id ); + eval { ( $undef, $major, $minor, $build, $id ) = Win32::GetOSVersion() }; + + # + # NAME ID MAJOR MINOR + # Windows NT 4 2 4 0 + # Windows 2000 2 5 0 + # Windows XP 2 5 1 + # Windows Server 2003 2 5 2 + + return "win32s" unless $id; # If id==0 then its a win32s box. + $os = { # Magic numbers from MSDN + # documentation of GetOSVersion + 1 => { + 0 => "95", + 10 => "98", + 90 => "Me" + }, + 2 => { + 0 => "2000", # or NT 4, see below + 1 => "XP/.Net", + 2 => "Win2003", + 51 => "NT3.51" + } + }->{$id}->{$minor}; + + # If $os is undefined, the above code is out of date. Suggested updates + # are welcome. + unless ( defined $os ) { + $os = ""; + + # Deactivated this message 20180322 because it was needlessly + # causing some test scripts to fail. Need help from someone + # with expertise in Windows to decide what is possible with windows. + ${$rpending_complaint} .= </.../, meaning look upwards from directory + my $config_file = shift; + if ($config_file) { + if ( my ( $start_dir, $search_file ) = + ( $config_file =~ m{^(.*)\.\.\./(.*)$} ) ) + { + ${$rconfig_file_chatter} .= + "# Searching Upward: $config_file\n"; + $start_dir = '.' if !$start_dir; + $start_dir = Cwd::realpath($start_dir); + if ( my $found_file = + find_file_upwards( $start_dir, $search_file ) ) + { + $config_file = $found_file; + ${$rconfig_file_chatter} .= "# Found: $config_file\n"; + } + } + } + return $config_file; + }; + + my $config_file; + + # look in current directory first + $config_file = ".perltidyrc"; + return $config_file if $exists_config_file->($config_file); + if ($is_Windows) { + $config_file = "perltidy.ini"; + return $config_file if $exists_config_file->($config_file); + } + + # Default environment vars. + my @envs = qw(PERLTIDY HOME); + + # Check the NT/2k/XP locations, first a local machine def, then a + # network def + push @envs, qw(USERPROFILE HOMESHARE) if $^O =~ /win32/i; + + # Now go through the environment ... + foreach my $var (@envs) { + ${$rconfig_file_chatter} .= "# Examining: \$ENV{$var}"; + if ( defined( $ENV{$var} ) ) { + ${$rconfig_file_chatter} .= " = $ENV{$var}\n"; + + # test ENV{ PERLTIDY } as file: + if ( $var eq 'PERLTIDY' ) { + $config_file = "$ENV{$var}"; + $config_file = $resolve_config_file->($config_file); + return $config_file if $exists_config_file->($config_file); + } + + # test ENV as directory: + $config_file = catfile( $ENV{$var}, ".perltidyrc" ); + $config_file = $resolve_config_file->($config_file); + return $config_file if $exists_config_file->($config_file); + + if ($is_Windows) { + $config_file = catfile( $ENV{$var}, "perltidy.ini" ); + $config_file = $resolve_config_file->($config_file); + return $config_file if $exists_config_file->($config_file); + } + } + else { + ${$rconfig_file_chatter} .= "\n"; + } + } + + # then look for a system-wide definition + # where to look varies with OS + if ($is_Windows) { + + if ($Windows_type) { + my ( $os, $system, $allusers ) = + Win_Config_Locs( $rpending_complaint, $Windows_type ); + + # Check All Users directory, if there is one. + # i.e. C:\Documents and Settings\User\perltidy.ini + if ($allusers) { + + $config_file = catfile( $allusers, ".perltidyrc" ); + return $config_file if $exists_config_file->($config_file); + + $config_file = catfile( $allusers, "perltidy.ini" ); + return $config_file if $exists_config_file->($config_file); + } + + # Check system directory. + # retain old code in case someone has been able to create + # a file with a leading period. + $config_file = catfile( $system, ".perltidyrc" ); + return $config_file if $exists_config_file->($config_file); + + $config_file = catfile( $system, "perltidy.ini" ); + return $config_file if $exists_config_file->($config_file); + } + } + + # Place to add customization code for other systems + elsif ( $^O eq 'OS2' ) { + } + elsif ( $^O eq 'MacOS' ) { + } + elsif ( $^O eq 'VMS' ) { + } + + # Assume some kind of Unix + else { + + $config_file = "/usr/local/etc/perltidyrc"; + return $config_file if $exists_config_file->($config_file); + + $config_file = "/etc/perltidyrc"; + return $config_file if $exists_config_file->($config_file); + } + + # Couldn't find a config file + return; +} + +sub Win_Config_Locs { + + # In scalar context returns the OS name (95 98 ME NT3.51 NT4 2000 XP), + # or undef if its not a win32 OS. In list context returns OS, System + # Directory, and All Users Directory. All Users will be empty on a + # 9x/Me box. Contributed by: Yves Orton. + + # Original coding: + # my $rpending_complaint = shift; + # my $os = (@_) ? shift : Win_OS_Type(); + + my ( $rpending_complaint, $os ) = @_; + if ( !$os ) { $os = Win_OS_Type(); } + + return unless $os; + + my $system = ""; + my $allusers = ""; + + if ( $os =~ /9[58]|Me/ ) { + $system = "C:/Windows"; + } + elsif ( $os =~ /NT|XP|200?/ ) { + $system = ( $os =~ /XP/ ) ? "C:/Windows/" : "C:/WinNT/"; + $allusers = + ( $os =~ /NT/ ) + ? "C:/WinNT/profiles/All Users/" + : "C:/Documents and Settings/All Users/"; + } + else { + + # This currently would only happen on a win32s computer. I don't have + # one to test, so I am unsure how to proceed. Suggestions welcome! + ${$rpending_complaint} .= +"I dont know a sensible place to look for config files on an $os system.\n"; + return; + } + return wantarray ? ( $os, $system, $allusers ) : $os; +} + +sub dump_config_file { + my ( $fh, $config_file, $rconfig_file_chatter ) = @_; + print STDOUT "$$rconfig_file_chatter"; + if ($fh) { + print STDOUT "# Dump of file: '$config_file'\n"; + while ( my $line = $fh->getline() ) { print STDOUT $line } + eval { $fh->close() }; + } + else { + print STDOUT "# ...no config file found\n"; + } + return; +} + +sub read_config_file { + + my ( $fh, $config_file, $rexpansion ) = @_; + my @config_list = (); + + # file is bad if non-empty $death_message is returned + my $death_message = ""; + + my $name = undef; + my $line_no; + my $opening_brace_line; + while ( my $line = $fh->getline() ) { + $line_no++; + chomp $line; + ( $line, $death_message ) = + strip_comment( $line, $config_file, $line_no ); + last if ($death_message); + next unless $line; + $line =~ s/^\s*(.*?)\s*$/$1/; # trim both ends + next unless $line; + + my $body = $line; + + # Look for complete or partial abbreviation definition of the form + # name { body } or name { or name { body + # See rules in perltidy's perldoc page + # Section: Other Controls - Creating a new abbreviation + if ( $line =~ /^((\w+)\s*\{)(.*)?$/ ) { + my $oldname = $name; + ( $name, $body ) = ( $2, $3 ); + + # Cannot start new abbreviation unless old abbreviation is complete + last if ($opening_brace_line); + + $opening_brace_line = $line_no unless ( $body && $body =~ s/\}$// ); + + # handle a new alias definition + if ( ${$rexpansion}{$name} ) { + local $" = ')('; + my @names = sort keys %$rexpansion; + $death_message = + "Here is a list of all installed aliases\n(@names)\n" + . "Attempting to redefine alias ($name) in config file $config_file line $.\n"; + last; + } + ${$rexpansion}{$name} = []; + } + + # leading opening braces not allowed + elsif ( $line =~ /^{/ ) { + $opening_brace_line = undef; + $death_message = + "Unexpected '{' at line $line_no in config file '$config_file'\n"; + last; + } + + # Look for abbreviation closing: body } or } + elsif ( $line =~ /^(.*)?\}$/ ) { + $body = $1; + if ($opening_brace_line) { + $opening_brace_line = undef; + } + else { + $death_message = +"Unexpected '}' at line $line_no in config file '$config_file'\n"; + last; + } + } + + # Now store any parameters + if ($body) { + + my ( $rbody_parts, $msg ) = parse_args($body); + if ($msg) { + $death_message = <close() }; + return ( \@config_list, $death_message ); +} + +sub strip_comment { + + # Strip any comment from a command line + my ( $instr, $config_file, $line_no ) = @_; + my $msg = ""; + + # check for full-line comment + if ( $instr =~ /^\s*#/ ) { + return ( "", $msg ); + } + + # nothing to do if no comments + if ( $instr !~ /#/ ) { + return ( $instr, $msg ); + } + + # handle case of no quotes + elsif ( $instr !~ /['"]/ ) { + + # We now require a space before the # of a side comment + # this allows something like: + # -sbcp=# + # Otherwise, it would have to be quoted: + # -sbcp='#' + $instr =~ s/\s+\#.*$//; + return ( $instr, $msg ); + } + + # handle comments and quotes + my $outstr = ""; + my $quote_char = ""; + while (1) { + + # looking for ending quote character + if ($quote_char) { + if ( $instr =~ /\G($quote_char)/gc ) { + $quote_char = ""; + $outstr .= $1; + } + elsif ( $instr =~ /\G(.)/gc ) { + $outstr .= $1; + } + + # error..we reached the end without seeing the ending quote char + else { + $msg = < in this text: +$instr +Please fix this line or use -npro to avoid reading this file +EOM + last; + } + } + + # accumulating characters and looking for start of a quoted string + else { + if ( $instr =~ /\G([\"\'])/gc ) { + $outstr .= $1; + $quote_char = $1; + } + + # Note: not yet enforcing the space-before-hash rule for side + # comments if the parameter is quoted. + elsif ( $instr =~ /\G#/gc ) { + last; + } + elsif ( $instr =~ /\G(.)/gc ) { + $outstr .= $1; + } + else { + last; + } + } + } + return ( $outstr, $msg ); +} + +sub parse_args { + + # Parse a command string containing multiple string with possible + # quotes, into individual commands. It might look like this, for example: + # + # -wba=" + - " -some-thing -wbb='. && ||' + # + # There is no need, at present, to handle escaped quote characters. + # (They are not perltidy tokens, so needn't be in strings). + + my ($body) = @_; + my @body_parts = (); + my $quote_char = ""; + my $part = ""; + my $msg = ""; + while (1) { + + # looking for ending quote character + if ($quote_char) { + if ( $body =~ /\G($quote_char)/gc ) { + $quote_char = ""; + } + elsif ( $body =~ /\G(.)/gc ) { + $part .= $1; + } + + # error..we reached the end without seeing the ending quote char + else { + if ( length($part) ) { push @body_parts, $part; } + $msg = < in this text: +$body +EOM + last; + } + } + + # accumulating characters and looking for start of a quoted string + else { + if ( $body =~ /\G([\"\'])/gc ) { + $quote_char = $1; + } + elsif ( $body =~ /\G(\s+)/gc ) { + if ( length($part) ) { push @body_parts, $part; } + $part = ""; + } + elsif ( $body =~ /\G(.)/gc ) { + $part .= $1; + } + else { + if ( length($part) ) { push @body_parts, $part; } + last; + } + } + } + return ( \@body_parts, $msg ); +} + +sub dump_long_names { + + my @names = @_; + print STDOUT < does not take an argument +# =s takes a mandatory string +# :s takes an optional string +# =i takes a mandatory integer +# :i takes an optional integer +# ! does not take an argument and may be negated +# i.e., -foo and -nofoo are allowed +# a double dash signals the end of the options list +# +#--------------------------------------------------------------- +EOM + + foreach my $name ( sort @names ) { print STDOUT "$name\n" } + return; +} + +sub dump_defaults { + my @defaults = @_; + print STDOUT "Default command line options:\n"; + foreach my $line ( sort @defaults ) { print STDOUT "$line\n" } + return; +} + +sub readable_options { + + # return options for this run as a string which could be + # put in a perltidyrc file + my ( $rOpts, $roption_string ) = @_; + my %Getopt_flags; + my $rGetopt_flags = \%Getopt_flags; + my $readable_options = "# Final parameter set for this run.\n"; + $readable_options .= + "# See utility 'perltidyrc_dump.pl' for nicer formatting.\n"; + foreach my $opt ( @{$roption_string} ) { + my $flag = ""; + if ( $opt =~ /(.*)(!|=.*)$/ ) { + $opt = $1; + $flag = $2; + } + if ( defined( $rOpts->{$opt} ) ) { + $rGetopt_flags->{$opt} = $flag; + } + } + foreach my $key ( sort keys %{$rOpts} ) { + my $flag = $rGetopt_flags->{$key}; + my $value = $rOpts->{$key}; + my $prefix = '--'; + my $suffix = ""; + if ($flag) { + if ( $flag =~ /^=/ ) { + if ( $value !~ /^\d+$/ ) { $value = '"' . $value . '"' } + $suffix = "=" . $value; + } + elsif ( $flag =~ /^!/ ) { + $prefix .= "no" unless ($value); + } + else { + + # shouldn't happen + $readable_options .= + "# ERROR in dump_options: unrecognized flag $flag for $key\n"; + } + } + $readable_options .= $prefix . $key . $suffix . "\n"; + } + return $readable_options; +} + +sub show_version { + print STDOUT <<"EOM"; +This is perltidy, v$VERSION + +Copyright 2000-2018, Steve Hancock + +Perltidy is free software and may be copied under the terms of the GNU +General Public License, which is included in the distribution files. + +Complete documentation for perltidy can be found using 'man perltidy' +or on the internet at http://perltidy.sourceforge.net. +EOM + return; +} + +sub usage { + + print STDOUT <outfile + perltidy [ options ] outfile + +Options have short and long forms. Short forms are shown; see +man pages for long forms. Note: '=s' indicates a required string, +and '=n' indicates a required integer. + +I/O control + -h show this help + -o=file name of the output file (only if single input file) + -oext=s change output extension from 'tdy' to s + -opath=path change path to be 'path' for output files + -b backup original to .bak and modify file in-place + -bext=s change default backup extension from 'bak' to s + -q deactivate error messages (for running under editor) + -w include non-critical warning messages in the .ERR error output + -syn run perl -c to check syntax (default under unix systems) + -log save .LOG file, which has useful diagnostics + -f force perltidy to read a binary file + -g like -log but writes more detailed .LOG file, for debugging scripts + -opt write the set of options actually used to a .LOG file + -npro ignore .perltidyrc configuration command file + -pro=file read configuration commands from file instead of .perltidyrc + -st send output to standard output, STDOUT + -se send all error output to standard error output, STDERR + -v display version number to standard output and quit + +Basic Options: + -i=n use n columns per indentation level (default n=4) + -t tabs: use one tab character per indentation level, not recommeded + -nt no tabs: use n spaces per indentation level (default) + -et=n entab leading whitespace n spaces per tab; not recommended + -io "indent only": just do indentation, no other formatting. + -sil=n set starting indentation level to n; use if auto detection fails + -ole=s specify output line ending (s=dos or win, mac, unix) + -ple keep output line endings same as input (input must be filename) + +Whitespace Control + -fws freeze whitespace; this disables all whitespace changes + and disables the following switches: + -bt=n sets brace tightness, n= (0 = loose, 1=default, 2 = tight) + -bbt same as -bt but for code block braces; same as -bt if not given + -bbvt block braces vertically tight; use with -bl or -bli + -bbvtl=s make -bbvt to apply to selected list of block types + -pt=n paren tightness (n=0, 1 or 2) + -sbt=n square bracket tightness (n=0, 1, or 2) + -bvt=n brace vertical tightness, + n=(0=open, 1=close unless multiple steps on a line, 2=always close) + -pvt=n paren vertical tightness (see -bvt for n) + -sbvt=n square bracket vertical tightness (see -bvt for n) + -bvtc=n closing brace vertical tightness: + n=(0=open, 1=sometimes close, 2=always close) + -pvtc=n closing paren vertical tightness, see -bvtc for n. + -sbvtc=n closing square bracket vertical tightness, see -bvtc for n. + -ci=n sets continuation indentation=n, default is n=2 spaces + -lp line up parentheses, brackets, and non-BLOCK braces + -sfs add space before semicolon in for( ; ; ) + -aws allow perltidy to add whitespace (default) + -dws delete all old non-essential whitespace + -icb indent closing brace of a code block + -cti=n closing indentation of paren, square bracket, or non-block brace: + n=0 none, =1 align with opening, =2 one full indentation level + -icp equivalent to -cti=2 + -wls=s want space left of tokens in string; i.e. -nwls='+ - * /' + -wrs=s want space right of tokens in string; + -sts put space before terminal semicolon of a statement + -sak=s put space between keywords given in s and '('; + -nsak=s no space between keywords in s and '('; i.e. -nsak='my our local' + +Line Break Control + -fnl freeze newlines; this disables all line break changes + and disables the following switches: + -anl add newlines; ok to introduce new line breaks + -bbs add blank line before subs and packages + -bbc add blank line before block comments + -bbb add blank line between major blocks + -kbl=n keep old blank lines? 0=no, 1=some, 2=all + -mbl=n maximum consecutive blank lines to output (default=1) + -ce cuddled else; use this style: '} else {' + -cb cuddled blocks (other than 'if-elsif-else') + -cbl=s list of blocks to cuddled, default 'try-catch-finally' + -dnl delete old newlines (default) + -l=n maximum line length; default n=80 + -bl opening brace on new line + -sbl opening sub brace on new line. value of -bl is used if not given. + -bli opening brace on new line and indented + -bar opening brace always on right, even for long clauses + -vt=n vertical tightness (requires -lp); n controls break after opening + token: 0=never 1=no break if next line balanced 2=no break + -vtc=n vertical tightness of closing container; n controls if closing + token starts new line: 0=always 1=not unless list 1=never + -wba=s want break after tokens in string; i.e. wba=': .' + -wbb=s want break before tokens in string + -wn weld nested: combines opening and closing tokens when both are adjacent + +Following Old Breakpoints + -kis keep interior semicolons. Allows multiple statements per line. + -boc break at old comma breaks: turns off all automatic list formatting + -bol break at old logical breakpoints: or, and, ||, && (default) + -bok break at old list keyword breakpoints such as map, sort (default) + -bot break at old conditional (ternary ?:) operator breakpoints (default) + -boa break at old attribute breakpoints + -cab=n break at commas after a comma-arrow (=>): + n=0 break at all commas after => + n=1 stable: break unless this breaks an existing one-line container + n=2 break only if a one-line container cannot be formed + n=3 do not treat commas after => specially at all + +Comment controls + -ibc indent block comments (default) + -isbc indent spaced block comments; may indent unless no leading space + -msc=n minimum desired spaces to side comment, default 4 + -fpsc=n fix position for side comments; default 0; + -csc add or update closing side comments after closing BLOCK brace + -dcsc delete closing side comments created by a -csc command + -cscp=s change closing side comment prefix to be other than '## end' + -cscl=s change closing side comment to apply to selected list of blocks + -csci=n minimum number of lines needed to apply a -csc tag, default n=6 + -csct=n maximum number of columns of appended text, default n=20 + -cscw causes warning if old side comment is overwritten with -csc + + -sbc use 'static block comments' identified by leading '##' (default) + -sbcp=s change static block comment identifier to be other than '##' + -osbc outdent static block comments + + -ssc use 'static side comments' identified by leading '##' (default) + -sscp=s change static side comment identifier to be other than '##' + +Delete selected text + -dac delete all comments AND pod + -dbc delete block comments + -dsc delete side comments + -dp delete pod + +Send selected text to a '.TEE' file + -tac tee all comments AND pod + -tbc tee block comments + -tsc tee side comments + -tp tee pod + +Outdenting + -olq outdent long quoted strings (default) + -olc outdent a long block comment line + -ola outdent statement labels + -okw outdent control keywords (redo, next, last, goto, return) + -okwl=s specify alternative keywords for -okw command + +Other controls + -mft=n maximum fields per table; default n=40 + -x do not format lines before hash-bang line (i.e., for VMS) + -asc allows perltidy to add a ';' when missing (default) + -dsm allows perltidy to delete an unnecessary ';' (default) + +Combinations of other parameters + -gnu attempt to follow GNU Coding Standards as applied to perl + -mangle remove as many newlines as possible (but keep comments and pods) + -extrude insert as many newlines as possible + +Dump and die, debugging + -dop dump options used in this run to standard output and quit + -ddf dump default options to standard output and quit + -dsn dump all option short names to standard output and quit + -dln dump option long names to standard output and quit + -dpro dump whatever configuration file is in effect to standard output + -dtt dump all token types to standard output and quit + +HTML + -html write an html file (see 'man perl2web' for many options) + Note: when -html is used, no indentation or formatting are done. + Hint: try perltidy -html -css=mystyle.css filename.pl + and edit mystyle.css to change the appearance of filename.html. + -nnn gives line numbers + -pre only writes out
..
code section + -toc places a table of contents to subs at the top (default) + -pod passes pod text through pod2html (default) + -frm write html as a frame (3 files) + -text=s extra extension for table of contents if -frm, default='toc' + -sext=s extra extension for file content if -frm, default='src' + +A prefix of "n" negates short form toggle switches, and a prefix of "no" +negates the long forms. For example, -nasc means don't add missing +semicolons. + +If you are unable to see this entire text, try "perltidy -h | more" +For more detailed information, and additional options, try "man perltidy", +or go to the perltidy home page at http://perltidy.sourceforge.net +EOF + + return; +} + +sub process_this_file { + + my ( $tokenizer, $formatter ) = @_; + + while ( my $line = $tokenizer->get_line() ) { + $formatter->write_line($line); + } + my $severe_error = $tokenizer->report_tokenization_errors(); + eval { $formatter->finish_formatting($severe_error) }; + + return; +} + +sub check_syntax { + + # Use 'perl -c' to make sure that we did not create bad syntax + # This is a very good independent check for programming errors + # + # Given names of the input and output files, ($istream, $ostream), + # we do the following: + # - check syntax of the input file + # - if bad, all done (could be an incomplete code snippet) + # - if infile syntax ok, then check syntax of the output file; + # - if outfile syntax bad, issue warning; this implies a code bug! + # - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good + + my ( $istream, $ostream, $logger_object, $rOpts ) = @_; + my $infile_syntax_ok = 0; + my $line_of_dashes = '-' x 42 . "\n"; + + my $flags = $rOpts->{'perl-syntax-check-flags'}; + + # be sure we invoke perl with -c + # note: perl will accept repeated flags like '-c -c'. It is safest + # to append another -c than try to find an interior bundled c, as + # in -Tc, because such a 'c' might be in a quoted string, for example. + if ( $flags !~ /(^-c|\s+-c)/ ) { $flags .= " -c" } + + # be sure we invoke perl with -x if requested + # same comments about repeated parameters applies + if ( $rOpts->{'look-for-hash-bang'} ) { + if ( $flags !~ /(^-x|\s+-x)/ ) { $flags .= " -x" } + } + + # this shouldn't happen unless a temporary file couldn't be made + if ( $istream eq '-' ) { + $logger_object->write_logfile_entry( + "Cannot run perl -c on STDIN and STDOUT\n"); + return $infile_syntax_ok; + } + + $logger_object->write_logfile_entry( + "checking input file syntax with perl $flags\n"); + + # Not all operating systems/shells support redirection of the standard + # error output. + my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1'; + + my ( $istream_filename, $perl_output ) = + do_syntax_check( $istream, $flags, $error_redirection ); + $logger_object->write_logfile_entry( + "Input stream passed to Perl as file $istream_filename\n"); + $logger_object->write_logfile_entry($line_of_dashes); + $logger_object->write_logfile_entry("$perl_output\n"); + + if ( $perl_output =~ /syntax\s*OK/ ) { + $infile_syntax_ok = 1; + $logger_object->write_logfile_entry($line_of_dashes); + $logger_object->write_logfile_entry( + "checking output file syntax with perl $flags ...\n"); + my ( $ostream_filename, $perl_output ) = + do_syntax_check( $ostream, $flags, $error_redirection ); + $logger_object->write_logfile_entry( + "Output stream passed to Perl as file $ostream_filename\n"); + $logger_object->write_logfile_entry($line_of_dashes); + $logger_object->write_logfile_entry("$perl_output\n"); + + unless ( $perl_output =~ /syntax\s*OK/ ) { + $logger_object->write_logfile_entry($line_of_dashes); + $logger_object->warning( +"The output file has a syntax error when tested with perl $flags $ostream !\n" + ); + $logger_object->warning( + "This implies an error in perltidy; the file $ostream is bad\n" + ); + $logger_object->report_definite_bug(); + + # the perl version number will be helpful for diagnosing the problem + $logger_object->write_logfile_entry( $^V . "\n" ); + ##qx/perl -v $error_redirection/ . "\n" ); + } + } + else { + + # Only warn of perl -c syntax errors. Other messages, + # such as missing modules, are too common. They can be + # seen by running with perltidy -w + $logger_object->complain("A syntax check using perl $flags\n"); + $logger_object->complain( + "for the output in file $istream_filename gives:\n"); + $logger_object->complain($line_of_dashes); + $logger_object->complain("$perl_output\n"); + $logger_object->complain($line_of_dashes); + $infile_syntax_ok = -1; + $logger_object->write_logfile_entry($line_of_dashes); + $logger_object->write_logfile_entry( +"The output file will not be checked because of input file problems\n" + ); + } + return $infile_syntax_ok; +} + +sub do_syntax_check { + + # This should not be called; the syntax check is deactivated + Die("Unexpected call for syntax check-shouldn't happen\n"); + return; +} + +=pod +sub do_syntax_check { + my ( $stream, $flags, $error_redirection ) = @_; + + ############################################################ + # This code is not reachable because syntax check is deactivated, + # but it is retained for reference. + ############################################################ + + # We need a named input file for executing perl + my ( $stream_filename, $is_tmpfile ) = get_stream_as_named_file($stream); + + # TODO: Need to add name of file to log somewhere + # otherwise Perl output is hard to read + if ( !$stream_filename ) { return $stream_filename, "" } + + # We have to quote the filename in case it has unusual characters + # or spaces. Example: this filename #CM11.pm# gives trouble. + my $quoted_stream_filename = '"' . $stream_filename . '"'; + + # Under VMS something like -T will become -t (and an error) so we + # will put quotes around the flags. Double quotes seem to work on + # Unix/Windows/VMS, but this may not work on all systems. (Single + # quotes do not work under Windows). It could become necessary to + # put double quotes around each flag, such as: -"c" -"T" + # We may eventually need some system-dependent coding here. + $flags = '"' . $flags . '"'; + + # now wish for luck... + my $msg = qx/perl $flags $quoted_stream_filename $error_redirection/; + + if ($is_tmpfile) { + unlink $stream_filename + or Perl::Tidy::Die("couldn't unlink stream $stream_filename: $!\n"); + } + return $stream_filename, $msg; +} +=cut + +1; + diff --git a/lib/Perl/Tidy.pod b/lib/Perl/Tidy.pod new file mode 100644 index 00000000..1707362c --- /dev/null +++ b/lib/Perl/Tidy.pod @@ -0,0 +1,435 @@ +=head1 NAME + +Perl::Tidy - Parses and beautifies perl source + +=head1 SYNOPSIS + + use Perl::Tidy; + + my $error_flag = Perl::Tidy::perltidy( + source => $source, + destination => $destination, + stderr => $stderr, + argv => $argv, + perltidyrc => $perltidyrc, + logfile => $logfile, + errorfile => $errorfile, + formatter => $formatter, # callback object (see below) + dump_options => $dump_options, + dump_options_type => $dump_options_type, + prefilter => $prefilter_coderef, + postfilter => $postfilter_coderef, + ); + +=head1 DESCRIPTION + +This module makes the functionality of the perltidy utility available to perl +scripts. Any or all of the input parameters may be omitted, in which case the +@ARGV array will be used to provide input parameters as described +in the perltidy(1) man page. + +For example, the perltidy script is basically just this: + + use Perl::Tidy; + Perl::Tidy::perltidy(); + +The call to B returns a scalar B<$error_flag> which is TRUE if an +error caused premature termination, and FALSE if the process ran to normal +completion. Additional discuss of errors is contained below in the L section. + +The module accepts input and output streams by a variety of methods. +The following list of parameters may be any of the following: a +filename, an ARRAY reference, a SCALAR reference, or an object with +either a B or B method, as appropriate. + + source - the source of the script to be formatted + destination - the destination of the formatted output + stderr - standard error output + perltidyrc - the .perltidyrc file + logfile - the .LOG file stream, if any + errorfile - the .ERR file stream, if any + dump_options - ref to a hash to receive parameters (see below), + dump_options_type - controls contents of dump_options + dump_getopt_flags - ref to a hash to receive Getopt flags + dump_options_category - ref to a hash giving category of options + dump_abbreviations - ref to a hash giving all abbreviations + +The following chart illustrates the logic used to decide how to +treat a parameter. + + ref($param) $param is assumed to be: + ----------- --------------------- + undef a filename + SCALAR ref to string + ARRAY ref to array + (other) object with getline (if source) or print method + +If the parameter is an object, and the object has a B method, that +close method will be called at the end of the stream. + +=over 4 + +=item source + +If the B parameter is given, it defines the source of the input stream. +If an input stream is defined with the B parameter then no other source +filenames may be specified in the @ARGV array or B parameter. + +=item destination + +If the B parameter is given, it will be used to define the +file or memory location to receive output of perltidy. + +=item stderr + +The B parameter allows the calling program to redirect the stream that +would otherwise go to the standard error output device to any of the stream +types listed above. This stream contains important warnings and errors +related to the parameters passed to perltidy. + +=item perltidyrc + +If the B file is given, it will be used instead of any +F<.perltidyrc> configuration file that would otherwise be used. + +=item errorfile + +The B parameter allows the calling program to capture +the stream that would otherwise go to either a .ERR file. This +stream contains warnings or errors related to the contents of one +source file or stream. + +The reason that this is different from the stderr stream is that when perltidy +is called to process multiple files there will be up to one .ERR file created +for each file and it would be very confusing if they were combined. + +However if perltidy is called to process just a single perl script then it may +be more convenient to combine the B stream with the B +stream. This can be done by setting the B<-se> parameter, in which case this +parameter is ignored. + +=item logfile + +The B parameter allows the calling program to capture the log stream. +This stream is only created if requested with a B<-g> parameter. It contains +detailed diagnostic information about a script which may be useful for +debugging. + +=item argv + +If the B parameter is given, it will be used instead of the +B<@ARGV> array. The B parameter may be a string, a reference to a +string, or a reference to an array. If it is a string or reference to a +string, it will be parsed into an array of items just as if it were a +command line string. + +=item dump_options + +If the B parameter is given, it must be the reference to a hash. +In this case, the parameters contained in any perltidyrc configuration file +will be placed in this hash and perltidy will return immediately. This is +equivalent to running perltidy with --dump-options, except that the perameters +are returned in a hash rather than dumped to standard output. Also, by default +only the parameters in the perltidyrc file are returned, but this can be +changed (see the next parameter). This parameter provides a convenient method +for external programs to read a perltidyrc file. An example program using +this feature, F, is included in the distribution. + +Any combination of the B parameters may be used together. + +=item dump_options_type + +This parameter is a string which can be used to control the parameters placed +in the hash reference supplied by B. The possible values are +'perltidyrc' (default) and 'full'. The 'full' parameter causes both the +default options plus any options found in a perltidyrc file to be returned. + +=item dump_getopt_flags + +If the B parameter is given, it must be the reference to a +hash. This hash will receive all of the parameters that perltidy understands +and flags that are passed to Getopt::Long. This parameter may be +used alone or with the B flag. Perltidy will +exit immediately after filling this hash. See the demo program +F for example usage. + +=item dump_options_category + +If the B parameter is given, it must be the reference to a +hash. This hash will receive a hash with keys equal to all long parameter names +and values equal to the title of the corresponding section of the perltidy manual. +See the demo program F for example usage. + +=item dump_abbreviations + +If the B parameter is given, it must be the reference to a +hash. This hash will receive all abbreviations used by Perl::Tidy. See the +demo program F for example usage. + +=item prefilter + +A code reference that will be applied to the source before tidying. It is +expected to take the full content as a string in its input, and output the +transformed content. + +=item postfilter + +A code reference that will be applied to the tidied result before outputting. +It is expected to take the full content as a string in its input, and output +the transformed content. + +Note: A convenient way to check the function of your custom prefilter and +postfilter code is to use the --notidy option, first with just the prefilter +and then with both the prefilter and postfilter. See also the file +B in the perltidy distribution. + +=back + +=head1 ERROR HANDLING + +Perltidy will return with an error flag indicating if the process had to be +terminated early due to errors in the input parameters. This can happen for +example if a parameter is misspelled or given an invalid value. The calling +program should check this flag because if it is set the destination stream will +be empty or incomplete and should be ignored. Error messages in the B +stream will indicate the cause of any problem. + +If the error flag is not set then perltidy ran to completion. However there +may still be warning messages in the B stream related to control +parameters, and there may be warning messages in the B stream +relating to possible syntax errors in the source code being tidied. + +In the event of a catastrophic error for which recovery is not possible +B terminates by making calls to B or B to help the +programmer localize the problem. These should normally only occur during +program development. + +=head1 NOTES ON FORMATTING PARAMETERS + +Parameters which control formatting may be passed in several ways: in a +F<.perltidyrc> configuration file, in the B parameter, and in the +B parameter. + +The B<-syn> (B<--check-syntax>) flag may be used with all source and +destination streams except for standard input and output. However +data streams which are not associated with a filename will +be copied to a temporary file before being be passed to Perl. This +use of temporary files can cause somewhat confusing output from Perl. + +If the B<-pbp> style is used it will typically be necessary to also +specify a B<-nst> flag. This is necessary to turn off the B<-st> flag +contained in the B<-pbp> parameter set which otherwise would direct +the output stream to the standard output. + +=head1 EXAMPLES + +The following example uses string references to hold the input and output +code and error streams, and illustrates checking for errors. + + use Perl::Tidy; + + my $source_string = <<'EOT'; + my$error=Perl::Tidy::perltidy(argv=>$argv,source=>\$source_string, + destination=>\$dest_string,stderr=>\$stderr_string, + errorfile=>\$errorfile_string,); + EOT + + my $dest_string; + my $stderr_string; + my $errorfile_string; + my $argv = "-npro"; # Ignore any .perltidyrc at this site + $argv .= " -pbp"; # Format according to perl best practices + $argv .= " -nst"; # Must turn off -st in case -pbp is specified + $argv .= " -se"; # -se appends the errorfile to stderr + ## $argv .= " --spell-check"; # uncomment to trigger an error + + print "<>\n$source_string\n"; + + my $error = Perl::Tidy::perltidy( + argv => $argv, + source => \$source_string, + destination => \$dest_string, + stderr => \$stderr_string, + errorfile => \$errorfile_string, # ignored when -se flag is set + ##phasers => 'stun', # uncomment to trigger an error + ); + + if ($error) { + + # serious error in input parameters, no tidied output + print "<>\n$stderr_string\n"; + die "Exiting because of serious errors\n"; + } + + if ($dest_string) { print "<>\n$dest_string\n" } + if ($stderr_string) { print "<>\n$stderr_string\n" } + if ($errorfile_string) { print "<<.ERR file>>\n$errorfile_string\n" } + +Additional examples are given in examples section of the perltidy distribution. + +=head1 Using the B Callback Object + +The B parameter is an optional callback object which allows +the calling program to receive tokenized lines directly from perltidy for +further specialized processing. When this parameter is used, the two +formatting options which are built into perltidy (beautification or +html) are ignored. The following diagram illustrates the logical flow: + + |-- (normal route) -> code beautification + caller->perltidy->|-- (-html flag ) -> create html + |-- (formatter given)-> callback to write_line + +This can be useful for processing perl scripts in some way. The +parameter C<$formatter> in the perltidy call, + + formatter => $formatter, + +is an object created by the caller with a C method which +will accept and process tokenized lines, one line per call. Here is +a simple example of a C which merely prints the line number, +the line type (as determined by perltidy), and the text of the line: + + sub write_line { + + # This is called from perltidy line-by-line + my $self = shift; + my $line_of_tokens = shift; + my $line_type = $line_of_tokens->{_line_type}; + my $input_line_number = $line_of_tokens->{_line_number}; + my $input_line = $line_of_tokens->{_line_text}; + print "$input_line_number:$line_type:$input_line"; + } + +The complete program, B, is contained in the examples section of +the source distribution. As this example shows, the callback method +receives a parameter B<$line_of_tokens>, which is a reference to a hash +of other useful information. This example uses these hash entries: + + $line_of_tokens->{_line_number} - the line number (1,2,...) + $line_of_tokens->{_line_text} - the text of the line + $line_of_tokens->{_line_type} - the type of the line, one of: + + SYSTEM - system-specific code before hash-bang line + CODE - line of perl code (including comments) + POD_START - line starting pod, such as '=head' + POD - pod documentation text + POD_END - last line of pod section, '=cut' + HERE - text of here-document + HERE_END - last line of here-doc (target word) + FORMAT - format section + FORMAT_END - last line of format section, '.' + DATA_START - __DATA__ line + DATA - unidentified text following __DATA__ + END_START - __END__ line + END - unidentified text following __END__ + ERROR - we are in big trouble, probably not a perl script + +Most applications will be only interested in lines of type B. For +another example, let's write a program which checks for one of the +so-called I C<&`>, C<$&>, and C<$'>, which +can slow down processing. Here is a B, from the example +program B, which does that: + + sub write_line { + + # This is called back from perltidy line-by-line + # We're looking for $`, $&, and $' + my ( $self, $line_of_tokens ) = @_; + + # pull out some stuff we might need + my $line_type = $line_of_tokens->{_line_type}; + my $input_line_number = $line_of_tokens->{_line_number}; + my $input_line = $line_of_tokens->{_line_text}; + my $rtoken_type = $line_of_tokens->{_rtoken_type}; + my $rtokens = $line_of_tokens->{_rtokens}; + chomp $input_line; + + # skip comments, pod, etc + return if ( $line_type ne 'CODE' ); + + # loop over tokens looking for $`, $&, and $' + for ( my $j = 0 ; $j < @$rtoken_type ; $j++ ) { + + # we only want to examine token types 'i' (identifier) + next unless $$rtoken_type[$j] eq 'i'; + + # pull out the actual token text + my $token = $$rtokens[$j]; + + # and check it + if ( $token =~ /^\$[\`\&\']$/ ) { + print STDERR + "$input_line_number: $token\n"; + } + } + } + +This example pulls out these tokenization variables from the $line_of_tokens +hash reference: + + $rtoken_type = $line_of_tokens->{_rtoken_type}; + $rtokens = $line_of_tokens->{_rtokens}; + +The variable C<$rtoken_type> is a reference to an array of token type codes, +and C<$rtokens> is a reference to a corresponding array of token text. +These are obviously only defined for lines of type B. +Perltidy classifies tokens into types, and has a brief code for each type. +You can get a complete list at any time by running perltidy from the +command line with + + perltidy --dump-token-types + +In the present example, we are only looking for tokens of type B +(identifiers), so the for loop skips past all other types. When an +identifier is found, its actual text is checked to see if it is one +being sought. If so, the above write_line prints the token and its +line number. + +The B feature is relatively new in perltidy, and further +documentation needs to be written to complete its description. However, +several example programs have been written and can be found in the +B section of the source distribution. Probably the best way +to get started is to find one of the examples which most closely matches +your application and start modifying it. + +For help with perltidy's peculiar way of breaking lines into tokens, you +might run, from the command line, + + perltidy -D filename + +where F is a short script of interest. This will produce +F with interleaved lines of text and their token types. +The B<-D> flag has been in perltidy from the beginning for this purpose. +If you want to see the code which creates this file, it is +C in Tidy.pm. + +=head1 EXPORT + + &perltidy + +=head1 VERSION + +This man page documents Perl::Tidy version 20180220.01 + +=head1 LICENSE + +This package is free software; you can redistribute it and/or modify it +under the terms of the "GNU General Public License". + +Please refer to the file "COPYING" for details. + +=head1 BUG REPORTS + +A list of current bugs and issues can be found at the CPAN site + + https://rt.cpan.org/Public/Dist/Display.html?Name=Perl-Tidy + +To report a new bug or problem, use the link on this page . + +=head1 SEE ALSO + +The perltidy(1) man page describes all of the features of perltidy. It +can be found at http://perltidy.sourceforge.net. + +=cut diff --git a/lib/Perl/Tidy/Debugger.pm b/lib/Perl/Tidy/Debugger.pm new file mode 100644 index 00000000..68f52be6 --- /dev/null +++ b/lib/Perl/Tidy/Debugger.pm @@ -0,0 +1,124 @@ +##################################################################### +# +# The Perl::Tidy::Debugger class shows line tokenization +# +##################################################################### + +package Perl::Tidy::Debugger; +use strict; +use warnings; + +sub new { + + my ( $class, $filename ) = @_; + + return bless { + _debug_file => $filename, + _debug_file_opened => 0, + _fh => undef, + }, $class; +} + +sub really_open_debug_file { + + my $self = shift; + my $debug_file = $self->{_debug_file}; + my $fh; + unless ( $fh = IO::File->new("> $debug_file") ) { + Perl::Tidy::Warn("can't open $debug_file: $!\n"); + } + $self->{_debug_file_opened} = 1; + $self->{_fh} = $fh; + print $fh + "Use -dump-token-types (-dtt) to get a list of token type codes\n"; + return; +} + +sub close_debug_file { + + my $self = shift; + my $fh = $self->{_fh}; + if ( $self->{_debug_file_opened} ) { + if ( !eval { $self->{_fh}->close(); 1 } ) { + + # ok, maybe no close function + } + } + return; +} + +sub write_debug_entry { + + # This is a debug dump routine which may be modified as necessary + # to dump tokens on a line-by-line basis. The output will be written + # to the .DEBUG file when the -D flag is entered. + my ( $self, $line_of_tokens ) = @_; + + my $input_line = $line_of_tokens->{_line_text}; + + my $rtoken_type = $line_of_tokens->{_rtoken_type}; + my $rtokens = $line_of_tokens->{_rtokens}; + my $rlevels = $line_of_tokens->{_rlevels}; + my $rslevels = $line_of_tokens->{_rslevels}; + my $rblock_type = $line_of_tokens->{_rblock_type}; + + my $input_line_number = $line_of_tokens->{_line_number}; + my $line_type = $line_of_tokens->{_line_type}; + ##my $rtoken_array = $line_of_tokens->{_token_array}; + + my ( $j, $num ); + + my $token_str = "$input_line_number: "; + my $reconstructed_original = "$input_line_number: "; + my $block_str = "$input_line_number: "; + + #$token_str .= "$line_type: "; + #$reconstructed_original .= "$line_type: "; + + my $pattern = ""; + my @next_char = ( '"', '"' ); + my $i_next = 0; + unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() } + my $fh = $self->{_fh}; + + # FIXME: could convert to use of token_array instead + foreach my $j ( 0 .. @{$rtoken_type} - 1 ) { + + # testing patterns + if ( $rtoken_type->[$j] eq 'k' ) { + $pattern .= $rtokens->[$j]; + } + else { + $pattern .= $rtoken_type->[$j]; + } + $reconstructed_original .= $rtokens->[$j]; + $block_str .= "($rblock_type->[$j])"; + $num = length( $rtokens->[$j] ); + my $type_str = $rtoken_type->[$j]; + + # be sure there are no blank tokens (shouldn't happen) + # This can only happen if a programming error has been made + # because all valid tokens are non-blank + if ( $type_str eq ' ' ) { + print $fh "BLANK TOKEN on the next line\n"; + $type_str = $next_char[$i_next]; + $i_next = 1 - $i_next; + } + + if ( length($type_str) == 1 ) { + $type_str = $type_str x $num; + } + $token_str .= $type_str; + } + + # Write what you want here ... + # print $fh "$input_line\n"; + # print $fh "$pattern\n"; + print $fh "$reconstructed_original\n"; + print $fh "$token_str\n"; + + #print $fh "$block_str\n"; + return; +} +1; + diff --git a/lib/Perl/Tidy/DevNull.pm b/lib/Perl/Tidy/DevNull.pm new file mode 100644 index 00000000..35c1253d --- /dev/null +++ b/lib/Perl/Tidy/DevNull.pm @@ -0,0 +1,15 @@ +##################################################################### +# +# The Perl::Tidy::DevNull class supplies a dummy print method +# +##################################################################### + +package Perl::Tidy::DevNull; +use strict; +use warnings; +sub new { my $self = shift; return bless {}, $self } +sub print { return } +sub close { return } + +1; + diff --git a/lib/Perl/Tidy/Diagnostics.pm b/lib/Perl/Tidy/Diagnostics.pm new file mode 100644 index 00000000..89a5d6e6 --- /dev/null +++ b/lib/Perl/Tidy/Diagnostics.pm @@ -0,0 +1,63 @@ +##################################################################### +# +# The Perl::Tidy::Diagnostics class writes the DIAGNOSTICS file, which is +# useful for program development. +# +# Only one such file is created regardless of the number of input +# files processed. This allows the results of processing many files +# to be summarized in a single file. + +# Output messages go to a file named DIAGNOSTICS, where +# they are labeled by file and line. This allows many files to be +# scanned at once for some particular condition of interest. It was +# particularly useful for developing guessing strategies. +# +# NOTE: This feature is deactivated in final releases but can be +# reactivated for debugging by un-commenting the 'I' options flag +# +##################################################################### + +package Perl::Tidy::Diagnostics; +use strict; +use warnings; + +sub new { + + my $class = shift; + return bless { + _write_diagnostics_count => 0, + _last_diagnostic_file => "", + _input_file => "", + _fh => undef, + }, $class; +} + +sub set_input_file { + my ( $self, $input_file ) = @_; + $self->{_input_file} = $input_file; + return; +} + +sub write_diagnostics { + my ( $self, $msg ) = @_; + + unless ( $self->{_write_diagnostics_count} ) { + open( $self->{_fh}, ">", "DIAGNOSTICS" ) + or Perl::Tidy::Die("couldn't open DIAGNOSTICS: $!\n"); + } + + my $fh = $self->{_fh}; + my $last_diagnostic_file = $self->{_last_diagnostic_file}; + my $input_file = $self->{_input_file}; + if ( $last_diagnostic_file ne $input_file ) { + $fh->print("\nFILE:$input_file\n"); + } + $self->{_last_diagnostic_file} = $input_file; + my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number(); + $fh->print("$input_line_number:\t$msg"); + $self->{_write_diagnostics_count}++; + return; +} + +1; + diff --git a/lib/Perl/Tidy/FileWriter.pm b/lib/Perl/Tidy/FileWriter.pm new file mode 100644 index 00000000..ce2a3a3a --- /dev/null +++ b/lib/Perl/Tidy/FileWriter.pm @@ -0,0 +1,227 @@ +##################################################################### +# +# the Perl::Tidy::FileWriter class writes the output file +# +##################################################################### + +package Perl::Tidy::FileWriter; +use strict; +use warnings; + +# Maximum number of little messages; probably need not be changed. +my $MAX_NAG_MESSAGES = 6; + +sub write_logfile_entry { + my ( $self, $msg ) = @_; + my $logger_object = $self->{_logger_object}; + if ($logger_object) { + $logger_object->write_logfile_entry($msg); + } + return; +} + +sub new { + my ( $class, $line_sink_object, $rOpts, $logger_object ) = @_; + + return bless { + _line_sink_object => $line_sink_object, + _logger_object => $logger_object, + _rOpts => $rOpts, + _output_line_number => 1, + _consecutive_blank_lines => 0, + _consecutive_nonblank_lines => 0, + _first_line_length_error => 0, + _max_line_length_error => 0, + _last_line_length_error => 0, + _first_line_length_error_at => 0, + _max_line_length_error_at => 0, + _last_line_length_error_at => 0, + _line_length_error_count => 0, + _max_output_line_length => 0, + _max_output_line_length_at => 0, + }, $class; +} + +sub tee_on { + my $self = shift; + $self->{_line_sink_object}->tee_on(); + return; +} + +sub tee_off { + my $self = shift; + $self->{_line_sink_object}->tee_off(); + return; +} + +sub get_output_line_number { + my $self = shift; + return $self->{_output_line_number}; +} + +sub decrement_output_line_number { + my $self = shift; + $self->{_output_line_number}--; + return; +} + +sub get_consecutive_nonblank_lines { + my $self = shift; + return $self->{_consecutive_nonblank_lines}; +} + +sub reset_consecutive_blank_lines { + my $self = shift; + $self->{_consecutive_blank_lines} = 0; + return; +} + +sub want_blank_line { + my $self = shift; + unless ( $self->{_consecutive_blank_lines} ) { + $self->write_blank_code_line(); + } + return; +} + +sub require_blank_code_lines { + + # write out the requested number of blanks regardless of the value of -mbl + # unless -mbl=0. This allows extra blank lines to be written for subs and + # packages even with the default -mbl=1 + my ( $self, $count ) = @_; + my $need = $count - $self->{_consecutive_blank_lines}; + my $rOpts = $self->{_rOpts}; + my $forced = $rOpts->{'maximum-consecutive-blank-lines'} > 0; + foreach my $i ( 0 .. $need - 1 ) { + $self->write_blank_code_line($forced); + } + return; +} + +sub write_blank_code_line { + my $self = shift; + my $forced = shift; + my $rOpts = $self->{_rOpts}; + return + if (!$forced + && $self->{_consecutive_blank_lines} >= + $rOpts->{'maximum-consecutive-blank-lines'} ); + $self->{_consecutive_blank_lines}++; + $self->{_consecutive_nonblank_lines} = 0; + $self->write_line("\n"); + return; +} + +sub write_code_line { + my $self = shift; + my $a = shift; + + if ( $a =~ /^\s*$/ ) { + my $rOpts = $self->{_rOpts}; + return + if ( $self->{_consecutive_blank_lines} >= + $rOpts->{'maximum-consecutive-blank-lines'} ); + $self->{_consecutive_blank_lines}++; + $self->{_consecutive_nonblank_lines} = 0; + } + else { + $self->{_consecutive_blank_lines} = 0; + $self->{_consecutive_nonblank_lines}++; + } + $self->write_line($a); + return; +} + +sub write_line { + my ( $self, $a ) = @_; + + # TODO: go through and see if the test is necessary here + if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; } + + $self->{_line_sink_object}->write_line($a); + + # This calculation of excess line length ignores any internal tabs + my $rOpts = $self->{_rOpts}; + my $exceed = length($a) - $rOpts->{'maximum-line-length'} - 1; + if ( $a =~ /^\t+/g ) { + $exceed += pos($a) * ( $rOpts->{'indent-columns'} - 1 ); + } + + # Note that we just incremented output line number to future value + # so we must subtract 1 for current line number + if ( length($a) > 1 + $self->{_max_output_line_length} ) { + $self->{_max_output_line_length} = length($a) - 1; + $self->{_max_output_line_length_at} = $self->{_output_line_number} - 1; + } + + if ( $exceed > 0 ) { + my $output_line_number = $self->{_output_line_number}; + $self->{_last_line_length_error} = $exceed; + $self->{_last_line_length_error_at} = $output_line_number - 1; + if ( $self->{_line_length_error_count} == 0 ) { + $self->{_first_line_length_error} = $exceed; + $self->{_first_line_length_error_at} = $output_line_number - 1; + } + + if ( + $self->{_last_line_length_error} > $self->{_max_line_length_error} ) + { + $self->{_max_line_length_error} = $exceed; + $self->{_max_line_length_error_at} = $output_line_number - 1; + } + + if ( $self->{_line_length_error_count} < $MAX_NAG_MESSAGES ) { + $self->write_logfile_entry( + "Line length exceeded by $exceed characters\n"); + } + $self->{_line_length_error_count}++; + } + return; +} + +sub report_line_length_errors { + my $self = shift; + my $rOpts = $self->{_rOpts}; + my $line_length_error_count = $self->{_line_length_error_count}; + if ( $line_length_error_count == 0 ) { + $self->write_logfile_entry( + "No lines exceeded $rOpts->{'maximum-line-length'} characters\n"); + my $max_output_line_length = $self->{_max_output_line_length}; + my $max_output_line_length_at = $self->{_max_output_line_length_at}; + $self->write_logfile_entry( +" Maximum output line length was $max_output_line_length at line $max_output_line_length_at\n" + ); + + } + else { + + my $word = ( $line_length_error_count > 1 ) ? "s" : ""; + $self->write_logfile_entry( +"$line_length_error_count output line$word exceeded $rOpts->{'maximum-line-length'} characters:\n" + ); + + $word = ( $line_length_error_count > 1 ) ? "First" : ""; + my $first_line_length_error = $self->{_first_line_length_error}; + my $first_line_length_error_at = $self->{_first_line_length_error_at}; + $self->write_logfile_entry( +" $word at line $first_line_length_error_at by $first_line_length_error characters\n" + ); + + if ( $line_length_error_count > 1 ) { + my $max_line_length_error = $self->{_max_line_length_error}; + my $max_line_length_error_at = $self->{_max_line_length_error_at}; + my $last_line_length_error = $self->{_last_line_length_error}; + my $last_line_length_error_at = $self->{_last_line_length_error_at}; + $self->write_logfile_entry( +" Maximum at line $max_line_length_error_at by $max_line_length_error characters\n" + ); + $self->write_logfile_entry( +" Last at line $last_line_length_error_at by $last_line_length_error characters\n" + ); + } + } + return; +} +1; + diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm new file mode 100644 index 00000000..88ec4010 --- /dev/null +++ b/lib/Perl/Tidy/Formatter.pm @@ -0,0 +1,16252 @@ +##################################################################### +# +# The Perl::Tidy::Formatter package adds indentation, whitespace, and +# line breaks to the token stream +# +# WARNING: This is not a real class for speed reasons. Only one +# Formatter may be used. +# +##################################################################### + +package Perl::Tidy::Formatter; +use strict; +use warnings; +use Carp; + +# The Tokenizer will be loaded with the Formatter +##use Perl::Tidy::Tokenizer; # for is_keyword() + +sub Die { + my ($msg) = @_; + Perl::Tidy::Die($msg); + croak "unexpected return from Perl::Tidy::Die"; +} + +sub Warn { + my ($msg) = @_; + Perl::Tidy::Warn($msg); + return; +} + +sub Exit { + my ($msg) = @_; + Perl::Tidy::Exit($msg); + croak "unexpected return from Perl::Tidy::Exit"; +} + +BEGIN { + + # Caution: these debug flags produce a lot of output + # They should all be 0 except when debugging small scripts + use constant FORMATTER_DEBUG_FLAG_RECOMBINE => 0; + use constant FORMATTER_DEBUG_FLAG_BOND_TABLES => 0; + use constant FORMATTER_DEBUG_FLAG_BOND => 0; + use constant FORMATTER_DEBUG_FLAG_BREAK => 0; + use constant FORMATTER_DEBUG_FLAG_CI => 0; + use constant FORMATTER_DEBUG_FLAG_FLUSH => 0; + use constant FORMATTER_DEBUG_FLAG_FORCE => 0; + use constant FORMATTER_DEBUG_FLAG_LIST => 0; + use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0; + use constant FORMATTER_DEBUG_FLAG_OUTPUT => 0; + use constant FORMATTER_DEBUG_FLAG_SPARSE => 0; + use constant FORMATTER_DEBUG_FLAG_STORE => 0; + use constant FORMATTER_DEBUG_FLAG_UNDOBP => 0; + use constant FORMATTER_DEBUG_FLAG_WHITE => 0; + + my $debug_warning = sub { + print STDOUT "FORMATTER_DEBUGGING with key $_[0]\n"; + }; + + FORMATTER_DEBUG_FLAG_RECOMBINE && $debug_warning->('RECOMBINE'); + FORMATTER_DEBUG_FLAG_BOND_TABLES && $debug_warning->('BOND_TABLES'); + FORMATTER_DEBUG_FLAG_BOND && $debug_warning->('BOND'); + FORMATTER_DEBUG_FLAG_BREAK && $debug_warning->('BREAK'); + FORMATTER_DEBUG_FLAG_CI && $debug_warning->('CI'); + FORMATTER_DEBUG_FLAG_FLUSH && $debug_warning->('FLUSH'); + FORMATTER_DEBUG_FLAG_FORCE && $debug_warning->('FORCE'); + FORMATTER_DEBUG_FLAG_LIST && $debug_warning->('LIST'); + FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK'); + FORMATTER_DEBUG_FLAG_OUTPUT && $debug_warning->('OUTPUT'); + FORMATTER_DEBUG_FLAG_SPARSE && $debug_warning->('SPARSE'); + FORMATTER_DEBUG_FLAG_STORE && $debug_warning->('STORE'); + FORMATTER_DEBUG_FLAG_UNDOBP && $debug_warning->('UNDOBP'); + FORMATTER_DEBUG_FLAG_WHITE && $debug_warning->('WHITE'); +} + +use vars qw{ + + @gnu_stack + $max_gnu_stack_index + $gnu_position_predictor + $line_start_index_to_go + $last_indentation_written + $last_unadjusted_indentation + $last_leading_token + $last_output_short_opening_token + $peak_batch_size + + $saw_VERSION_in_this_file + $saw_END_or_DATA_ + + @gnu_item_list + $max_gnu_item_index + $gnu_sequence_number + $last_output_indentation + %last_gnu_equals + %gnu_comma_count + %gnu_arrow_count + + @block_type_to_go + @type_sequence_to_go + @container_environment_to_go + @bond_strength_to_go + @forced_breakpoint_to_go + @token_lengths_to_go + @summed_lengths_to_go + @levels_to_go + @leading_spaces_to_go + @reduced_spaces_to_go + @matching_token_to_go + @mate_index_to_go + @ci_levels_to_go + @nesting_depth_to_go + @nobreak_to_go + @old_breakpoint_to_go + @tokens_to_go + @K_to_go + @types_to_go + @inext_to_go + @iprev_to_go + + %saved_opening_indentation + + $max_index_to_go + $comma_count_in_batch + $last_nonblank_index_to_go + $last_nonblank_type_to_go + $last_nonblank_token_to_go + $last_last_nonblank_index_to_go + $last_last_nonblank_type_to_go + $last_last_nonblank_token_to_go + @nonblank_lines_at_depth + $starting_in_quote + $ending_in_quote + @whitespace_level_stack + $whitespace_last_level + + $format_skipping_pattern_begin + $format_skipping_pattern_end + + $forced_breakpoint_count + $forced_breakpoint_undo_count + @forced_breakpoint_undo_stack + %postponed_breakpoint + + $tabbing + $embedded_tab_count + $first_embedded_tab_at + $last_embedded_tab_at + $deleted_semicolon_count + $first_deleted_semicolon_at + $last_deleted_semicolon_at + $added_semicolon_count + $first_added_semicolon_at + $last_added_semicolon_at + $first_tabbing_disagreement + $last_tabbing_disagreement + $in_tabbing_disagreement + $tabbing_disagreement_count + $input_line_tabbing + + $last_line_leading_type + $last_line_leading_level + $last_last_line_leading_level + + %block_leading_text + %block_opening_line_number + $csc_new_statement_ok + $csc_last_label + %csc_block_label + $accumulating_text_for_block + $leading_block_text + $rleading_block_if_elsif_text + $leading_block_text_level + $leading_block_text_length_exceeded + $leading_block_text_line_length + $leading_block_text_line_number + $closing_side_comment_prefix_pattern + $closing_side_comment_list_pattern + + $blank_lines_after_opening_block_pattern + $blank_lines_before_closing_block_pattern + + $last_nonblank_token + $last_nonblank_type + $last_last_nonblank_token + $last_last_nonblank_type + $last_nonblank_block_type + $last_output_level + %is_do_follower + %is_if_brace_follower + %space_after_keyword + $rbrace_follower + $looking_for_else + %is_last_next_redo_return + %is_other_brace_follower + %is_else_brace_follower + %is_anon_sub_brace_follower + %is_anon_sub_1_brace_follower + %is_sort_map_grep + %is_sort_map_grep_eval + %is_sort_map_grep_eval_do + %is_block_without_semicolon + %is_if_unless + %is_and_or + %is_assignment + %is_chain_operator + %is_if_unless_and_or_last_next_redo_return + %ok_to_add_semicolon_for_block_type + + @has_broken_sublist + @dont_align + @want_comma_break + + $is_static_block_comment + $index_start_one_line_block + $semicolons_before_block_self_destruct + $index_max_forced_break + $input_line_number + $diagnostics_object + $vertical_aligner_object + $logger_object + $file_writer_object + $formatter_self + @ci_stack + %want_break_before + %outdent_keyword + $static_block_comment_pattern + $static_side_comment_pattern + %opening_vertical_tightness + %closing_vertical_tightness + %closing_token_indentation + $some_closing_token_indentation + + %opening_token_right + %stack_opening_token + %stack_closing_token + + $block_brace_vertical_tightness_pattern + + $rOpts_add_newlines + $rOpts_add_whitespace + $rOpts_block_brace_tightness + $rOpts_block_brace_vertical_tightness + $rOpts_brace_left_and_indent + $rOpts_comma_arrow_breakpoints + $rOpts_break_at_old_keyword_breakpoints + $rOpts_break_at_old_comma_breakpoints + $rOpts_break_at_old_logical_breakpoints + $rOpts_break_at_old_ternary_breakpoints + $rOpts_break_at_old_attribute_breakpoints + $rOpts_closing_side_comment_else_flag + $rOpts_closing_side_comment_maximum_text + $rOpts_continuation_indentation + $rOpts_delete_old_whitespace + $rOpts_fuzzy_line_length + $rOpts_indent_columns + $rOpts_line_up_parentheses + $rOpts_maximum_fields_per_table + $rOpts_maximum_line_length + $rOpts_variable_maximum_line_length + $rOpts_short_concatenation_item_length + $rOpts_keep_old_blank_lines + $rOpts_ignore_old_breakpoints + $rOpts_format_skipping + $rOpts_space_function_paren + $rOpts_space_keyword_paren + $rOpts_keep_interior_semicolons + $rOpts_ignore_side_comment_lengths + $rOpts_stack_closing_block_brace + $rOpts_space_backslash_quote + $rOpts_whitespace_cycle + + %is_opening_type + %is_closing_type + %is_keyword_returning_list + %tightness + %matching_token + $rOpts + %right_bond_strength + %left_bond_strength + %binary_ws_rules + %want_left_space + %want_right_space + %is_digraph + %is_trigraph + $bli_pattern + $bli_list_string + %is_closing_type + %is_opening_type + %is_closing_token + %is_opening_token + + %weld_len_left_closing + %weld_len_right_closing + %weld_len_left_opening + %weld_len_right_opening + + $rcuddled_block_types + + $SUB_PATTERN + $ASUB_PATTERN + + $NVARS + +}; + +BEGIN { + + # Array index names for token variables + my $i = 0; + use constant { + _BLOCK_TYPE_ => $i++, + _CI_LEVEL_ => $i++, + _CONTAINER_ENVIRONMENT_ => $i++, + _CONTAINER_TYPE_ => $i++, + _CUMULATIVE_LENGTH_ => $i++, + _LINE_INDEX_ => $i++, + _KNEXT_SEQ_ITEM_ => $i++, + _LEVEL_ => $i++, + _LEVEL_TRUE_ => $i++, + _SLEVEL_ => $i++, + _TOKEN_ => $i++, + _TYPE_ => $i++, + _TYPE_SEQUENCE_ => $i++, + }; + $NVARS = 1 + _TYPE_SEQUENCE_; + + # default list of block types for which -bli would apply + $bli_list_string = 'if else elsif unless while for foreach do : sub'; + + my @q; + + @q = qw( + .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <> + <= >= == =~ !~ != ++ -- /= x= + ); + @is_digraph{@q} = (1) x scalar(@q); + + @q = qw( ... **= <<= >>= &&= ||= //= <=> <<~ ); + @is_trigraph{@q} = (1) x scalar(@q); + + @q = qw( + = **= += *= &= <<= &&= + -= /= |= >>= ||= //= + .= %= ^= + x= + ); + @is_assignment{@q} = (1) x scalar(@q); + + @q = qw( + grep + keys + map + reverse + sort + split + ); + @is_keyword_returning_list{@q} = (1) x scalar(@q); + + @q = qw(is if unless and or err last next redo return); + @is_if_unless_and_or_last_next_redo_return{@q} = (1) x scalar(@q); + + @q = qw(last next redo return); + @is_last_next_redo_return{@q} = (1) x scalar(@q); + + @q = qw(sort map grep); + @is_sort_map_grep{@q} = (1) x scalar(@q); + + @q = qw(sort map grep eval); + @is_sort_map_grep_eval{@q} = (1) x scalar(@q); + + @q = qw(sort map grep eval do); + @is_sort_map_grep_eval_do{@q} = (1) x scalar(@q); + + @q = qw(if unless); + @is_if_unless{@q} = (1) x scalar(@q); + + @q = qw(and or err); + @is_and_or{@q} = (1) x scalar(@q); + + # Identify certain operators which often occur in chains. + # Note: the minus (-) causes a side effect of padding of the first line in + # something like this (by sub set_logical_padding): + # Checkbutton => 'Transmission checked', + # -variable => \$TRANS + # This usually improves appearance so it seems ok. + @q = qw(&& || and or : ? . + - * /); + @is_chain_operator{@q} = (1) x scalar(@q); + + # We can remove semicolons after blocks preceded by these keywords + @q = + qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else + unless while until for foreach given when default); + @is_block_without_semicolon{@q} = (1) x scalar(@q); + + # We will allow semicolons to be added within these block types + # as well as sub and package blocks. + # NOTES: + # 1. Note that these keywords are omitted: + # switch case given when default sort map grep + # 2. It is also ok to add for sub and package blocks and a labeled block + # 3. But not okay for other perltidy types including: + # { } ; G t + # 4. Test files: blktype.t, blktype1.t, semicolon.t + @q = + qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else + unless do while until eval for foreach ); + @ok_to_add_semicolon_for_block_type{@q} = (1) x scalar(@q); + + # 'L' is token for opening { at hash key + @q = qw< L { ( [ >; + @is_opening_type{@q} = (1) x scalar(@q); + + # 'R' is token for closing } at hash key + @q = qw< R } ) ] >; + @is_closing_type{@q} = (1) x scalar(@q); + + @q = qw< { ( [ >; + @is_opening_token{@q} = (1) x scalar(@q); + + @q = qw< } ) ] >; + @is_closing_token{@q} = (1) x scalar(@q); + + # Patterns for standardizing matches to block types for regular subs and + # anonymous subs. Examples + # 'sub process' is a named sub + # 'sub ::m' is a named sub + # 'sub' is an anonymous sub + # 'sub:' is a label, not a sub + # 'substr' is a keyword + $SUB_PATTERN = '^sub\s+(::|\w)'; + $ASUB_PATTERN = '^sub$'; +} + +# whitespace codes +use constant WS_YES => 1; +use constant WS_OPTIONAL => 0; +use constant WS_NO => -1; + +# Token bond strengths. +use constant NO_BREAK => 10000; +use constant VERY_STRONG => 100; +use constant STRONG => 2.1; +use constant NOMINAL => 1.1; +use constant WEAK => 0.8; +use constant VERY_WEAK => 0.55; + +# values for testing indexes in output array +use constant UNDEFINED_INDEX => -1; + +# Maximum number of little messages; probably need not be changed. +use constant MAX_NAG_MESSAGES => 6; + +# increment between sequence numbers for each type +# For example, ?: pairs might have numbers 7,11,15,... +use constant TYPE_SEQUENCE_INCREMENT => 4; + +{ + + # methods to count instances + my $_count = 0; + sub get_count { return $_count; } + sub _increment_count { return ++$_count } + sub _decrement_count { return --$_count } +} + +sub trim { + + # trim leading and trailing whitespace from a string + my $str = shift; + $str =~ s/\s+$//; + $str =~ s/^\s+//; + return $str; +} + +sub max { + my @vals = @_; + my $max = shift @vals; + foreach my $val (@vals) { + $max = ( $max < $val ) ? $val : $max; + } + return $max; +} + +sub min { + my @vals = @_; + my $min = shift @vals; + foreach my $val (@vals) { + $min = ( $min > $val ) ? $val : $min; + } + return $min; +} + +sub split_words { + + # given a string containing words separated by whitespace, + # return the list of words + my ($str) = @_; + return unless $str; + $str =~ s/\s+$//; + $str =~ s/^\s+//; + return split( /\s+/, $str ); +} + +sub check_keys { + my ( $rtest, $rvalid, $msg, $exact_match ) = @_; + + # Check the keys of a hash: + # $rtest = ref to hash to test + # $rvalid = ref to hash with valid keys + + # $msg = a message to write in case of error + # $exact_match defines the type of check: + # = false: test hash must not have unknown key + # = true: test hash must have exactly same keys as known hash + my @unknown_keys = + grep { !exists $rvalid->{$_} } keys %{$rtest}; + my @missing_keys = + grep { !exists $rtest->{$_} } keys %{$rvalid}; + my $error = @unknown_keys; + if ($exact_match) { $error ||= @missing_keys } + if ($error) { + local $" = ')('; + my @expected_keys = sort keys %{$rvalid}; + @unknown_keys = sort @unknown_keys; + Die(<warning($msg); } + return; +} + +sub complain { + my ($msg) = @_; + if ($logger_object) { + $logger_object->complain($msg); + } + return; +} + +sub write_logfile_entry { + my @msg = @_; + if ($logger_object) { + $logger_object->write_logfile_entry(@msg); + } + return; +} + +sub black_box { + my @msg = @_; + if ($logger_object) { $logger_object->black_box(@msg); } + return; +} + +sub report_definite_bug { + if ($logger_object) { + $logger_object->report_definite_bug(); + } + return; +} + +sub get_saw_brace_error { + if ($logger_object) { + return $logger_object->get_saw_brace_error(); + } + return; +} + +sub we_are_at_the_last_line { + if ($logger_object) { + $logger_object->we_are_at_the_last_line(); + } + return; +} + +# interface to Perl::Tidy::Diagnostics routine +sub write_diagnostics { + my $msg = shift; + if ($diagnostics_object) { $diagnostics_object->write_diagnostics($msg); } + return; +} + +sub get_added_semicolon_count { + my $self = shift; + return $added_semicolon_count; +} + +sub DESTROY { + my $self = shift; + $self->_decrement_count(); + return; +} + +sub get_output_line_number { + return $vertical_aligner_object->get_output_line_number(); +} + +sub new { + + my ( $class, @args ) = @_; + + # we are given an object with a write_line() method to take lines + my %defaults = ( + sink_object => undef, + diagnostics_object => undef, + logger_object => undef, + ); + my %args = ( %defaults, @args ); + + $logger_object = $args{logger_object}; + $diagnostics_object = $args{diagnostics_object}; + + # we create another object with a get_line() and peek_ahead() method + my $sink_object = $args{sink_object}; + $file_writer_object = + Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object ); + + # initialize the leading whitespace stack to negative levels + # so that we can never run off the end of the stack + $peak_batch_size = 0; # flag to determine if we have output code + $gnu_position_predictor = 0; # where the current token is predicted to be + $max_gnu_stack_index = 0; + $max_gnu_item_index = -1; + $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 ); + @gnu_item_list = (); + $last_output_indentation = 0; + $last_indentation_written = 0; + $last_unadjusted_indentation = 0; + $last_leading_token = ""; + $last_output_short_opening_token = 0; + + $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'}; + $saw_END_or_DATA_ = 0; + + @block_type_to_go = (); + @type_sequence_to_go = (); + @container_environment_to_go = (); + @bond_strength_to_go = (); + @forced_breakpoint_to_go = (); + @summed_lengths_to_go = (); # line length to start of ith token + @token_lengths_to_go = (); + @levels_to_go = (); + @matching_token_to_go = (); + @mate_index_to_go = (); + @ci_levels_to_go = (); + @nesting_depth_to_go = (0); + @nobreak_to_go = (); + @old_breakpoint_to_go = (); + @tokens_to_go = (); + @K_to_go = (); + @types_to_go = (); + @leading_spaces_to_go = (); + @reduced_spaces_to_go = (); + @inext_to_go = (); + @iprev_to_go = (); + + @whitespace_level_stack = (); + $whitespace_last_level = -1; + + @dont_align = (); + @has_broken_sublist = (); + @want_comma_break = (); + + @ci_stack = (""); + $first_tabbing_disagreement = 0; + $last_tabbing_disagreement = 0; + $tabbing_disagreement_count = 0; + $in_tabbing_disagreement = 0; + $input_line_tabbing = undef; + + $last_last_line_leading_level = 0; + $last_line_leading_level = 0; + $last_line_leading_type = '#'; + + $last_nonblank_token = ';'; + $last_nonblank_type = ';'; + $last_last_nonblank_token = ';'; + $last_last_nonblank_type = ';'; + $last_nonblank_block_type = ""; + $last_output_level = 0; + $looking_for_else = 0; + $embedded_tab_count = 0; + $first_embedded_tab_at = 0; + $last_embedded_tab_at = 0; + $deleted_semicolon_count = 0; + $first_deleted_semicolon_at = 0; + $last_deleted_semicolon_at = 0; + $added_semicolon_count = 0; + $first_added_semicolon_at = 0; + $last_added_semicolon_at = 0; + $is_static_block_comment = 0; + %postponed_breakpoint = (); + + # variables for adding side comments + %block_leading_text = (); + %block_opening_line_number = (); + $csc_new_statement_ok = 1; + %csc_block_label = (); + + %saved_opening_indentation = (); + + reset_block_text_accumulator(); + + prepare_for_new_input_lines(); + + $vertical_aligner_object = + Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object, + $logger_object, $diagnostics_object ); + + if ( $rOpts->{'entab-leading-whitespace'} ) { + write_logfile_entry( +"Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n" + ); + } + elsif ( $rOpts->{'tabs'} ) { + write_logfile_entry("Indentation will be with a tab character\n"); + } + else { + write_logfile_entry( + "Indentation will be with $rOpts->{'indent-columns'} spaces\n"); + } + + # This hash holds the main data structures for formatting + # All hash keys must be defined here. + $formatter_self = { + rlines => [], # = ref to array of lines of the file + rlines_new => [], # = ref to array of output lines + # (FOR FUTURE DEVELOPMENT) + rLL => [], # = ref to array with all tokens + # in the file. LL originally meant + # 'Linked List'. Linked lists were a + # bad idea but LL is easy to type. + Klimit => undef, # = maximum K index for rLL. This is + # needed to catch any autovivification + # problems. + rnested_pairs => [], # for welding decisions + K_opening_container => {}, # for quickly traversing structure + K_closing_container => {}, # for quickly traversing structure + K_opening_ternary => {}, # for quickly traversing structure + K_closing_ternary => {}, # for quickly traversing structure + rK_phantom_semicolons => + undef, # for undoing phantom semicolons if iterating + rpaired_to_inner_container => {}, + rbreak_container => {}, # prevent one-line blocks + rvalid_self_keys => [], # for checking + valign_batch_count => 0, + }; + my @valid_keys = keys %{$formatter_self}; + $formatter_self->{rvalid_self_keys} = \@valid_keys; + + bless $formatter_self, $class; + + # Safety check..this is not a class yet + if ( _increment_count() > 1 ) { + confess +"Attempt to create more than 1 object in $class, which is not a true class yet\n"; + } + return $formatter_self; +} + +# Future routines for storing new lines +sub push_line { + my ( $self, $rline ) = @_; + + # my $rline = $rlines->[$index_old]; + # push @{$rlines_new}, $rline; + return; +} + +sub push_old_line { + my ( $self, $index_old ) = @_; + + # TODO: This will copy line with index $index_old to the new line array + # my $rlines = $self->{rlines}; + # my $rline = $rlines->[$index_old]; + # $self->push_line($rline); + return; +} + +sub push_blank_line { + my ($self) = @_; + + # my $rline = ... + # $self->push_line($rline); + return; +} + +sub push_CODE_line { + my ( $self, $Kmin, $Kmax ) = @_; + + # TODO: This will store the values for one new line of CODE + # CHECK TOKEN RANGE HERE + # $self->push_line($rline); + return; +} + +sub increment_valign_batch_count { + my ($self) = shift; + return ++$self->{valign_batch_count}; +} + +sub get_valign_batch_count { + my ($self) = shift; + return $self->{valign_batch_count}; +} + +sub Fault { + my ($msg) = @_; + + # "I've just picked up a fault in the AE35 unit" - 2001: A Space Odyssey ... + + # This routine is called for errors that really should not occur + # except if there has been a bug introduced by a recent program change + my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0); + my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1); + my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2); + + Die(<{rvalid_self_keys} }; + my %valid_self_hash; + @valid_self_hash{@valid_self_keys} = (1) x scalar(@valid_self_keys); + check_keys( $self, \%valid_self_hash, "Checkpoint: self error", 1 ); + return; +} + +sub check_token_array { + my $self = shift; + + # Check for errors in the array of tokens + # Uses package variable $NVARS + $self->check_self_hash(); + my $rLL = $self->{rLL}; + for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) { + my $nvars = @{ $rLL->[$KK] }; + if ( $nvars != $NVARS ) { + my $type = $rLL->[$KK]->[_TYPE_]; + $type = '*' unless defined($type); + Fault( +"number of vars for node $KK, type '$type', is $nvars but should be $NVARS" + ); + } + foreach my $var ( _TOKEN_, _TYPE_ ) { + if ( !defined( $rLL->[$KK]->[$var] ) ) { + my $iline = $rLL->[$KK]->[_LINE_INDEX_]; + Fault("Undefined variable $var for K=$KK, line=$iline\n"); + } + } + } + return; +} + +sub set_rLL_max_index { + my $self = shift; + + # Set the limit of the rLL array, assuming that it is correct. + # This should only be called by routines after they make changes + # to tokenization + my $rLL = $self->{rLL}; + if ( !defined($rLL) ) { + + # Shouldn't happen because rLL was initialized to be an array ref + Fault("Undefined Memory rLL"); + } + my $Klimit_old = $self->{Klimit}; + my $num = @{$rLL}; + my $Klimit; + if ( $num > 0 ) { $Klimit = $num - 1 } + $self->{Klimit} = $Klimit; + return ($Klimit); +} + +sub get_rLL_max_index { + my $self = shift; + + # the memory location $rLL and number of tokens should be obtained + # from this routine so that any autovivication can be immediately caught. + my $rLL = $self->{rLL}; + my $Klimit = $self->{Klimit}; + if ( !defined($rLL) ) { + + # Shouldn't happen because rLL was initialized to be an array ref + Fault("Undefined Memory rLL"); + } + my $num = @{$rLL}; + if ( $num == 0 && defined($Klimit) + || $num > 0 && !defined($Klimit) + || $num > 0 && $Klimit != $num - 1 ) + { + + # Possible autovivification problem... + if ( !defined($Klimit) ) { $Klimit = '*' } + Fault("Error getting rLL: Memory items=$num and Klimit=$Klimit"); + } + return ($Klimit); +} + +sub prepare_for_new_input_lines { + + # Remember the largest batch size processed. This is needed + # by the pad routine to avoid padding the first nonblank token + if ( $max_index_to_go && $max_index_to_go > $peak_batch_size ) { + $peak_batch_size = $max_index_to_go; + } + + $gnu_sequence_number++; # increment output batch counter + %last_gnu_equals = (); + %gnu_comma_count = (); + %gnu_arrow_count = (); + $line_start_index_to_go = 0; + $max_gnu_item_index = UNDEFINED_INDEX; + $index_max_forced_break = UNDEFINED_INDEX; + $max_index_to_go = UNDEFINED_INDEX; + $last_nonblank_index_to_go = UNDEFINED_INDEX; + $last_nonblank_type_to_go = ''; + $last_nonblank_token_to_go = ''; + $last_last_nonblank_index_to_go = UNDEFINED_INDEX; + $last_last_nonblank_type_to_go = ''; + $last_last_nonblank_token_to_go = ''; + $forced_breakpoint_count = 0; + $forced_breakpoint_undo_count = 0; + $rbrace_follower = undef; + $summed_lengths_to_go[0] = 0; + $comma_count_in_batch = 0; + $starting_in_quote = 0; + + destroy_one_line_block(); + return; +} + +sub break_lines { + + # Loop over old lines to set new line break points + + my $self = shift; + my $rlines = $self->{rlines}; + + # Flag to prevent blank lines when POD occurs in a format skipping sect. + my $in_format_skipping_section; + + my $line_type = ""; + foreach my $line_of_tokens ( @{$rlines} ) { + + my $last_line_type = $line_type; + $line_type = $line_of_tokens->{_line_type}; + my $input_line = $line_of_tokens->{_line_text}; + + # _line_type codes are: + # SYSTEM - system-specific code before hash-bang line + # CODE - line of perl code (including comments) + # POD_START - line starting pod, such as '=head' + # POD - pod documentation text + # POD_END - last line of pod section, '=cut' + # HERE - text of here-document + # HERE_END - last line of here-doc (target word) + # FORMAT - format section + # FORMAT_END - last line of format section, '.' + # DATA_START - __DATA__ line + # DATA - unidentified text following __DATA__ + # END_START - __END__ line + # END - unidentified text following __END__ + # ERROR - we are in big trouble, probably not a perl script + + # put a blank line after an =cut which comes before __END__ and __DATA__ + # (required by podchecker) + if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) { + $file_writer_object->reset_consecutive_blank_lines(); + if ( !$in_format_skipping_section && $input_line !~ /^\s*$/ ) { + $self->want_blank_line(); + } + } + + # handle line of code.. + if ( $line_type eq 'CODE' ) { + + my $CODE_type = $line_of_tokens->{_code_type}; + $in_format_skipping_section = $CODE_type eq 'FS'; + + # Handle blank lines + if ( $CODE_type eq 'BL' ) { + + # If keep-old-blank-lines is zero, we delete all + # old blank lines and let the blank line rules generate any + # needed blanks. + if ($rOpts_keep_old_blank_lines) { + $self->flush(); + $file_writer_object->write_blank_code_line( + $rOpts_keep_old_blank_lines == 2 ); + $last_line_leading_type = 'b'; + } + next; + } + else { + + # let logger see all non-blank lines of code + my $output_line_number = get_output_line_number(); + ##$vertical_aligner_object->get_output_line_number(); + black_box( $line_of_tokens, $output_line_number ); + } + + # Handle Format Skipping (FS) and Verbatim (VB) Lines + if ( $CODE_type eq 'VB' || $CODE_type eq 'FS' ) { + $self->write_unindented_line("$input_line"); + $file_writer_object->reset_consecutive_blank_lines(); + next; + } + + # Handle all other lines of code + $self->print_line_of_tokens($line_of_tokens); + } + + # handle line of non-code.. + else { + + # set special flags + my $skip_line = 0; + my $tee_line = 0; + if ( $line_type =~ /^POD/ ) { + + # Pod docs should have a preceding blank line. But stay + # out of __END__ and __DATA__ sections, because + # the user may be using this section for any purpose whatsoever + if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; } + if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; } + if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// } + if ( !$skip_line + && !$in_format_skipping_section + && $line_type eq 'POD_START' + && !$saw_END_or_DATA_ ) + { + $self->want_blank_line(); + } + } + + # leave the blank counters in a predictable state + # after __END__ or __DATA__ + elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) { + $file_writer_object->reset_consecutive_blank_lines(); + $saw_END_or_DATA_ = 1; + } + + # write unindented non-code line + if ( !$skip_line ) { + if ($tee_line) { $file_writer_object->tee_on() } + $self->write_unindented_line($input_line); + if ($tee_line) { $file_writer_object->tee_off() } + } + } + } + return; +} + +{ ## Beginning of routine to check line hashes + + my %valid_line_hash; + + BEGIN { + + # These keys are defined for each line in the formatter + # Each line must have exactly these quantities + my @valid_line_keys = qw( + _curly_brace_depth + _ending_in_quote + _guessed_indentation_level + _line_number + _line_text + _line_type + _paren_depth + _quote_character + _rK_range + _square_bracket_depth + _starting_in_quote + _ended_in_blank_token + _code_type + + _ci_level_0 + _level_0 + _nesting_blocks_0 + _nesting_tokens_0 + ); + + @valid_line_hash{@valid_line_keys} = (1) x scalar(@valid_line_keys); + } + + sub check_line_hashes { + my $self = shift; + $self->check_self_hash(); + my $rlines = $self->{rlines}; + foreach my $rline ( @{$rlines} ) { + my $iline = $rline->{_line_number}; + my $line_type = $rline->{_line_type}; + check_keys( $rline, \%valid_line_hash, + "Checkpoint: line number =$iline, line_type=$line_type", 1 ); + } + return; + } + +} ## End check line hashes + +sub write_line { + + # We are caching tokenized lines as they arrive and converting them to the + # format needed for the final formatting. + my ( $self, $line_of_tokens_old ) = @_; + my $rLL = $self->{rLL}; + my $Klimit = $self->{Klimit}; + my $rlines_new = $self->{rlines}; + + my $Kfirst; + my $line_of_tokens = {}; + foreach my $key ( + qw( + _curly_brace_depth + _ending_in_quote + _guessed_indentation_level + _line_number + _line_text + _line_type + _paren_depth + _quote_character + _square_bracket_depth + _starting_in_quote + ) + ) + { + $line_of_tokens->{$key} = $line_of_tokens_old->{$key}; + } + + # Data needed by Logger + $line_of_tokens->{_level_0} = 0; + $line_of_tokens->{_ci_level_0} = 0; + $line_of_tokens->{_nesting_blocks_0} = ""; + $line_of_tokens->{_nesting_tokens_0} = ""; + + # Needed to avoid trimming quotes + $line_of_tokens->{_ended_in_blank_token} = undef; + + my $line_type = $line_of_tokens_old->{_line_type}; + my $input_line_no = $line_of_tokens_old->{_line_number} - 1; + if ( $line_type eq 'CODE' ) { + + my $rtokens = $line_of_tokens_old->{_rtokens}; + my $rtoken_type = $line_of_tokens_old->{_rtoken_type}; + my $rblock_type = $line_of_tokens_old->{_rblock_type}; + my $rcontainer_type = $line_of_tokens_old->{_rcontainer_type}; + my $rcontainer_environment = + $line_of_tokens_old->{_rcontainer_environment}; + my $rtype_sequence = $line_of_tokens_old->{_rtype_sequence}; + my $rlevels = $line_of_tokens_old->{_rlevels}; + my $rslevels = $line_of_tokens_old->{_rslevels}; + my $rci_levels = $line_of_tokens_old->{_rci_levels}; + my $rnesting_blocks = $line_of_tokens_old->{_rnesting_blocks}; + my $rnesting_tokens = $line_of_tokens_old->{_rnesting_tokens}; + + my $jmax = @{$rtokens} - 1; + if ( $jmax >= 0 ) { + $Kfirst = defined($Klimit) ? $Klimit + 1 : 0; + foreach my $j ( 0 .. $jmax ) { + my @tokary; + @tokary[ + _TOKEN_, _TYPE_, + _BLOCK_TYPE_, _CONTAINER_TYPE_, + _CONTAINER_ENVIRONMENT_, _TYPE_SEQUENCE_, + _LEVEL_, _LEVEL_TRUE_, + _SLEVEL_, _CI_LEVEL_, + _LINE_INDEX_, + ] + = ( + $rtokens->[$j], $rtoken_type->[$j], + $rblock_type->[$j], $rcontainer_type->[$j], + $rcontainer_environment->[$j], $rtype_sequence->[$j], + $rlevels->[$j], $rlevels->[$j], + $rslevels->[$j], $rci_levels->[$j], + $input_line_no, + ); + push @{$rLL}, \@tokary; + } + + $Klimit = @{$rLL} - 1; + + # Need to remember if we can trim the input line + $line_of_tokens->{_ended_in_blank_token} = + $rtoken_type->[$jmax] eq 'b'; + + $line_of_tokens->{_level_0} = $rlevels->[0]; + $line_of_tokens->{_ci_level_0} = $rci_levels->[0]; + $line_of_tokens->{_nesting_blocks_0} = $rnesting_blocks->[0]; + $line_of_tokens->{_nesting_tokens_0} = $rnesting_tokens->[0]; + } + } + + $line_of_tokens->{_rK_range} = [ $Kfirst, $Klimit ]; + $line_of_tokens->{_code_type} = ""; + $self->{Klimit} = $Klimit; + + push @{$rlines_new}, $line_of_tokens; + return; +} + +sub initialize_whitespace_hashes { + + # initialize these global hashes, which control the use of + # whitespace around tokens: + # + # %binary_ws_rules + # %want_left_space + # %want_right_space + # %space_after_keyword + # + # Many token types are identical to the tokens themselves. + # See the tokenizer for a complete list. Here are some special types: + # k = perl keyword + # f = semicolon in for statement + # m = unary minus + # p = unary plus + # Note that :: is excluded since it should be contained in an identifier + # Note that '->' is excluded because it never gets space + # parentheses and brackets are excluded since they are handled specially + # curly braces are included but may be overridden by logic, such as + # newline logic. + + # NEW_TOKENS: create a whitespace rule here. This can be as + # simple as adding your new letter to @spaces_both_sides, for + # example. + + my @opening_type = qw< L { ( [ >; + @is_opening_type{@opening_type} = (1) x scalar(@opening_type); + + my @closing_type = qw< R } ) ] >; + @is_closing_type{@closing_type} = (1) x scalar(@closing_type); + + my @spaces_both_sides = qw# + + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -= + .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~ + &&= ||= //= <=> A k f w F n C Y U G v + #; + + my @spaces_left_side = qw< + t ! ~ m p { \ h pp mm Z j + >; + push( @spaces_left_side, '#' ); # avoids warning message + + my @spaces_right_side = qw< + ; } ) ] R J ++ -- **= + >; + push( @spaces_right_side, ',' ); # avoids warning message + + # Note that we are in a BEGIN block here. Later in processing + # the values of %want_left_space and %want_right_space + # may be overridden by any user settings specified by the + # -wls and -wrs parameters. However the binary_whitespace_rules + # are hardwired and have priority. + @want_left_space{@spaces_both_sides} = + (1) x scalar(@spaces_both_sides); + @want_right_space{@spaces_both_sides} = + (1) x scalar(@spaces_both_sides); + @want_left_space{@spaces_left_side} = + (1) x scalar(@spaces_left_side); + @want_right_space{@spaces_left_side} = + (-1) x scalar(@spaces_left_side); + @want_left_space{@spaces_right_side} = + (-1) x scalar(@spaces_right_side); + @want_right_space{@spaces_right_side} = + (1) x scalar(@spaces_right_side); + $want_left_space{'->'} = WS_NO; + $want_right_space{'->'} = WS_NO; + $want_left_space{'**'} = WS_NO; + $want_right_space{'**'} = WS_NO; + $want_right_space{'CORE::'} = WS_NO; + + # These binary_ws_rules are hardwired and have priority over the above + # settings. It would be nice to allow adjustment by the user, + # but it would be complicated to specify. + # + # hash type information must stay tightly bound + # as in : ${xxxx} + $binary_ws_rules{'i'}{'L'} = WS_NO; + $binary_ws_rules{'i'}{'{'} = WS_YES; + $binary_ws_rules{'k'}{'{'} = WS_YES; + $binary_ws_rules{'U'}{'{'} = WS_YES; + $binary_ws_rules{'i'}{'['} = WS_NO; + $binary_ws_rules{'R'}{'L'} = WS_NO; + $binary_ws_rules{'R'}{'{'} = WS_NO; + $binary_ws_rules{'t'}{'L'} = WS_NO; + $binary_ws_rules{'t'}{'{'} = WS_NO; + $binary_ws_rules{'}'}{'L'} = WS_NO; + $binary_ws_rules{'}'}{'{'} = WS_NO; + $binary_ws_rules{'$'}{'L'} = WS_NO; + $binary_ws_rules{'$'}{'{'} = WS_NO; + $binary_ws_rules{'@'}{'L'} = WS_NO; + $binary_ws_rules{'@'}{'{'} = WS_NO; + $binary_ws_rules{'='}{'L'} = WS_YES; + $binary_ws_rules{'J'}{'J'} = WS_YES; + + # the following includes ') {' + # as in : if ( xxx ) { yyy } + $binary_ws_rules{']'}{'L'} = WS_NO; + $binary_ws_rules{']'}{'{'} = WS_NO; + $binary_ws_rules{')'}{'{'} = WS_YES; + $binary_ws_rules{')'}{'['} = WS_NO; + $binary_ws_rules{']'}{'['} = WS_NO; + $binary_ws_rules{']'}{'{'} = WS_NO; + $binary_ws_rules{'}'}{'['} = WS_NO; + $binary_ws_rules{'R'}{'['} = WS_NO; + + $binary_ws_rules{']'}{'++'} = WS_NO; + $binary_ws_rules{']'}{'--'} = WS_NO; + $binary_ws_rules{')'}{'++'} = WS_NO; + $binary_ws_rules{')'}{'--'} = WS_NO; + + $binary_ws_rules{'R'}{'++'} = WS_NO; + $binary_ws_rules{'R'}{'--'} = WS_NO; + + $binary_ws_rules{'i'}{'Q'} = WS_YES; + $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()' + + # FIXME: we could to split 'i' into variables and functions + # and have no space for functions but space for variables. For now, + # I have a special patch in the special rules below + $binary_ws_rules{'i'}{'('} = WS_NO; + + $binary_ws_rules{'w'}{'('} = WS_NO; + $binary_ws_rules{'w'}{'{'} = WS_YES; + return; + +} ## end initialize_whitespace_hashes + +sub set_whitespace_flags { + + # This routine examines each pair of nonblank tokens and + # sets a flag indicating if white space is needed. + # + # $rwhitespace_flags->[$j] is a flag indicating whether a white space + # BEFORE token $j is needed, with the following values: + # + # WS_NO = -1 do not want a space before token $j + # WS_OPTIONAL= 0 optional space or $j is a whitespace + # WS_YES = 1 want a space before token $j + # + + my $self = shift; + my $rLL = $self->{rLL}; + + my $rwhitespace_flags = []; + + my ( $last_token, $last_type, $last_block_type, $last_input_line_no, + $token, $type, $block_type, $input_line_no ); + my $j_tight_closing_paren = -1; + + $token = ' '; + $type = 'b'; + $block_type = ''; + $input_line_no = 0; + $last_token = ' '; + $last_type = 'b'; + $last_block_type = ''; + $last_input_line_no = 0; + + my $jmax = @{$rLL} - 1; + + my ($ws); + + # This is some logic moved to a sub to avoid deep nesting of if stmts + my $ws_in_container = sub { + + my ($j) = @_; + my $ws = WS_YES; + if ( $j + 1 > $jmax ) { return (WS_NO) } + + # Patch to count '-foo' as single token so that + # each of $a{-foo} and $a{foo} and $a{'foo'} do + # not get spaces with default formatting. + my $j_here = $j; + ++$j_here + if ( $token eq '-' + && $last_token eq '{' + && $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' ); + + # $j_next is where a closing token should be if + # the container has a single token + if ( $j_here + 1 > $jmax ) { return (WS_NO) } + my $j_next = + ( $rLL->[ $j_here + 1 ]->[_TYPE_] eq 'b' ) + ? $j_here + 2 + : $j_here + 1; + + if ( $j_next > $jmax ) { return WS_NO } + my $tok_next = $rLL->[$j_next]->[_TOKEN_]; + my $type_next = $rLL->[$j_next]->[_TYPE_]; + + # for tightness = 1, if there is just one token + # within the matching pair, we will keep it tight + if ( + $tok_next eq $matching_token{$last_token} + + # but watch out for this: [ [ ] (misc.t) + && $last_token ne $token + + # double diamond is usually spaced + && $token ne '<<>>' + + ) + { + + # remember where to put the space for the closing paren + $j_tight_closing_paren = $j_next; + return (WS_NO); + } + return (WS_YES); + }; + + # main loop over all tokens to define the whitespace flags + for ( my $j = 0 ; $j <= $jmax ; $j++ ) { + + my $rtokh = $rLL->[$j]; + + # Set a default + $rwhitespace_flags->[$j] = WS_OPTIONAL; + + if ( $rtokh->[_TYPE_] eq 'b' ) { + next; + } + + # set a default value, to be changed as needed + $ws = undef; + $last_token = $token; + $last_type = $type; + $last_block_type = $block_type; + $last_input_line_no = $input_line_no; + $token = $rtokh->[_TOKEN_]; + $type = $rtokh->[_TYPE_]; + $block_type = $rtokh->[_BLOCK_TYPE_]; + $input_line_no = $rtokh->[_LINE_INDEX_]; + + #--------------------------------------------------------------- + # Whitespace Rules Section 1: + # Handle space on the inside of opening braces. + #--------------------------------------------------------------- + + # /^[L\{\(\[]$/ + if ( $is_opening_type{$last_type} ) { + + $j_tight_closing_paren = -1; + + # let us keep empty matched braces together: () {} [] + # except for BLOCKS + if ( $token eq $matching_token{$last_token} ) { + if ($block_type) { + $ws = WS_YES; + } + else { + $ws = WS_NO; + } + } + else { + + # we're considering the right of an opening brace + # tightness = 0 means always pad inside with space + # tightness = 1 means pad inside if "complex" + # tightness = 2 means never pad inside with space + + my $tightness; + if ( $last_type eq '{' + && $last_token eq '{' + && $last_block_type ) + { + $tightness = $rOpts_block_brace_tightness; + } + else { $tightness = $tightness{$last_token} } + + #============================================================= + # Patch for test problem <> + # We must always avoid spaces around a bare word beginning + # with ^ as in: + # my $before = ${^PREMATCH}; + # Because all of the following cause an error in perl: + # my $before = ${ ^PREMATCH }; + # my $before = ${ ^PREMATCH}; + # my $before = ${^PREMATCH }; + # So if brace tightness flag is -bt=0 we must temporarily reset + # to bt=1. Note that here we must set tightness=1 and not 2 so + # that the closing space + # is also avoided (via the $j_tight_closing_paren flag in coding) + if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 } + + #============================================================= + + if ( $tightness <= 0 ) { + $ws = WS_YES; + } + elsif ( $tightness > 1 ) { + $ws = WS_NO; + } + else { + $ws = $ws_in_container->($j); + } + } + } # end setting space flag inside opening tokens + my $ws_1; + $ws_1 = $ws + if FORMATTER_DEBUG_FLAG_WHITE; + + #--------------------------------------------------------------- + # Whitespace Rules Section 2: + # Handle space on inside of closing brace pairs. + #--------------------------------------------------------------- + + # /[\}\)\]R]/ + if ( $is_closing_type{$type} ) { + + if ( $j == $j_tight_closing_paren ) { + + $j_tight_closing_paren = -1; + $ws = WS_NO; + } + else { + + if ( !defined($ws) ) { + + my $tightness; + if ( $type eq '}' && $token eq '}' && $block_type ) { + $tightness = $rOpts_block_brace_tightness; + } + else { $tightness = $tightness{$token} } + + $ws = ( $tightness > 1 ) ? WS_NO : WS_YES; + } + } + } # end setting space flag inside closing tokens + + my $ws_2; + $ws_2 = $ws + if FORMATTER_DEBUG_FLAG_WHITE; + + #--------------------------------------------------------------- + # Whitespace Rules Section 3: + # Use the binary rule table. + #--------------------------------------------------------------- + if ( !defined($ws) ) { + $ws = $binary_ws_rules{$last_type}{$type}; + } + my $ws_3; + $ws_3 = $ws + if FORMATTER_DEBUG_FLAG_WHITE; + + #--------------------------------------------------------------- + # Whitespace Rules Section 4: + # Handle some special cases. + #--------------------------------------------------------------- + if ( $token eq '(' ) { + + # This will have to be tweaked as tokenization changes. + # We usually want a space at '} (', for example: + # <> + # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s ); + # + # But not others: + # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } ); + # At present, the above & block is marked as type L/R so this case + # won't go through here. + if ( $last_type eq '}' ) { $ws = WS_YES } + + # NOTE: some older versions of Perl had occasional problems if + # spaces are introduced between keywords or functions and opening + # parens. So the default is not to do this except is certain + # cases. The current Perl seems to tolerate spaces. + + # Space between keyword and '(' + elsif ( $last_type eq 'k' ) { + $ws = WS_NO + unless ( $rOpts_space_keyword_paren + || $space_after_keyword{$last_token} ); + } + + # Space between function and '(' + # ----------------------------------------------------- + # 'w' and 'i' checks for something like: + # myfun( &myfun( ->myfun( + # ----------------------------------------------------- + elsif (( $last_type =~ /^[wUG]$/ ) + || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) ) + { + $ws = WS_NO unless ($rOpts_space_function_paren); + } + + # space between something like $i and ( in <> + # for $i ( 0 .. 20 ) { + # FIXME: eventually, type 'i' needs to be split into multiple + # token types so this can be a hardwired rule. + elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) { + $ws = WS_YES; + } + + # allow constant function followed by '()' to retain no space + elsif ($last_type eq 'C' + && $rLL->[ $j + 1 ]->[_TOKEN_] eq ')' ) + { + $ws = WS_NO; + } + } + + # patch for SWITCH/CASE: make space at ']{' optional + # since the '{' might begin a case or when block + elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) { + $ws = WS_OPTIONAL; + } + + # keep space between 'sub' and '{' for anonymous sub definition + if ( $type eq '{' ) { + if ( $last_token eq 'sub' ) { + $ws = WS_YES; + } + + # this is needed to avoid no space in '){' + if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES } + + # avoid any space before the brace or bracket in something like + # @opts{'a','b',...} + if ( $last_type eq 'i' && $last_token =~ /^\@/ ) { + $ws = WS_NO; + } + } + + elsif ( $type eq 'i' ) { + + # never a space before -> + if ( $token =~ /^\-\>/ ) { + $ws = WS_NO; + } + } + + # retain any space between '-' and bare word + elsif ( $type eq 'w' || $type eq 'C' ) { + $ws = WS_OPTIONAL if $last_type eq '-'; + + # never a space before -> + if ( $token =~ /^\-\>/ ) { + $ws = WS_NO; + } + } + + # retain any space between '-' and bare word; for example + # avoid space between 'USER' and '-' here: <> + # $myhash{USER-NAME}='steve'; + elsif ( $type eq 'm' || $type eq '-' ) { + $ws = WS_OPTIONAL if ( $last_type eq 'w' ); + } + + # always space before side comment + elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 } + + # always preserver whatever space was used after a possible + # filehandle (except _) or here doc operator + if ( + $type ne '#' + && ( ( $last_type eq 'Z' && $last_token ne '_' ) + || $last_type eq 'h' ) + ) + { + $ws = WS_OPTIONAL; + } + + # space_backslash_quote; RT #123774 <> + # allow a space between a backslash and single or double quote + # to avoid fooling html formatters + elsif ( $last_type eq '\\' && $type eq 'Q' && $token =~ /^[\"\']/ ) { + if ($rOpts_space_backslash_quote) { + if ( $rOpts_space_backslash_quote == 1 ) { + $ws = WS_OPTIONAL; + } + elsif ( $rOpts_space_backslash_quote == 2 ) { $ws = WS_YES } + else { } # shouldnt happen + } + else { + $ws = WS_NO; + } + } + + my $ws_4; + $ws_4 = $ws + if FORMATTER_DEBUG_FLAG_WHITE; + + #--------------------------------------------------------------- + # Whitespace Rules Section 5: + # Apply default rules not covered above. + #--------------------------------------------------------------- + + # If we fall through to here, look at the pre-defined hash tables for + # the two tokens, and: + # if (they are equal) use the common value + # if (either is zero or undef) use the other + # if (either is -1) use it + # That is, + # left vs right + # 1 vs 1 --> 1 + # 0 vs 0 --> 0 + # -1 vs -1 --> -1 + # + # 0 vs -1 --> -1 + # 0 vs 1 --> 1 + # 1 vs 0 --> 1 + # -1 vs 0 --> -1 + # + # -1 vs 1 --> -1 + # 1 vs -1 --> -1 + if ( !defined($ws) ) { + my $wl = $want_left_space{$type}; + my $wr = $want_right_space{$last_type}; + if ( !defined($wl) ) { $wl = 0 } + if ( !defined($wr) ) { $wr = 0 } + $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr; + } + + if ( !defined($ws) ) { + $ws = 0; + write_diagnostics( + "WS flag is undefined for tokens $last_token $token\n"); + } + + # Treat newline as a whitespace. Otherwise, we might combine + # 'Send' and '-recipients' here according to the above rules: + # <> + # my $msg = new Fax::Send + # -recipients => $to, + # -data => $data; + if ( $ws == 0 && $input_line_no != $last_input_line_no ) { $ws = 1 } + + if ( ( $ws == 0 ) + && $j > 0 + && $j < $jmax + && ( $last_type !~ /^[Zh]$/ ) ) + { + + # If this happens, we have a non-fatal but undesirable + # hole in the above rules which should be patched. + write_diagnostics( + "WS flag is zero for tokens $last_token $token\n"); + } + + $rwhitespace_flags->[$j] = $ws; + + FORMATTER_DEBUG_FLAG_WHITE && do { + my $str = substr( $last_token, 0, 15 ); + $str .= ' ' x ( 16 - length($str) ); + if ( !defined($ws_1) ) { $ws_1 = "*" } + if ( !defined($ws_2) ) { $ws_2 = "*" } + if ( !defined($ws_3) ) { $ws_3 = "*" } + if ( !defined($ws_4) ) { $ws_4 = "*" } + print STDOUT +"NEW WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n"; + }; + } ## end main loop + + if ( $rOpts->{'tight-secret-operators'} ) { + new_secret_operator_whitespace( $rLL, $rwhitespace_flags ); + } + return $rwhitespace_flags; +} ## end sub set_whitespace_flags + +sub respace_tokens { + + my $self = shift; + return if $rOpts->{'indent-only'}; + + # This routine makes all necessary changes to the tokenization after the + # file has been read. This consists mostly of inserting and deleting spaces + # according to the selected parameters. In a few cases non-space characters + # are added, deleted or modified. + + # The old tokens are copied one-by-one, with changes, from the old + # linear storage array to a new array. + + my $rLL = $self->{rLL}; + my $Klimit_old = $self->{Klimit}; + my $rlines = $self->{rlines}; + my $rpaired_to_inner_container = $self->{rpaired_to_inner_container}; + + my $rLL_new = []; # This is the new array + my $KK = 0; + my $rtoken_vars; + my $Kmax = @{$rLL} - 1; + + # Set the whitespace flags, which indicate the token spacing preference. + my $rwhitespace_flags = $self->set_whitespace_flags(); + + # we will be setting token lengths as we go + my $cumulative_length = 0; + + # We also define these hash indexes giving container token array indexes + # as a function of the container sequence numbers. For example, + my $K_opening_container = {}; # opening [ { or ( + my $K_closing_container = {}; # closing ] } or ) + my $K_opening_ternary = {}; # opening ? of ternary + my $K_closing_ternary = {}; # closing : of ternary + + # List of new K indexes of phantom semicolons + # This will be needed if we want to undo them for iterations + my $rK_phantom_semicolons = []; + + # Temporary hashes for adding semicolons + ##my $rKfirst_new = {}; + + # a sub to link preceding nodes forward to a new node type + my $link_back = sub { + my ( $Ktop, $key ) = @_; + + my $Kprev = $Ktop - 1; + while ( $Kprev >= 0 + && !defined( $rLL_new->[$Kprev]->[$key] ) ) + { + $rLL_new->[$Kprev]->[$key] = $Ktop; + $Kprev -= 1; + } + }; + + # A sub to store one token in the new array + # All new tokens must be stored by this sub so that it can update + # all data structures on the fly. + my $last_nonblank_type = ';'; + my $store_token = sub { + my ($item) = @_; + + # This will be the index of this item in the new array + my $KK_new = @{$rLL_new}; + + # check for a sequenced item (i.e., container or ?/:) + my $type_sequence = $item->[_TYPE_SEQUENCE_]; + if ($type_sequence) { + + $link_back->( $KK_new, _KNEXT_SEQ_ITEM_ ); + + my $token = $item->[_TOKEN_]; + if ( $is_opening_token{$token} ) { + + $K_opening_container->{$type_sequence} = $KK_new; + } + elsif ( $is_closing_token{$token} ) { + + $K_closing_container->{$type_sequence} = $KK_new; + } + + # These are not yet used but could be useful + else { + if ( $token eq '?' ) { + $K_opening_ternary->{$type_sequence} = $KK; + } + elsif ( $token eq ':' ) { + $K_closing_ternary->{$type_sequence} = $KK; + } + else { + # shouldn't happen + print STDERR "Ugh: shouldn't happen\n"; + } + } + } + + # find the length of this token + my $token_length = length( $item->[_TOKEN_] ); + + # and update the cumulative length + $cumulative_length += $token_length; + + # Save the length sum to just AFTER this token + $item->[_CUMULATIVE_LENGTH_] = $cumulative_length; + + my $type = $item->[_TYPE_]; + if ( $type ne 'b' ) { $last_nonblank_type = $type } + + # and finally, add this item to the new array + push @{$rLL_new}, $item; + }; + + my $store_token_and_space = sub { + my ( $item, $want_space ) = @_; + + # store a token with preceding space if requested and needed + + # First store the space + if ( $want_space + && @{$rLL_new} + && $rLL_new->[-1]->[_TYPE_] ne 'b' + && $rOpts_add_whitespace ) + { + my $rcopy = copy_token_as_type( $item, 'b', ' ' ); + $rcopy->[_LINE_INDEX_] = + $rLL_new->[-1]->[_LINE_INDEX_]; + $store_token->($rcopy); + } + + # then the token + $store_token->($item); + }; + + my $K_end_q = sub { + my ($KK) = @_; + my $K_end = $KK; + my $Kn = $self->K_next_nonblank($KK); + while ( defined($Kn) && $rLL->[$Kn]->[_TYPE_] eq 'q' ) { + $K_end = $Kn; + $Kn = $self->K_next_nonblank($Kn); + } + return $K_end; + }; + + my $add_phantom_semicolon = sub { + + my ($KK) = @_; + + my $Kp = $self->K_previous_nonblank( undef, $rLL_new ); + return unless ( defined($Kp) ); + + # we are only adding semicolons for certain block types + my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_]; + return + unless ( $ok_to_add_semicolon_for_block_type{$block_type} + || $block_type =~ /^(sub|package)/ + || $block_type =~ /^\w+\:$/ ); + + my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_]; + + my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_]; + my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_]; + + # Do not add a semicolon if... + return + if ( + + # it would follow a comment (and be isolated) + $previous_nonblank_type eq '#' + + # it follows a code block ( because they are not always wanted + # there and may add clutter) + || $rLL_new->[$Kp]->[_BLOCK_TYPE_] + + # it would follow a label + || $previous_nonblank_type eq 'J' + + # it would be inside a 'format' statement (and cause syntax error) + || ( $previous_nonblank_type eq 'k' + && $previous_nonblank_token =~ /format/ ) + + # if it would prevent welding two containers + || $rpaired_to_inner_container->{$type_sequence} + + ); + + # We will insert an empty semicolon here as a placeholder. Later, if + # it becomes the last token on a line, we will bring it to life. The + # advantage of doing this is that (1) we just have to check line + # endings, and (2) the phantom semicolon has zero width and therefore + # won't cause needless breaks of one-line blocks. + my $Ktop = -1; + if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b' + && $want_left_space{';'} == WS_NO ) + { + + # convert the blank into a semicolon.. + # be careful: we are working on the new stack top + # on a token which has been stored. + my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', ' ' ); + + # Convert the existing blank to a semicolon + $rLL_new->[$Ktop]->[_TOKEN_] = ''; # zero length + $rLL_new->[$Ktop]->[_TYPE_] = ';'; + $rLL_new->[$Ktop]->[_SLEVEL_] = + $rLL->[$KK]->[_SLEVEL_]; + + push @{$rK_phantom_semicolons}, @{$rLL_new} - 1; + + # Then store a new blank + $store_token->($rcopy); + } + else { + + # insert a new token + my $rcopy = copy_token_as_type( $rLL_new->[$Kp], ';', '' ); + $rcopy->[_SLEVEL_] = $rLL->[$KK]->[_SLEVEL_]; + $store_token->($rcopy); + push @{$rK_phantom_semicolons}, @{$rLL_new} - 1; + } + }; + + my $check_Q = sub { + + # Check that a quote looks okay + # This sub works but needs to by sync'd with the log file output + # before it can be used. + my ( $KK, $Kfirst ) = @_; + my $token = $rLL->[$KK]->[_TOKEN_]; + note_embedded_tab() if ( $token =~ "\t" ); + + my $Kp = $self->K_previous_nonblank( undef, $rLL_new ); + return unless ( defined($Kp) ); + my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_]; + my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_]; + + my $previous_nonblank_type_2 = 'b'; + my $previous_nonblank_token_2 = ""; + my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new ); + if ( defined($Kpp) ) { + $previous_nonblank_type_2 = $rLL_new->[$Kpp]->[_TYPE_]; + $previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_]; + } + + my $Kn = $self->K_next_nonblank($KK); + my $next_nonblank_token = ""; + if ( defined($Kn) ) { + $next_nonblank_token = $rLL->[$Kn]->[_TOKEN_]; + } + + my $token_0 = $rLL->[$Kfirst]->[_TOKEN_]; + my $type_0 = $rLL->[$Kfirst]->[_TYPE_]; + + # make note of something like '$var = s/xxx/yyy/;' + # in case it should have been '$var =~ s/xxx/yyy/;' + if ( + $token =~ /^(s|tr|y|m|\/)/ + && $previous_nonblank_token =~ /^(=|==|!=)$/ + + # preceded by simple scalar + && $previous_nonblank_type_2 eq 'i' + && $previous_nonblank_token_2 =~ /^\$/ + + # followed by some kind of termination + # (but give complaint if we can not see far enough ahead) + && $next_nonblank_token =~ /^[; \)\}]$/ + + # scalar is not declared + && !( $type_0 eq 'k' && $token_0 =~ /^(my|our|local)$/ ) + ) + { + my $guess = substr( $last_nonblank_token, 0, 1 ) . '~'; + complain( +"Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n" + ); + } + }; + + # Main loop over all lines of the file + my $last_K_out; + my $CODE_type = ""; + my $line_type = ""; + + # Testing option to break qw. Do not use; it can make a mess. + my $ALLOW_BREAK_MULTILINE_QW = 0; + my $in_multiline_qw; + foreach my $line_of_tokens ( @{$rlines} ) { + + $input_line_number = $line_of_tokens->{_line_number}; + my $last_line_type = $line_type; + $line_type = $line_of_tokens->{_line_type}; + next unless ( $line_type eq 'CODE' ); + my $last_CODE_type = $CODE_type; + $CODE_type = $line_of_tokens->{_code_type}; + my $rK_range = $line_of_tokens->{_rK_range}; + my ( $Kfirst, $Klast ) = @{$rK_range}; + next unless defined($Kfirst); + + # Check for correct sequence of token indexes... + # An error here means that sub write_line() did not correctly + # package the tokenized lines as it received them. + if ( defined($last_K_out) ) { + if ( $Kfirst != $last_K_out + 1 ) { + Fault( + "Program Bug: last K out was $last_K_out but Kfirst=$Kfirst" + ); + } + } + else { + if ( $Kfirst != 0 ) { + Fault("Program Bug: first K is $Kfirst but should be 0"); + } + } + $last_K_out = $Klast; + + # Handle special lines of code + if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) { + + # CODE_types are as follows. + # 'BL' = Blank Line + # 'VB' = Verbatim - line goes out verbatim + # 'FS' = Format Skipping - line goes out verbatim, no blanks + # 'IO' = Indent Only - only indentation may be changed + # 'NIN' = No Internal Newlines - line does not get broken + # 'HSC'=Hanging Side Comment - fix this hanging side comment + # 'BC'=Block Comment - an ordinary full line comment + # 'SBC'=Static Block Comment - a block comment which does not get + # indented + # 'SBCX'=Static Block Comment Without Leading Space + # 'DEL'=Delete this line + # 'VER'=VERSION statement + # '' or (undefined) - no restructions + + # For a hanging side comment we insert an empty quote before + # the comment so that it becomes a normal side comment and + # will be aligned by the vertical aligner + if ( $CODE_type eq 'HSC' ) { + + # Safety Check: This must be a line with one token (a comment) + my $rtoken_vars = $rLL->[$Kfirst]; + if ( $Kfirst == $Klast && $rtoken_vars->[_TYPE_] eq '#' ) { + + # Note that even if the flag 'noadd-whitespace' is set, we + # will make an exception here and allow a blank to be + # inserted to push the comment to the right. We can think + # of this as an adjustment of indentation rather than + # whitespace between tokens. This will also prevent the + # hanging side comment from getting converted to a block + # comment if whitespace gets deleted, as for example with + # the -extrude and -mangle options. + my $rcopy = copy_token_as_type( $rtoken_vars, 'q', '' ); + $store_token->($rcopy); + $rcopy = copy_token_as_type( $rtoken_vars, 'b', ' ' ); + $store_token->($rcopy); + $store_token->($rtoken_vars); + next; + } + else { + + # This line was mis-marked by sub scan_comment + Fault( + "Program bug. A hanging side comment has been mismarked" + ); + } + } + + # Copy tokens unchanged + foreach my $KK ( $Kfirst .. $Klast ) { + $store_token->( $rLL->[$KK] ); + } + next; + } + + # Handle normal line.. + + # Insert any essential whitespace between lines + # if last line was normal CODE. + # Patch for rt #125012: use K_previous_code rather than '_nonblank' + # because comments may disappear. + my $type_next = $rLL->[$Kfirst]->[_TYPE_]; + my $token_next = $rLL->[$Kfirst]->[_TOKEN_]; + my $Kp = $self->K_previous_code( undef, $rLL_new ); + if ( $last_line_type eq 'CODE' + && $type_next ne 'b' + && defined($Kp) ) + { + my $token_p = $rLL_new->[$Kp]->[_TOKEN_]; + my $type_p = $rLL_new->[$Kp]->[_TYPE_]; + + my ( $token_pp, $type_pp ); + my $Kpp = $self->K_previous_code( $Kp, $rLL_new ); + if ( defined($Kpp) ) { + $token_pp = $rLL_new->[$Kpp]->[_TOKEN_]; + $type_pp = $rLL_new->[$Kpp]->[_TYPE_]; + } + else { + $token_pp = ";"; + $type_pp = ';'; + } + + if ( + is_essential_whitespace( + $token_pp, $type_pp, $token_p, + $type_p, $token_next, $type_next, + ) + ) + { + + # Copy this first token as blank, but use previous line number + my $rcopy = copy_token_as_type( $rLL->[$Kfirst], 'b', ' ' ); + $rcopy->[_LINE_INDEX_] = + $rLL_new->[-1]->[_LINE_INDEX_]; + $store_token->($rcopy); + } + } + + # loop to copy all tokens on this line, with any changes + my $type_sequence; + for ( my $KK = $Kfirst ; $KK <= $Klast ; $KK++ ) { + $rtoken_vars = $rLL->[$KK]; + my $token = $rtoken_vars->[_TOKEN_]; + my $type = $rtoken_vars->[_TYPE_]; + my $last_type_sequence = $type_sequence; + $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_]; + + # Handle a blank space ... + if ( $type eq 'b' ) { + + # Delete it if not wanted by whitespace rules + # or we are deleting all whitespace + # Note that whitespace flag is a flag indicating whether a + # white space BEFORE the token is needed + next if ( $KK >= $Kmax ); # skip terminal blank + my $Knext = $KK + 1; + my $ws = $rwhitespace_flags->[$Knext]; + if ( $ws == -1 + || $rOpts_delete_old_whitespace ) + { + + # FIXME: maybe switch to using _new + my $Kp = $self->K_previous_nonblank($KK); + next unless defined($Kp); + my $token_p = $rLL->[$Kp]->[_TOKEN_]; + my $type_p = $rLL->[$Kp]->[_TYPE_]; + + my ( $token_pp, $type_pp ); + + #my $Kpp = $K_previous_nonblank->($Kp); + my $Kpp = $self->K_previous_nonblank($Kp); + if ( defined($Kpp) ) { + $token_pp = $rLL->[$Kpp]->[_TOKEN_]; + $type_pp = $rLL->[$Kpp]->[_TYPE_]; + } + else { + $token_pp = ";"; + $type_pp = ';'; + } + my $token_next = $rLL->[$Knext]->[_TOKEN_]; + my $type_next = $rLL->[$Knext]->[_TYPE_]; + + my $do_not_delete = is_essential_whitespace( + $token_pp, $type_pp, $token_p, + $type_p, $token_next, $type_next, + ); + + next unless ($do_not_delete); + } + + # make it just one character if allowed + if ($rOpts_add_whitespace) { + $rtoken_vars->[_TOKEN_] = ' '; + } + $store_token->($rtoken_vars); + next; + } + + # Handle a nonblank token... + + # check for a qw quote + if ( $type eq 'q' ) { + + # trim blanks from right of qw quotes + # (To avoid trimming qw quotes use -ntqw; the tokenizer handles + # this) + $token =~ s/\s*$//; + $rtoken_vars->[_TOKEN_] = $token; + note_embedded_tab() if ( $token =~ "\t" ); + + if ($in_multiline_qw) { + + # If we are at the end of a multiline qw .. + if ( $in_multiline_qw == $KK ) { + + # Split off the closing delimiter character + # so that the formatter can put a line break there if necessary + my $part1 = $token; + my $part2 = substr( $part1, -1, 1, "" ); + + if ($part1) { + my $rcopy = + copy_token_as_type( $rtoken_vars, 'q', $part1 ); + $store_token->($rcopy); + $token = $part2; + $rtoken_vars->[_TOKEN_] = $token; + + } + $in_multiline_qw = undef; + + # store without preceding blank + $store_token->($rtoken_vars); + next; + } + else { + # continuing a multiline qw + $store_token->($rtoken_vars); + next; + } + } + + else { + + # we are encountered new qw token...see if multiline + my $K_end = $K_end_q->($KK); + if ( $ALLOW_BREAK_MULTILINE_QW && $K_end != $KK ) { + + # Starting multiline qw... + # set flag equal to the ending K + $in_multiline_qw = $K_end; + + # Split off the leading part + # so that the formatter can put a line break there if necessary + if ( $token =~ /^(qw\s*.)(.*)$/ ) { + my $part1 = $1; + my $part2 = $2; + if ($part2) { + my $rcopy = + copy_token_as_type( $rtoken_vars, 'q', + $part1 ); + $store_token_and_space->( + $rcopy, $rwhitespace_flags->[$KK] == WS_YES + ); + $token = $part2; + $rtoken_vars->[_TOKEN_] = $token; + + # Second part goes without intermediate blank + $store_token->($rtoken_vars); + next; + } + } + } + else { + + # this is a new single token qw - + # store with possible preceding blank + $store_token_and_space->( + $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES + ); + next; + } + } + } ## end if ( $type eq 'q' ) + + # Modify certain tokens here for whitespace + # The following is not yet done, but could be: + # sub (x x x) + elsif ( $type =~ /^[wit]$/ ) { + + # Examples: <> + # change '$ var' to '$var' etc + # '-> new' to '->new' + if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) { + $token =~ s/\s*//g; + $rtoken_vars->[_TOKEN_] = $token; + } + + # Split identifiers with leading arrows, inserting blanks if + # necessary. It is easier and safer here than in the + # tokenizer. For example '->new' becomes two tokens, '->' and + # 'new' with a possible blank between. + # + # Note: there is a related patch in sub set_whitespace_flags + if ( $token =~ /^\-\>(.*)$/ && $1 ) { + my $token_save = $1; + my $type_save = $type; + + # store a blank to left of arrow if necessary + my $Kprev = $self->K_previous_nonblank($KK); + if ( defined($Kprev) + && $rLL->[$Kprev]->[_TYPE_] ne 'b' + && $rOpts_add_whitespace + && $want_left_space{'->'} == WS_YES ) + { + my $rcopy = + copy_token_as_type( $rtoken_vars, 'b', ' ' ); + $store_token->($rcopy); + } + + # then store the arrow + my $rcopy = copy_token_as_type( $rtoken_vars, '->', '->' ); + $store_token->($rcopy); + + # then reset the current token to be the remainder, + # and reset the whitespace flag according to the arrow + $token = $rtoken_vars->[_TOKEN_] = $token_save; + $type = $rtoken_vars->[_TYPE_] = $type_save; + $store_token->($rtoken_vars); + next; + } + + if ( $token =~ /$SUB_PATTERN/ ) { + $token =~ s/\s+/ /g; + $rtoken_vars->[_TOKEN_] = $token; + } + + # trim identifiers of trailing blanks which can occur + # under some unusual circumstances, such as if the + # identifier 'witch' has trailing blanks on input here: + # + # sub + # witch + # () # prototype may be on new line ... + # ... + if ( $type eq 'i' ) { + $token =~ s/\s+$//g; + $rtoken_vars->[_TOKEN_] = $token; + } + } + + # change 'LABEL :' to 'LABEL:' + elsif ( $type eq 'J' ) { + $token =~ s/\s+//g; + $rtoken_vars->[_TOKEN_] = $token; + } + + # patch to add space to something like "x10" + # This avoids having to split this token in the pre-tokenizer + elsif ( $type eq 'n' ) { + if ( $token =~ /^x\d+/ ) { + $token =~ s/x/x /; + $rtoken_vars->[_TOKEN_] = $token; + } + } + + # check a quote for problems + elsif ( $type eq 'Q' ) { + + # This is ready to go but is commented out because there is + # still identical logic in sub break_lines. + # $check_Q->($KK, $Kfirst); + } + + elsif ($type_sequence) { + + # if ( $is_opening_token{$token} ) { + # } + + if ( $is_closing_token{$token} ) { + + # Insert a tentative missing semicolon if the next token is + # a closing block brace + if ( + $type eq '}' + && $token eq '}' + + # not preceded by a ';' + && $last_nonblank_type ne ';' + + # and this is not a VERSION stmt (is all one line, we are not + # inserting semicolons on one-line blocks) + && $CODE_type ne 'VER' + + # and we are allowed to add semicolons + && $rOpts->{'add-semicolons'} + ) + { + $add_phantom_semicolon->($KK); + } + } + } + + # Store this token with possible previous blank + $store_token_and_space->( + $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES + ); + + } # End token loop + } # End line loop + + # Reset memory to be the new array + $self->{rLL} = $rLL_new; + $self->set_rLL_max_index(); + $self->{K_opening_container} = $K_opening_container; + $self->{K_closing_container} = $K_closing_container; + $self->{K_opening_ternary} = $K_opening_ternary; + $self->{K_closing_ternary} = $K_closing_ternary; + $self->{rK_phantom_semicolons} = $rK_phantom_semicolons; + + # make sure the new array looks okay + $self->check_token_array(); + + # reset the token limits of each line + $self->resync_lines_and_tokens(); + + return; +} + +{ # scan_comments + + my $Last_line_had_side_comment; + my $In_format_skipping_section; + my $Saw_VERSION_in_this_file; + + sub scan_comments { + my $self = shift; + my $rlines = $self->{rlines}; + + $Last_line_had_side_comment = undef; + $In_format_skipping_section = undef; + $Saw_VERSION_in_this_file = undef; + + # Loop over all lines + foreach my $line_of_tokens ( @{$rlines} ) { + my $line_type = $line_of_tokens->{_line_type}; + next unless ( $line_type eq 'CODE' ); + my $CODE_type = $self->get_CODE_type($line_of_tokens); + $line_of_tokens->{_code_type} = $CODE_type; + } + return; + } + + sub get_CODE_type { + my ( $self, $line_of_tokens ) = @_; + + # We are looking at a line of code and setting a flag to + # describe any special processing that it requires + + # Possible CODE_types are as follows. + # 'BL' = Blank Line + # 'VB' = Verbatim - line goes out verbatim + # 'IO' = Indent Only - line goes out unchanged except for indentation + # 'NIN' = No Internal Newlines - line does not get broken + # 'HSC'=Hanging Side Comment - fix this hanging side comment + # 'BC'=Block Comment - an ordinary full line comment + # 'SBC'=Static Block Comment - a block comment which does not get + # indented + # 'SBCX'=Static Block Comment Without Leading Space + # 'DEL'=Delete this line + # 'VER'=VERSION statement + # '' or (undefined) - no restructions + + my $rLL = $self->{rLL}; + my $Klimit = $self->{Klimit}; + + my $CODE_type = $rOpts->{'indent-only'} ? 'IO' : ""; + my $no_internal_newlines = 1 - $rOpts_add_newlines; + if ( !$CODE_type && $no_internal_newlines ) { $CODE_type = 'NIN' } + + # extract what we need for this line.. + + # Global value for error messages: + $input_line_number = $line_of_tokens->{_line_number}; + + my $rK_range = $line_of_tokens->{_rK_range}; + my ( $Kfirst, $Klast ) = @{$rK_range}; + my $jmax = -1; + if ( defined($Kfirst) ) { $jmax = $Klast - $Kfirst } + my $input_line = $line_of_tokens->{_line_text}; + my $in_continued_quote = my $starting_in_quote = + $line_of_tokens->{_starting_in_quote}; + my $in_quote = $line_of_tokens->{_ending_in_quote}; + my $ending_in_quote = $in_quote; + my $guessed_indentation_level = + $line_of_tokens->{_guessed_indentation_level}; + + my $is_static_block_comment = 0; + + # Handle a continued quote.. + if ($in_continued_quote) { + + # A line which is entirely a quote or pattern must go out + # verbatim. Note: the \n is contained in $input_line. + if ( $jmax <= 0 ) { + if ( ( $input_line =~ "\t" ) ) { + note_embedded_tab(); + } + $Last_line_had_side_comment = 0; + return 'VB'; + } + } + + my $is_block_comment = + ( $jmax == 0 && $rLL->[$Kfirst]->[_TYPE_] eq '#' ); + + # Write line verbatim if we are in a formatting skip section + if ($In_format_skipping_section) { + $Last_line_had_side_comment = 0; + + # Note: extra space appended to comment simplifies pattern matching + if ( $is_block_comment + && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~ + /$format_skipping_pattern_end/o ) + { + $In_format_skipping_section = 0; + write_logfile_entry("Exiting formatting skip section\n"); + } + return 'FS'; + } + + # See if we are entering a formatting skip section + if ( $rOpts_format_skipping + && $is_block_comment + && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~ + /$format_skipping_pattern_begin/o ) + { + $In_format_skipping_section = 1; + write_logfile_entry("Entering formatting skip section\n"); + $Last_line_had_side_comment = 0; + return 'FS'; + } + + # ignore trailing blank tokens (they will get deleted later) + if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) { + $jmax--; + } + + # Handle a blank line.. + if ( $jmax < 0 ) { + $Last_line_had_side_comment = 0; + return 'BL'; + } + + # see if this is a static block comment (starts with ## by default) + my $is_static_block_comment_without_leading_space = 0; + if ( $is_block_comment + && $rOpts->{'static-block-comments'} + && $input_line =~ /$static_block_comment_pattern/o ) + { + $is_static_block_comment = 1; + $is_static_block_comment_without_leading_space = + substr( $input_line, 0, 1 ) eq '#'; + } + + # Check for comments which are line directives + # Treat exactly as static block comments without leading space + # reference: perlsyn, near end, section Plain Old Comments (Not!) + # example: '# line 42 "new_filename.plx"' + if ( + $is_block_comment + && $input_line =~ /^\# \s* + line \s+ (\d+) \s* + (?:\s("?)([^"]+)\2)? \s* + $/x + ) + { + $is_static_block_comment = 1; + $is_static_block_comment_without_leading_space = 1; + } + + # look for hanging side comment + if ( + $is_block_comment + && $Last_line_had_side_comment # last line had side comment + && $input_line =~ /^\s/ # there is some leading space + && !$is_static_block_comment # do not make static comment hanging + && $rOpts->{'hanging-side-comments'} # user is allowing + # hanging side comments + # like this + ) + { + $Last_line_had_side_comment = 1; + return 'HSC'; + } + + # remember if this line has a side comment + $Last_line_had_side_comment = + ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq '#' ); + + # Handle a block (full-line) comment.. + if ($is_block_comment) { + + if ( $rOpts->{'delete-block-comments'} ) { return 'DEL' } + + # TRIM COMMENTS -- This could be turned off as a option + $rLL->[$Kfirst]->[_TOKEN_] =~ s/\s*$//; # trim right end + + if ($is_static_block_comment_without_leading_space) { + return 'SBCX'; + } + elsif ($is_static_block_comment) { + return 'SBC'; + } + else { + return 'BC'; + } + } + +=pod + # NOTE: This does not work yet. Version in print-line-of-tokens + # is Still used until fixed + + # compare input/output indentation except for continuation lines + # (because they have an unknown amount of initial blank space) + # and lines which are quotes (because they may have been outdented) + # Note: this test is placed here because we know the continuation flag + # at this point, which allows us to avoid non-meaningful checks. + my $structural_indentation_level = $rLL->[$Kfirst]->[_LEVEL_]; + compare_indentation_levels( $guessed_indentation_level, + $structural_indentation_level ) + unless ( $rLL->[$Kfirst]->[_CI_LEVEL_] > 0 + || $guessed_indentation_level == 0 + && $rLL->[$Kfirst]->[_TYPE_] eq 'Q' ); +=cut + + # Patch needed for MakeMaker. Do not break a statement + # in which $VERSION may be calculated. See MakeMaker.pm; + # this is based on the coding in it. + # The first line of a file that matches this will be eval'd: + # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ + # Examples: + # *VERSION = \'1.01'; + # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/; + # We will pass such a line straight through without breaking + # it unless -npvl is used. + + # Patch for problem reported in RT #81866, where files + # had been flattened into a single line and couldn't be + # tidied without -npvl. There are two parts to this patch: + # First, it is not done for a really long line (80 tokens for now). + # Second, we will only allow up to one semicolon + # before the VERSION. We need to allow at least one semicolon + # for statements like this: + # require Exporter; our $VERSION = $Exporter::VERSION; + # where both statements must be on a single line for MakeMaker + + my $is_VERSION_statement = 0; + if ( !$Saw_VERSION_in_this_file + && $jmax < 80 + && $input_line =~ + /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ ) + { + $Saw_VERSION_in_this_file = 1; + write_logfile_entry("passing VERSION line; -npvl deactivates\n"); + $CODE_type = 'VER'; + } + return $CODE_type; + } +} + +sub find_nested_pairs { + my $self = shift; + + my $rLL = $self->{rLL}; + return unless ( defined($rLL) && @{$rLL} ); + + # We define an array of pairs of nested containers + my @nested_pairs; + + # We also set the following hash values to identify container pairs for + # which the opening and closing tokens are adjacent in the token stream: + # $rpaired_to_inner_container->{$seqno_out}=$seqno_in where $seqno_out and + # $seqno_in are the seqence numbers of the outer and inner containers of + # the pair We need these later to decide if we can insert a missing + # semicolon + my $rpaired_to_inner_container = {}; + + # This local hash remembers if an outer container has a close following + # inner container; + # The key is the outer sequence number + # The value is the token_hash of the inner container + + my %has_close_following_opening; + + # Names of calling routines can either be marked as 'i' or 'w', + # and they may invoke a sub call with an '->'. We will consider + # any consecutive string of such types as a single unit when making + # weld decisions. We also allow a leading ! + my $is_name_type = { + 'i' => 1, + 'w' => 1, + 'U' => 1, + '->' => 1, + '!' => 1, + }; + + my $is_name = sub { + my $type = shift; + return $type && $is_name_type->{$type}; + }; + + my $last_container; + my $last_last_container; + my $last_nonblank_token_vars; + my $last_count; + + my $nonblank_token_count = 0; + + # loop over all tokens + foreach my $rtoken_vars ( @{$rLL} ) { + + my $type = $rtoken_vars->[_TYPE_]; + + next if ( $type eq 'b' ); + + # long identifier-like items are counted as a single item + $nonblank_token_count++ + unless ( $is_name->($type) + && $is_name->( $last_nonblank_token_vars->[_TYPE_] ) ); + + my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_]; + if ($type_sequence) { + + my $token = $rtoken_vars->[_TOKEN_]; + + if ( $is_opening_token{$token} ) { + + # following previous opening token ... + if ( $last_container + && $is_opening_token{ $last_container->[_TOKEN_] } ) + { + + # adjacent to this one + my $tok_diff = $nonblank_token_count - $last_count; + + my $last_tok = $last_nonblank_token_vars->[_TOKEN_]; + + if ( $tok_diff == 1 + || $tok_diff == 2 && $last_container->[_TOKEN_] eq '(' ) + { + + # remember this pair... + my $outer_seqno = $last_container->[_TYPE_SEQUENCE_]; + my $inner_seqno = $type_sequence; + $has_close_following_opening{$outer_seqno} = + $rtoken_vars; + } + } + } + + elsif ( $is_closing_token{$token} ) { + + # if the corresponding opening token had an adjacent opening + if ( $has_close_following_opening{$type_sequence} + && $is_closing_token{ $last_container->[_TOKEN_] } + && $has_close_following_opening{$type_sequence} + ->[_TYPE_SEQUENCE_] == $last_container->[_TYPE_SEQUENCE_] ) + { + + # The closing weld tokens must be adjacent + # NOTE: so intermediate commas and semicolons + # can currently block a weld. This is something + # that could be fixed in the future by including + # a flag to delete un-necessary commas and semicolons. + my $tok_diff = $nonblank_token_count - $last_count; + + if ( $tok_diff == 1 ) { + + # This is a closely nested pair .. + my $inner_seqno = $last_container->[_TYPE_SEQUENCE_]; + my $outer_seqno = $type_sequence; + $rpaired_to_inner_container->{$outer_seqno} = + $inner_seqno; + + push @nested_pairs, [ $inner_seqno, $outer_seqno ]; + } + } + } + + $last_last_container = $last_container; + $last_container = $rtoken_vars; + $last_count = $nonblank_token_count; + } + $last_nonblank_token_vars = $rtoken_vars; + } + $self->{rnested_pairs} = \@nested_pairs; + $self->{rpaired_to_inner_container} = $rpaired_to_inner_container; + return; +} + +sub dump_tokens { + + # a debug routine, not normally used + my ( $self, $msg ) = @_; + my $rLL = $self->{rLL}; + my $nvars = @{$rLL}; + print STDERR "$msg\n"; + print STDERR "ntokens=$nvars\n"; + print STDERR "K\t_TOKEN_\t_TYPE_\n"; + my $K = 0; + foreach my $item ( @{$rLL} ) { + print STDERR "$K\t$item->[_TOKEN_]\t$item->[_TYPE_]\n"; + $K++; + } + return; +} + +sub get_old_line_index { + my ( $self, $K ) = @_; + my $rLL = $self->{rLL}; + return 0 unless defined($K); + return $rLL->[$K]->[_LINE_INDEX_]; +} + +sub get_old_line_count { + my ( $self, $Kbeg, $Kend ) = @_; + my $rLL = $self->{rLL}; + return 0 unless defined($Kbeg); + return 0 unless defined($Kend); + return $rLL->[$Kend]->[_LINE_INDEX_] - $rLL->[$Kbeg]->[_LINE_INDEX_] + 1; +} + +sub K_next_code { + my ( $self, $KK, $rLL ) = @_; + + # return the index K of the next nonblank, non-comment token + return unless ( defined($KK) && $KK >= 0 ); + + # use the standard array unless given otherwise + $rLL = $self->{rLL} unless ( defined($rLL) ); + my $Num = @{$rLL}; + my $Knnb = $KK + 1; + while ( $Knnb < $Num ) { + if ( !defined( $rLL->[$Knnb] ) ) { + Fault("Undefined entry for k=$Knnb"); + } + if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' + && $rLL->[$Knnb]->[_TYPE_] ne '#' ) + { + return $Knnb; + } + $Knnb++; + } + return; +} + +sub K_next_nonblank { + my ( $self, $KK, $rLL ) = @_; + + # return the index K of the next nonblank token + return unless ( defined($KK) && $KK >= 0 ); + + # use the standard array unless given otherwise + $rLL = $self->{rLL} unless ( defined($rLL) ); + my $Num = @{$rLL}; + my $Knnb = $KK + 1; + while ( $Knnb < $Num ) { + if ( !defined( $rLL->[$Knnb] ) ) { + Fault("Undefined entry for k=$Knnb"); + } + if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb } + $Knnb++; + } + return; +} + +sub K_previous_code { + + # return the index K of the previous nonblank, non-comment token + # Call with $KK=undef to start search at the top of the array + my ( $self, $KK, $rLL ) = @_; + + # use the standard array unless given otherwise + $rLL = $self->{rLL} unless ( defined($rLL) ); + my $Num = @{$rLL}; + if ( !defined($KK) ) { $KK = $Num } + elsif ( $KK > $Num ) { + + # The caller should make the first call with KK_new=undef to + # avoid this error + Fault( +"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num" + ); + } + my $Kpnb = $KK - 1; + while ( $Kpnb >= 0 ) { + if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' + && $rLL->[$Kpnb]->[_TYPE_] ne '#' ) + { + return $Kpnb; + } + $Kpnb--; + } + return; +} + +sub K_previous_nonblank { + + # return index of previous nonblank token before item K; + # Call with $KK=undef to start search at the top of the array + my ( $self, $KK, $rLL ) = @_; + + # use the standard array unless given otherwise + $rLL = $self->{rLL} unless ( defined($rLL) ); + my $Num = @{$rLL}; + if ( !defined($KK) ) { $KK = $Num } + elsif ( $KK > $Num ) { + + # The caller should make the first call with KK_new=undef to + # avoid this error + Fault( +"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num" + ); + } + my $Kpnb = $KK - 1; + while ( $Kpnb >= 0 ) { + if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb } + $Kpnb--; + } + return; +} + +sub weld_containers { + + # do any welding operations + my $self = shift; + + # initialize weld length hashes needed later for checking line lengths + # TODO: These should eventually be stored in $self rather than be package vars + %weld_len_left_closing = (); + %weld_len_right_closing = (); + %weld_len_left_opening = (); + %weld_len_right_opening = (); + + return if ( $rOpts->{'indent-only'} ); + return unless ($rOpts_add_newlines); + + if ( $rOpts->{'weld-nested-containers'} ) { + + # if called, weld_nested_containers must be called before other weld + # operations. # This is because weld_nested_containers could overwrite + # hash values written by weld_cuddled_blocks and weld_nested_quotes. + $self->weld_nested_containers(); + + $self->weld_nested_quotes(); + } + + # Note that weld_nested_containers() changes the _LEVEL_ values, so + # weld_cuddled_blocks must use the _TRUE_LEVEL_ values instead. + + # Here is a good test case to Be sure that both cuddling and welding + # are working and not interfering with each other: <> + + # perltidy -wn -ce + + # if ($BOLD_MATH) { ( + # $labels, $comment, + # join( '', '', &make_math( $mode, '', '', $_ ), '' ) + # ) } else { ( + # &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ), + # $after + # ) } + + $self->weld_cuddled_blocks(); + + return; +} + +sub cumulative_length_before_K { + my ( $self, $KK ) = @_; + my $rLL = $self->{rLL}; + return ( $KK <= 0 ) ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_]; +} + +sub cumulative_length_after_K { + my ( $self, $KK ) = @_; + my $rLL = $self->{rLL}; + return $rLL->[$KK]->[_CUMULATIVE_LENGTH_]; +} + +sub weld_cuddled_blocks { + my $self = shift; + + # This routine implements the -cb flag by finding the appropriate + # closing and opening block braces and welding them together. + return unless ( %{$rcuddled_block_types} ); + + my $rLL = $self->{rLL}; + return unless ( defined($rLL) && @{$rLL} ); + my $rbreak_container = $self->{rbreak_container}; + + my $K_opening_container = $self->{K_opening_container}; + my $K_closing_container = $self->{K_closing_container}; + + my $length_to_opening_seqno = sub { + my ($seqno) = @_; + my $KK = $K_opening_container->{$seqno}; + my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_]; + return $lentot; + }; + my $length_to_closing_seqno = sub { + my ($seqno) = @_; + my $KK = $K_closing_container->{$seqno}; + my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_]; + return $lentot; + }; + + my $is_broken_block = sub { + + # a block is broken if the input line numbers of the braces differ + # we can only cuddle between broken blocks + my ($seqno) = @_; + my $K_opening = $K_opening_container->{$seqno}; + return unless ( defined($K_opening) ); + my $K_closing = $K_closing_container->{$seqno}; + return unless ( defined($K_closing) ); + return $rbreak_container->{$seqno} + || $rLL->[$K_closing]->[_LINE_INDEX_] != + $rLL->[$K_opening]->[_LINE_INDEX_]; + }; + + # A stack to remember open chains at all levels: + # $in_chain[$level] = [$chain_type, $type_sequence]; + my @in_chain; + my $CBO = $rOpts->{'cuddled-break-option'}; + + # loop over structure items to find cuddled pairs + my $level = 0; + my $KK = 0; + while ( defined( $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_] ) ) { + my $rtoken_vars = $rLL->[$KK]; + my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_]; + if ( !$type_sequence ) { + Fault("sequence = $type_sequence not defined"); + } + + # We use the original levels because they get changed by sub + # 'weld_nested_containers'. So if this were to be called before that + # routine, the levels would be wrong and things would go bad. + my $last_level = $level; + $level = $rtoken_vars->[_LEVEL_TRUE_]; + + if ( $level < $last_level ) { $in_chain[$last_level] = undef } + elsif ( $level > $last_level ) { $in_chain[$level] = undef } + + # We are only looking at code blocks + my $token = $rtoken_vars->[_TOKEN_]; + my $type = $rtoken_vars->[_TYPE_]; + next unless ( $type eq $token ); + + if ( $token eq '{' ) { + + my $block_type = $rtoken_vars->[_BLOCK_TYPE_]; + if ( !$block_type ) { + + # patch for unrecognized block types which may not be labeled + my $Kp = $self->K_previous_nonblank($KK); + while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) { + $Kp = $self->K_previous_nonblank($Kp); + } + next unless $Kp; + $block_type = $rLL->[$Kp]->[_TOKEN_]; + } + if ( $in_chain[$level] ) { + + # we are in a chain and are at an opening block brace. + # See if we are welding this opening brace with the previous + # block brace. Get their identification numbers: + my $closing_seqno = $in_chain[$level]->[1]; + my $opening_seqno = $type_sequence; + + # The preceding block must be on multiple lines so that its + # closing brace will start a new line. + if ( !$is_broken_block->($closing_seqno) ) { + next unless ( $CBO == 2 ); + $rbreak_container->{$closing_seqno} = 1; + } + + # we will let the trailing block be either broken or intact + ## && $is_broken_block->($opening_seqno); + + # We can weld the closing brace to its following word .. + my $Ko = $K_closing_container->{$closing_seqno}; + my $Kon = $self->K_next_nonblank($Ko); + + # ..unless it is a comment + if ( $rLL->[$Kon]->[_TYPE_] ne '#' ) { + my $dlen = + $rLL->[$Kon]->[_CUMULATIVE_LENGTH_] - + $rLL->[ $Ko - 1 ]->[_CUMULATIVE_LENGTH_]; + $weld_len_right_closing{$closing_seqno} = $dlen; + + # Set flag that we want to break the next container + # so that the cuddled line is balanced. + $rbreak_container->{$opening_seqno} = 1 + if ($CBO); + } + + } + else { + + # We are not in a chain. Start a new chain if we see the + # starting block type. + if ( $rcuddled_block_types->{$block_type} ) { + $in_chain[$level] = [ $block_type, $type_sequence ]; + } + else { + $block_type = '*'; + $in_chain[$level] = [ $block_type, $type_sequence ]; + } + } + } + elsif ( $token eq '}' ) { + if ( $in_chain[$level] ) { + + # We are in a chain at a closing brace. See if this chain + # continues.. + my $Knn = $self->K_next_code($KK); + next unless $Knn; + + my $chain_type = $in_chain[$level]->[0]; + my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_]; + if ( + $rcuddled_block_types->{$chain_type}->{$next_nonblank_token} + ) + { + + # Note that we do not weld yet because we must wait until + # we we are sure that an opening brace for this follows. + $in_chain[$level]->[1] = $type_sequence; + } + else { $in_chain[$level] = undef } + } + } + } + + return; +} + +sub weld_nested_containers { + my $self = shift; + + # This routine implements the -wn flag by "welding together" + # the nested closing and opening tokens which were previously + # identified by sub 'find_nested_pairs'. "welding" simply + # involves setting certain hash values which will be checked + # later during formatting. + + my $rLL = $self->{rLL}; + my $Klimit = $self->get_rLL_max_index(); + my $rnested_pairs = $self->{rnested_pairs}; + my $rlines = $self->{rlines}; + my $K_opening_container = $self->{K_opening_container}; + my $K_closing_container = $self->{K_closing_container}; + + # Return unless there are nested pairs to weld + return unless defined($rnested_pairs) && @{$rnested_pairs}; + + # This array will hold the sequence numbers of the tokens to be welded. + my @welds; + + # Variables needed for estimating line lengths + my $starting_indent; + my $starting_lentot; + + # A tolerance to the length for length estimates. In some rare cases + # this can avoid problems where a final weld slightly exceeds the + # line length and gets broken in a bad spot. + my $length_tol = 1; + + my $excess_length_to_K = sub { + my ($K) = @_; + + # Estimate the length from the line start to a given token + my $length = $self->cumulative_length_before_K($K) - $starting_lentot; + my $excess_length = + $starting_indent + $length + $length_tol - $rOpts_maximum_line_length; + return ($excess_length); + }; + + my $length_to_opening_seqno = sub { + my ($seqno) = @_; + my $KK = $K_opening_container->{$seqno}; + my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_]; + return $lentot; + }; + + my $length_to_closing_seqno = sub { + my ($seqno) = @_; + my $KK = $K_closing_container->{$seqno}; + my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_]; + ##my $lentot = $rLL->[$KK]->[_CUMULATIVE_LENGTH_]; + return $lentot; + }; + + # Abbreviations: + # _oo=outer opening, i.e. first of { { + # _io=inner opening, i.e. second of { { + # _oc=outer closing, i.e. second of } { + # _ic=inner closing, i.e. first of } } + + my $previous_pair; + + # We are working from outermost to innermost pairs so that + # level changes will be complete when we arrive at the inner pairs. + + while ( my $item = pop( @{$rnested_pairs} ) ) { + my ( $inner_seqno, $outer_seqno ) = @{$item}; + + my $Kouter_opening = $K_opening_container->{$outer_seqno}; + my $Kinner_opening = $K_opening_container->{$inner_seqno}; + my $Kouter_closing = $K_closing_container->{$outer_seqno}; + my $Kinner_closing = $K_closing_container->{$inner_seqno}; + + my $outer_opening = $rLL->[$Kouter_opening]; + my $inner_opening = $rLL->[$Kinner_opening]; + my $outer_closing = $rLL->[$Kouter_closing]; + my $inner_closing = $rLL->[$Kinner_closing]; + + my $iline_oo = $outer_opening->[_LINE_INDEX_]; + my $iline_io = $inner_opening->[_LINE_INDEX_]; + + # Set flag saying if this pair starts a new weld + my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] ); + + # Set flag saying if this pair is adjacent to the previous nesting pair + # (even if previous pair was rejected as a weld) + my $touch_previous_pair = + defined($previous_pair) && $outer_seqno == $previous_pair->[0]; + $previous_pair = $item; + + # Set a flag if we should not weld. It sometimes looks best not to weld + # when the opening and closing tokens are very close. However, there + # is a danger that we will create a "blinker", which oscillates between + # two semi-stable states, if we do not weld. So the rules for + # not welding have to be carefully defined and tested. + my $do_not_weld; + if ( !$touch_previous_pair ) { + + # If this pair is not adjacent to the previous pair (skipped or + # not), then measure lengths from the start of line of oo + + my $rK_range = $rlines->[$iline_oo]->{_rK_range}; + my ( $Kfirst, $Klast ) = @{$rK_range}; + $starting_lentot = + $Kfirst <= 0 ? 0 : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_]; + $starting_indent = 0; + if ( !$rOpts_variable_maximum_line_length ) { + my $level = $rLL->[$Kfirst]->[_LEVEL_]; + $starting_indent = $rOpts_indent_columns * $level; + } + + # DO-NOT-WELD RULE 1: + # Do not weld something that looks like the start of a two-line + # function call, like this: <> + # $trans->add_transformation( + # PDL::Graphics::TriD::Scale->new( $sx, $sy, $sz ) ); + # We will look for a semicolon after the closing paren. + + # We want to weld something complex, like this though + # my $compass = uc( opposite_direction( line_to_canvas_direction( + # @{ $coords[0] }, @{ $coords[1] } ) ) ); + # Otherwise we will get a 'blinker' + + my $iline_oc = $outer_closing->[_LINE_INDEX_]; + if ( $iline_oc <= $iline_oo + 1 ) { + + # Look for following semicolon... + my $Knext_nonblank = $self->K_next_nonblank($Kouter_closing); + my $next_nonblank_type = + defined($Knext_nonblank) + ? $rLL->[$Knext_nonblank]->[_TYPE_] + : 'b'; + if ( $next_nonblank_type eq ';' ) { + + # Then do not weld if no other containers between inner + # opening and closing. + my $Knext_seq_item = $inner_opening->[_KNEXT_SEQ_ITEM_]; + if ( $Knext_seq_item == $Kinner_closing ) { + $do_not_weld ||= 1; + } + } + } + } + + my $iline_ic = $inner_closing->[_LINE_INDEX_]; + + # DO-NOT-WELD RULE 2: + # Do not weld an opening paren to an inner one line brace block + # We will just use old line numbers for this test and require + # iterations if necessary for convergence + + # For example, otherwise we could cause the opening paren + # in the following example to separate from the caller name + # as here: + + # $_[0]->code_handler + # ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } ); + + # Here is another example where we do not want to weld: + # $wrapped->add_around_modifier( + # sub { push @tracelog => 'around 1'; $_[0]->(); } ); + + # If the one line sub block gets broken due to length or by the + # user, then we can weld. The result will then be: + # $wrapped->add_around_modifier( sub { + # push @tracelog => 'around 1'; + # $_[0]->(); + # } ); + + if ( $iline_ic == $iline_io ) { + + my $token_oo = $outer_opening->[_TOKEN_]; + my $block_type_io = $inner_opening->[_BLOCK_TYPE_]; + my $token_io = $inner_opening->[_TOKEN_]; + $do_not_weld ||= $token_oo eq '(' && $token_io eq '{'; + } + + # DO-NOT-WELD RULE 3: + # Do not weld if this makes our line too long + $do_not_weld ||= $excess_length_to_K->($Kinner_opening) > 0; + + if ($do_not_weld) { + + # After neglecting a pair, we start measuring from start of point io + $starting_lentot = + $self->cumulative_length_before_K($Kinner_opening); + $starting_indent = 0; + if ( !$rOpts_variable_maximum_line_length ) { + my $level = $inner_opening->[_LEVEL_]; + $starting_indent = $rOpts_indent_columns * $level; + } + + # Normally, a broken pair should not decrease indentation of + # intermediate tokens: + ## if ( $last_pair_broken ) { next } + # However, for long strings of welded tokens, such as '{{{{{{...' + # we will allow broken pairs to also remove indentation. + # This will keep very long strings of opening and closing + # braces from marching off to the right. We will do this if the + # number of tokens in a weld before the broken weld is 4 or more. + # This rule will mainly be needed for test scripts, since typical + # welds have fewer than about 4 welded tokens. + if ( !@welds || @{ $welds[-1] } < 4 ) { next } + } + + # otherwise start new weld ... + elsif ($starting_new_weld) { + push @welds, $item; + } + + # ... or extend current weld + else { + unshift @{ $welds[-1] }, $inner_seqno; + } + + # After welding, reduce the indentation level if all intermediate tokens + my $dlevel = $outer_opening->[_LEVEL_] - $inner_opening->[_LEVEL_]; + if ( $dlevel != 0 ) { + my $Kstart = $Kinner_opening; + my $Kstop = $Kinner_closing; + for ( my $KK = $Kstart ; $KK <= $Kstop ; $KK++ ) { + $rLL->[$KK]->[_LEVEL_] += $dlevel; + } + } + } + + # Define weld lengths needed later to set line breaks + foreach my $item (@welds) { + + # sweep from inner to outer + + my $inner_seqno; + my $len_close = 0; + my $len_open = 0; + foreach my $outer_seqno ( @{$item} ) { + if ($inner_seqno) { + + my $dlen_opening = + $length_to_opening_seqno->($inner_seqno) - + $length_to_opening_seqno->($outer_seqno); + + my $dlen_closing = + $length_to_closing_seqno->($outer_seqno) - + $length_to_closing_seqno->($inner_seqno); + + $len_open += $dlen_opening; + $len_close += $dlen_closing; + + } + + $weld_len_left_closing{$outer_seqno} = $len_close; + $weld_len_right_opening{$outer_seqno} = $len_open; + + $inner_seqno = $outer_seqno; + } + + # sweep from outer to inner + foreach my $seqno ( reverse @{$item} ) { + $weld_len_right_closing{$seqno} = + $len_close - $weld_len_left_closing{$seqno}; + $weld_len_left_opening{$seqno} = + $len_open - $weld_len_right_opening{$seqno}; + } + } + + ##################################### + # DEBUG + ##################################### + if (0) { + my $count = 0; + local $" = ')('; + foreach my $weld (@welds) { + print "\nWeld number $count has seq: (@{$weld})\n"; + foreach my $seq ( @{$weld} ) { + print <{rLL}; + return unless ( defined($rLL) && @{$rLL} ); + + my $K_opening_container = $self->{K_opening_container}; + my $K_closing_container = $self->{K_closing_container}; + my $rlines = $self->{rlines}; + + my $is_single_quote = sub { + my ( $Kbeg, $Kend, $quote_type ) = @_; + foreach my $K ( $Kbeg .. $Kend ) { + my $test_type = $rLL->[$K]->[_TYPE_]; + next if ( $test_type eq 'b' ); + return if ( $test_type ne $quote_type ); + } + return 1; + }; + + my $excess_line_length = sub { + my ( $KK, $Ktest ) = @_; + + # what is the excess length if we add token $Ktest to the line with $KK? + my $iline = $rLL->[$KK]->[_LINE_INDEX_]; + my $rK_range = $rlines->[$iline]->{_rK_range}; + my ( $Kfirst, $Klast ) = @{$rK_range}; + my $starting_lentot = + $Kfirst <= 0 ? 0 : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_]; + my $starting_indent = 0; + my $length_tol = 1; + if ( !$rOpts_variable_maximum_line_length ) { + my $level = $rLL->[$Kfirst]->[_LEVEL_]; + $starting_indent = $rOpts_indent_columns * $level; + } + + my $length = $rLL->[$Ktest]->[_CUMULATIVE_LENGTH_] - $starting_lentot; + my $excess_length = + $starting_indent + $length + $length_tol - $rOpts_maximum_line_length; + return $excess_length; + }; + + # look for single qw quotes nested in containers + my $KK = 0; + while ( defined( $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_] ) ) { + my $rtoken_vars = $rLL->[$KK]; + my $outer_seqno = $rtoken_vars->[_TYPE_SEQUENCE_]; + if ( !$outer_seqno ) { + Fault("sequence = $outer_seqno not defined"); + } + + my $token = $rtoken_vars->[_TOKEN_]; + if ( $is_opening_token{$token} ) { + + # see if the next token is a quote of some type + my $Kn = $self->K_next_nonblank($KK); + next unless $Kn; + my $next_token = $rLL->[$Kn]->[_TOKEN_]; + my $next_type = $rLL->[$Kn]->[_TYPE_]; + next + unless ( ( $next_type eq 'q' || $next_type eq 'Q' ) + && $next_token =~ /^q/ ); + + # The token before the closing container must also be a quote + my $K_closing = $K_closing_container->{$outer_seqno}; + my $Kt_end = $self->K_previous_nonblank($K_closing); + next unless $rLL->[$Kt_end]->[_TYPE_] eq $next_type; + + # Do not weld to single-line quotes. Nothing is gained, and it may + # look bad. + next if ( $Kt_end == $Kn ); + + # Only weld to quotes delimited with container tokens. This is + # because welding to arbitrary quote delimiters can produce code + # which is less readable than without welding. + my $closing_delimiter = substr( $rLL->[$Kt_end]->[_TOKEN_], -1, 1 ); + next + unless ( $is_closing_token{$closing_delimiter} + || $closing_delimiter eq '>' ); + + # Now make sure that there is just a single quote in the container + next + unless ( $is_single_quote->( $Kn + 1, $Kt_end - 1, $next_type ) ); + + # If welded, the line must not exceed allowed line length + # Assume old line breaks for this estimate. + next if ( $excess_line_length->( $KK, $Kn ) > 0 ); + + # OK to weld + # FIXME: Are these always correct? + $weld_len_left_closing{$outer_seqno} = 1; + $weld_len_right_opening{$outer_seqno} = 2; + } + } + return; +} + +sub weld_len_left { + + my ( $seqno, $type_or_tok ) = @_; + + # Given the sequence number of a token, and the token or its type, + # return the length of any weld to its left + + my $weld_len; + if ($seqno) { + if ( $is_closing_type{$type_or_tok} ) { + $weld_len = $weld_len_left_closing{$seqno}; + } + elsif ( $is_opening_type{$type_or_tok} ) { + $weld_len = $weld_len_left_opening{$seqno}; + } + } + if ( !defined($weld_len) ) { $weld_len = 0 } + return $weld_len; +} + +sub weld_len_right { + + my ( $seqno, $type_or_tok ) = @_; + + # Given the sequence number of a token, and the token or its type, + # return the length of any weld to its right + + my $weld_len; + if ($seqno) { + if ( $is_closing_type{$type_or_tok} ) { + $weld_len = $weld_len_right_closing{$seqno}; + } + elsif ( $is_opening_type{$type_or_tok} ) { + $weld_len = $weld_len_right_opening{$seqno}; + } + } + if ( !defined($weld_len) ) { $weld_len = 0 } + return $weld_len; +} + +sub weld_len_left_to_go { + my ($i) = @_; + + # Given the index of a token in the 'to_go' array + # return the length of any weld to its left + return if ( $i < 0 ); + my $weld_len = + weld_len_left( $type_sequence_to_go[$i], $types_to_go[$i] ); + return $weld_len; +} + +sub weld_len_right_to_go { + my ($i) = @_; + + # Given the index of a token in the 'to_go' array + # return the length of any weld to its right + return if ( $i < 0 ); + if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- } + my $weld_len = + weld_len_right( $type_sequence_to_go[$i], $types_to_go[$i] ); + return $weld_len; +} + +sub link_sequence_items { + + # This has been merged into 'respace_tokens' but retained for reference + my $self = shift; + my $rlines = $self->{rlines}; + my $rLL = $self->{rLL}; + + # We walk the token list and make links to the next sequence item. + # We also define these hashes to container tokens using sequence number as + # the key: + my $K_opening_container = {}; # opening [ { or ( + my $K_closing_container = {}; # closing ] } or ) + my $K_opening_ternary = {}; # opening ? of ternary + my $K_closing_ternary = {}; # closing : of ternary + + # sub to link preceding nodes forward to a new node type + my $link_back = sub { + my ( $Ktop, $key ) = @_; + + my $Kprev = $Ktop - 1; + while ( $Kprev >= 0 + && !defined( $rLL->[$Kprev]->[$key] ) ) + { + $rLL->[$Kprev]->[$key] = $Ktop; + $Kprev -= 1; + } + }; + + for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) { + + $rLL->[$KK]->[_KNEXT_SEQ_ITEM_] = undef; + + my $type = $rLL->[$KK]->[_TYPE_]; + + next if ( $type eq 'b' ); + + my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_]; + if ($type_sequence) { + + $link_back->( $KK, _KNEXT_SEQ_ITEM_ ); + + my $token = $rLL->[$KK]->[_TOKEN_]; + if ( $is_opening_token{$token} ) { + + $K_opening_container->{$type_sequence} = $KK; + } + elsif ( $is_closing_token{$token} ) { + + $K_closing_container->{$type_sequence} = $KK; + } + + # These are not yet used but could be useful + else { + if ( $token eq '?' ) { + $K_opening_ternary->{$type_sequence} = $KK; + } + elsif ( $token eq ':' ) { + $K_closing_ternary->{$type_sequence} = $KK; + } + else { + Fault(<{K_opening_container} = $K_opening_container; + $self->{K_closing_container} = $K_closing_container; + $self->{K_opening_ternary} = $K_opening_ternary; + $self->{K_closing_ternary} = $K_closing_ternary; + return; +} + +sub sum_token_lengths { + my $self = shift; + + # This has been merged into 'respace_tokens' but retained for reference + my $rLL = $self->{rLL}; + my $cumulative_length = 0; + for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) { + + # now set the length of this token + my $token_length = length( $rLL->[$KK]->[_TOKEN_] ); + + $cumulative_length += $token_length; + + # Save the length sum to just AFTER this token + $rLL->[$KK]->[_CUMULATIVE_LENGTH_] = $cumulative_length; + + } + return; +} + +sub resync_lines_and_tokens { + + my $self = shift; + my $rLL = $self->{rLL}; + my $Klimit = $self->{Klimit}; + my $rlines = $self->{rlines}; + + # Re-construct the arrays of tokens associated with the original input lines + # since they have probably changed due to inserting and deleting blanks + # and a few other tokens. + + my $Kmax = -1; + + # This is the next token and its line index: + my $Knext = 0; + my $inext; + if ( defined($rLL) && @{$rLL} ) { + $Kmax = @{$rLL} - 1; + $inext = $rLL->[$Knext]->[_LINE_INDEX_]; + } + + my $get_inext = sub { + if ( $Knext < 0 || $Knext > $Kmax ) { $inext = undef } + else { + $inext = $rLL->[$Knext]->[_LINE_INDEX_]; + } + return $inext; + }; + + # Remember the most recently output token index + my $Klast_out; + + my $iline = -1; + foreach my $line_of_tokens ( @{$rlines} ) { + $iline++; + my $line_type = $line_of_tokens->{_line_type}; + if ( $line_type eq 'CODE' ) { + + my @K_array; + my $rK_range; + $inext = $get_inext->(); + while ( defined($inext) && $inext <= $iline ) { + push @{K_array}, $Knext; + $Knext += 1; + $inext = $get_inext->(); + } + + # Delete any terminal blank token + if (@K_array) { + if ( $rLL->[ $K_array[-1] ]->[_TYPE_] eq 'b' ) { + pop @K_array; + } + } + + # Define the range of K indexes for the line: + # $Kfirst = index of first token on line + # $Klast_out = index of last token on line + my ( $Kfirst, $Klast ); + if (@K_array) { + $Kfirst = $K_array[0]; + $Klast = $K_array[-1]; + $Klast_out = $Klast; + } + + # It is only safe to trim the actual line text if the input + # line had a terminal blank token. Otherwise, we may be + # in a quote. + if ( $line_of_tokens->{_ended_in_blank_token} ) { + $line_of_tokens->{_line_text} =~ s/\s+$//; + } + $line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ]; + } + } + + # There shouldn't be any nodes beyond the last one unless we start + # allowing 'link_after' calls + if ( defined($inext) ) { + + Fault("unexpected tokens at end of file when reconstructing lines"); + } + + return; +} + +sub dump_verbatim { + my $self = shift; + my $rlines = $self->{rlines}; + foreach my $line ( @{$rlines} ) { + my $input_line = $line->{_line_text}; + $self->write_unindented_line($input_line); + } + return; +} + +sub finish_formatting { + + my ( $self, $severe_error ) = @_; + + # The file has been tokenized and is ready to be formatted. + # All of the relevant data is stored in $self, ready to go. + + # output file verbatim if severe error or no formatting requested + if ( $severe_error || $rOpts->{notidy} ) { + $self->dump_verbatim(); + $self->wrapup(); + return; + } + + # Make a pass through the lines, looking at lines of CODE and identifying + # special processing needs, such format skipping sections marked by + # special comments + $self->scan_comments(); + + # Find nested pairs of container tokens for any welding. This information + # is also needed for adding semicolons, so it is split apart from the + # welding step. + $self->find_nested_pairs(); + + # Make sure everything looks good + $self->check_line_hashes(); + + # Future: Place to Begin future Iteration Loop + # foreach my $it_count(1..$maxit) { + + # Future: We must reset some things after the first iteration. + # This includes: + # - resetting levels if there was any welding + # - resetting any phantom semicolons + # - dealing with any line numbering issues so we can relate final lines + # line numbers with input line numbers. + # + # If ($it_count>1) { + # Copy {level_raw} to [_LEVEL_] if ($it_count>1) + # Renumber lines + # } + + # Make a pass through all tokens, adding or deleting any whitespace as + # required. Also make any other changes, such as adding semicolons. + # All token changes must be made here so that the token data structure + # remains fixed for the rest of this iteration. + $self->respace_tokens(); + + # Implement any welding needed for the -wn or -cb options + $self->weld_containers(); + + # Finishes formatting and write the result to the line sink. + # Eventually this call should just change the 'rlines' data according to the + # new line breaks and then return so that we can do an internal iteration + # before continuing with the next stages of formatting. + $self->break_lines(); + + ############################################################ + # A possible future decomposition of 'break_lines()' follows. + # Benefits: + # - allow perltidy to do an internal iteration which eliminates + # many unnecessary steps, such as re-parsing and vertical alignment. + # This will allow iterations to be automatic. + # - consolidate all length calculations to allow utf8 alignment + ############################################################ + + # Future: Check for convergence of beginning tokens on CODE lines + + # Future: End of Iteration Loop + + # Future: add_padding($rargs); + + # Future: add_closing_side_comments($rargs); + + # Future: vertical_alignment($rargs); + + # Future: output results + + # A final routine to tie up any loose ends + $self->wrapup(); + return; +} + +sub create_one_line_block { + ( $index_start_one_line_block, $semicolons_before_block_self_destruct ) = + @_; + return; +} + +sub destroy_one_line_block { + $index_start_one_line_block = UNDEFINED_INDEX; + $semicolons_before_block_self_destruct = 0; + return; +} + +sub leading_spaces_to_go { + + # return the number of indentation spaces for a token in the output stream; + # these were previously stored by 'set_leading_whitespace'. + + my $ii = shift; + if ( $ii < 0 ) { $ii = 0 } + return get_spaces( $leading_spaces_to_go[$ii] ); + +} + +sub get_spaces { + + # return the number of leading spaces associated with an indentation + # variable $indentation is either a constant number of spaces or an object + # with a get_spaces method. + my $indentation = shift; + return ref($indentation) ? $indentation->get_spaces() : $indentation; +} + +sub get_recoverable_spaces { + + # return the number of spaces (+ means shift right, - means shift left) + # that we would like to shift a group of lines with the same indentation + # to get them to line up with their opening parens + my $indentation = shift; + return ref($indentation) ? $indentation->get_recoverable_spaces() : 0; +} + +sub get_available_spaces_to_go { + + my $ii = shift; + my $item = $leading_spaces_to_go[$ii]; + + # return the number of available leading spaces associated with an + # indentation variable. $indentation is either a constant number of + # spaces or an object with a get_available_spaces method. + return ref($item) ? $item->get_available_spaces() : 0; +} + +sub new_lp_indentation_item { + + # this is an interface to the IndentationItem class + my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_; + + # A negative level implies not to store the item in the item_list + my $index = 0; + if ( $level >= 0 ) { $index = ++$max_gnu_item_index; } + + my $item = Perl::Tidy::IndentationItem->new( + $spaces, $level, + $ci_level, $available_spaces, + $index, $gnu_sequence_number, + $align_paren, $max_gnu_stack_index, + $line_start_index_to_go, + ); + + if ( $level >= 0 ) { + $gnu_item_list[$max_gnu_item_index] = $item; + } + + return $item; +} + +sub set_leading_whitespace { + + # This routine defines leading whitespace + # given: the level and continuation_level of a token, + # define: space count of leading string which would apply if it + # were the first token of a new line. + + my ( $level_abs, $ci_level, $in_continued_quote ) = @_; + + # Adjust levels if necessary to recycle whitespace: + # given $level_abs, the absolute level + # define $level, a possibly reduced level for whitespace + my $level = $level_abs; + if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle > 0 ) { + if ( $level_abs < $whitespace_last_level ) { + pop(@whitespace_level_stack); + } + if ( !@whitespace_level_stack ) { + push @whitespace_level_stack, $level_abs; + } + elsif ( $level_abs > $whitespace_last_level ) { + $level = $whitespace_level_stack[-1] + + ( $level_abs - $whitespace_last_level ); + + if ( + # 1 Try to break at a block brace + ( + $level > $rOpts_whitespace_cycle + && $last_nonblank_type eq '{' + && $last_nonblank_token eq '{' + ) + + # 2 Then either a brace or bracket + || ( $level > $rOpts_whitespace_cycle + 1 + && $last_nonblank_token =~ /^[\{\[]$/ ) + + # 3 Then a paren too + || $level > $rOpts_whitespace_cycle + 2 + ) + { + $level = 1; + } + push @whitespace_level_stack, $level; + } + $level = $whitespace_level_stack[-1]; + } + $whitespace_last_level = $level_abs; + + # modify for -bli, which adds one continuation indentation for + # opening braces + if ( $rOpts_brace_left_and_indent + && $max_index_to_go == 0 + && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o ) + { + $ci_level++; + } + + # patch to avoid trouble when input file has negative indentation. + # other logic should catch this error. + if ( $level < 0 ) { $level = 0 } + + #------------------------------------------- + # handle the standard indentation scheme + #------------------------------------------- + unless ($rOpts_line_up_parentheses) { + my $space_count = + $ci_level * $rOpts_continuation_indentation + + $level * $rOpts_indent_columns; + my $ci_spaces = + ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation; + + if ($in_continued_quote) { + $space_count = 0; + $ci_spaces = 0; + } + $leading_spaces_to_go[$max_index_to_go] = $space_count; + $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces; + return; + } + + #------------------------------------------------------------- + # handle case of -lp indentation.. + #------------------------------------------------------------- + + # The continued_quote flag means that this is the first token of a + # line, and it is the continuation of some kind of multi-line quote + # or pattern. It requires special treatment because it must have no + # added leading whitespace. So we create a special indentation item + # which is not in the stack. + if ($in_continued_quote) { + my $space_count = 0; + my $available_space = 0; + $level = -1; # flag to prevent storing in item_list + $leading_spaces_to_go[$max_index_to_go] = + $reduced_spaces_to_go[$max_index_to_go] = + new_lp_indentation_item( $space_count, $level, $ci_level, + $available_space, 0 ); + return; + } + + # get the top state from the stack + my $space_count = $gnu_stack[$max_gnu_stack_index]->get_spaces(); + my $current_level = $gnu_stack[$max_gnu_stack_index]->get_level(); + my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_ci_level(); + + my $type = $types_to_go[$max_index_to_go]; + my $token = $tokens_to_go[$max_index_to_go]; + my $total_depth = $nesting_depth_to_go[$max_index_to_go]; + + if ( $type eq '{' || $type eq '(' ) { + + $gnu_comma_count{ $total_depth + 1 } = 0; + $gnu_arrow_count{ $total_depth + 1 } = 0; + + # If we come to an opening token after an '=' token of some type, + # see if it would be helpful to 'break' after the '=' to save space + my $last_equals = $last_gnu_equals{$total_depth}; + if ( $last_equals && $last_equals > $line_start_index_to_go ) { + + # find the position if we break at the '=' + my $i_test = $last_equals; + if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ } + + # TESTING + ##my $too_close = ($i_test==$max_index_to_go-1); + + my $test_position = total_line_length( $i_test, $max_index_to_go ); + my $mll = maximum_line_length($i_test); + + if ( + + # the equals is not just before an open paren (testing) + ##!$too_close && + + # if we are beyond the midpoint + $gnu_position_predictor > $mll - $rOpts_maximum_line_length / 2 + + # or we are beyond the 1/4 point and there was an old + # break at the equals + || ( + $gnu_position_predictor > + $mll - $rOpts_maximum_line_length * 3 / 4 + && ( + $old_breakpoint_to_go[$last_equals] + || ( $last_equals > 0 + && $old_breakpoint_to_go[ $last_equals - 1 ] ) + || ( $last_equals > 1 + && $types_to_go[ $last_equals - 1 ] eq 'b' + && $old_breakpoint_to_go[ $last_equals - 2 ] ) + ) + ) + ) + { + + # then make the switch -- note that we do not set a real + # breakpoint here because we may not really need one; sub + # scan_list will do that if necessary + $line_start_index_to_go = $i_test + 1; + $gnu_position_predictor = $test_position; + } + } + } + + my $halfway = + maximum_line_length_for_level($level) - $rOpts_maximum_line_length / 2; + + # Check for decreasing depth .. + # Note that one token may have both decreasing and then increasing + # depth. For example, (level, ci) can go from (1,1) to (2,0). So, + # in this example we would first go back to (1,0) then up to (2,0) + # in a single call. + if ( $level < $current_level || $ci_level < $current_ci_level ) { + + # loop to find the first entry at or completely below this level + my ( $lev, $ci_lev ); + while (1) { + if ($max_gnu_stack_index) { + + # save index of token which closes this level + $gnu_stack[$max_gnu_stack_index]->set_closed($max_index_to_go); + + # Undo any extra indentation if we saw no commas + my $available_spaces = + $gnu_stack[$max_gnu_stack_index]->get_available_spaces(); + + my $comma_count = 0; + my $arrow_count = 0; + if ( $type eq '}' || $type eq ')' ) { + $comma_count = $gnu_comma_count{$total_depth}; + $arrow_count = $gnu_arrow_count{$total_depth}; + $comma_count = 0 unless $comma_count; + $arrow_count = 0 unless $arrow_count; + } + $gnu_stack[$max_gnu_stack_index]->set_comma_count($comma_count); + $gnu_stack[$max_gnu_stack_index]->set_arrow_count($arrow_count); + + if ( $available_spaces > 0 ) { + + if ( $comma_count <= 0 || $arrow_count > 0 ) { + + my $i = $gnu_stack[$max_gnu_stack_index]->get_index(); + my $seqno = + $gnu_stack[$max_gnu_stack_index] + ->get_sequence_number(); + + # Be sure this item was created in this batch. This + # should be true because we delete any available + # space from open items at the end of each batch. + if ( $gnu_sequence_number != $seqno + || $i > $max_gnu_item_index ) + { + warning( +"Program bug with -lp. seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n" + ); + report_definite_bug(); + } + + else { + if ( $arrow_count == 0 ) { + $gnu_item_list[$i] + ->permanently_decrease_available_spaces( + $available_spaces); + } + else { + $gnu_item_list[$i] + ->tentatively_decrease_available_spaces( + $available_spaces); + } + foreach my $j ( $i + 1 .. $max_gnu_item_index ) { + $gnu_item_list[$j] + ->decrease_SPACES($available_spaces); + } + } + } + } + + # go down one level + --$max_gnu_stack_index; + $lev = $gnu_stack[$max_gnu_stack_index]->get_level(); + $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_ci_level(); + + # stop when we reach a level at or below the current level + if ( $lev <= $level && $ci_lev <= $ci_level ) { + $space_count = + $gnu_stack[$max_gnu_stack_index]->get_spaces(); + $current_level = $lev; + $current_ci_level = $ci_lev; + last; + } + } + + # reached bottom of stack .. should never happen because + # only negative levels can get here, and $level was forced + # to be positive above. + else { + warning( +"program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n" + ); + report_definite_bug(); + last; + } + } + } + + # handle increasing depth + if ( $level > $current_level || $ci_level > $current_ci_level ) { + + # Compute the standard incremental whitespace. This will be + # the minimum incremental whitespace that will be used. This + # choice results in a smooth transition between the gnu-style + # and the standard style. + my $standard_increment = + ( $level - $current_level ) * $rOpts_indent_columns + + ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation; + + # Now we have to define how much extra incremental space + # ("$available_space") we want. This extra space will be + # reduced as necessary when long lines are encountered or when + # it becomes clear that we do not have a good list. + my $available_space = 0; + my $align_paren = 0; + my $excess = 0; + + # initialization on empty stack.. + if ( $max_gnu_stack_index == 0 ) { + $space_count = $level * $rOpts_indent_columns; + } + + # if this is a BLOCK, add the standard increment + elsif ($last_nonblank_block_type) { + $space_count += $standard_increment; + } + + # if last nonblank token was not structural indentation, + # just use standard increment + elsif ( $last_nonblank_type ne '{' ) { + $space_count += $standard_increment; + } + + # otherwise use the space to the first non-blank level change token + else { + + $space_count = $gnu_position_predictor; + + my $min_gnu_indentation = + $gnu_stack[$max_gnu_stack_index]->get_spaces(); + + $available_space = $space_count - $min_gnu_indentation; + if ( $available_space >= $standard_increment ) { + $min_gnu_indentation += $standard_increment; + } + elsif ( $available_space > 1 ) { + $min_gnu_indentation += $available_space + 1; + } + elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) { + if ( ( $tightness{$last_nonblank_token} < 2 ) ) { + $min_gnu_indentation += 2; + } + else { + $min_gnu_indentation += 1; + } + } + else { + $min_gnu_indentation += $standard_increment; + } + $available_space = $space_count - $min_gnu_indentation; + + if ( $available_space < 0 ) { + $space_count = $min_gnu_indentation; + $available_space = 0; + } + $align_paren = 1; + } + + # update state, but not on a blank token + if ( $types_to_go[$max_index_to_go] ne 'b' ) { + + $gnu_stack[$max_gnu_stack_index]->set_have_child(1); + + ++$max_gnu_stack_index; + $gnu_stack[$max_gnu_stack_index] = + new_lp_indentation_item( $space_count, $level, $ci_level, + $available_space, $align_paren ); + + # If the opening paren is beyond the half-line length, then + # we will use the minimum (standard) indentation. This will + # help avoid problems associated with running out of space + # near the end of a line. As a result, in deeply nested + # lists, there will be some indentations which are limited + # to this minimum standard indentation. But the most deeply + # nested container will still probably be able to shift its + # parameters to the right for proper alignment, so in most + # cases this will not be noticeable. + if ( $available_space > 0 && $space_count > $halfway ) { + $gnu_stack[$max_gnu_stack_index] + ->tentatively_decrease_available_spaces($available_space); + } + } + } + + # Count commas and look for non-list characters. Once we see a + # non-list character, we give up and don't look for any more commas. + if ( $type eq '=>' ) { + $gnu_arrow_count{$total_depth}++; + + # tentatively treating '=>' like '=' for estimating breaks + # TODO: this could use some experimentation + $last_gnu_equals{$total_depth} = $max_index_to_go; + } + + elsif ( $type eq ',' ) { + $gnu_comma_count{$total_depth}++; + } + + elsif ( $is_assignment{$type} ) { + $last_gnu_equals{$total_depth} = $max_index_to_go; + } + + # this token might start a new line + # if this is a non-blank.. + if ( $type ne 'b' ) { + + # and if .. + if ( + + # this is the first nonblank token of the line + $max_index_to_go == 1 && $types_to_go[0] eq 'b' + + # or previous character was one of these: + || $last_nonblank_type_to_go =~ /^([\:\?\,f])$/ + + # or previous character was opening and this does not close it + || ( $last_nonblank_type_to_go eq '{' && $type ne '}' ) + || ( $last_nonblank_type_to_go eq '(' and $type ne ')' ) + + # or this token is one of these: + || $type =~ /^([\.]|\|\||\&\&)$/ + + # or this is a closing structure + || ( $last_nonblank_type_to_go eq '}' + && $last_nonblank_token_to_go eq $last_nonblank_type_to_go ) + + # or previous token was keyword 'return' + || ( $last_nonblank_type_to_go eq 'k' + && ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) ) + + # or starting a new line at certain keywords is fine + || ( $type eq 'k' + && $is_if_unless_and_or_last_next_redo_return{$token} ) + + # or this is after an assignment after a closing structure + || ( + $is_assignment{$last_nonblank_type_to_go} + && ( + $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/ + + # and it is significantly to the right + || $gnu_position_predictor > $halfway + ) + ) + ) + { + check_for_long_gnu_style_lines(); + $line_start_index_to_go = $max_index_to_go; + + # back up 1 token if we want to break before that type + # otherwise, we may strand tokens like '?' or ':' on a line + if ( $line_start_index_to_go > 0 ) { + if ( $last_nonblank_type_to_go eq 'k' ) { + + if ( $want_break_before{$last_nonblank_token_to_go} ) { + $line_start_index_to_go--; + } + } + elsif ( $want_break_before{$last_nonblank_type_to_go} ) { + $line_start_index_to_go--; + } + } + } + } + + # remember the predicted position of this token on the output line + if ( $max_index_to_go > $line_start_index_to_go ) { + $gnu_position_predictor = + total_line_length( $line_start_index_to_go, $max_index_to_go ); + } + else { + $gnu_position_predictor = + $space_count + $token_lengths_to_go[$max_index_to_go]; + } + + # store the indentation object for this token + # this allows us to manipulate the leading whitespace + # (in case we have to reduce indentation to fit a line) without + # having to change any token values + $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index]; + $reduced_spaces_to_go[$max_index_to_go] = + ( $max_gnu_stack_index > 0 && $ci_level ) + ? $gnu_stack[ $max_gnu_stack_index - 1 ] + : $gnu_stack[$max_gnu_stack_index]; + return; +} + +sub check_for_long_gnu_style_lines { + + # look at the current estimated maximum line length, and + # remove some whitespace if it exceeds the desired maximum + + # this is only for the '-lp' style + return unless ($rOpts_line_up_parentheses); + + # nothing can be done if no stack items defined for this line + return if ( $max_gnu_item_index == UNDEFINED_INDEX ); + + # see if we have exceeded the maximum desired line length + # keep 2 extra free because they are needed in some cases + # (result of trial-and-error testing) + my $spaces_needed = + $gnu_position_predictor - maximum_line_length($max_index_to_go) + 2; + + return if ( $spaces_needed <= 0 ); + + # We are over the limit, so try to remove a requested number of + # spaces from leading whitespace. We are only allowed to remove + # from whitespace items created on this batch, since others have + # already been used and cannot be undone. + my @candidates = (); + my $i; + + # loop over all whitespace items created for the current batch + for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) { + my $item = $gnu_item_list[$i]; + + # item must still be open to be a candidate (otherwise it + # cannot influence the current token) + next if ( $item->get_closed() >= 0 ); + + my $available_spaces = $item->get_available_spaces(); + + if ( $available_spaces > 0 ) { + push( @candidates, [ $i, $available_spaces ] ); + } + } + + return unless (@candidates); + + # sort by available whitespace so that we can remove whitespace + # from the maximum available first + @candidates = sort { $b->[1] <=> $a->[1] } @candidates; + + # keep removing whitespace until we are done or have no more + foreach my $candidate (@candidates) { + my ( $i, $available_spaces ) = @{$candidate}; + my $deleted_spaces = + ( $available_spaces > $spaces_needed ) + ? $spaces_needed + : $available_spaces; + + # remove the incremental space from this item + $gnu_item_list[$i]->decrease_available_spaces($deleted_spaces); + + my $i_debug = $i; + + # update the leading whitespace of this item and all items + # that came after it + for ( ; $i <= $max_gnu_item_index ; $i++ ) { + + my $old_spaces = $gnu_item_list[$i]->get_spaces(); + if ( $old_spaces >= $deleted_spaces ) { + $gnu_item_list[$i]->decrease_SPACES($deleted_spaces); + } + + # shouldn't happen except for code bug: + else { + my $level = $gnu_item_list[$i_debug]->get_level(); + my $ci_level = $gnu_item_list[$i_debug]->get_ci_level(); + my $old_level = $gnu_item_list[$i]->get_level(); + my $old_ci_level = $gnu_item_list[$i]->get_ci_level(); + warning( +"program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n" + ); + report_definite_bug(); + } + } + $gnu_position_predictor -= $deleted_spaces; + $spaces_needed -= $deleted_spaces; + last unless ( $spaces_needed > 0 ); + } + return; +} + +sub finish_lp_batch { + + # This routine is called once after each output stream batch is + # finished to undo indentation for all incomplete -lp + # indentation levels. It is too risky to leave a level open, + # because then we can't backtrack in case of a long line to follow. + # This means that comments and blank lines will disrupt this + # indentation style. But the vertical aligner may be able to + # get the space back if there are side comments. + + # this is only for the 'lp' style + return unless ($rOpts_line_up_parentheses); + + # nothing can be done if no stack items defined for this line + return if ( $max_gnu_item_index == UNDEFINED_INDEX ); + + # loop over all whitespace items created for the current batch + foreach my $i ( 0 .. $max_gnu_item_index ) { + my $item = $gnu_item_list[$i]; + + # only look for open items + next if ( $item->get_closed() >= 0 ); + + # Tentatively remove all of the available space + # (The vertical aligner will try to get it back later) + my $available_spaces = $item->get_available_spaces(); + if ( $available_spaces > 0 ) { + + # delete incremental space for this item + $gnu_item_list[$i] + ->tentatively_decrease_available_spaces($available_spaces); + + # Reduce the total indentation space of any nodes that follow + # Note that any such nodes must necessarily be dependents + # of this node. + foreach ( $i + 1 .. $max_gnu_item_index ) { + $gnu_item_list[$_]->decrease_SPACES($available_spaces); + } + } + } + return; +} + +sub reduce_lp_indentation { + + # reduce the leading whitespace at token $i if possible by $spaces_needed + # (a large value of $spaces_needed will remove all excess space) + # NOTE: to be called from scan_list only for a sequence of tokens + # contained between opening and closing parens/braces/brackets + + my ( $i, $spaces_wanted ) = @_; + my $deleted_spaces = 0; + + my $item = $leading_spaces_to_go[$i]; + my $available_spaces = $item->get_available_spaces(); + + if ( + $available_spaces > 0 + && ( ( $spaces_wanted <= $available_spaces ) + || !$item->get_have_child() ) + ) + { + + # we'll remove these spaces, but mark them as recoverable + $deleted_spaces = + $item->tentatively_decrease_available_spaces($spaces_wanted); + } + + return $deleted_spaces; +} + +sub token_sequence_length { + + # return length of tokens ($ibeg .. $iend) including $ibeg & $iend + # returns 0 if $ibeg > $iend (shouldn't happen) + my ( $ibeg, $iend ) = @_; + return 0 if ( $iend < 0 || $ibeg > $iend ); + return $summed_lengths_to_go[ $iend + 1 ] if ( $ibeg < 0 ); + return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg]; +} + +sub total_line_length { + + # return length of a line of tokens ($ibeg .. $iend) + my ( $ibeg, $iend ) = @_; + return leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend ); +} + +sub maximum_line_length_for_level { + + # return maximum line length for line starting with a given level + my $maximum_line_length = $rOpts_maximum_line_length; + + # Modify if -vmll option is selected + if ($rOpts_variable_maximum_line_length) { + my $level = shift; + if ( $level < 0 ) { $level = 0 } + $maximum_line_length += $level * $rOpts_indent_columns; + } + return $maximum_line_length; +} + +sub maximum_line_length { + + # return maximum line length for line starting with the token at given index + my $ii = shift; + return maximum_line_length_for_level( $levels_to_go[$ii] ); +} + +sub excess_line_length { + + # return number of characters by which a line of tokens ($ibeg..$iend) + # exceeds the allowable line length. + my ( $ibeg, $iend, $ignore_left_weld, $ignore_right_weld ) = @_; + + # Include left and right weld lengths unless requested not to + my $wl = $ignore_left_weld ? 0 : weld_len_left_to_go($iend); + my $wr = $ignore_right_weld ? 0 : weld_len_right_to_go($iend); + + return total_line_length( $ibeg, $iend ) + $wl + $wr - + maximum_line_length($ibeg); +} + +sub wrapup { + + # flush buffer and write any informative messages + my $self = shift; + + $self->flush(); + $file_writer_object->decrement_output_line_number() + ; # fix up line number since it was incremented + we_are_at_the_last_line(); + if ( $added_semicolon_count > 0 ) { + my $first = ( $added_semicolon_count > 1 ) ? "First" : ""; + my $what = + ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was"; + write_logfile_entry("$added_semicolon_count $what added:\n"); + write_logfile_entry( + " $first at input line $first_added_semicolon_at\n"); + + if ( $added_semicolon_count > 1 ) { + write_logfile_entry( + " Last at input line $last_added_semicolon_at\n"); + } + write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n"); + write_logfile_entry("\n"); + } + + if ( $deleted_semicolon_count > 0 ) { + my $first = ( $deleted_semicolon_count > 1 ) ? "First" : ""; + my $what = + ( $deleted_semicolon_count > 1 ) + ? "semicolons were" + : "semicolon was"; + write_logfile_entry( + "$deleted_semicolon_count unnecessary $what deleted:\n"); + write_logfile_entry( + " $first at input line $first_deleted_semicolon_at\n"); + + if ( $deleted_semicolon_count > 1 ) { + write_logfile_entry( + " Last at input line $last_deleted_semicolon_at\n"); + } + write_logfile_entry(" (Use -ndsc to prevent semicolon deletion)\n"); + write_logfile_entry("\n"); + } + + if ( $embedded_tab_count > 0 ) { + my $first = ( $embedded_tab_count > 1 ) ? "First" : ""; + my $what = + ( $embedded_tab_count > 1 ) + ? "quotes or patterns" + : "quote or pattern"; + write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n"); + write_logfile_entry( +"This means the display of this script could vary with device or software\n" + ); + write_logfile_entry(" $first at input line $first_embedded_tab_at\n"); + + if ( $embedded_tab_count > 1 ) { + write_logfile_entry( + " Last at input line $last_embedded_tab_at\n"); + } + write_logfile_entry("\n"); + } + + if ($first_tabbing_disagreement) { + write_logfile_entry( +"First indentation disagreement seen at input line $first_tabbing_disagreement\n" + ); + } + + if ($in_tabbing_disagreement) { + write_logfile_entry( +"Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n" + ); + } + else { + + if ($last_tabbing_disagreement) { + + write_logfile_entry( +"Last indentation disagreement seen at input line $last_tabbing_disagreement\n" + ); + } + else { + write_logfile_entry("No indentation disagreement seen\n"); + } + } + if ($first_tabbing_disagreement) { + write_logfile_entry( +"Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n" + ); + } + write_logfile_entry("\n"); + + $vertical_aligner_object->report_anything_unusual(); + + $file_writer_object->report_line_length_errors(); + + return; +} + +sub check_options { + + # This routine is called to check the Opts hash after it is defined + $rOpts = shift; + + initialize_whitespace_hashes(); + initialize_bond_strength_hashes(); + + make_static_block_comment_pattern(); + make_static_side_comment_pattern(); + make_closing_side_comment_prefix(); + make_closing_side_comment_list_pattern(); + $format_skipping_pattern_begin = + make_format_skipping_pattern( 'format-skipping-begin', '#<<<' ); + $format_skipping_pattern_end = + make_format_skipping_pattern( 'format-skipping-end', '#>>>' ); + + # If closing side comments ARE selected, then we can safely + # delete old closing side comments unless closing side comment + # warnings are requested. This is a good idea because it will + # eliminate any old csc's which fall below the line count threshold. + # We cannot do this if warnings are turned on, though, because we + # might delete some text which has been added. So that must + # be handled when comments are created. + if ( $rOpts->{'closing-side-comments'} ) { + if ( !$rOpts->{'closing-side-comment-warnings'} ) { + $rOpts->{'delete-closing-side-comments'} = 1; + } + } + + # If closing side comments ARE NOT selected, but warnings ARE + # selected and we ARE DELETING csc's, then we will pretend to be + # adding with a huge interval. This will force the comments to be + # generated for comparison with the old comments, but not added. + elsif ( $rOpts->{'closing-side-comment-warnings'} ) { + if ( $rOpts->{'delete-closing-side-comments'} ) { + $rOpts->{'delete-closing-side-comments'} = 0; + $rOpts->{'closing-side-comments'} = 1; + $rOpts->{'closing-side-comment-interval'} = 100000000; + } + } + + make_bli_pattern(); + make_block_brace_vertical_tightness_pattern(); + make_blank_line_pattern(); + + prepare_cuddled_block_types(); + if ( $rOpts->{'dump-cuddled-block-list'} ) { + dump_cuddled_block_list(*STDOUT); + Exit(0); + } + + if ( $rOpts->{'line-up-parentheses'} ) { + + if ( $rOpts->{'indent-only'} + || !$rOpts->{'add-newlines'} + || !$rOpts->{'delete-old-newlines'} ) + { + Warn(<{'line-up-parentheses'} = 0; + } + } + + # At present, tabs are not compatible with the line-up-parentheses style + # (it would be possible to entab the total leading whitespace + # just prior to writing the line, if desired). + if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) { + Warn(<{'tabs'} = 0; + } + + # Likewise, tabs are not compatible with outdenting.. + if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) { + Warn(<{'tabs'} = 0; + } + + if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) { + Warn(<{'tabs'} = 0; + } + + if ( !$rOpts->{'space-for-semicolon'} ) { + $want_left_space{'f'} = -1; + } + + if ( $rOpts->{'space-terminal-semicolon'} ) { + $want_left_space{';'} = 1; + } + + # implement outdenting preferences for keywords + %outdent_keyword = (); + my @okw = split_words( $rOpts->{'outdent-keyword-okl'} ); + unless (@okw) { + @okw = qw(next last redo goto return); # defaults + } + + # FUTURE: if not a keyword, assume that it is an identifier + foreach (@okw) { + if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) { + $outdent_keyword{$_} = 1; + } + else { + Warn("ignoring '$_' in -okwl list; not a perl keyword"); + } + } + + # implement user whitespace preferences + if ( my @q = split_words( $rOpts->{'want-left-space'} ) ) { + @want_left_space{@q} = (1) x scalar(@q); + } + + if ( my @q = split_words( $rOpts->{'want-right-space'} ) ) { + @want_right_space{@q} = (1) x scalar(@q); + } + + if ( my @q = split_words( $rOpts->{'nowant-left-space'} ) ) { + @want_left_space{@q} = (-1) x scalar(@q); + } + + if ( my @q = split_words( $rOpts->{'nowant-right-space'} ) ) { + @want_right_space{@q} = (-1) x scalar(@q); + } + if ( $rOpts->{'dump-want-left-space'} ) { + dump_want_left_space(*STDOUT); + Exit(0); + } + + if ( $rOpts->{'dump-want-right-space'} ) { + dump_want_right_space(*STDOUT); + Exit(0); + } + + # default keywords for which space is introduced before an opening paren + # (at present, including them messes up vertical alignment) + my @sak = qw(my local our and or err eq ne if else elsif until + unless while for foreach return switch case given when catch); + @space_after_keyword{@sak} = (1) x scalar(@sak); + + # first remove any or all of these if desired + if ( my @q = split_words( $rOpts->{'nospace-after-keyword'} ) ) { + + # -nsak='*' selects all the above keywords + if ( @q == 1 && $q[0] eq '*' ) { @q = keys(%space_after_keyword) } + @space_after_keyword{@q} = (0) x scalar(@q); + } + + # then allow user to add to these defaults + if ( my @q = split_words( $rOpts->{'space-after-keyword'} ) ) { + @space_after_keyword{@q} = (1) x scalar(@q); + } + + # implement user break preferences + my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | & + = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= + . : ? && || and or err xor + ); + + my $break_after = sub { + my @toks = @_; + foreach my $tok (@toks) { + if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/: + my $lbs = $left_bond_strength{$tok}; + my $rbs = $right_bond_strength{$tok}; + if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) { + ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) = + ( $lbs, $rbs ); + } + } + }; + + my $break_before = sub { + my @toks = @_; + foreach my $tok (@toks) { + my $lbs = $left_bond_strength{$tok}; + my $rbs = $right_bond_strength{$tok}; + if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) { + ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) = + ( $lbs, $rbs ); + } + } + }; + + $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} ); + $break_before->(@all_operators) + if ( $rOpts->{'break-before-all-operators'} ); + + $break_after->( split_words( $rOpts->{'want-break-after'} ) ); + $break_before->( split_words( $rOpts->{'want-break-before'} ) ); + + # make note if breaks are before certain key types + %want_break_before = (); + foreach my $tok ( @all_operators, ',' ) { + $want_break_before{$tok} = + $left_bond_strength{$tok} < $right_bond_strength{$tok}; + } + + # Coordinate ?/: breaks, which must be similar + if ( !$want_break_before{':'} ) { + $want_break_before{'?'} = $want_break_before{':'}; + $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01; + $left_bond_strength{'?'} = NO_BREAK; + } + + # Define here tokens which may follow the closing brace of a do statement + # on the same line, as in: + # } while ( $something); + my @dof = qw(until while unless if ; : ); + push @dof, ','; + @is_do_follower{@dof} = (1) x scalar(@dof); + + # What tokens may follow the closing brace of an if or elsif block? + # Not used. Previously used for cuddled else, but no longer needed. + %is_if_brace_follower = (); + + # nothing can follow the closing curly of an else { } block: + %is_else_brace_follower = (); + + # what can follow a multi-line anonymous sub definition closing curly: + my @asf = qw# ; : => or and && || ~~ !~~ ) #; + push @asf, ','; + @is_anon_sub_brace_follower{@asf} = (1) x scalar(@asf); + + # what can follow a one-line anonymous sub closing curly: + # one-line anonymous subs also have ']' here... + # see tk3.t and PP.pm + my @asf1 = qw# ; : => or and && || ) ] ~~ !~~ #; + push @asf1, ','; + @is_anon_sub_1_brace_follower{@asf1} = (1) x scalar(@asf1); + + # What can follow a closing curly of a block + # which is not an if/elsif/else/do/sort/map/grep/eval/sub + # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl' + my @obf = qw# ; : => or and && || ) #; + push @obf, ','; + @is_other_brace_follower{@obf} = (1) x scalar(@obf); + + $right_bond_strength{'{'} = WEAK; + $left_bond_strength{'{'} = VERY_STRONG; + + # make -l=0 equal to -l=infinite + if ( !$rOpts->{'maximum-line-length'} ) { + $rOpts->{'maximum-line-length'} = 1000000; + } + + # make -lbl=0 equal to -lbl=infinite + if ( !$rOpts->{'long-block-line-count'} ) { + $rOpts->{'long-block-line-count'} = 1000000; + } + + my $enc = $rOpts->{'character-encoding'}; + if ( $enc && $enc !~ /^(none|utf8)$/i ) { + Die(<{'output-line-ending'}; + if ($ole) { + my %endings = ( + dos => "\015\012", + win => "\015\012", + mac => "\015", + unix => "\012", + ); + + # Patch for RT #99514, a memoization issue. + # Normally, the user enters one of 'dos', 'win', etc, and we change the + # value in the options parameter to be the corresponding line ending + # character. But, if we are using memoization, on later passes through + # here the option parameter will already have the desired ending + # character rather than the keyword 'dos', 'win', etc. So + # we must check to see if conversion has already been done and, if so, + # bypass the conversion step. + my %endings_inverted = ( + "\015\012" => 'dos', + "\015\012" => 'win', + "\015" => 'mac', + "\012" => 'unix', + ); + + if ( defined( $endings_inverted{$ole} ) ) { + + # we already have valid line ending, nothing more to do + } + else { + $ole = lc $ole; + unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) { + my $str = join " ", keys %endings; + Die(<{'preserve-line-endings'} ) { + Warn("Ignoring -ple; conflicts with -ole\n"); + $rOpts->{'preserve-line-endings'} = undef; + } + } + } + + # hashes used to simplify setting whitespace + %tightness = ( + '{' => $rOpts->{'brace-tightness'}, + '}' => $rOpts->{'brace-tightness'}, + '(' => $rOpts->{'paren-tightness'}, + ')' => $rOpts->{'paren-tightness'}, + '[' => $rOpts->{'square-bracket-tightness'}, + ']' => $rOpts->{'square-bracket-tightness'}, + ); + %matching_token = ( + '{' => '}', + '(' => ')', + '[' => ']', + '?' => ':', + ); + + # frequently used parameters + $rOpts_add_newlines = $rOpts->{'add-newlines'}; + $rOpts_add_whitespace = $rOpts->{'add-whitespace'}; + $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'}; + $rOpts_block_brace_vertical_tightness = + $rOpts->{'block-brace-vertical-tightness'}; + $rOpts_brace_left_and_indent = $rOpts->{'brace-left-and-indent'}; + $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'}; + $rOpts_break_at_old_ternary_breakpoints = + $rOpts->{'break-at-old-ternary-breakpoints'}; + $rOpts_break_at_old_attribute_breakpoints = + $rOpts->{'break-at-old-attribute-breakpoints'}; + $rOpts_break_at_old_comma_breakpoints = + $rOpts->{'break-at-old-comma-breakpoints'}; + $rOpts_break_at_old_keyword_breakpoints = + $rOpts->{'break-at-old-keyword-breakpoints'}; + $rOpts_break_at_old_logical_breakpoints = + $rOpts->{'break-at-old-logical-breakpoints'}; + $rOpts_closing_side_comment_else_flag = + $rOpts->{'closing-side-comment-else-flag'}; + $rOpts_closing_side_comment_maximum_text = + $rOpts->{'closing-side-comment-maximum-text'}; + $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'}; + $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'}; + $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'}; + $rOpts_indent_columns = $rOpts->{'indent-columns'}; + $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'}; + $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'}; + $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'}; + $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'}; + + $rOpts_variable_maximum_line_length = + $rOpts->{'variable-maximum-line-length'}; + $rOpts_short_concatenation_item_length = + $rOpts->{'short-concatenation-item-length'}; + + $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'}; + $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'}; + $rOpts_format_skipping = $rOpts->{'format-skipping'}; + $rOpts_space_function_paren = $rOpts->{'space-function-paren'}; + $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'}; + $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'}; + $rOpts_ignore_side_comment_lengths = + $rOpts->{'ignore-side-comment-lengths'}; + + # Note that both opening and closing tokens can access the opening + # and closing flags of their container types. + %opening_vertical_tightness = ( + '(' => $rOpts->{'paren-vertical-tightness'}, + '{' => $rOpts->{'brace-vertical-tightness'}, + '[' => $rOpts->{'square-bracket-vertical-tightness'}, + ')' => $rOpts->{'paren-vertical-tightness'}, + '}' => $rOpts->{'brace-vertical-tightness'}, + ']' => $rOpts->{'square-bracket-vertical-tightness'}, + ); + + %closing_vertical_tightness = ( + '(' => $rOpts->{'paren-vertical-tightness-closing'}, + '{' => $rOpts->{'brace-vertical-tightness-closing'}, + '[' => $rOpts->{'square-bracket-vertical-tightness-closing'}, + ')' => $rOpts->{'paren-vertical-tightness-closing'}, + '}' => $rOpts->{'brace-vertical-tightness-closing'}, + ']' => $rOpts->{'square-bracket-vertical-tightness-closing'}, + ); + + # assume flag for '>' same as ')' for closing qw quotes + %closing_token_indentation = ( + ')' => $rOpts->{'closing-paren-indentation'}, + '}' => $rOpts->{'closing-brace-indentation'}, + ']' => $rOpts->{'closing-square-bracket-indentation'}, + '>' => $rOpts->{'closing-paren-indentation'}, + ); + + # flag indicating if any closing tokens are indented + $some_closing_token_indentation = + $rOpts->{'closing-paren-indentation'} + || $rOpts->{'closing-brace-indentation'} + || $rOpts->{'closing-square-bracket-indentation'} + || $rOpts->{'indent-closing-brace'}; + + %opening_token_right = ( + '(' => $rOpts->{'opening-paren-right'}, + '{' => $rOpts->{'opening-hash-brace-right'}, + '[' => $rOpts->{'opening-square-bracket-right'}, + ); + + %stack_opening_token = ( + '(' => $rOpts->{'stack-opening-paren'}, + '{' => $rOpts->{'stack-opening-hash-brace'}, + '[' => $rOpts->{'stack-opening-square-bracket'}, + ); + + %stack_closing_token = ( + ')' => $rOpts->{'stack-closing-paren'}, + '}' => $rOpts->{'stack-closing-hash-brace'}, + ']' => $rOpts->{'stack-closing-square-bracket'}, + ); + $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'}; + $rOpts_space_backslash_quote = $rOpts->{'space-backslash-quote'}; + return; +} + +sub bad_pattern { + + # See if a pattern will compile. We have to use a string eval here, + # but it should be safe because the pattern has been constructed + # by this program. + my ($pattern) = @_; + eval "'##'=~/$pattern/"; + return $@; +} + +{ + my %no_cuddle; + + # Add keywords here which really should not be cuddled + BEGIN { + my @q = qw(if unless for foreach while); + @no_cuddle{@q} = (1) x scalar(@q); + } + + sub prepare_cuddled_block_types { + + # the cuddled-else style, if used, is controlled by a hash that + # we construct here + + # Include keywords here which should not be cuddled + + my $cuddled_string = ""; + if ( $rOpts->{'cuddled-else'} ) { + + # set the default + $cuddled_string = 'elsif else continue catch finally' + unless ( $rOpts->{'cuddled-block-list-exclusive'} ); + + # This is the old equivalent but more complex version + # $cuddled_string = 'if-elsif-else unless-elsif-else -continue '; + + # Add users other blocks to be cuddled + my $cuddled_block_list = $rOpts->{'cuddled-block-list'}; + if ($cuddled_block_list) { + $cuddled_string .= " " . $cuddled_block_list; + } + + } + + # If we have a cuddled string of the form + # 'try-catch-finally' + + # we want to prepare a hash of the form + + # $rcuddled_block_types = { + # 'try' => { + # 'catch' => 1, + # 'finally' => 1 + # }, + # }; + + # use -dcbl to dump this hash + + # Multiple such strings are input as a space or comma separated list + + # If we get two lists with the same leading type, such as + # -cbl = "-try-catch-finally -try-catch-otherwise" + # then they will get merged as follows: + # $rcuddled_block_types = { + # 'try' => { + # 'catch' => 1, + # 'finally' => 2, + # 'otherwise' => 1, + # }, + # }; + # This will allow either type of chain to be followed. + + $cuddled_string =~ s/,/ /g; # allow space or comma separated lists + my @cuddled_strings = split /\s+/, $cuddled_string; + + $rcuddled_block_types = {}; + + # process each dash-separated string... + my $string_count = 0; + foreach my $string (@cuddled_strings) { + next unless $string; + my @words = split /-+/, $string; # allow multiple dashes + + # we could look for and report possible errors here... + next unless ( @words > 0 ); + + # allow either '-continue' or *-continue' for arbitrary starting type + my $start = '*'; + + # a single word without dashes is a secondary block type + if ( @words > 1 ) { + $start = shift @words; + } + + # always make an entry for the leading word. If none follow, this + # will still prevent a wildcard from matching this word. + if ( !defined( $rcuddled_block_types->{$start} ) ) { + $rcuddled_block_types->{$start} = {}; + } + + # The count gives the original word order in case we ever want it. + $string_count++; + my $word_count = 0; + foreach my $word (@words) { + next unless $word; + if ( $no_cuddle{$word} ) { + Warn( +"## Ignoring keyword '$word' in -cbl; does not seem right\n" + ); + next; + } + $word_count++; + $rcuddled_block_types->{$start}->{$word} = + 1; #"$string_count.$word_count"; + } + } + return; + } +} + +sub dump_cuddled_block_list { + my ($fh) = @_; + + # ORIGINAL METHOD: Here is the format of the cuddled block type hash + # which controls this routine + # my $rcuddled_block_types = { + # 'if' => { + # 'else' => 1, + # 'elsif' => 1 + # }, + # 'try' => { + # 'catch' => 1, + # 'finally' => 1 + # }, + # }; + + # SIMPLFIED METHOD: the simplified method uses a wildcard for + # the starting block type and puts all cuddled blocks together: + # my $rcuddled_block_types = { + # '*' => { + # 'else' => 1, + # 'elsif' => 1 + # 'catch' => 1, + # 'finally' => 1 + # }, + # }; + + # Both methods work, but the simplified method has proven to be adequate and + # easier to manage. + + my $cuddled_string = $rOpts->{'cuddled-block-list'}; + $cuddled_string = '' unless $cuddled_string; + + my $flags = ""; + $flags .= "-ce" if ( $rOpts->{'cuddled-else'} ); + $flags .= " -cbl='$cuddled_string'"; + + unless ( $rOpts->{'cuddled-else'} ) { + $flags .= "\nNote: You must specify -ce to generate a cuddled hash"; + } + + $fh->print(<print( Dumper($rcuddled_block_types) ); + + $fh->print(<{'static-block-comment-prefix'} ) { + my $prefix = $rOpts->{'static-block-comment-prefix'}; + $prefix =~ s/^\s*//; + my $pattern = $prefix; + + # user may give leading caret to force matching left comments only + if ( $prefix !~ /^\^#/ ) { + if ( $prefix !~ /^#/ ) { + Die( +"ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n" + ); + } + $pattern = '^\s*' . $prefix; + } + if ( bad_pattern($pattern) ) { + Die( +"ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n" + ); + } + $static_block_comment_pattern = $pattern; + } + return; +} + +sub make_format_skipping_pattern { + my ( $opt_name, $default ) = @_; + my $param = $rOpts->{$opt_name}; + unless ($param) { $param = $default } + $param =~ s/^\s*//; + if ( $param !~ /^#/ ) { + Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n"); + } + my $pattern = '^' . $param . '\s'; + if ( bad_pattern($pattern) ) { + Die( +"ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n" + ); + } + return $pattern; +} + +sub make_closing_side_comment_list_pattern { + + # turn any input list into a regex for recognizing selected block types + $closing_side_comment_list_pattern = '^\w+'; + if ( defined( $rOpts->{'closing-side-comment-list'} ) + && $rOpts->{'closing-side-comment-list'} ) + { + $closing_side_comment_list_pattern = + make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} ); + } + return; +} + +sub make_bli_pattern { + + if ( defined( $rOpts->{'brace-left-and-indent-list'} ) + && $rOpts->{'brace-left-and-indent-list'} ) + { + $bli_list_string = $rOpts->{'brace-left-and-indent-list'}; + } + + $bli_pattern = make_block_pattern( '-blil', $bli_list_string ); + return; +} + +sub make_block_brace_vertical_tightness_pattern { + + # turn any input list into a regex for recognizing selected block types + $block_brace_vertical_tightness_pattern = + '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)'; + if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} ) + && $rOpts->{'block-brace-vertical-tightness-list'} ) + { + $block_brace_vertical_tightness_pattern = + make_block_pattern( '-bbvtl', + $rOpts->{'block-brace-vertical-tightness-list'} ); + } + return; +} + +sub make_blank_line_pattern { + + $blank_lines_before_closing_block_pattern = $SUB_PATTERN; + my $key = 'blank-lines-before-closing-block-list'; + if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) { + $blank_lines_before_closing_block_pattern = + make_block_pattern( '-blbcl', $rOpts->{$key} ); + } + + $blank_lines_after_opening_block_pattern = $SUB_PATTERN; + $key = 'blank-lines-after-opening-block-list'; + if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) { + $blank_lines_after_opening_block_pattern = + make_block_pattern( '-blaol', $rOpts->{$key} ); + } + return; +} + +sub make_block_pattern { + + # given a string of block-type keywords, return a regex to match them + # The only tricky part is that labels are indicated with a single ':' + # and the 'sub' token text may have additional text after it (name of + # sub). + # + # Example: + # + # input string: "if else elsif unless while for foreach do : sub"; + # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)'; + + # Minor Update: + # + # To distinguish between anonymous subs and named subs, use 'sub' to + # indicate a named sub, and 'asub' to indicate an anonymous sub + + my ( $abbrev, $string ) = @_; + my @list = split_words($string); + my @words = (); + my %seen; + for my $i (@list) { + if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern } + next if $seen{$i}; + $seen{$i} = 1; + if ( $i eq 'sub' ) { + } + elsif ( $i eq 'asub' ) { + } + elsif ( $i eq ';' ) { + push @words, ';'; + } + elsif ( $i eq '{' ) { + push @words, '\{'; + } + elsif ( $i eq ':' ) { + push @words, '\w+:'; + } + elsif ( $i =~ /^\w/ ) { + push @words, $i; + } + else { + Warn("unrecognized block type $i after $abbrev, ignoring\n"); + } + } + my $pattern = '(' . join( '|', @words ) . ')$'; + my $sub_patterns = ""; + if ( $seen{'sub'} ) { + $sub_patterns .= '|' . $SUB_PATTERN; + } + if ( $seen{'asub'} ) { + $sub_patterns .= '|' . $ASUB_PATTERN; + } + if ($sub_patterns) { + $pattern = '(' . $pattern . $sub_patterns . ')'; + } + $pattern = '^' . $pattern; + return $pattern; +} + +sub make_static_side_comment_pattern { + + # create the pattern used to identify static side comments + $static_side_comment_pattern = '^##'; + + # allow the user to change it + if ( $rOpts->{'static-side-comment-prefix'} ) { + my $prefix = $rOpts->{'static-side-comment-prefix'}; + $prefix =~ s/^\s*//; + my $pattern = '^' . $prefix; + if ( bad_pattern($pattern) ) { + Die( +"ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n" + ); + } + $static_side_comment_pattern = $pattern; + } + return; +} + +sub make_closing_side_comment_prefix { + + # Be sure we have a valid closing side comment prefix + my $csc_prefix = $rOpts->{'closing-side-comment-prefix'}; + my $csc_prefix_pattern; + if ( !defined($csc_prefix) ) { + $csc_prefix = '## end'; + $csc_prefix_pattern = '^##\s+end'; + } + else { + my $test_csc_prefix = $csc_prefix; + if ( $test_csc_prefix !~ /^#/ ) { + $test_csc_prefix = '#' . $test_csc_prefix; + } + + # make a regex to recognize the prefix + my $test_csc_prefix_pattern = $test_csc_prefix; + + # escape any special characters + $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g; + + $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern; + + # allow exact number of intermediate spaces to vary + $test_csc_prefix_pattern =~ s/\s+/\\s\+/g; + + # make sure we have a good pattern + # if we fail this we probably have an error in escaping + # characters. + + if ( bad_pattern($test_csc_prefix_pattern) ) { + + # shouldn't happen..must have screwed up escaping, above + report_definite_bug(); + Warn( +"Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n" + ); + + # just warn and keep going with defaults + Warn("Please consider using a simpler -cscp prefix\n"); + Warn("Using default -cscp instead; please check output\n"); + } + else { + $csc_prefix = $test_csc_prefix; + $csc_prefix_pattern = $test_csc_prefix_pattern; + } + } + $rOpts->{'closing-side-comment-prefix'} = $csc_prefix; + $closing_side_comment_prefix_pattern = $csc_prefix_pattern; + return; +} + +sub dump_want_left_space { + my $fh = shift; + local $" = "\n"; + print $fh <> + # my $msg = new Fax::Send + # -recipients => $to, + # -data => $data; + # This is the safest thing to do. If we had the token to the right of + # the minus we could do a better check. + || ( ( $tokenr eq '-' ) && ( $typel eq 'w' ) ) + + # and something like this could become ambiguous without space + # after the '-': + # use constant III=>1; + # $a = $b - III; + # and even this: + # $a = - III; + || ( ( $tokenl eq '-' ) + && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) ) + + # '= -' should not become =- or you will get a warning + # about reversed -= + # || ($tokenr eq '-') + + # keep a space between a quote and a bareword to prevent the + # bareword from becoming a quote modifier. + || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) ) + + # keep a space between a token ending in '$' and any word; + # this caused trouble: "die @$ if $@" + || ( ( $typel eq 'i' && $tokenl =~ /\$$/ ) + && ( $tokenr =~ /^[a-zA-Z_]/ ) ) + + # perl is very fussy about spaces before << + || ( $tokenr =~ /^\<\ [qw#= ( ) =#], #=( )= + 'Venus1' => [qw#0 +#], # 0+ + 'Venus2' => [qw#+ 0#], # +0 + 'Enterprise' => [qw#) x ! !#], # ()x!! + 'Kite1' => [qw#~ ~ <>#], # ~~<> + 'Kite2' => [qw#~~ <>#], # ~~<> + 'Winking Fat Comma' => [ ( ',', '=>' ) ], # ,=> + 'Bang bang ' => [qw#! !#], # !! + ); + + # The following operators and constants are not included because they + # are normally kept tight by perltidy: + # ~~ <~> + # + + # Make a lookup table indexed by the first token of each operator: + # first token => [list, list, ...] + foreach my $value ( values(%secret_operators) ) { + my $tok = $value->[0]; + push @{ $is_leading_secret_token{$tok} }, $value; + } + } + + sub new_secret_operator_whitespace { + + my ( $rlong_array, $rwhitespace_flags ) = @_; + + # Loop over all tokens in this line + my ( $token, $type ); + my $jmax = @{$rlong_array} - 1; + foreach my $j ( 0 .. $jmax ) { + + $token = $rlong_array->[$j]->[_TOKEN_]; + $type = $rlong_array->[$j]->[_TYPE_]; + + # Skip unless this token might start a secret operator + next if ( $type eq 'b' ); + next unless ( $is_leading_secret_token{$token} ); + + # Loop over all secret operators with this leading token + foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) { + my $jend = $j - 1; + foreach my $tok ( @{$rpattern} ) { + $jend++; + $jend++ + + if ( $jend <= $jmax + && $rlong_array->[$jend]->[_TYPE_] eq 'b' ); + if ( $jend > $jmax + || $tok ne $rlong_array->[$jend]->[_TOKEN_] ) + { + $jend = undef; + last; + } + } + + if ($jend) { + + # set flags to prevent spaces within this operator + foreach my $jj ( $j + 1 .. $jend ) { + $rwhitespace_flags->[$jj] = WS_NO; + } + $j = $jend; + last; + } + } ## End Loop over all operators + } ## End loop over all tokens + return; + } # End sub +} + +{ # begin print_line_of_tokens + + my $rinput_token_array; # Current working array + my $rinput_K_array; # Future working array + + my $in_quote; + my $guessed_indentation_level; + + # This should be a return variable from extract_token + # These local token variables are stored by store_token_to_go: + my $Ktoken_vars; + my $block_type; + my $ci_level; + my $container_environment; + my $container_type; + my $in_continued_quote; + my $level; + my $no_internal_newlines; + my $slevel; + my $token; + my $type; + my $type_sequence; + + # routine to pull the jth token from the line of tokens + sub extract_token { + my ( $self, $j ) = @_; + + my $rLL = $self->{rLL}; + $Ktoken_vars = $rinput_K_array->[$j]; + if ( !defined($Ktoken_vars) ) { + + # Shouldn't happen: an error here would be due to a recent program change + Fault("undefined index K for j=$j"); + } + my $rtoken_vars = $rLL->[$Ktoken_vars]; + + if ( $rtoken_vars->[_TOKEN_] ne $rLL->[$Ktoken_vars]->[_TOKEN_] ) { + + # Shouldn't happen: an error here would be due to a recent program change + Fault(<[_TOKEN_]' ne '$rLL->[$Ktoken_vars]' +EOM + } + + ######################################################### + # these are now redundant and can eventually be eliminated + + $token = $rtoken_vars->[_TOKEN_]; + $type = $rtoken_vars->[_TYPE_]; + $block_type = $rtoken_vars->[_BLOCK_TYPE_]; + $container_type = $rtoken_vars->[_CONTAINER_TYPE_]; + $container_environment = $rtoken_vars->[_CONTAINER_ENVIRONMENT_]; + $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_]; + $level = $rtoken_vars->[_LEVEL_]; + $slevel = $rtoken_vars->[_SLEVEL_]; + $ci_level = $rtoken_vars->[_CI_LEVEL_]; + ######################################################### + + return; + } + + { + my @saved_token; + + sub save_current_token { + + @saved_token = ( + $block_type, $ci_level, + $container_environment, $container_type, + $in_continued_quote, $level, + $no_internal_newlines, $slevel, + $token, $type, + $type_sequence, $Ktoken_vars, + ); + return; + } + + sub restore_current_token { + ( + $block_type, $ci_level, + $container_environment, $container_type, + $in_continued_quote, $level, + $no_internal_newlines, $slevel, + $token, $type, + $type_sequence, $Ktoken_vars, + ) = @saved_token; + return; + } + } + + sub token_length { + + # Returns the length of a token, given: + # $token=text of the token + # $type = type + # $not_first_token = should be TRUE if this is not the first token of + # the line. It might the index of this token in an array. It is + # used to test for a side comment vs a block comment. + # Note: Eventually this should be the only routine determining the + # length of a token in this package. + my ( $token, $type, $not_first_token ) = @_; + my $token_length = length($token); + + # We mark lengths of side comments as just 1 if we are + # ignoring their lengths when setting line breaks. + $token_length = 1 + if ( $rOpts_ignore_side_comment_lengths + && $not_first_token + && $type eq '#' ); + return $token_length; + } + + sub rtoken_length { + + # return length of ith token in @{$rtokens} + my ($i) = @_; + return token_length( $rinput_token_array->[$i]->[_TOKEN_], + $rinput_token_array->[$i]->[_TYPE_], $i ); + } + + # Routine to place the current token into the output stream. + # Called once per output token. + sub store_token_to_go { + + my ( $self, $side_comment_follows ) = @_; + + my $flag = $side_comment_follows ? 1 : $no_internal_newlines; + + ++$max_index_to_go; + $K_to_go[$max_index_to_go] = $Ktoken_vars; + $tokens_to_go[$max_index_to_go] = $token; + $types_to_go[$max_index_to_go] = $type; + $nobreak_to_go[$max_index_to_go] = $flag; + $old_breakpoint_to_go[$max_index_to_go] = 0; + $forced_breakpoint_to_go[$max_index_to_go] = 0; + $block_type_to_go[$max_index_to_go] = $block_type; + $type_sequence_to_go[$max_index_to_go] = $type_sequence; + $container_environment_to_go[$max_index_to_go] = $container_environment; + $ci_levels_to_go[$max_index_to_go] = $ci_level; + $mate_index_to_go[$max_index_to_go] = -1; + $matching_token_to_go[$max_index_to_go] = ''; + $bond_strength_to_go[$max_index_to_go] = 0; + + # Note: negative levels are currently retained as a diagnostic so that + # the 'final indentation level' is correctly reported for bad scripts. + # But this means that every use of $level as an index must be checked. + # If this becomes too much of a problem, we might give up and just clip + # them at zero. + ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0; + $levels_to_go[$max_index_to_go] = $level; + $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0; + + # link the non-blank tokens + my $iprev = $max_index_to_go - 1; + $iprev-- if ( $iprev >= 0 && $types_to_go[$iprev] eq 'b' ); + $iprev_to_go[$max_index_to_go] = $iprev; + $inext_to_go[$iprev] = $max_index_to_go + if ( $iprev >= 0 && $type ne 'b' ); + $inext_to_go[$max_index_to_go] = $max_index_to_go + 1; + + $token_lengths_to_go[$max_index_to_go] = + token_length( $token, $type, $max_index_to_go ); + + # We keep a running sum of token lengths from the start of this batch: + # summed_lengths_to_go[$i] = total length to just before token $i + # summed_lengths_to_go[$i+1] = total length to just after token $i + $summed_lengths_to_go[ $max_index_to_go + 1 ] = + $summed_lengths_to_go[$max_index_to_go] + + $token_lengths_to_go[$max_index_to_go]; + + # Define the indentation that this token would have if it started + # a new line. We have to do this now because we need to know this + # when considering one-line blocks. + set_leading_whitespace( $level, $ci_level, $in_continued_quote ); + + # remember previous nonblank tokens seen + if ( $type ne 'b' ) { + $last_last_nonblank_index_to_go = $last_nonblank_index_to_go; + $last_last_nonblank_type_to_go = $last_nonblank_type_to_go; + $last_last_nonblank_token_to_go = $last_nonblank_token_to_go; + $last_nonblank_index_to_go = $max_index_to_go; + $last_nonblank_type_to_go = $type; + $last_nonblank_token_to_go = $token; + if ( $type eq ',' ) { + $comma_count_in_batch++; + } + } + + FORMATTER_DEBUG_FLAG_STORE && do { + my ( $a, $b, $c ) = caller(); + print STDOUT +"STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n"; + }; + return; + } + + sub insert_new_token_to_go { + + # insert a new token into the output stream. use same level as + # previous token; assumes a character at max_index_to_go. + my ( $self, @args ) = @_; + save_current_token(); + ( $token, $type, $slevel, $no_internal_newlines ) = @args; + + if ( $max_index_to_go == UNDEFINED_INDEX ) { + warning("code bug: bad call to insert_new_token_to_go\n"); + } + $level = $levels_to_go[$max_index_to_go]; + + # FIXME: it seems to be necessary to use the next, rather than + # previous, value of this variable when creating a new blank (align.t) + #my $slevel = $nesting_depth_to_go[$max_index_to_go]; + $ci_level = $ci_levels_to_go[$max_index_to_go]; + $container_environment = $container_environment_to_go[$max_index_to_go]; + $in_continued_quote = 0; + $block_type = ""; + $type_sequence = ""; + + # store an undef for the K value to catch unexpected usage + # This routine is only called by add_closing_side_comments, and + # eventually that call will be eliminated. + $Ktoken_vars = undef; + + $self->store_token_to_go(); + restore_current_token(); + return; + } + + sub copy_hash { + my ($rold_token_hash) = @_; + my %new_token_hash = + map { ( $_, $rold_token_hash->{$_} ) } keys %{$rold_token_hash}; + return \%new_token_hash; + } + + sub copy_array { + my ($rold) = @_; + my @new = map { $_ } @{$rold}; + return \@new; + } + + sub copy_token_as_type { + my ( $rold_token, $type, $token ) = @_; + if ( $type eq 'b' ) { + $token = " " unless defined($token); + } + elsif ( $type eq 'q' ) { + $token = '' unless defined($token); + } + elsif ( $type eq '->' ) { + $token = '->' unless defined($token); + } + elsif ( $type eq ';' ) { + $token = ';' unless defined($token); + } + else { + Fault( +"Programming error: copy_token_as has type $type but should be 'b' or 'q'" + ); + } + my $rnew_token = copy_array($rold_token); + $rnew_token->[_TYPE_] = $type; + $rnew_token->[_TOKEN_] = $token; + $rnew_token->[_BLOCK_TYPE_] = ''; + $rnew_token->[_CONTAINER_TYPE_] = ''; + $rnew_token->[_CONTAINER_ENVIRONMENT_] = ''; + $rnew_token->[_TYPE_SEQUENCE_] = ''; + return $rnew_token; + } + + sub boolean_equals { + my ( $val1, $val2 ) = @_; + return ( $val1 && $val2 || !$val1 && !$val2 ); + } + + sub print_line_of_tokens { + + my ( $self, $line_of_tokens ) = @_; + + # This routine is called once per input line to process all of + # the tokens on that line. This is the first stage of + # beautification. + # + # Full-line comments and blank lines may be processed immediately. + # + # For normal lines of code, the tokens are stored one-by-one, + # via calls to 'sub store_token_to_go', until a known line break + # point is reached. Then, the batch of collected tokens is + # passed along to 'sub output_line_to_go' for further + # processing. This routine decides if there should be + # whitespace between each pair of non-white tokens, so later + # routines only need to decide on any additional line breaks. + # Any whitespace is initially a single space character. Later, + # the vertical aligner may expand that to be multiple space + # characters if necessary for alignment. + + $input_line_number = $line_of_tokens->{_line_number}; + my $input_line = $line_of_tokens->{_line_text}; + my $CODE_type = $line_of_tokens->{_code_type}; + + my $rK_range = $line_of_tokens->{_rK_range}; + my ( $K_first, $K_last ) = @{$rK_range}; + + my $rLL = $self->{rLL}; + my $rbreak_container = $self->{rbreak_container}; + + if ( !defined($K_first) ) { + + # Unexpected blank line.. + # Calling routine was supposed to handle this + Warn( +"Programming Error: Unexpected Blank Line in print_line_of_tokens. Ignoring" + ); + return; + } + + $no_internal_newlines = 1 - $rOpts_add_newlines; + my $is_comment = + ( $K_first == $K_last && $rLL->[$K_first]->[_TYPE_] eq '#' ); + my $is_static_block_comment_without_leading_space = + $CODE_type eq 'SBCX'; + $is_static_block_comment = + $CODE_type eq 'SBC' || $is_static_block_comment_without_leading_space; + my $is_hanging_side_comment = $CODE_type eq 'HSC'; + my $is_VERSION_statement = $CODE_type eq 'VER'; + if ($is_VERSION_statement) { + $saw_VERSION_in_this_file = 1; + $no_internal_newlines = 1; + } + + # Add interline blank if any + my $last_old_nonblank_type = "b"; + my $first_new_nonblank_type = "b"; + my $first_new_nonblank_token = " "; + if ( $max_index_to_go >= 0 ) { + $last_old_nonblank_type = $types_to_go[$max_index_to_go]; + $first_new_nonblank_type = $rLL->[$K_first]->[_TYPE_]; + $first_new_nonblank_token = $rLL->[$K_first]->[_TOKEN_]; + if ( !$is_comment + && $types_to_go[$max_index_to_go] ne 'b' + && $K_first > 0 + && $rLL->[ $K_first - 1 ]->[_TYPE_] eq 'b' ) + { + $K_first -= 1; + } + } + + # Copy the tokens into local arrays + $rinput_token_array = []; + $rinput_K_array = []; + $rinput_K_array = [ ( $K_first .. $K_last ) ]; + $rinput_token_array = [ map { $rLL->[$_] } @{$rinput_K_array} ]; + my $jmax = @{$rinput_K_array} - 1; + + $in_continued_quote = $starting_in_quote = + $line_of_tokens->{_starting_in_quote}; + $in_quote = $line_of_tokens->{_ending_in_quote}; + $ending_in_quote = $in_quote; + $guessed_indentation_level = + $line_of_tokens->{_guessed_indentation_level}; + + my $j_next; + my $next_nonblank_token; + my $next_nonblank_token_type; + + $block_type = ""; + $container_type = ""; + $container_environment = ""; + $type_sequence = ""; + + ###################################### + # Handle a block (full-line) comment.. + ###################################### + if ($is_comment) { + + if ( $rOpts->{'delete-block-comments'} ) { return } + + if ( $rOpts->{'tee-block-comments'} ) { + $file_writer_object->tee_on(); + } + + destroy_one_line_block(); + $self->output_line_to_go(); + + # output a blank line before block comments + if ( + # unless we follow a blank or comment line + $last_line_leading_type !~ /^[#b]$/ + + # only if allowed + && $rOpts->{'blanks-before-comments'} + + # if this is NOT an empty comment line + && $rinput_token_array->[0]->[_TOKEN_] ne '#' + + # not after a short line ending in an opening token + # because we already have space above this comment. + # Note that the first comment in this if block, after + # the 'if (', does not get a blank line because of this. + && !$last_output_short_opening_token + + # never before static block comments + && !$is_static_block_comment + ) + { + $self->flush(); # switching to new output stream + $file_writer_object->write_blank_code_line(); + $last_line_leading_type = 'b'; + } + + # TRIM COMMENTS -- This could be turned off as a option + $rinput_token_array->[0]->[_TOKEN_] =~ s/\s*$//; # trim right end + + if ( + $rOpts->{'indent-block-comments'} + && ( !$rOpts->{'indent-spaced-block-comments'} + || $input_line =~ /^\s+/ ) + && !$is_static_block_comment_without_leading_space + ) + { + $self->extract_token(0); + $self->store_token_to_go(); + $self->output_line_to_go(); + } + else { + $self->flush(); # switching to new output stream + $file_writer_object->write_code_line( + $rinput_token_array->[0]->[_TOKEN_] . "\n" ); + $last_line_leading_type = '#'; + } + if ( $rOpts->{'tee-block-comments'} ) { + $file_writer_object->tee_off(); + } + return; + } + + # TODO: Move to sub scan_comments + # compare input/output indentation except for continuation lines + # (because they have an unknown amount of initial blank space) + # and lines which are quotes (because they may have been outdented) + # Note: this test is placed here because we know the continuation flag + # at this point, which allows us to avoid non-meaningful checks. + my $structural_indentation_level = $rinput_token_array->[0]->[_LEVEL_]; + compare_indentation_levels( $guessed_indentation_level, + $structural_indentation_level ) + unless ( $is_hanging_side_comment + || $rinput_token_array->[0]->[_CI_LEVEL_] > 0 + || $guessed_indentation_level == 0 + && $rinput_token_array->[0]->[_TYPE_] eq 'Q' ); + + ########################## + # Handle indentation-only + ########################## + + # NOTE: In previous versions we sent all qw lines out immediately here. + # No longer doing this: also write a line which is entirely a 'qw' list + # to allow stacking of opening and closing tokens. Note that interior + # qw lines will still go out at the end of this routine. + ##if ( $rOpts->{'indent-only'} ) { + if ( $CODE_type eq 'IO' ) { + $self->flush(); + my $line = $input_line; + + # delete side comments if requested with -io, but + # we will not allow deleting of closing side comments with -io + # because the coding would be more complex + if ( $rOpts->{'delete-side-comments'} + && $rinput_token_array->[$jmax]->[_TYPE_] eq '#' ) + { + + $line = ""; + foreach my $jj ( 0 .. $jmax - 1 ) { + $line .= $rinput_token_array->[$jj]->[_TOKEN_]; + } + } + $line = trim($line); + + $self->extract_token(0); + $token = $line; + $type = 'q'; + $block_type = ""; + $container_type = ""; + $container_environment = ""; + $type_sequence = ""; + $self->store_token_to_go(); + $self->output_line_to_go(); + return; + } + + ############################ + # Handle all other lines ... + ############################ + + ####################################################### + # FIXME: this should become unnecessary + # making $j+2 valid simplifies coding + my $rnew_blank = + copy_token_as_type( $rinput_token_array->[$jmax], 'b' ); + push @{$rinput_token_array}, $rnew_blank; + push @{$rinput_token_array}, $rnew_blank; + ####################################################### + + # If we just saw the end of an elsif block, write nag message + # if we do not see another elseif or an else. + if ($looking_for_else) { + + unless ( $rinput_token_array->[0]->[_TOKEN_] =~ /^(elsif|else)$/ ) { + write_logfile_entry("(No else block)\n"); + } + $looking_for_else = 0; + } + + # This is a good place to kill incomplete one-line blocks + if ( + ( + ( $semicolons_before_block_self_destruct == 0 ) + && ( $max_index_to_go >= 0 ) + && ( $last_old_nonblank_type eq ';' ) + && ( $first_new_nonblank_token ne '}' ) + ) + + # Patch for RT #98902. Honor request to break at old commas. + || ( $rOpts_break_at_old_comma_breakpoints + && $max_index_to_go >= 0 + && $last_old_nonblank_type eq ',' ) + ) + { + $forced_breakpoint_to_go[$max_index_to_go] = 1 + if ($rOpts_break_at_old_comma_breakpoints); + destroy_one_line_block(); + $self->output_line_to_go(); + } + + # loop to process the tokens one-by-one + $type = 'b'; + $token = ""; + + # We do not want a leading blank if the previous batch just got output + my $jmin = 0; + if ( $max_index_to_go < 0 && $rLL->[$K_first]->[_TYPE_] eq 'b' ) { + $jmin = 1; + } + + foreach my $j ( $jmin .. $jmax ) { + + # pull out the local values for this token + $self->extract_token($j); + + if ( $type eq '#' ) { + + # trim trailing whitespace + # (there is no option at present to prevent this) + $token =~ s/\s*$//; + + if ( + $rOpts->{'delete-side-comments'} + + # delete closing side comments if necessary + || ( $rOpts->{'delete-closing-side-comments'} + && $token =~ /$closing_side_comment_prefix_pattern/o + && $last_nonblank_block_type =~ + /$closing_side_comment_list_pattern/o ) + ) + { + if ( $types_to_go[$max_index_to_go] eq 'b' ) { + unstore_token_to_go(); + } + last; + } + } + + # If we are continuing after seeing a right curly brace, flush + # buffer unless we see what we are looking for, as in + # } else ... + if ( $rbrace_follower && $type ne 'b' ) { + + unless ( $rbrace_follower->{$token} ) { + $self->output_line_to_go(); + } + $rbrace_follower = undef; + } + + $j_next = + ( $rinput_token_array->[ $j + 1 ]->[_TYPE_] eq 'b' ) + ? $j + 2 + : $j + 1; + $next_nonblank_token = $rinput_token_array->[$j_next]->[_TOKEN_]; + $next_nonblank_token_type = + $rinput_token_array->[$j_next]->[_TYPE_]; + + ###################### + # MAYBE MOVE ELSEWHERE? + ###################### + if ( $type eq 'Q' ) { + note_embedded_tab() if ( $token =~ "\t" ); + + # make note of something like '$var = s/xxx/yyy/;' + # in case it should have been '$var =~ s/xxx/yyy/;' + if ( + $token =~ /^(s|tr|y|m|\/)/ + && $last_nonblank_token =~ /^(=|==|!=)$/ + + # preceded by simple scalar + && $last_last_nonblank_type eq 'i' + && $last_last_nonblank_token =~ /^\$/ + + # followed by some kind of termination + # (but give complaint if we can's see far enough ahead) + && $next_nonblank_token =~ /^[; \)\}]$/ + + # scalar is not declared + && !( + $types_to_go[0] eq 'k' + && $tokens_to_go[0] =~ /^(my|our|local)$/ + ) + ) + { + my $guess = substr( $last_nonblank_token, 0, 1 ) . '~'; + complain( +"Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n" + ); + } + } + + # Do not allow breaks which would promote a side comment to a + # block comment. In order to allow a break before an opening + # or closing BLOCK, followed by a side comment, those sections + # of code will handle this flag separately. + my $side_comment_follows = ( $next_nonblank_token_type eq '#' ); + my $is_opening_BLOCK = + ( $type eq '{' + && $token eq '{' + && $block_type + && $block_type ne 't' ); + my $is_closing_BLOCK = + ( $type eq '}' + && $token eq '}' + && $block_type + && $block_type ne 't' ); + + if ( $side_comment_follows + && !$is_opening_BLOCK + && !$is_closing_BLOCK ) + { + $no_internal_newlines = 1; + } + + # We're only going to handle breaking for code BLOCKS at this + # (top) level. Other indentation breaks will be handled by + # sub scan_list, which is better suited to dealing with them. + if ($is_opening_BLOCK) { + + # Tentatively output this token. This is required before + # calling starting_one_line_block. We may have to unstore + # it, though, if we have to break before it. + $self->store_token_to_go($side_comment_follows); + + # Look ahead to see if we might form a one-line block.. + my $too_long = + $self->starting_one_line_block( $j, $jmax, $level, $slevel, + $ci_level, $rinput_token_array ); + clear_breakpoint_undo_stack(); + + # to simplify the logic below, set a flag to indicate if + # this opening brace is far from the keyword which introduces it + my $keyword_on_same_line = 1; + if ( ( $max_index_to_go >= 0 ) + && ( $last_nonblank_type eq ')' ) + && ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long ) ) + { + $keyword_on_same_line = 0; + } + + # decide if user requested break before '{' + my $want_break = + + # use -bl flag if not a sub block of any type + $block_type !~ /^sub\b/ + ? $rOpts->{'opening-brace-on-new-line'} + + # use -sbl flag for a named sub block + : $block_type !~ /$ASUB_PATTERN/ + ? $rOpts->{'opening-sub-brace-on-new-line'} + + # use -asbl flag for an anonymous sub block + : $rOpts->{'opening-anonymous-sub-brace-on-new-line'}; + + # Do not break if this token is welded to the left + if ( weld_len_left( $type_sequence, $token ) ) { + $want_break = 0; + } + + # Break before an opening '{' ... + if ( + + # if requested + $want_break + + # and we were unable to start looking for a block, + && $index_start_one_line_block == UNDEFINED_INDEX + + # or if it will not be on same line as its keyword, so that + # it will be outdented (eval.t, overload.t), and the user + # has not insisted on keeping it on the right + || ( !$keyword_on_same_line + && !$rOpts->{'opening-brace-always-on-right'} ) + + ) + { + + # but only if allowed + unless ($no_internal_newlines) { + + # since we already stored this token, we must unstore it + $self->unstore_token_to_go(); + + # then output the line + $self->output_line_to_go(); + + # and now store this token at the start of a new line + $self->store_token_to_go($side_comment_follows); + } + } + + # Now update for side comment + if ($side_comment_follows) { $no_internal_newlines = 1 } + + # now output this line + unless ($no_internal_newlines) { + $self->output_line_to_go(); + } + } + + elsif ($is_closing_BLOCK) { + + # If there is a pending one-line block .. + if ( $index_start_one_line_block != UNDEFINED_INDEX ) { + + # we have to terminate it if.. + if ( + + # it is too long (final length may be different from + # initial estimate). note: must allow 1 space for this + # token + excess_line_length( $index_start_one_line_block, + $max_index_to_go ) >= 0 + + # or if it has too many semicolons + || ( $semicolons_before_block_self_destruct == 0 + && $last_nonblank_type ne ';' ) + ) + { + destroy_one_line_block(); + } + } + + # put a break before this closing curly brace if appropriate + unless ( $no_internal_newlines + || $index_start_one_line_block != UNDEFINED_INDEX ) + { + + # write out everything before this closing curly brace + $self->output_line_to_go(); + } + + # Now update for side comment + if ($side_comment_follows) { $no_internal_newlines = 1 } + + # store the closing curly brace + $self->store_token_to_go(); + + # ok, we just stored a closing curly brace. Often, but + # not always, we want to end the line immediately. + # So now we have to check for special cases. + + # if this '}' successfully ends a one-line block.. + my $is_one_line_block = 0; + my $keep_going = 0; + if ( $index_start_one_line_block != UNDEFINED_INDEX ) { + + # Remember the type of token just before the + # opening brace. It would be more general to use + # a stack, but this will work for one-line blocks. + $is_one_line_block = + $types_to_go[$index_start_one_line_block]; + + # we have to actually make it by removing tentative + # breaks that were set within it + undo_forced_breakpoint_stack(0); + set_nobreaks( $index_start_one_line_block, + $max_index_to_go - 1 ); + + # then re-initialize for the next one-line block + destroy_one_line_block(); + + # then decide if we want to break after the '}' .. + # We will keep going to allow certain brace followers as in: + # do { $ifclosed = 1; last } unless $losing; + # + # But make a line break if the curly ends a + # significant block: + if ( + ( + $is_block_without_semicolon{$block_type} + + # Follow users break point for + # one line block types U & G, such as a 'try' block + || $is_one_line_block =~ /^[UG]$/ && $j == $jmax + ) + + # if needless semicolon follows we handle it later + && $next_nonblank_token ne ';' + ) + { + $self->output_line_to_go() + unless ($no_internal_newlines); + } + } + + # set string indicating what we need to look for brace follower + # tokens + if ( $block_type eq 'do' ) { + $rbrace_follower = \%is_do_follower; + } + elsif ( $block_type =~ /^(if|elsif|unless)$/ ) { + $rbrace_follower = \%is_if_brace_follower; + } + elsif ( $block_type eq 'else' ) { + $rbrace_follower = \%is_else_brace_follower; + } + + # added eval for borris.t + elsif ($is_sort_map_grep_eval{$block_type} + || $is_one_line_block eq 'G' ) + { + $rbrace_follower = undef; + $keep_going = 1; + } + + # anonymous sub + elsif ( $block_type =~ /$ASUB_PATTERN/ ) { + + if ($is_one_line_block) { + $rbrace_follower = \%is_anon_sub_1_brace_follower; + } + else { + $rbrace_follower = \%is_anon_sub_brace_follower; + } + } + + # None of the above: specify what can follow a closing + # brace of a block which is not an + # if/elsif/else/do/sort/map/grep/eval + # Testfiles: + # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t + else { + $rbrace_follower = \%is_other_brace_follower; + } + + # See if an elsif block is followed by another elsif or else; + # complain if not. + if ( $block_type eq 'elsif' ) { + + if ( $next_nonblank_token_type eq 'b' ) { # end of line? + $looking_for_else = 1; # ok, check on next line + } + else { + + unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) { + write_logfile_entry("No else block :(\n"); + } + } + } + + # keep going after certain block types (map,sort,grep,eval) + # added eval for borris.t + if ($keep_going) { + + # keep going + } + + # if no more tokens, postpone decision until re-entring + elsif ( ( $next_nonblank_token_type eq 'b' ) + && $rOpts_add_newlines ) + { + unless ($rbrace_follower) { + $self->output_line_to_go() + unless ($no_internal_newlines); + } + } + + elsif ($rbrace_follower) { + + unless ( $rbrace_follower->{$next_nonblank_token} ) { + $self->output_line_to_go() + unless ($no_internal_newlines); + } + $rbrace_follower = undef; + } + + else { + $self->output_line_to_go() unless ($no_internal_newlines); + } + + } # end treatment of closing block token + + # handle semicolon + elsif ( $type eq ';' ) { + + # kill one-line blocks with too many semicolons + $semicolons_before_block_self_destruct--; + if ( + ( $semicolons_before_block_self_destruct < 0 ) + || ( $semicolons_before_block_self_destruct == 0 + && $next_nonblank_token_type !~ /^[b\}]$/ ) + ) + { + destroy_one_line_block(); + } + + # Remove unnecessary semicolons, but not after bare + # blocks, where it could be unsafe if the brace is + # mistokenized. + if ( + ( + $last_nonblank_token eq '}' + && ( + $is_block_without_semicolon{ + $last_nonblank_block_type} + || $last_nonblank_block_type =~ /$SUB_PATTERN/ + || $last_nonblank_block_type =~ /^\w+:$/ ) + ) + || $last_nonblank_type eq ';' + ) + { + + if ( + $rOpts->{'delete-semicolons'} + + # don't delete ; before a # because it would promote it + # to a block comment + && ( $next_nonblank_token_type ne '#' ) + ) + { + note_deleted_semicolon(); + $self->output_line_to_go() + unless ( $no_internal_newlines + || $index_start_one_line_block != UNDEFINED_INDEX ); + next; + } + else { + write_logfile_entry("Extra ';'\n"); + } + } + $self->store_token_to_go(); + + $self->output_line_to_go() + unless ( $no_internal_newlines + || ( $rOpts_keep_interior_semicolons && $j < $jmax ) + || ( $next_nonblank_token eq '}' ) ); + + } + + # handle here_doc target string + elsif ( $type eq 'h' ) { + + # no newlines after seeing here-target + $no_internal_newlines = 1; + destroy_one_line_block(); + $self->store_token_to_go(); + } + + # handle all other token types + else { + + $self->store_token_to_go(); + } + + # remember two previous nonblank OUTPUT tokens + if ( $type ne '#' && $type ne 'b' ) { + $last_last_nonblank_token = $last_nonblank_token; + $last_last_nonblank_type = $last_nonblank_type; + $last_nonblank_token = $token; + $last_nonblank_type = $type; + $last_nonblank_block_type = $block_type; + } + + # unset the continued-quote flag since it only applies to the + # first token, and we want to resume normal formatting if + # there are additional tokens on the line + $in_continued_quote = 0; + + } # end of loop over all tokens in this 'line_of_tokens' + + # we have to flush .. + if ( + + # if there is a side comment + ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} ) + + # if this line ends in a quote + # NOTE: This is critically important for insuring that quoted lines + # do not get processed by things like -sot and -sct + || $in_quote + + # if this is a VERSION statement + || $is_VERSION_statement + + # to keep a label at the end of a line + || $type eq 'J' + + # if we are instructed to keep all old line breaks + || !$rOpts->{'delete-old-newlines'} + ) + { + destroy_one_line_block(); + $self->output_line_to_go(); + } + + # mark old line breakpoints in current output stream + if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) { + my $jobp = $max_index_to_go; + if ( $types_to_go[$max_index_to_go] eq 'b' && $max_index_to_go > 0 ) + { + $jobp--; + } + $old_breakpoint_to_go[$jobp] = 1; + } + return; + } ## end sub print_line_of_tokens +} ## end block print_line_of_tokens + +# sub output_line_to_go sends one logical line of tokens on down the +# pipeline to the VerticalAligner package, breaking the line into continuation +# lines as necessary. The line of tokens is ready to go in the "to_go" +# arrays. +sub output_line_to_go { + + my $self = shift; + my $rLL = $self->{rLL}; + + # debug stuff; this routine can be called from many points + FORMATTER_DEBUG_FLAG_OUTPUT && do { + my ( $a, $b, $c ) = caller; + write_diagnostics( +"OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n" + ); + my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ]; + write_diagnostics("$output_str\n"); + }; + + # Do not end line in a weld + # TODO: Move this fix into the routine? + #my $jnb = $max_index_to_go; + #if ( $jnb > 0 && $types_to_go[$jnb] eq 'b' ) { $jnb-- } + return if ( weld_len_right_to_go($max_index_to_go) ); + + # just set a tentative breakpoint if we might be in a one-line block + if ( $index_start_one_line_block != UNDEFINED_INDEX ) { + set_forced_breakpoint($max_index_to_go); + return; + } + +## my $cscw_block_comment; +## $cscw_block_comment = $self->add_closing_side_comment() +## if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 ); + + my $comma_arrow_count_contained = match_opening_and_closing_tokens(); + + # tell the -lp option we are outputting a batch so it can close + # any unfinished items in its stack + finish_lp_batch(); + + # If this line ends in a code block brace, set breaks at any + # previous closing code block braces to breakup a chain of code + # blocks on one line. This is very rare but can happen for + # user-defined subs. For example we might be looking at this: + # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR { + my $saw_good_break = 0; # flag to force breaks even if short line + if ( + + # looking for opening or closing block brace + $block_type_to_go[$max_index_to_go] + + # but not one of these which are never duplicated on a line: + # until|while|for|if|elsif|else + && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] } + ) + { + my $lev = $nesting_depth_to_go[$max_index_to_go]; + + # Walk backwards from the end and + # set break at any closing block braces at the same level. + # But quit if we are not in a chain of blocks. + for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) { + last if ( $levels_to_go[$i] < $lev ); # stop at a lower level + next if ( $levels_to_go[$i] > $lev ); # skip past higher level + + if ( $block_type_to_go[$i] ) { + if ( $tokens_to_go[$i] eq '}' ) { + set_forced_breakpoint($i); + $saw_good_break = 1; + } + } + + # quit if we see anything besides words, function, blanks + # at this level + elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last } + } + } + + my $imin = 0; + my $imax = $max_index_to_go; + + # trim any blank tokens + if ( $max_index_to_go >= 0 ) { + if ( $types_to_go[$imin] eq 'b' ) { $imin++ } + if ( $types_to_go[$imax] eq 'b' ) { $imax-- } + } + + # anything left to write? + if ( $imin <= $imax ) { + + # add a blank line before certain key types but not after a comment + if ( $last_line_leading_type !~ /^[#]/ ) { + my $want_blank = 0; + my $leading_token = $tokens_to_go[$imin]; + my $leading_type = $types_to_go[$imin]; + + # blank lines before subs except declarations and one-liners + # MCONVERSION LOCATION - for sub tokenization change + if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) { + $want_blank = $rOpts->{'blank-lines-before-subs'} + if ( + terminal_type( \@types_to_go, \@block_type_to_go, $imin, + $imax ) !~ /^[\;\}]$/ + ); + } + + # break before all package declarations + # MCONVERSION LOCATION - for tokenizaton change + elsif ($leading_token =~ /^(package\s)/ + && $leading_type eq 'i' ) + { + $want_blank = $rOpts->{'blank-lines-before-packages'}; + } + + # break before certain key blocks except one-liners + if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) { + $want_blank = $rOpts->{'blank-lines-before-subs'} + if ( + terminal_type( \@types_to_go, \@block_type_to_go, $imin, + $imax ) ne '}' + ); + } + + # Break before certain block types if we haven't had a + # break at this level for a while. This is the + # difficult decision.. + elsif ($leading_type eq 'k' + && $last_line_leading_type ne 'b' + && $leading_token =~ /^(unless|if|while|until|for|foreach)$/ ) + { + my $lc = $nonblank_lines_at_depth[$last_line_leading_level]; + if ( !defined($lc) ) { $lc = 0 } + + $want_blank = + $rOpts->{'blanks-before-blocks'} + && $lc >= $rOpts->{'long-block-line-count'} + && $file_writer_object->get_consecutive_nonblank_lines() >= + $rOpts->{'long-block-line-count'} + && ( + terminal_type( \@types_to_go, \@block_type_to_go, $imin, + $imax ) ne '}' + ); + } + + # Check for blank lines wanted before a closing brace + if ( $leading_token eq '}' ) { + if ( $rOpts->{'blank-lines-before-closing-block'} + && $block_type_to_go[$imin] + && $block_type_to_go[$imin] =~ + /$blank_lines_before_closing_block_pattern/ ) + { + my $nblanks = $rOpts->{'blank-lines-before-closing-block'}; + if ( $nblanks > $want_blank ) { + $want_blank = $nblanks; + } + } + } + + if ($want_blank) { + + # future: send blank line down normal path to VerticalAligner + Perl::Tidy::VerticalAligner::flush(); + $file_writer_object->require_blank_code_lines($want_blank); + } + } + + # update blank line variables and count number of consecutive + # non-blank, non-comment lines at this level + $last_last_line_leading_level = $last_line_leading_level; + $last_line_leading_level = $levels_to_go[$imin]; + if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 } + $last_line_leading_type = $types_to_go[$imin]; + if ( $last_line_leading_level == $last_last_line_leading_level + && $last_line_leading_type ne 'b' + && $last_line_leading_type ne '#' + && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) ) + { + $nonblank_lines_at_depth[$last_line_leading_level]++; + } + else { + $nonblank_lines_at_depth[$last_line_leading_level] = 1; + } + + FORMATTER_DEBUG_FLAG_FLUSH && do { + my ( $package, $file, $line ) = caller; + print STDOUT +"FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n"; + }; + + # add a couple of extra terminal blank tokens + pad_array_to_go(); + + # set all forced breakpoints for good list formatting + my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0; + + my $old_line_count_in_batch = + $self->get_old_line_count( $K_to_go[0], $K_to_go[$max_index_to_go] ); + + if ( + $is_long_line + || $old_line_count_in_batch > 1 + + # must always call scan_list() with unbalanced batches because it + # is maintaining some stacks + || is_unbalanced_batch() + + # call scan_list if we might want to break at commas + || ( + $comma_count_in_batch + && ( $rOpts_maximum_fields_per_table > 0 + || $rOpts_comma_arrow_breakpoints == 0 ) + ) + + # call scan_list if user may want to break open some one-line + # hash references + || ( $comma_arrow_count_contained + && $rOpts_comma_arrow_breakpoints != 3 ) + ) + { + ## This caused problems in one version of perl for unknown reasons: + ## $saw_good_break ||= scan_list(); + my $sgb = scan_list(); + $saw_good_break ||= $sgb; + } + + # let $ri_first and $ri_last be references to lists of + # first and last tokens of line fragments to output.. + my ( $ri_first, $ri_last ); + + # write a single line if.. + if ( + + # we aren't allowed to add any newlines + !$rOpts_add_newlines + + # or, we don't already have an interior breakpoint + # and we didn't see a good breakpoint + || ( + !$forced_breakpoint_count + && !$saw_good_break + + # and this line is 'short' + && !$is_long_line + ) + ) + { + @{$ri_first} = ($imin); + @{$ri_last} = ($imax); + } + + # otherwise use multiple lines + else { + + ( $ri_first, $ri_last, my $colon_count ) = + set_continuation_breaks($saw_good_break); + + break_all_chain_tokens( $ri_first, $ri_last ); + + break_equals( $ri_first, $ri_last ); + + # now we do a correction step to clean this up a bit + # (The only time we would not do this is for debugging) + if ( $rOpts->{'recombine'} ) { + ( $ri_first, $ri_last ) = + recombine_breakpoints( $ri_first, $ri_last ); + } + + insert_final_breaks( $ri_first, $ri_last ) if $colon_count; + } + + # do corrector step if -lp option is used + my $do_not_pad = 0; + if ($rOpts_line_up_parentheses) { + $do_not_pad = correct_lp_indentation( $ri_first, $ri_last ); + } + $self->unmask_phantom_semicolons( $ri_first, $ri_last ); + $self->send_lines_to_vertical_aligner( $ri_first, $ri_last, + $do_not_pad ); + + # Insert any requested blank lines after an opening brace. We have to + # skip back before any side comment to find the terminal token + my $iterm; + for ( $iterm = $imax ; $iterm >= $imin ; $iterm-- ) { + next if $types_to_go[$iterm] eq '#'; + next if $types_to_go[$iterm] eq 'b'; + last; + } + + # write requested number of blank lines after an opening block brace + if ( $iterm >= $imin && $types_to_go[$iterm] eq '{' ) { + if ( $rOpts->{'blank-lines-after-opening-block'} + && $block_type_to_go[$iterm] + && $block_type_to_go[$iterm] =~ + /$blank_lines_after_opening_block_pattern/ ) + { + my $nblanks = $rOpts->{'blank-lines-after-opening-block'}; + Perl::Tidy::VerticalAligner::flush(); + $file_writer_object->require_blank_code_lines($nblanks); + } + } + } + + prepare_for_new_input_lines(); + +## # output any new -cscw block comment +## if ($cscw_block_comment) { +## $self->flush(); +## $file_writer_object->write_code_line( $cscw_block_comment . "\n" ); +## } + return; +} + +sub note_added_semicolon { + my ($line_number) = @_; + $last_added_semicolon_at = $line_number; + if ( $added_semicolon_count == 0 ) { + $first_added_semicolon_at = $last_added_semicolon_at; + } + $added_semicolon_count++; + write_logfile_entry("Added ';' here\n"); + return; +} + +sub note_deleted_semicolon { + $last_deleted_semicolon_at = $input_line_number; + if ( $deleted_semicolon_count == 0 ) { + $first_deleted_semicolon_at = $last_deleted_semicolon_at; + } + $deleted_semicolon_count++; + write_logfile_entry("Deleted unnecessary ';'\n"); # i hope ;) + return; +} + +sub note_embedded_tab { + $embedded_tab_count++; + $last_embedded_tab_at = $input_line_number; + if ( !$first_embedded_tab_at ) { + $first_embedded_tab_at = $last_embedded_tab_at; + } + + if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) { + write_logfile_entry("Embedded tabs in quote or pattern\n"); + } + return; +} + +sub starting_one_line_block { + + # after seeing an opening curly brace, look for the closing brace + # and see if the entire block will fit on a line. This routine is + # not always right because it uses the old whitespace, so a check + # is made later (at the closing brace) to make sure we really + # have a one-line block. We have to do this preliminary check, + # though, because otherwise we would always break at a semicolon + # within a one-line block if the block contains multiple statements. + + my ( $self, $j, $jmax, $level, $slevel, $ci_level, $rtoken_array ) = @_; + my $rbreak_container = $self->{rbreak_container}; + + my $jmax_check = @{$rtoken_array}; + if ( $jmax_check < $jmax ) { + print STDERR "jmax=$jmax > $jmax_check\n"; + } + + # kill any current block - we can only go 1 deep + destroy_one_line_block(); + + # return value: + # 1=distance from start of block to opening brace exceeds line length + # 0=otherwise + + my $i_start = 0; + + # shouldn't happen: there must have been a prior call to + # store_token_to_go to put the opening brace in the output stream + if ( $max_index_to_go < 0 ) { + Fault("program bug: store_token_to_go called incorrectly\n"); + + #warning("program bug: store_token_to_go called incorrectly\n"); + ##report_definite_bug(); + } + + # return if block should be broken + my $type_sequence = $rtoken_array->[$j]->[_TYPE_SEQUENCE_]; + if ( $rbreak_container->{$type_sequence} ) { + return 0; + } + + my $block_type = $rtoken_array->[$j]->[_BLOCK_TYPE_]; + + # find the starting keyword for this block (such as 'if', 'else', ...) + + if ( $block_type =~ /^[\{\}\;\:]$/ || $block_type =~ /^package/ ) { + $i_start = $max_index_to_go; + } + + # the previous nonblank token should start these block types + elsif (( $last_last_nonblank_token_to_go eq $block_type ) + || ( $block_type =~ /^sub\b/ ) + || $block_type =~ /\(\)/ ) + { + $i_start = $last_last_nonblank_index_to_go; + + # For signatures and extended syntax ... + # If this brace follows a parenthesized list, we should look back to + # find the keyword before the opening paren because otherwise we might + # form a one line block which stays intack, and cause the parenthesized + # expression to break open. That looks bad. However, actually + # searching for the opening paren is slow and tedius. + # The actual keyword is often at the start of a line, but might not be. + # For example, we might have an anonymous sub with signature list + # following a =>. It is safe to mark the start anywhere before the + # opening paren, so we just go back to the prevoious break (or start of + # the line) if that is before the opening paren. The minor downside is + # that we may very occasionally break open a block unnecessarily. + if ( $tokens_to_go[$i_start] eq ')' ) { + $i_start = $index_max_forced_break + 1; + if ( $types_to_go[$i_start] eq 'b' ) { $i_start++; } + my $lev = $levels_to_go[$i_start]; + if ( $lev > $level ) { return 0 } + } + } + + elsif ( $last_last_nonblank_token_to_go eq ')' ) { + + # For something like "if (xxx) {", the keyword "if" will be + # just after the most recent break. This will be 0 unless + # we have just killed a one-line block and are starting another. + # (doif.t) + # Note: cannot use inext_index_to_go[] here because that array + # is still being constructed. + $i_start = $index_max_forced_break + 1; + if ( $types_to_go[$i_start] eq 'b' ) { + $i_start++; + } + + # Patch to avoid breaking short blocks defined with extended_syntax: + # Strip off any trailing () which was added in the parser to mark + # the opening keyword. For example, in the following + # create( TypeFoo $e) {$bubba} + # the blocktype would be marked as create() + my $stripped_block_type = $block_type; + $stripped_block_type =~ s/\(\)$//; + + unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) { + return 0; + } + } + + # patch for SWITCH/CASE to retain one-line case/when blocks + elsif ( $block_type eq 'case' || $block_type eq 'when' ) { + + # Note: cannot use inext_index_to_go[] here because that array + # is still being constructed. + $i_start = $index_max_forced_break + 1; + if ( $types_to_go[$i_start] eq 'b' ) { + $i_start++; + } + unless ( $tokens_to_go[$i_start] eq $block_type ) { + return 0; + } + } + + else { + return 1; + } + + my $pos = total_line_length( $i_start, $max_index_to_go ) - 1; + + # see if length is too long to even start + if ( $pos > maximum_line_length($i_start) ) { + return 1; + } + + foreach my $i ( $j + 1 .. $jmax ) { + + # old whitespace could be arbitrarily large, so don't use it + if ( $rtoken_array->[$i]->[_TYPE_] eq 'b' ) { $pos += 1 } + else { $pos += rtoken_length($i) } + + # Return false result if we exceed the maximum line length, + if ( $pos > maximum_line_length($i_start) ) { + return 0; + } + + # or encounter another opening brace before finding the closing brace. + elsif ($rtoken_array->[$i]->[_TOKEN_] eq '{' + && $rtoken_array->[$i]->[_TYPE_] eq '{' + && $rtoken_array->[$i]->[_BLOCK_TYPE_] ) + { + return 0; + } + + # if we find our closing brace.. + elsif ($rtoken_array->[$i]->[_TOKEN_] eq '}' + && $rtoken_array->[$i]->[_TYPE_] eq '}' + && $rtoken_array->[$i]->[_BLOCK_TYPE_] ) + { + + # be sure any trailing comment also fits on the line + my $i_nonblank = + ( $rtoken_array->[ $i + 1 ]->[_TYPE_] eq 'b' ) ? $i + 2 : $i + 1; + + # Patch for one-line sort/map/grep/eval blocks with side comments: + # We will ignore the side comment length for sort/map/grep/eval + # because this can lead to statements which change every time + # perltidy is run. Here is an example from Denis Moskowitz which + # oscillates between these two states without this patch: + +## -------- +## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf +## @baz; +## +## grep { +## $_->foo ne 'bar' +## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf +## @baz; +## -------- + + # When the first line is input it gets broken apart by the main + # line break logic in sub print_line_of_tokens. + # When the second line is input it gets recombined by + # print_line_of_tokens and passed to the output routines. The + # output routines (set_continuation_breaks) do not break it apart + # because the bond strengths are set to the highest possible value + # for grep/map/eval/sort blocks, so the first version gets output. + # It would be possible to fix this by changing bond strengths, + # but they are high to prevent errors in older versions of perl. + + if ( $rtoken_array->[$i_nonblank]->[_TYPE_] eq '#' + && !$is_sort_map_grep{$block_type} ) + { + + $pos += rtoken_length($i_nonblank); + + if ( $i_nonblank > $i + 1 ) { + + # source whitespace could be anything, assume + # at least one space before the hash on output + if ( $rtoken_array->[ $i + 1 ]->[_TYPE_] eq 'b' ) { + $pos += 1; + } + else { $pos += rtoken_length( $i + 1 ) } + } + + if ( $pos >= maximum_line_length($i_start) ) { + return 0; + } + } + + # ok, it's a one-line block + create_one_line_block( $i_start, 20 ); + return 0; + } + + # just keep going for other characters + else { + } + } + + # Allow certain types of new one-line blocks to form by joining + # input lines. These can be safely done, but for other block types, + # we keep old one-line blocks but do not form new ones. It is not + # always a good idea to make as many one-line blocks as possible, + # so other types are not done. The user can always use -mangle. + if ( $is_sort_map_grep_eval{$block_type} ) { + create_one_line_block( $i_start, 1 ); + } + return 0; +} + +sub unstore_token_to_go { + + # remove most recent token from output stream + my $self = shift; + if ( $max_index_to_go > 0 ) { + $max_index_to_go--; + } + else { + $max_index_to_go = UNDEFINED_INDEX; + } + return; +} + +sub want_blank_line { + my $self = shift; + $self->flush(); + $file_writer_object->want_blank_line(); + return; +} + +sub write_unindented_line { + my ( $self, $line ) = @_; + $self->flush(); + $file_writer_object->write_line($line); + return; +} + +sub undo_ci { + + # Undo continuation indentation in certain sequences + # For example, we can undo continuation indentation in sort/map/grep chains + # my $dat1 = pack( "n*", + # map { $_, $lookup->{$_} } + # sort { $a <=> $b } + # grep { $lookup->{$_} ne $default } keys %$lookup ); + # To align the map/sort/grep keywords like this: + # my $dat1 = pack( "n*", + # map { $_, $lookup->{$_} } + # sort { $a <=> $b } + # grep { $lookup->{$_} ne $default } keys %$lookup ); + my ( $ri_first, $ri_last ) = @_; + my ( $line_1, $line_2, $lev_last ); + my $this_line_is_semicolon_terminated; + my $max_line = @{$ri_first} - 1; + + # looking at each line of this batch.. + # We are looking at leading tokens and looking for a sequence + # all at the same level and higher level than enclosing lines. + foreach my $line ( 0 .. $max_line ) { + + my $ibeg = $ri_first->[$line]; + my $lev = $levels_to_go[$ibeg]; + if ( $line > 0 ) { + + # if we have started a chain.. + if ($line_1) { + + # see if it continues.. + if ( $lev == $lev_last ) { + if ( $types_to_go[$ibeg] eq 'k' + && $is_sort_map_grep{ $tokens_to_go[$ibeg] } ) + { + + # chain continues... + # check for chain ending at end of a statement + if ( $line == $max_line ) { + + # see of this line ends a statement + my $iend = $ri_last->[$line]; + $this_line_is_semicolon_terminated = + $types_to_go[$iend] eq ';' + + # with possible side comment + || ( $types_to_go[$iend] eq '#' + && $iend - $ibeg >= 2 + && $types_to_go[ $iend - 2 ] eq ';' + && $types_to_go[ $iend - 1 ] eq 'b' ); + } + $line_2 = $line if ($this_line_is_semicolon_terminated); + } + else { + + # kill chain + $line_1 = undef; + } + } + elsif ( $lev < $lev_last ) { + + # chain ends with previous line + $line_2 = $line - 1; + } + elsif ( $lev > $lev_last ) { + + # kill chain + $line_1 = undef; + } + + # undo the continuation indentation if a chain ends + if ( defined($line_2) && defined($line_1) ) { + my $continuation_line_count = $line_2 - $line_1 + 1; + @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ] = + (0) x ($continuation_line_count) + if ( $continuation_line_count >= 0 ); + @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ] + = @reduced_spaces_to_go[ @{$ri_first} + [ $line_1 .. $line_2 ] ]; + $line_1 = undef; + } + } + + # not in a chain yet.. + else { + + # look for start of a new sort/map/grep chain + if ( $lev > $lev_last ) { + if ( $types_to_go[$ibeg] eq 'k' + && $is_sort_map_grep{ $tokens_to_go[$ibeg] } ) + { + $line_1 = $line; + } + } + } + } + $lev_last = $lev; + } + return; +} + +sub undo_lp_ci { + + # If there is a single, long parameter within parens, like this: + # + # $self->command( "/msg " + # . $infoline->chan + # . " You said $1, but did you know that it's square was " + # . $1 * $1 . " ?" ); + # + # we can remove the continuation indentation of the 2nd and higher lines + # to achieve this effect, which is more pleasing: + # + # $self->command("/msg " + # . $infoline->chan + # . " You said $1, but did you know that it's square was " + # . $1 * $1 . " ?"); + + my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_; + my $max_line = @{$ri_first} - 1; + + # must be multiple lines + return unless $max_line > $line_open; + + my $lev_start = $levels_to_go[$i_start]; + my $ci_start_plus = 1 + $ci_levels_to_go[$i_start]; + + # see if all additional lines in this container have continuation + # indentation + my $n; + my $line_1 = 1 + $line_open; + for ( $n = $line_1 ; $n <= $max_line ; ++$n ) { + my $ibeg = $ri_first->[$n]; + my $iend = $ri_last->[$n]; + if ( $ibeg eq $closing_index ) { $n--; last } + return if ( $lev_start != $levels_to_go[$ibeg] ); + return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] ); + last if ( $closing_index <= $iend ); + } + + # we can reduce the indentation of all continuation lines + my $continuation_line_count = $n - $line_open; + @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $n ] ] = + (0) x ($continuation_line_count); + @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ] = + @reduced_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ]; + return; +} + +sub pad_token { + + # insert $pad_spaces before token number $ipad + my ( $ipad, $pad_spaces ) = @_; + if ( $pad_spaces > 0 ) { + $tokens_to_go[$ipad] = ' ' x $pad_spaces . $tokens_to_go[$ipad]; + } + elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq ' ' ) { + $tokens_to_go[$ipad] = ""; + } + else { + + # shouldn't happen + return; + } + + $token_lengths_to_go[$ipad] += $pad_spaces; + foreach my $i ( $ipad .. $max_index_to_go ) { + $summed_lengths_to_go[ $i + 1 ] += $pad_spaces; + } + return; +} + +{ + my %is_math_op; + + BEGIN { + + my @q = qw( + - * / ); + @is_math_op{@q} = (1) x scalar(@q); + } + + sub set_logical_padding { + + # Look at a batch of lines and see if extra padding can improve the + # alignment when there are certain leading operators. Here is an + # example, in which some extra space is introduced before + # '( $year' to make it line up with the subsequent lines: + # + # if ( ( $Year < 1601 ) + # || ( $Year > 2899 ) + # || ( $EndYear < 1601 ) + # || ( $EndYear > 2899 ) ) + # { + # &Error_OutOfRange; + # } + # + my ( $ri_first, $ri_last ) = @_; + my $max_line = @{$ri_first} - 1; + + # FIXME: move these declarations below + my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $pad_spaces, + $tok_next, $type_next, $has_leading_op_next, $has_leading_op ); + + # looking at each line of this batch.. + foreach my $line ( 0 .. $max_line - 1 ) { + + # see if the next line begins with a logical operator + $ibeg = $ri_first->[$line]; + $iend = $ri_last->[$line]; + $ibeg_next = $ri_first->[ $line + 1 ]; + $tok_next = $tokens_to_go[$ibeg_next]; + $type_next = $types_to_go[$ibeg_next]; + + $has_leading_op_next = ( $tok_next =~ /^\w/ ) + ? $is_chain_operator{$tok_next} # + - * / : ? && || + : $is_chain_operator{$type_next}; # and, or + + next unless ($has_leading_op_next); + + # next line must not be at lesser depth + next + if ( $nesting_depth_to_go[$ibeg] > + $nesting_depth_to_go[$ibeg_next] ); + + # identify the token in this line to be padded on the left + $ipad = undef; + + # handle lines at same depth... + if ( $nesting_depth_to_go[$ibeg] == + $nesting_depth_to_go[$ibeg_next] ) + { + + # if this is not first line of the batch ... + if ( $line > 0 ) { + + # and we have leading operator.. + next if $has_leading_op; + + # Introduce padding if.. + # 1. the previous line is at lesser depth, or + # 2. the previous line ends in an assignment + # 3. the previous line ends in a 'return' + # 4. the previous line ends in a comma + # Example 1: previous line at lesser depth + # if ( ( $Year < 1601 ) # <- we are here but + # || ( $Year > 2899 ) # list has not yet + # || ( $EndYear < 1601 ) # collapsed vertically + # || ( $EndYear > 2899 ) ) + # { + # + # Example 2: previous line ending in assignment: + # $leapyear = + # $year % 4 ? 0 # <- We are here + # : $year % 100 ? 1 + # : $year % 400 ? 0 + # : 1; + # + # Example 3: previous line ending in comma: + # push @expr, + # /test/ ? undef + # : eval($_) ? 1 + # : eval($_) ? 1 + # : 0; + + # be sure levels agree (do not indent after an indented 'if') + next + if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] ); + + # allow padding on first line after a comma but only if: + # (1) this is line 2 and + # (2) there are at more than three lines and + # (3) lines 3 and 4 have the same leading operator + # These rules try to prevent padding within a long + # comma-separated list. + my $ok_comma; + if ( $types_to_go[$iendm] eq ',' + && $line == 1 + && $max_line > 2 ) + { + my $ibeg_next_next = $ri_first->[ $line + 2 ]; + my $tok_next_next = $tokens_to_go[$ibeg_next_next]; + $ok_comma = $tok_next_next eq $tok_next; + } + + next + unless ( + $is_assignment{ $types_to_go[$iendm] } + || $ok_comma + || ( $nesting_depth_to_go[$ibegm] < + $nesting_depth_to_go[$ibeg] ) + || ( $types_to_go[$iendm] eq 'k' + && $tokens_to_go[$iendm] eq 'return' ) + ); + + # we will add padding before the first token + $ipad = $ibeg; + } + + # for first line of the batch.. + else { + + # WARNING: Never indent if first line is starting in a + # continued quote, which would change the quote. + next if $starting_in_quote; + + # if this is text after closing '}' + # then look for an interior token to pad + if ( $types_to_go[$ibeg] eq '}' ) { + + } + + # otherwise, we might pad if it looks really good + else { + + # we might pad token $ibeg, so be sure that it + # is at the same depth as the next line. + next + if ( $nesting_depth_to_go[$ibeg] != + $nesting_depth_to_go[$ibeg_next] ); + + # We can pad on line 1 of a statement if at least 3 + # lines will be aligned. Otherwise, it + # can look very confusing. + + # We have to be careful not to pad if there are too few + # lines. The current rule is: + # (1) in general we require at least 3 consecutive lines + # with the same leading chain operator token, + # (2) but an exception is that we only require two lines + # with leading colons if there are no more lines. For example, + # the first $i in the following snippet would get padding + # by the second rule: + # + # $i == 1 ? ( "First", "Color" ) + # : $i == 2 ? ( "Then", "Rarity" ) + # : ( "Then", "Name" ); + + if ( $max_line > 1 ) { + my $leading_token = $tokens_to_go[$ibeg_next]; + my $tokens_differ; + + # never indent line 1 of a '.' series because + # previous line is most likely at same level. + # TODO: we should also look at the leasing_spaces + # of the last output line and skip if it is same + # as this line. + next if ( $leading_token eq '.' ); + + my $count = 1; + foreach my $l ( 2 .. 3 ) { + last if ( $line + $l > $max_line ); + my $ibeg_next_next = $ri_first->[ $line + $l ]; + if ( $tokens_to_go[$ibeg_next_next] ne + $leading_token ) + { + $tokens_differ = 1; + last; + } + $count++; + } + next if ($tokens_differ); + next if ( $count < 3 && $leading_token ne ':' ); + $ipad = $ibeg; + } + else { + next; + } + } + } + } + + # find interior token to pad if necessary + if ( !defined($ipad) ) { + + for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) { + + # find any unclosed container + next + unless ( $type_sequence_to_go[$i] + && $mate_index_to_go[$i] > $iend ); + + # find next nonblank token to pad + $ipad = $inext_to_go[$i]; + last if ( $ipad > $iend ); + } + last unless $ipad; + } + + # We cannot pad the first leading token of a file because + # it could cause a bug in which the starting indentation + # level is guessed incorrectly each time the code is run + # though perltidy, thus causing the code to march off to + # the right. For example, the following snippet would have + # this problem: + +## ov_method mycan( $package, '(""' ), $package +## or ov_method mycan( $package, '(0+' ), $package +## or ov_method mycan( $package, '(bool' ), $package +## or ov_method mycan( $package, '(nomethod' ), $package; + + # If this snippet is within a block this won't happen + # unless the user just processes the snippet alone within + # an editor. In that case either the user will see and + # fix the problem or it will be corrected next time the + # entire file is processed with perltidy. + ##next if ( $ipad == 0 && $levels_to_go[$ipad] == 0 ); + next if ( $ipad == 0 && $peak_batch_size <= 1 ); + +## THIS PATCH REMOVES THE FOLLOWING POOR PADDING (math.t) with -pbp, BUT +## IT DID MORE HARM THAN GOOD +## ceil( +## $font->{'loca'}->{'glyphs'}[$x]->read->{'xMin'} * 1000 +## / $upem +## ), +##? # do not put leading padding for just 2 lines of math +##? if ( $ipad == $ibeg +##? && $line > 0 +##? && $levels_to_go[$ipad] > $levels_to_go[ $ipad - 1 ] +##? && $is_math_op{$type_next} +##? && $line + 2 <= $max_line ) +##? { +##? my $ibeg_next_next = $ri_first->[ $line + 2 ]; +##? my $type_next_next = $types_to_go[$ibeg_next_next]; +##? next if !$is_math_op{$type_next_next}; +##? } + + # next line must not be at greater depth + my $iend_next = $ri_last->[ $line + 1 ]; + next + if ( $nesting_depth_to_go[ $iend_next + 1 ] > + $nesting_depth_to_go[$ipad] ); + + # lines must be somewhat similar to be padded.. + my $inext_next = $inext_to_go[$ibeg_next]; + my $type = $types_to_go[$ipad]; + my $type_next = $types_to_go[ $ipad + 1 ]; + + # see if there are multiple continuation lines + my $logical_continuation_lines = 1; + if ( $line + 2 <= $max_line ) { + my $leading_token = $tokens_to_go[$ibeg_next]; + my $ibeg_next_next = $ri_first->[ $line + 2 ]; + if ( $tokens_to_go[$ibeg_next_next] eq $leading_token + && $nesting_depth_to_go[$ibeg_next] eq + $nesting_depth_to_go[$ibeg_next_next] ) + { + $logical_continuation_lines++; + } + } + + # see if leading types match + my $types_match = $types_to_go[$inext_next] eq $type; + my $matches_without_bang; + + # if first line has leading ! then compare the following token + if ( !$types_match && $type eq '!' ) { + $types_match = $matches_without_bang = + $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ]; + } + + if ( + + # either we have multiple continuation lines to follow + # and we are not padding the first token + ( $logical_continuation_lines > 1 && $ipad > 0 ) + + # or.. + || ( + + # types must match + $types_match + + # and keywords must match if keyword + && !( + $type eq 'k' + && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next] + ) + ) + ) + { + + #----------------------begin special checks-------------- + # + # SPECIAL CHECK 1: + # A check is needed before we can make the pad. + # If we are in a list with some long items, we want each + # item to stand out. So in the following example, the + # first line beginning with '$casefold->' would look good + # padded to align with the next line, but then it + # would be indented more than the last line, so we + # won't do it. + # + # ok( + # $casefold->{code} eq '0041' + # && $casefold->{status} eq 'C' + # && $casefold->{mapping} eq '0061', + # 'casefold 0x41' + # ); + # + # Note: + # It would be faster, and almost as good, to use a comma + # count, and not pad if comma_count > 1 and the previous + # line did not end with a comma. + # + my $ok_to_pad = 1; + + my $ibg = $ri_first->[ $line + 1 ]; + my $depth = $nesting_depth_to_go[ $ibg + 1 ]; + + # just use simplified formula for leading spaces to avoid + # needless sub calls + my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg]; + + # look at each line beyond the next .. + my $l = $line + 1; + foreach my $ltest ( $line + 2 .. $max_line ) { + $l = $ltest; + my $ibg = $ri_first->[$l]; + + # quit looking at the end of this container + last + if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth ) + || ( $nesting_depth_to_go[$ibg] < $depth ); + + # cannot do the pad if a later line would be + # outdented more + if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) { + $ok_to_pad = 0; + last; + } + } + + # don't pad if we end in a broken list + if ( $l == $max_line ) { + my $i2 = $ri_last->[$l]; + if ( $types_to_go[$i2] eq '#' ) { + my $i1 = $ri_first->[$l]; + next + if ( + terminal_type( \@types_to_go, \@block_type_to_go, + $i1, $i2 ) eq ',' + ); + } + } + + # SPECIAL CHECK 2: + # a minus may introduce a quoted variable, and we will + # add the pad only if this line begins with a bare word, + # such as for the word 'Button' here: + # [ + # Button => "Print letter \"~$_\"", + # -command => [ sub { print "$_[0]\n" }, $_ ], + # -accelerator => "Meta+$_" + # ]; + # + # On the other hand, if 'Button' is quoted, it looks best + # not to pad: + # [ + # 'Button' => "Print letter \"~$_\"", + # -command => [ sub { print "$_[0]\n" }, $_ ], + # -accelerator => "Meta+$_" + # ]; + if ( $types_to_go[$ibeg_next] eq 'm' ) { + $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q'; + } + + next unless $ok_to_pad; + + #----------------------end special check--------------- + + my $length_1 = total_line_length( $ibeg, $ipad - 1 ); + my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 ); + $pad_spaces = $length_2 - $length_1; + + # If the first line has a leading ! and the second does + # not, then remove one space to try to align the next + # leading characters, which are often the same. For example: + # if ( !$ts + # || $ts == $self->Holder + # || $self->Holder->Type eq "Arena" ) + # + # This usually helps readability, but if there are subsequent + # ! operators things will still get messed up. For example: + # + # if ( !exists $Net::DNS::typesbyname{$qtype} + # && exists $Net::DNS::classesbyname{$qtype} + # && !exists $Net::DNS::classesbyname{$qclass} + # && exists $Net::DNS::typesbyname{$qclass} ) + # We can't fix that. + if ($matches_without_bang) { $pad_spaces-- } + + # make sure this won't change if -lp is used + my $indentation_1 = $leading_spaces_to_go[$ibeg]; + if ( ref($indentation_1) ) { + if ( $indentation_1->get_recoverable_spaces() == 0 ) { + my $indentation_2 = $leading_spaces_to_go[$ibeg_next]; + unless ( $indentation_2->get_recoverable_spaces() == 0 ) + { + $pad_spaces = 0; + } + } + } + + # we might be able to handle a pad of -1 by removing a blank + # token + if ( $pad_spaces < 0 ) { + + if ( $pad_spaces == -1 ) { + if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' ) + { + pad_token( $ipad - 1, $pad_spaces ); + } + } + $pad_spaces = 0; + } + + # now apply any padding for alignment + if ( $ipad >= 0 && $pad_spaces ) { + + my $length_t = total_line_length( $ibeg, $iend ); + if ( $pad_spaces + $length_t <= maximum_line_length($ibeg) ) + { + pad_token( $ipad, $pad_spaces ); + } + } + } + } + continue { + $iendm = $iend; + $ibegm = $ibeg; + $has_leading_op = $has_leading_op_next; + } # end of loop over lines + return; + } +} + +sub correct_lp_indentation { + + # When the -lp option is used, we need to make a last pass through + # each line to correct the indentation positions in case they differ + # from the predictions. This is necessary because perltidy uses a + # predictor/corrector method for aligning with opening parens. The + # predictor is usually good, but sometimes stumbles. The corrector + # tries to patch things up once the actual opening paren locations + # are known. + my ( $ri_first, $ri_last ) = @_; + my $do_not_pad = 0; + + # Note on flag '$do_not_pad': + # We want to avoid a situation like this, where the aligner inserts + # whitespace before the '=' to align it with a previous '=', because + # otherwise the parens might become mis-aligned in a situation like + # this, where the '=' has become aligned with the previous line, + # pushing the opening '(' forward beyond where we want it. + # + # $mkFloor::currentRoom = ''; + # $mkFloor::c_entry = $c->Entry( + # -width => '10', + # -relief => 'sunken', + # ... + # ); + # + # We leave it to the aligner to decide how to do this. + + # first remove continuation indentation if appropriate + my $max_line = @{$ri_first} - 1; + + # looking at each line of this batch.. + my ( $ibeg, $iend ); + foreach my $line ( 0 .. $max_line ) { + $ibeg = $ri_first->[$line]; + $iend = $ri_last->[$line]; + + # looking at each token in this output line.. + foreach my $i ( $ibeg .. $iend ) { + + # How many space characters to place before this token + # for special alignment. Actual padding is done in the + # continue block. + + # looking for next unvisited indentation item + my $indentation = $leading_spaces_to_go[$i]; + if ( !$indentation->get_marked() ) { + $indentation->set_marked(1); + + # looking for indentation item for which we are aligning + # with parens, braces, and brackets + next unless ( $indentation->get_align_paren() ); + + # skip closed container on this line + if ( $i > $ibeg ) { + my $im = max( $ibeg, $iprev_to_go[$i] ); + if ( $type_sequence_to_go[$im] + && $mate_index_to_go[$im] <= $iend ) + { + next; + } + } + + if ( $line == 1 && $i == $ibeg ) { + $do_not_pad = 1; + } + + # Ok, let's see what the error is and try to fix it + my $actual_pos; + my $predicted_pos = $indentation->get_spaces(); + if ( $i > $ibeg ) { + + # token is mid-line - use length to previous token + $actual_pos = total_line_length( $ibeg, $i - 1 ); + + # for mid-line token, we must check to see if all + # additional lines have continuation indentation, + # and remove it if so. Otherwise, we do not get + # good alignment. + my $closing_index = $indentation->get_closed(); + if ( $closing_index > $iend ) { + my $ibeg_next = $ri_first->[ $line + 1 ]; + if ( $ci_levels_to_go[$ibeg_next] > 0 ) { + undo_lp_ci( $line, $i, $closing_index, $ri_first, + $ri_last ); + } + } + } + elsif ( $line > 0 ) { + + # handle case where token starts a new line; + # use length of previous line + my $ibegm = $ri_first->[ $line - 1 ]; + my $iendm = $ri_last->[ $line - 1 ]; + $actual_pos = total_line_length( $ibegm, $iendm ); + + # follow -pt style + ++$actual_pos + if ( $types_to_go[ $iendm + 1 ] eq 'b' ); + } + else { + + # token is first character of first line of batch + $actual_pos = $predicted_pos; + } + + my $move_right = $actual_pos - $predicted_pos; + + # done if no error to correct (gnu2.t) + if ( $move_right == 0 ) { + $indentation->set_recoverable_spaces($move_right); + next; + } + + # if we have not seen closure for this indentation in + # this batch, we can only pass on a request to the + # vertical aligner + my $closing_index = $indentation->get_closed(); + + if ( $closing_index < 0 ) { + $indentation->set_recoverable_spaces($move_right); + next; + } + + # If necessary, look ahead to see if there is really any + # leading whitespace dependent on this whitespace, and + # also find the longest line using this whitespace. + # Since it is always safe to move left if there are no + # dependents, we only need to do this if we may have + # dependent nodes or need to move right. + + my $right_margin = 0; + my $have_child = $indentation->get_have_child(); + + my %saw_indentation; + my $line_count = 1; + $saw_indentation{$indentation} = $indentation; + + if ( $have_child || $move_right > 0 ) { + $have_child = 0; + my $max_length = 0; + if ( $i == $ibeg ) { + $max_length = total_line_length( $ibeg, $iend ); + } + + # look ahead at the rest of the lines of this batch.. + foreach my $line_t ( $line + 1 .. $max_line ) { + my $ibeg_t = $ri_first->[$line_t]; + my $iend_t = $ri_last->[$line_t]; + last if ( $closing_index <= $ibeg_t ); + + # remember all different indentation objects + my $indentation_t = $leading_spaces_to_go[$ibeg_t]; + $saw_indentation{$indentation_t} = $indentation_t; + $line_count++; + + # remember longest line in the group + my $length_t = total_line_length( $ibeg_t, $iend_t ); + if ( $length_t > $max_length ) { + $max_length = $length_t; + } + } + $right_margin = maximum_line_length($ibeg) - $max_length; + if ( $right_margin < 0 ) { $right_margin = 0 } + } + + my $first_line_comma_count = + grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ]; + my $comma_count = $indentation->get_comma_count(); + my $arrow_count = $indentation->get_arrow_count(); + + # This is a simple approximate test for vertical alignment: + # if we broke just after an opening paren, brace, bracket, + # and there are 2 or more commas in the first line, + # and there are no '=>'s, + # then we are probably vertically aligned. We could set + # an exact flag in sub scan_list, but this is good + # enough. + my $indentation_count = keys %saw_indentation; + my $is_vertically_aligned = + ( $i == $ibeg + && $first_line_comma_count > 1 + && $indentation_count == 1 + && ( $arrow_count == 0 || $arrow_count == $line_count ) ); + + # Make the move if possible .. + if ( + + # we can always move left + $move_right < 0 + + # but we should only move right if we are sure it will + # not spoil vertical alignment + || ( $comma_count == 0 ) + || ( $comma_count > 0 && !$is_vertically_aligned ) + ) + { + my $move = + ( $move_right <= $right_margin ) + ? $move_right + : $right_margin; + + foreach ( keys %saw_indentation ) { + $saw_indentation{$_} + ->permanently_decrease_available_spaces( -$move ); + } + } + + # Otherwise, record what we want and the vertical aligner + # will try to recover it. + else { + $indentation->set_recoverable_spaces($move_right); + } + } + } + } + return $do_not_pad; +} + +# flush is called to output any tokens in the pipeline, so that +# an alternate source of lines can be written in the correct order + +sub flush { + my $self = shift; + destroy_one_line_block(); + $self->output_line_to_go(); + Perl::Tidy::VerticalAligner::flush(); + return; +} + +sub reset_block_text_accumulator { + + # save text after 'if' and 'elsif' to append after 'else' + if ($accumulating_text_for_block) { + + if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) { + push @{$rleading_block_if_elsif_text}, $leading_block_text; + } + } + $accumulating_text_for_block = ""; + $leading_block_text = ""; + $leading_block_text_level = 0; + $leading_block_text_length_exceeded = 0; + $leading_block_text_line_number = 0; + $leading_block_text_line_length = 0; + return; +} + +sub set_block_text_accumulator { + my $i = shift; + $accumulating_text_for_block = $tokens_to_go[$i]; + if ( $accumulating_text_for_block !~ /^els/ ) { + $rleading_block_if_elsif_text = []; + } + $leading_block_text = ""; + $leading_block_text_level = $levels_to_go[$i]; + $leading_block_text_line_number = get_output_line_number(); + ##$vertical_aligner_object->get_output_line_number(); + $leading_block_text_length_exceeded = 0; + + # this will contain the column number of the last character + # of the closing side comment + $leading_block_text_line_length = + length($csc_last_label) + + length($accumulating_text_for_block) + + length( $rOpts->{'closing-side-comment-prefix'} ) + + $leading_block_text_level * $rOpts_indent_columns + 3; + return; +} + +sub accumulate_block_text { + my $i = shift; + + # accumulate leading text for -csc, ignoring any side comments + if ( $accumulating_text_for_block + && !$leading_block_text_length_exceeded + && $types_to_go[$i] ne '#' ) + { + + my $added_length = $token_lengths_to_go[$i]; + $added_length += 1 if $i == 0; + my $new_line_length = $leading_block_text_line_length + $added_length; + + # we can add this text if we don't exceed some limits.. + if ( + + # we must not have already exceeded the text length limit + length($leading_block_text) < + $rOpts_closing_side_comment_maximum_text + + # and either: + # the new total line length must be below the line length limit + # or the new length must be below the text length limit + # (ie, we may allow one token to exceed the text length limit) + && ( + $new_line_length < + maximum_line_length_for_level($leading_block_text_level) + + || length($leading_block_text) + $added_length < + $rOpts_closing_side_comment_maximum_text + ) + + # UNLESS: we are adding a closing paren before the brace we seek. + # This is an attempt to avoid situations where the ... to be + # added are longer than the omitted right paren, as in: + + # foreach my $item (@a_rather_long_variable_name_here) { + # &whatever; + # } ## end foreach my $item (@a_rather_long_variable_name_here... + + || ( + $tokens_to_go[$i] eq ')' + && ( + ( + $i + 1 <= $max_index_to_go + && $block_type_to_go[ $i + 1 ] eq + $accumulating_text_for_block + ) + || ( $i + 2 <= $max_index_to_go + && $block_type_to_go[ $i + 2 ] eq + $accumulating_text_for_block ) + ) + ) + ) + { + + # add an extra space at each newline + if ( $i == 0 ) { $leading_block_text .= ' ' } + + # add the token text + $leading_block_text .= $tokens_to_go[$i]; + $leading_block_text_line_length = $new_line_length; + } + + # show that text was truncated if necessary + elsif ( $types_to_go[$i] ne 'b' ) { + $leading_block_text_length_exceeded = 1; + $leading_block_text .= '...'; + } + } + return; +} + +{ + my %is_if_elsif_else_unless_while_until_for_foreach; + + BEGIN { + + # These block types may have text between the keyword and opening + # curly. Note: 'else' does not, but must be included to allow trailing + # if/elsif text to be appended. + # patch for SWITCH/CASE: added 'case' and 'when' + my @q = + qw(if elsif else unless while until for foreach case when catch); + @is_if_elsif_else_unless_while_until_for_foreach{@q} = + (1) x scalar(@q); + } + + sub accumulate_csc_text { + + # called once per output buffer when -csc is used. Accumulates + # the text placed after certain closing block braces. + # Defines and returns the following for this buffer: + + my $block_leading_text = ""; # the leading text of the last '}' + my $rblock_leading_if_elsif_text; + my $i_block_leading_text = + -1; # index of token owning block_leading_text + my $block_line_count = 100; # how many lines the block spans + my $terminal_type = 'b'; # type of last nonblank token + my $i_terminal = 0; # index of last nonblank token + my $terminal_block_type = ""; + + # update most recent statement label + $csc_last_label = "" unless ($csc_last_label); + if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] } + my $block_label = $csc_last_label; + + # Loop over all tokens of this batch + for my $i ( 0 .. $max_index_to_go ) { + my $type = $types_to_go[$i]; + my $block_type = $block_type_to_go[$i]; + my $token = $tokens_to_go[$i]; + + # remember last nonblank token type + if ( $type ne '#' && $type ne 'b' ) { + $terminal_type = $type; + $terminal_block_type = $block_type; + $i_terminal = $i; + } + + my $type_sequence = $type_sequence_to_go[$i]; + if ( $block_type && $type_sequence ) { + + if ( $token eq '}' ) { + + # restore any leading text saved when we entered this block + if ( defined( $block_leading_text{$type_sequence} ) ) { + ( $block_leading_text, $rblock_leading_if_elsif_text ) + = @{ $block_leading_text{$type_sequence} }; + $i_block_leading_text = $i; + delete $block_leading_text{$type_sequence}; + $rleading_block_if_elsif_text = + $rblock_leading_if_elsif_text; + } + + if ( defined( $csc_block_label{$type_sequence} ) ) { + $block_label = $csc_block_label{$type_sequence}; + delete $csc_block_label{$type_sequence}; + } + + # if we run into a '}' then we probably started accumulating + # at something like a trailing 'if' clause..no harm done. + if ( $accumulating_text_for_block + && $levels_to_go[$i] <= $leading_block_text_level ) + { + my $lev = $levels_to_go[$i]; + reset_block_text_accumulator(); + } + + if ( defined( $block_opening_line_number{$type_sequence} ) ) + { + my $output_line_number = get_output_line_number(); + ##$vertical_aligner_object->get_output_line_number(); + $block_line_count = + $output_line_number - + $block_opening_line_number{$type_sequence} + 1; + delete $block_opening_line_number{$type_sequence}; + } + else { + + # Error: block opening line undefined for this line.. + # This shouldn't be possible, but it is not a + # significant problem. + } + } + + elsif ( $token eq '{' ) { + + my $line_number = get_output_line_number(); + ##$vertical_aligner_object->get_output_line_number(); + $block_opening_line_number{$type_sequence} = $line_number; + + # set a label for this block, except for + # a bare block which already has the label + # A label can only be used on the next { + if ( $block_type =~ /:$/ ) { $csc_last_label = "" } + $csc_block_label{$type_sequence} = $csc_last_label; + $csc_last_label = ""; + + if ( $accumulating_text_for_block + && $levels_to_go[$i] == $leading_block_text_level ) + { + + if ( $accumulating_text_for_block eq $block_type ) { + + # save any leading text before we enter this block + $block_leading_text{$type_sequence} = [ + $leading_block_text, + $rleading_block_if_elsif_text + ]; + $block_opening_line_number{$type_sequence} = + $leading_block_text_line_number; + reset_block_text_accumulator(); + } + else { + + # shouldn't happen, but not a serious error. + # We were accumulating -csc text for block type + # $accumulating_text_for_block and unexpectedly + # encountered a '{' for block type $block_type. + } + } + } + } + + if ( $type eq 'k' + && $csc_new_statement_ok + && $is_if_elsif_else_unless_while_until_for_foreach{$token} + && $token =~ /$closing_side_comment_list_pattern/o ) + { + set_block_text_accumulator($i); + } + else { + + # note: ignoring type 'q' because of tricks being played + # with 'q' for hanging side comments + if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) { + $csc_new_statement_ok = + ( $block_type || $type eq 'J' || $type eq ';' ); + } + if ( $type eq ';' + && $accumulating_text_for_block + && $levels_to_go[$i] == $leading_block_text_level ) + { + reset_block_text_accumulator(); + } + else { + accumulate_block_text($i); + } + } + } + + # Treat an 'else' block specially by adding preceding 'if' and + # 'elsif' text. Otherwise, the 'end else' is not helpful, + # especially for cuddled-else formatting. + if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) { + $block_leading_text = + make_else_csc_text( $i_terminal, $terminal_block_type, + $block_leading_text, $rblock_leading_if_elsif_text ); + } + + # if this line ends in a label then remember it for the next pass + $csc_last_label = ""; + if ( $terminal_type eq 'J' ) { + $csc_last_label = $tokens_to_go[$i_terminal]; + } + + return ( $terminal_type, $i_terminal, $i_block_leading_text, + $block_leading_text, $block_line_count, $block_label ); + } +} + +sub make_else_csc_text { + + # create additional -csc text for an 'else' and optionally 'elsif', + # depending on the value of switch + # $rOpts_closing_side_comment_else_flag: + # + # = 0 add 'if' text to trailing else + # = 1 same as 0 plus: + # add 'if' to 'elsif's if can fit in line length + # add last 'elsif' to trailing else if can fit in one line + # = 2 same as 1 but do not check if exceed line length + # + # $rif_elsif_text = a reference to a list of all previous closing + # side comments created for this if block + # + my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_; + my $csc_text = $block_leading_text; + + if ( $block_type eq 'elsif' + && $rOpts_closing_side_comment_else_flag == 0 ) + { + return $csc_text; + } + + my $count = @{$rif_elsif_text}; + return $csc_text unless ($count); + + my $if_text = '[ if' . $rif_elsif_text->[0]; + + # always show the leading 'if' text on 'else' + if ( $block_type eq 'else' ) { + $csc_text .= $if_text; + } + + # see if that's all + if ( $rOpts_closing_side_comment_else_flag == 0 ) { + return $csc_text; + } + + my $last_elsif_text = ""; + if ( $count > 1 ) { + $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ]; + if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; } + } + + # tentatively append one more item + my $saved_text = $csc_text; + if ( $block_type eq 'else' ) { + $csc_text .= $last_elsif_text; + } + else { + $csc_text .= ' ' . $if_text; + } + + # all done if no length checks requested + if ( $rOpts_closing_side_comment_else_flag == 2 ) { + return $csc_text; + } + + # undo it if line length exceeded + my $length = + length($csc_text) + + length($block_type) + + length( $rOpts->{'closing-side-comment-prefix'} ) + + $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3; + if ( $length > maximum_line_length_for_level($leading_block_text_level) ) { + $csc_text = $saved_text; + } + return $csc_text; +} + +{ # sub balance_csc_text + + my %matching_char; + + BEGIN { + %matching_char = ( + '{' => '}', + '(' => ')', + '[' => ']', + '}' => '{', + ')' => '(', + ']' => '[', + ); + } + + sub balance_csc_text { + + # Append characters to balance a closing side comment so that editors + # such as vim can correctly jump through code. + # Simple Example: + # input = ## end foreach my $foo ( sort { $b ... + # output = ## end foreach my $foo ( sort { $b ...}) + + # NOTE: This routine does not currently filter out structures within + # quoted text because the bounce algorithms in text editors do not + # necessarily do this either (a version of vim was checked and + # did not do this). + + # Some complex examples which will cause trouble for some editors: + # while ( $mask_string =~ /\{[^{]*?\}/g ) { + # if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) { + # if ( $1 eq '{' ) { + # test file test1/braces.pl has many such examples. + + my ($csc) = @_; + + # loop to examine characters one-by-one, RIGHT to LEFT and + # build a balancing ending, LEFT to RIGHT. + for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) { + + my $char = substr( $csc, $pos, 1 ); + + # ignore everything except structural characters + next unless ( $matching_char{$char} ); + + # pop most recently appended character + my $top = chop($csc); + + # push it back plus the mate to the newest character + # unless they balance each other. + $csc = $csc . $top . $matching_char{$char} unless $top eq $char; + } + + # return the balanced string + return $csc; + } +} + +sub add_closing_side_comment { + + my $self = shift; + + # add closing side comments after closing block braces if -csc used + my $cscw_block_comment; + + #--------------------------------------------------------------- + # Step 1: loop through all tokens of this line to accumulate + # the text needed to create the closing side comments. Also see + # how the line ends. + #--------------------------------------------------------------- + + my ( $terminal_type, $i_terminal, $i_block_leading_text, + $block_leading_text, $block_line_count, $block_label ) + = accumulate_csc_text(); + + #--------------------------------------------------------------- + # Step 2: make the closing side comment if this ends a block + #--------------------------------------------------------------- + ##my $have_side_comment = $i_terminal != $max_index_to_go; + my $have_side_comment = $types_to_go[$max_index_to_go] eq '#'; + + # if this line might end in a block closure.. + if ( + $terminal_type eq '}' + + # ..and either + && ( + + # the block is long enough + ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} ) + + # or there is an existing comment to check + || ( $have_side_comment + && $rOpts->{'closing-side-comment-warnings'} ) + ) + + # .. and if this is one of the types of interest + && $block_type_to_go[$i_terminal] =~ + /$closing_side_comment_list_pattern/o + + # .. but not an anonymous sub + # These are not normally of interest, and their closing braces are + # often followed by commas or semicolons anyway. This also avoids + # possible erratic output due to line numbering inconsistencies + # in the cases where their closing braces terminate a line. + && $block_type_to_go[$i_terminal] ne 'sub' + + # ..and the corresponding opening brace must is not in this batch + # (because we do not need to tag one-line blocks, although this + # should also be caught with a positive -csci value) + && $mate_index_to_go[$i_terminal] < 0 + + # ..and either + && ( + + # this is the last token (line doesn't have a side comment) + !$have_side_comment + + # or the old side comment is a closing side comment + || $tokens_to_go[$max_index_to_go] =~ + /$closing_side_comment_prefix_pattern/o + ) + ) + { + + # then make the closing side comment text + if ($block_label) { $block_label .= " " } + my $token = +"$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]"; + + # append any extra descriptive text collected above + if ( $i_block_leading_text == $i_terminal ) { + $token .= $block_leading_text; + } + + $token = balance_csc_text($token) + if $rOpts->{'closing-side-comments-balanced'}; + + $token =~ s/\s*$//; # trim any trailing whitespace + + # handle case of existing closing side comment + if ($have_side_comment) { + + # warn if requested and tokens differ significantly + if ( $rOpts->{'closing-side-comment-warnings'} ) { + my $old_csc = $tokens_to_go[$max_index_to_go]; + my $new_csc = $token; + $new_csc =~ s/\s+//g; # trim all whitespace + $old_csc =~ s/\s+//g; # trim all whitespace + $new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures + $old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures + $new_csc =~ s/(\.\.\.)$//; # trim trailing '...' + my $new_trailing_dots = $1; + $old_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...' + + # Patch to handle multiple closing side comments at + # else and elsif's. These have become too complicated + # to check, so if we see an indication of + # '[ if' or '[ # elsif', then assume they were made + # by perltidy. + if ( $block_type_to_go[$i_terminal] eq 'else' ) { + if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc } + } + elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) { + if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc } + } + + # if old comment is contained in new comment, + # only compare the common part. + if ( length($new_csc) > length($old_csc) ) { + $new_csc = substr( $new_csc, 0, length($old_csc) ); + } + + # if the new comment is shorter and has been limited, + # only compare the common part. + if ( length($new_csc) < length($old_csc) + && $new_trailing_dots ) + { + $old_csc = substr( $old_csc, 0, length($new_csc) ); + } + + # any remaining difference? + if ( $new_csc ne $old_csc ) { + + # just leave the old comment if we are below the threshold + # for creating side comments + if ( $block_line_count < + $rOpts->{'closing-side-comment-interval'} ) + { + $token = undef; + } + + # otherwise we'll make a note of it + else { + + warning( +"perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n" + ); + + # save the old side comment in a new trailing block + # comment + my $timestamp = ""; + if ( $rOpts->{'timestamp'} ) { + my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ]; + $year += 1900; + $month += 1; + $timestamp = "$year-$month-$day"; + } + $cscw_block_comment = +"## perltidy -cscw $timestamp: $tokens_to_go[$max_index_to_go]"; +## "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]"; + } + } + else { + + # No differences.. we can safely delete old comment if we + # are below the threshold + if ( $block_line_count < + $rOpts->{'closing-side-comment-interval'} ) + { + $token = undef; + $self->unstore_token_to_go() + if ( $types_to_go[$max_index_to_go] eq '#' ); + $self->unstore_token_to_go() + if ( $types_to_go[$max_index_to_go] eq 'b' ); + } + } + } + + # switch to the new csc (unless we deleted it!) + $tokens_to_go[$max_index_to_go] = $token if $token; + } + + # handle case of NO existing closing side comment + else { + + # Remove any existing blank and add another below. + # This is a tricky point. A side comment needs to have the same level + # as the preceding closing brace or else the line will not get the right + # indentation. So even if we have a blank, we are going to replace it. + if ( $types_to_go[$max_index_to_go] eq 'b' ) { + unstore_token_to_go(); + } + + # insert the new side comment into the output token stream + my $type = '#'; + my $block_type = ''; + my $type_sequence = ''; + my $container_environment = + $container_environment_to_go[$max_index_to_go]; + my $level = $levels_to_go[$max_index_to_go]; + my $slevel = $nesting_depth_to_go[$max_index_to_go]; + my $no_internal_newlines = 0; + + my $ci_level = $ci_levels_to_go[$max_index_to_go]; + my $in_continued_quote = 0; + + # insert a blank token + $self->insert_new_token_to_go( ' ', 'b', $slevel, + $no_internal_newlines ); + + # then the side comment + $self->insert_new_token_to_go( $token, $type, $slevel, + $no_internal_newlines ); + } + } + return $cscw_block_comment; +} + +sub previous_nonblank_token { + my ($i) = @_; + my $name = ""; + my $im = $i - 1; + return "" if ( $im < 0 ); + if ( $types_to_go[$im] eq 'b' ) { $im--; } + return "" if ( $im < 0 ); + $name = $tokens_to_go[$im]; + + # prepend any sub name to an isolated -> to avoid unwanted alignments + # [test case is test8/penco.pl] + if ( $name eq '->' ) { + $im--; + if ( $im >= 0 && $types_to_go[$im] ne 'b' ) { + $name = $tokens_to_go[$im] . $name; + } + } + return $name; +} + +sub send_lines_to_vertical_aligner { + + my ( $self, $ri_first, $ri_last, $do_not_pad ) = @_; + + my $valign_batch_number = $self->increment_valign_batch_count(); + + my $cscw_block_comment; + if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 ) { + $cscw_block_comment = $self->add_closing_side_comment(); + + # Add or update any closing side comment + if ( $types_to_go[$max_index_to_go] eq '#' ) { + $ri_last->[-1] = $max_index_to_go; + } + } + + my $rindentation_list = [0]; # ref to indentations for each line + + # define the array @matching_token_to_go for the output tokens + # which will be non-blank for each special token (such as =>) + # for which alignment is required. + set_vertical_alignment_markers( $ri_first, $ri_last ); + + # flush if necessary to avoid unwanted alignment + my $must_flush = 0; + if ( @{$ri_first} > 1 ) { + + # flush before a long if statement + if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) { + $must_flush = 1; + } + } + if ($must_flush) { + Perl::Tidy::VerticalAligner::flush(); + } + + undo_ci( $ri_first, $ri_last ); + + set_logical_padding( $ri_first, $ri_last ); + + # loop to prepare each line for shipment + my $n_last_line = @{$ri_first} - 1; + my $in_comma_list; + for my $n ( 0 .. $n_last_line ) { + my $ibeg = $ri_first->[$n]; + my $iend = $ri_last->[$n]; + + my ( $rtokens, $rfields, $rpatterns ) = + make_alignment_patterns( $ibeg, $iend ); + + # Set flag to show how much level changes between this line + # and the next line, if we have it. + my $ljump = 0; + if ( $n < $n_last_line ) { + my $ibegp = $ri_first->[ $n + 1 ]; + $ljump = $levels_to_go[$ibegp] - $levels_to_go[$iend]; + } + + my ( $indentation, $lev, $level_end, $terminal_type, + $is_semicolon_terminated, $is_outdented_line ) + = $self->set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns, + $ri_first, $ri_last, $rindentation_list, $ljump ); + + # we will allow outdenting of long lines.. + my $outdent_long_lines = ( + + # which are long quotes, if allowed + ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} ) + + # which are long block comments, if allowed + || ( + $types_to_go[$ibeg] eq '#' + && $rOpts->{'outdent-long-comments'} + + # but not if this is a static block comment + && !$is_static_block_comment + ) + ); + + my $level_jump = + $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg]; + + my $rvertical_tightness_flags = + set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend, + $ri_first, $ri_last ); + + # flush an outdented line to avoid any unwanted vertical alignment + Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line); + + # Set a flag at the final ':' of a ternary chain to request + # vertical alignment of the final term. Here is a + # slightly complex example: + # + # $self->{_text} = ( + # !$section ? '' + # : $type eq 'item' ? "the $section entry" + # : "the section on $section" + # ) + # . ( + # $page + # ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage" + # : ' elsewhere in this document' + # ); + # + my $is_terminal_ternary = 0; + if ( $tokens_to_go[$ibeg] eq ':' + || $n > 0 && $tokens_to_go[ $ri_last->[ $n - 1 ] ] eq ':' ) + { + my $last_leading_type = ":"; + if ( $n > 0 ) { + my $iprev = $ri_first->[ $n - 1 ]; + $last_leading_type = $types_to_go[$iprev]; + } + if ( $terminal_type ne ';' + && $n_last_line > $n + && $level_end == $lev ) + { + my $inext = $ri_first->[ $n + 1 ]; + $level_end = $levels_to_go[$inext]; + $terminal_type = $types_to_go[$inext]; + } + + $is_terminal_ternary = $last_leading_type eq ':' + && ( ( $terminal_type eq ';' && $level_end <= $lev ) + || ( $terminal_type ne ':' && $level_end < $lev ) ) + + # the terminal term must not contain any ternary terms, as in + # my $ECHO = ( + # $Is_MSWin32 ? ".\\echo$$" + # : $Is_MacOS ? ":echo$$" + # : ( $Is_NetWare ? "echo$$" : "./echo$$" ) + # ); + && !grep { /^[\?\:]$/ } @types_to_go[ $ibeg + 1 .. $iend ]; + } + + # send this new line down the pipe + my $forced_breakpoint = $forced_breakpoint_to_go[$iend]; + + my $rvalign_hash = {}; + $rvalign_hash->{level} = $lev; + $rvalign_hash->{level_end} = $level_end; + $rvalign_hash->{indentation} = $indentation; + $rvalign_hash->{is_forced_break} = + $forced_breakpoint_to_go[$iend] || $in_comma_list; + $rvalign_hash->{outdent_long_lines} = $outdent_long_lines; + $rvalign_hash->{is_terminal_ternary} = $is_terminal_ternary; + $rvalign_hash->{is_terminal_statement} = $is_semicolon_terminated; + $rvalign_hash->{do_not_pad} = $do_not_pad; + $rvalign_hash->{rvertical_tightness_flags} = $rvertical_tightness_flags; + $rvalign_hash->{level_jump} = $level_jump; + + $rvalign_hash->{valign_batch_number} = $valign_batch_number; + + Perl::Tidy::VerticalAligner::valign_input( $rvalign_hash, $rfields, + $rtokens, $rpatterns ); + + $in_comma_list = + $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend]; + + # flush an outdented line to avoid any unwanted vertical alignment + Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line); + + $do_not_pad = 0; + + # Set flag indicating if this line ends in an opening + # token and is very short, so that a blank line is not + # needed if the subsequent line is a comment. + # Examples of what we are looking for: + # { + # && ( + # BEGIN { + # default { + # sub { + $last_output_short_opening_token + + # line ends in opening token + = $types_to_go[$iend] =~ /^[\{\(\[L]$/ + + # and either + && ( + # line has either single opening token + $iend == $ibeg + + # or is a single token followed by opening token. + # Note that sub identifiers have blanks like 'sub doit' + || ( $iend - $ibeg <= 2 && $tokens_to_go[$ibeg] !~ /\s+/ ) + ) + + # and limit total to 10 character widths + && token_sequence_length( $ibeg, $iend ) <= 10; + + } # end of loop to output each line + + # remember indentation of lines containing opening containers for + # later use by sub set_adjusted_indentation + save_opening_indentation( $ri_first, $ri_last, $rindentation_list ); + + # output any new -cscw block comment + if ($cscw_block_comment) { + Perl::Tidy::VerticalAligner::flush(); + $file_writer_object->write_code_line( $cscw_block_comment . "\n" ); + } + return; +} + +{ # begin make_alignment_patterns + + my %block_type_map; + my %keyword_map; + + BEGIN { + + # map related block names into a common name to + # allow alignment + %block_type_map = ( + 'unless' => 'if', + 'else' => 'if', + 'elsif' => 'if', + 'when' => 'if', + 'default' => 'if', + 'case' => 'if', + 'sort' => 'map', + 'grep' => 'map', + ); + + # map certain keywords to the same 'if' class to align + # long if/elsif sequences. [elsif.pl] + %keyword_map = ( + 'unless' => 'if', + 'else' => 'if', + 'elsif' => 'if', + 'when' => 'given', + 'default' => 'given', + 'case' => 'switch', + + # treat an 'undef' similar to numbers and quotes + 'undef' => 'Q', + ); + } + + sub make_alignment_patterns { + + # Here we do some important preliminary work for the + # vertical aligner. We create three arrays for one + # output line. These arrays contain strings that can + # be tested by the vertical aligner to see if + # consecutive lines can be aligned vertically. + # + # The three arrays are indexed on the vertical + # alignment fields and are: + # @tokens - a list of any vertical alignment tokens for this line. + # These are tokens, such as '=' '&&' '#' etc which + # we want to might align vertically. These are + # decorated with various information such as + # nesting depth to prevent unwanted vertical + # alignment matches. + # @fields - the actual text of the line between the vertical alignment + # tokens. + # @patterns - a modified list of token types, one for each alignment + # field. These should normally each match before alignment is + # allowed, even when the alignment tokens match. + my ( $ibeg, $iend ) = @_; + my @tokens = (); + my @fields = (); + my @patterns = (); + my $i_start = $ibeg; + + my $depth = 0; + my @container_name = (""); + my @multiple_comma_arrows = (undef); + + my $j = 0; # field index + + $patterns[0] = ""; + for my $i ( $ibeg .. $iend ) { + + # Keep track of containers balanced on this line only. + # These are used below to prevent unwanted cross-line alignments. + # Unbalanced containers already avoid aligning across + # container boundaries. + if ( $tokens_to_go[$i] eq '(' ) { + + # if container is balanced on this line... + my $i_mate = $mate_index_to_go[$i]; + if ( $i_mate > $i && $i_mate <= $iend ) { + $depth++; + my $seqno = $type_sequence_to_go[$i]; + my $count = comma_arrow_count($seqno); + $multiple_comma_arrows[$depth] = $count && $count > 1; + + # Append the previous token name to make the container name + # more unique. This name will also be given to any commas + # within this container, and it helps avoid undesirable + # alignments of different types of containers. + my $name = previous_nonblank_token($i); + $name =~ s/^->//; + $container_name[$depth] = "+" . $name; + + # Make the container name even more unique if necessary. + # If we are not vertically aligning this opening paren, + # append a character count to avoid bad alignment because + # it usually looks bad to align commas within containers + # for which the opening parens do not align. Here + # is an example very BAD alignment of commas (because + # the atan2 functions are not all aligned): + # $XY = + # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) + + # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) - + # $X * atan2( $X, 1 ) - + # $Y * atan2( $Y, 1 ); + # + # On the other hand, it is usually okay to align commas if + # opening parens align, such as: + # glVertex3d( $cx + $s * $xs, $cy, $z ); + # glVertex3d( $cx, $cy + $s * $ys, $z ); + # glVertex3d( $cx - $s * $xs, $cy, $z ); + # glVertex3d( $cx, $cy - $s * $ys, $z ); + # + # To distinguish between these situations, we will + # append the length of the line from the previous matching + # token, or beginning of line, to the function name. This + # will allow the vertical aligner to reject undesirable + # matches. + + # if we are not aligning on this paren... + if ( $matching_token_to_go[$i] eq '' ) { + + # Sum length from previous alignment, or start of line. + my $len = + ( $i_start == $ibeg ) + ? total_line_length( $i_start, $i - 1 ) + : token_sequence_length( $i_start, $i - 1 ); + + # tack length onto the container name to make unique + $container_name[$depth] .= "-" . $len; + } + } + } + elsif ( $tokens_to_go[$i] eq ')' ) { + $depth-- if $depth > 0; + } + + # if we find a new synchronization token, we are done with + # a field + if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) { + + my $tok = my $raw_tok = $matching_token_to_go[$i]; + + # make separators in different nesting depths unique + # by appending the nesting depth digit. + if ( $raw_tok ne '#' ) { + $tok .= "$nesting_depth_to_go[$i]"; + } + + # also decorate commas with any container name to avoid + # unwanted cross-line alignments. + if ( $raw_tok eq ',' || $raw_tok eq '=>' ) { + if ( $container_name[$depth] ) { + $tok .= $container_name[$depth]; + } + } + + # Patch to avoid aligning leading and trailing if, unless. + # Mark trailing if, unless statements with container names. + # This makes them different from leading if, unless which + # are not so marked at present. If we ever need to name + # them too, we could use ci to distinguish them. + # Example problem to avoid: + # return ( 2, "DBERROR" ) + # if ( $retval == 2 ); + # if ( scalar @_ ) { + # my ( $a, $b, $c, $d, $e, $f ) = @_; + # } + if ( $raw_tok eq '(' ) { + my $ci = $ci_levels_to_go[$ibeg]; + if ( $container_name[$depth] =~ /^\+(if|unless)/ + && $ci ) + { + $tok .= $container_name[$depth]; + } + } + + # Decorate block braces with block types to avoid + # unwanted alignments such as the following: + # foreach ( @{$routput_array} ) { $fh->print($_) } + # eval { $fh->close() }; + if ( $raw_tok eq '{' && $block_type_to_go[$i] ) { + my $block_type = $block_type_to_go[$i]; + + # map certain related block types to allow + # else blocks to align + $block_type = $block_type_map{$block_type} + if ( defined( $block_type_map{$block_type} ) ); + + # remove sub names to allow one-line sub braces to align + # regardless of name + #if ( $block_type =~ /^sub / ) { $block_type = 'sub' } + if ( $block_type =~ /$SUB_PATTERN/ ) { $block_type = 'sub' } + + # allow all control-type blocks to align + if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' } + + $tok .= $block_type; + } + + # concatenate the text of the consecutive tokens to form + # the field + push( @fields, + join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) ); + + # store the alignment token for this field + push( @tokens, $tok ); + + # get ready for the next batch + $i_start = $i; + $j++; + $patterns[$j] = ""; + } + + # continue accumulating tokens + # handle non-keywords.. + if ( $types_to_go[$i] ne 'k' ) { + my $type = $types_to_go[$i]; + + # Mark most things before arrows as a quote to + # get them to line up. Testfile: mixed.pl. + if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) { + my $next_type = $types_to_go[ $i + 1 ]; + my $i_next_nonblank = + ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); + + if ( $types_to_go[$i_next_nonblank] eq '=>' ) { + $type = 'Q'; + + # Patch to ignore leading minus before words, + # by changing pattern 'mQ' into just 'Q', + # so that we can align things like this: + # Button => "Print letter \"~$_\"", + # -command => [ sub { print "$_[0]\n" }, $_ ], + if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" } + } + } + + # patch to make numbers and quotes align + if ( $type eq 'n' ) { $type = 'Q' } + + # patch to ignore any ! in patterns + if ( $type eq '!' ) { $type = '' } + + $patterns[$j] .= $type; + } + + # for keywords we have to use the actual text + else { + + my $tok = $tokens_to_go[$i]; + + # but map certain keywords to a common string to allow + # alignment. + $tok = $keyword_map{$tok} + if ( defined( $keyword_map{$tok} ) ); + $patterns[$j] .= $tok; + } + } + + # done with this line .. join text of tokens to make the last field + push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) ); + return ( \@tokens, \@fields, \@patterns ); + } + +} # end make_alignment_patterns + +{ # begin unmatched_indexes + + # closure to keep track of unbalanced containers. + # arrays shared by the routines in this block: + my @unmatched_opening_indexes_in_this_batch; + my @unmatched_closing_indexes_in_this_batch; + my %comma_arrow_count; + + sub is_unbalanced_batch { + return @unmatched_opening_indexes_in_this_batch + + @unmatched_closing_indexes_in_this_batch; + } + + sub comma_arrow_count { + my $seqno = shift; + return $comma_arrow_count{$seqno}; + } + + sub match_opening_and_closing_tokens { + + # Match up indexes of opening and closing braces, etc, in this batch. + # This has to be done after all tokens are stored because unstoring + # of tokens would otherwise cause trouble. + + @unmatched_opening_indexes_in_this_batch = (); + @unmatched_closing_indexes_in_this_batch = (); + %comma_arrow_count = (); + my $comma_arrow_count_contained = 0; + + foreach my $i ( 0 .. $max_index_to_go ) { + if ( $type_sequence_to_go[$i] ) { + my $token = $tokens_to_go[$i]; + if ( $token =~ /^[\(\[\{\?]$/ ) { + push @unmatched_opening_indexes_in_this_batch, $i; + } + elsif ( $token =~ /^[\)\]\}\:]$/ ) { + + my $i_mate = pop @unmatched_opening_indexes_in_this_batch; + if ( defined($i_mate) && $i_mate >= 0 ) { + if ( $type_sequence_to_go[$i_mate] == + $type_sequence_to_go[$i] ) + { + $mate_index_to_go[$i] = $i_mate; + $mate_index_to_go[$i_mate] = $i; + my $seqno = $type_sequence_to_go[$i]; + if ( $comma_arrow_count{$seqno} ) { + $comma_arrow_count_contained += + $comma_arrow_count{$seqno}; + } + } + else { + push @unmatched_opening_indexes_in_this_batch, + $i_mate; + push @unmatched_closing_indexes_in_this_batch, $i; + } + } + else { + push @unmatched_closing_indexes_in_this_batch, $i; + } + } + } + elsif ( $tokens_to_go[$i] eq '=>' ) { + if (@unmatched_opening_indexes_in_this_batch) { + my $j = $unmatched_opening_indexes_in_this_batch[-1]; + my $seqno = $type_sequence_to_go[$j]; + $comma_arrow_count{$seqno}++; + } + } + } + return $comma_arrow_count_contained; + } + + sub save_opening_indentation { + + # This should be called after each batch of tokens is output. It + # saves indentations of lines of all unmatched opening tokens. + # These will be used by sub get_opening_indentation. + + my ( $ri_first, $ri_last, $rindentation_list ) = @_; + + # we no longer need indentations of any saved indentations which + # are unmatched closing tokens in this batch, because we will + # never encounter them again. So we can delete them to keep + # the hash size down. + foreach (@unmatched_closing_indexes_in_this_batch) { + my $seqno = $type_sequence_to_go[$_]; + delete $saved_opening_indentation{$seqno}; + } + + # we need to save indentations of any unmatched opening tokens + # in this batch because we may need them in a subsequent batch. + foreach (@unmatched_opening_indexes_in_this_batch) { + my $seqno = $type_sequence_to_go[$_]; + $saved_opening_indentation{$seqno} = [ + lookup_opening_indentation( + $_, $ri_first, $ri_last, $rindentation_list + ) + ]; + } + return; + } +} # end unmatched_indexes + +sub get_opening_indentation { + + # get the indentation of the line which output the opening token + # corresponding to a given closing token in the current output batch. + # + # given: + # $i_closing - index in this line of a closing token ')' '}' or ']' + # + # $ri_first - reference to list of the first index $i for each output + # line in this batch + # $ri_last - reference to list of the last index $i for each output line + # in this batch + # $rindentation_list - reference to a list containing the indentation + # used for each line. + # + # return: + # -the indentation of the line which contained the opening token + # which matches the token at index $i_opening + # -and its offset (number of columns) from the start of the line + # + my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_; + + # first, see if the opening token is in the current batch + my $i_opening = $mate_index_to_go[$i_closing]; + my ( $indent, $offset, $is_leading, $exists ); + $exists = 1; + if ( $i_opening >= 0 ) { + + # it is..look up the indentation + ( $indent, $offset, $is_leading ) = + lookup_opening_indentation( $i_opening, $ri_first, $ri_last, + $rindentation_list ); + } + + # if not, it should have been stored in the hash by a previous batch + else { + my $seqno = $type_sequence_to_go[$i_closing]; + if ($seqno) { + if ( $saved_opening_indentation{$seqno} ) { + ( $indent, $offset, $is_leading ) = + @{ $saved_opening_indentation{$seqno} }; + } + + # some kind of serious error + # (example is badfile.t) + else { + $indent = 0; + $offset = 0; + $is_leading = 0; + $exists = 0; + } + } + + # if no sequence number it must be an unbalanced container + else { + $indent = 0; + $offset = 0; + $is_leading = 0; + $exists = 0; + } + } + return ( $indent, $offset, $is_leading, $exists ); +} + +sub lookup_opening_indentation { + + # get the indentation of the line in the current output batch + # which output a selected opening token + # + # given: + # $i_opening - index of an opening token in the current output batch + # whose line indentation we need + # $ri_first - reference to list of the first index $i for each output + # line in this batch + # $ri_last - reference to list of the last index $i for each output line + # in this batch + # $rindentation_list - reference to a list containing the indentation + # used for each line. (NOTE: the first slot in + # this list is the last returned line number, and this is + # followed by the list of indentations). + # + # return + # -the indentation of the line which contained token $i_opening + # -and its offset (number of columns) from the start of the line + + my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_; + + my $nline = $rindentation_list->[0]; # line number of previous lookup + + # reset line location if necessary + $nline = 0 if ( $i_opening < $ri_start->[$nline] ); + + # find the correct line + unless ( $i_opening > $ri_last->[-1] ) { + while ( $i_opening > $ri_last->[$nline] ) { $nline++; } + } + + # error - token index is out of bounds - shouldn't happen + else { + warning( +"non-fatal program bug in lookup_opening_indentation - index out of range\n" + ); + report_definite_bug(); + $nline = $#{$ri_last}; + } + + $rindentation_list->[0] = + $nline; # save line number to start looking next call + my $ibeg = $ri_start->[$nline]; + my $offset = token_sequence_length( $ibeg, $i_opening ) - 1; + my $is_leading = ( $ibeg == $i_opening ); + return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading ); +} + +{ + my %is_if_elsif_else_unless_while_until_for_foreach; + + BEGIN { + + # These block types may have text between the keyword and opening + # curly. Note: 'else' does not, but must be included to allow trailing + # if/elsif text to be appended. + # patch for SWITCH/CASE: added 'case' and 'when' + my @q = qw(if elsif else unless while until for foreach case when); + @is_if_elsif_else_unless_while_until_for_foreach{@q} = + (1) x scalar(@q); + } + + sub set_adjusted_indentation { + + # This routine has the final say regarding the actual indentation of + # a line. It starts with the basic indentation which has been + # defined for the leading token, and then takes into account any + # options that the user has set regarding special indenting and + # outdenting. + + my ( + $self, $ibeg, $iend, + $rfields, $rpatterns, $ri_first, + $ri_last, $rindentation_list, $level_jump + ) = @_; + + my $rLL = $self->{rLL}; + + # we need to know the last token of this line + my ( $terminal_type, $i_terminal ) = + terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend ); + + my $is_outdented_line = 0; + + my $is_semicolon_terminated = $terminal_type eq ';' + && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg]; + + # NOTE: A future improvement would be to make it semicolon terminated + # even if it does not have a semicolon but is followed by a closing + # block brace. This would undo ci even for something like the + # following, in which the final paren does not have a semicolon because + # it is a possible weld location: + + # if ($BOLD_MATH) { + # ( + # $labels, $comment, + # join( '', '', &make_math( $mode, '', '', $_ ), '' ) + # ) + # } + # + + # MOJO: Set a flag if this lines begins with ')->' + my $leading_paren_arrow = ( + $types_to_go[$ibeg] eq '}' + && $tokens_to_go[$ibeg] eq ')' + && ( + ( $ibeg < $i_terminal && $types_to_go[ $ibeg + 1 ] eq '->' ) + || ( $ibeg < $i_terminal - 1 + && $types_to_go[ $ibeg + 1 ] eq 'b' + && $types_to_go[ $ibeg + 2 ] eq '->' ) + ) + ); + + ########################################################## + # Section 1: set a flag and a default indentation + # + # Most lines are indented according to the initial token. + # But it is common to outdent to the level just after the + # terminal token in certain cases... + # adjust_indentation flag: + # 0 - do not adjust + # 1 - outdent + # 2 - vertically align with opening token + # 3 - indent + ########################################################## + my $adjust_indentation = 0; + my $default_adjust_indentation = $adjust_indentation; + + my ( + $opening_indentation, $opening_offset, + $is_leading, $opening_exists + ); + + # if we are at a closing token of some type.. + if ( $types_to_go[$ibeg] =~ /^[\)\}\]R]$/ ) { + + # get the indentation of the line containing the corresponding + # opening token + ( + $opening_indentation, $opening_offset, + $is_leading, $opening_exists + ) + = get_opening_indentation( $ibeg, $ri_first, $ri_last, + $rindentation_list ); + + # First set the default behavior: + if ( + + # default behavior is to outdent closing lines + # of the form: "); }; ]; )->xxx;" + $is_semicolon_terminated + + # and 'cuddled parens' of the form: ")->pack(" + # Bug fix for RT #123749]: the types here were + # incorrectly '(' and ')'. Corrected to be '{' and '}' + || ( + $terminal_type eq '{' + && $types_to_go[$ibeg] eq '}' + && ( $nesting_depth_to_go[$iend] + 1 == + $nesting_depth_to_go[$ibeg] ) + ) + + # remove continuation indentation for any line like + # } ... { + # or without ending '{' and unbalanced, such as + # such as '}->{$operator}' + || ( + $types_to_go[$ibeg] eq '}' + + && ( $types_to_go[$iend] eq '{' + || $levels_to_go[$iend] < $levels_to_go[$ibeg] ) + ) + + # and when the next line is at a lower indentation level + # PATCH: and only if the style allows undoing continuation + # for all closing token types. We should really wait until + # the indentation of the next line is known and then make + # a decision, but that would require another pass. + || ( $level_jump < 0 && !$some_closing_token_indentation ) + + # Patch for -wn=2, multiple welded closing tokens + || ( $i_terminal > $ibeg + && $types_to_go[$iend] =~ /^[\)\}\]R]$/ ) + + ) + { + $adjust_indentation = 1; + } + + # outdent something like '),' + if ( + $terminal_type eq ',' + + # Removed this constraint for -wn + # OLD: allow just one character before the comma + # && $i_terminal == $ibeg + 1 + + # require LIST environment; otherwise, we may outdent too much - + # this can happen in calls without parentheses (overload.t); + && $container_environment_to_go[$i_terminal] eq 'LIST' + ) + { + $adjust_indentation = 1; + } + + # undo continuation indentation of a terminal closing token if + # it is the last token before a level decrease. This will allow + # a closing token to line up with its opening counterpart, and + # avoids a indentation jump larger than 1 level. + my $K_beg = $K_to_go[$ibeg]; + if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/ + && $i_terminal == $ibeg + && defined($K_beg) ) + { + my $K_next_nonblank = $self->K_next_code($K_beg); + if ( defined($K_next_nonblank) ) { + my $lev = $rLL->[$K_beg]->[_LEVEL_]; + my $level_next = $rLL->[$K_next_nonblank]->[_LEVEL_]; + $adjust_indentation = 1 if ( $level_next < $lev ); + } + + # Patch for RT #96101, in which closing brace of anonymous subs + # was not outdented. We should look ahead and see if there is + # a level decrease at the next token (i.e., a closing token), + # but right now we do not have that information. For now + # we see if we are in a list, and this works well. + # See test files 'sub*.t' for good test cases. + if ( $block_type_to_go[$ibeg] =~ /$ASUB_PATTERN/ + && $container_environment_to_go[$i_terminal] eq 'LIST' + && !$rOpts->{'indent-closing-brace'} ) + { + ( + $opening_indentation, $opening_offset, + $is_leading, $opening_exists + ) + = get_opening_indentation( $ibeg, $ri_first, $ri_last, + $rindentation_list ); + my $indentation = $leading_spaces_to_go[$ibeg]; + if ( defined($opening_indentation) + && get_spaces($indentation) > + get_spaces($opening_indentation) ) + { + $adjust_indentation = 1; + } + } + } + + # YVES patch 1 of 2: + # Undo ci of line with leading closing eval brace, + # but not beyond the indention of the line with + # the opening brace. + if ( $block_type_to_go[$ibeg] eq 'eval' + && !$rOpts->{'line-up-parentheses'} + && !$rOpts->{'indent-closing-brace'} ) + { + ( + $opening_indentation, $opening_offset, + $is_leading, $opening_exists + ) + = get_opening_indentation( $ibeg, $ri_first, $ri_last, + $rindentation_list ); + my $indentation = $leading_spaces_to_go[$ibeg]; + if ( defined($opening_indentation) + && get_spaces($indentation) > + get_spaces($opening_indentation) ) + { + $adjust_indentation = 1; + } + } + + $default_adjust_indentation = $adjust_indentation; + + # Now modify default behavior according to user request: + # handle option to indent non-blocks of the form ); }; ]; + # But don't do special indentation to something like ')->pack(' + if ( !$block_type_to_go[$ibeg] ) { + my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] }; + if ( $cti == 1 ) { + if ( $i_terminal <= $ibeg + 1 + || $is_semicolon_terminated ) + { + $adjust_indentation = 2; + } + else { + $adjust_indentation = 0; + } + } + elsif ( $cti == 2 ) { + if ($is_semicolon_terminated) { + $adjust_indentation = 3; + } + else { + $adjust_indentation = 0; + } + } + elsif ( $cti == 3 ) { + $adjust_indentation = 3; + } + } + + # handle option to indent blocks + else { + if ( + $rOpts->{'indent-closing-brace'} + && ( + $i_terminal == $ibeg # isolated terminal '}' + || $is_semicolon_terminated + ) + ) # } xxxx ; + { + $adjust_indentation = 3; + } + } + } + + # if at ');', '};', '>;', and '];' of a terminal qw quote + elsif ($rpatterns->[0] =~ /^qb*;$/ + && $rfields->[0] =~ /^([\)\}\]\>]);$/ ) + { + if ( $closing_token_indentation{$1} == 0 ) { + $adjust_indentation = 1; + } + else { + $adjust_indentation = 3; + } + } + + # if line begins with a ':', align it with any + # previous line leading with corresponding ? + elsif ( $types_to_go[$ibeg] eq ':' ) { + ( + $opening_indentation, $opening_offset, + $is_leading, $opening_exists + ) + = get_opening_indentation( $ibeg, $ri_first, $ri_last, + $rindentation_list ); + if ($is_leading) { $adjust_indentation = 2; } + } + + ########################################################## + # Section 2: set indentation according to flag set above + # + # Select the indentation object to define leading + # whitespace. If we are outdenting something like '} } );' + # then we want to use one level below the last token + # ($i_terminal) in order to get it to fully outdent through + # all levels. + ########################################################## + my $indentation; + my $lev; + my $level_end = $levels_to_go[$iend]; + + if ( $adjust_indentation == 0 ) { + $indentation = $leading_spaces_to_go[$ibeg]; + $lev = $levels_to_go[$ibeg]; + } + elsif ( $adjust_indentation == 1 ) { + + # Change the indentation to be that of a different token on the line + # Previously, the indentation of the terminal token was used: + # OLD CODING: + # $indentation = $reduced_spaces_to_go[$i_terminal]; + # $lev = $levels_to_go[$i_terminal]; + + # Generalization for MOJO: + # Use the lowest level indentation of the tokens on the line. + # For example, here we can use the indentation of the ending ';': + # } until ($selection > 0 and $selection < 10); # ok to use ';' + # But this will not outdent if we use the terminal indentation: + # )->then( sub { # use indentation of the ->, not the { + # Warning: reduced_spaces_to_go[] may be a reference, do not + # do numerical checks with it + + my $i_ind = $ibeg; + $indentation = $reduced_spaces_to_go[$i_ind]; + $lev = $levels_to_go[$i_ind]; + while ( $i_ind < $i_terminal ) { + $i_ind++; + if ( $levels_to_go[$i_ind] < $lev ) { + $indentation = $reduced_spaces_to_go[$i_ind]; + $lev = $levels_to_go[$i_ind]; + } + } + } + + # handle indented closing token which aligns with opening token + elsif ( $adjust_indentation == 2 ) { + + # handle option to align closing token with opening token + $lev = $levels_to_go[$ibeg]; + + # calculate spaces needed to align with opening token + my $space_count = + get_spaces($opening_indentation) + $opening_offset; + + # Indent less than the previous line. + # + # Problem: For -lp we don't exactly know what it was if there + # were recoverable spaces sent to the aligner. A good solution + # would be to force a flush of the vertical alignment buffer, so + # that we would know. For now, this rule is used for -lp: + # + # When the last line did not start with a closing token we will + # be optimistic that the aligner will recover everything wanted. + # + # This rule will prevent us from breaking a hierarchy of closing + # tokens, and in a worst case will leave a closing paren too far + # indented, but this is better than frequently leaving it not + # indented enough. + my $last_spaces = get_spaces($last_indentation_written); + if ( $last_leading_token !~ /^[\}\]\)]$/ ) { + $last_spaces += + get_recoverable_spaces($last_indentation_written); + } + + # reset the indentation to the new space count if it works + # only options are all or none: nothing in-between looks good + $lev = $levels_to_go[$ibeg]; + if ( $space_count < $last_spaces ) { + if ($rOpts_line_up_parentheses) { + my $lev = $levels_to_go[$ibeg]; + $indentation = + new_lp_indentation_item( $space_count, $lev, 0, 0, 0 ); + } + else { + $indentation = $space_count; + } + } + + # revert to default if it doesn't work + else { + $space_count = leading_spaces_to_go($ibeg); + if ( $default_adjust_indentation == 0 ) { + $indentation = $leading_spaces_to_go[$ibeg]; + } + elsif ( $default_adjust_indentation == 1 ) { + $indentation = $reduced_spaces_to_go[$i_terminal]; + $lev = $levels_to_go[$i_terminal]; + } + } + } + + # Full indentaion of closing tokens (-icb and -icp or -cti=2) + else { + + # handle -icb (indented closing code block braces) + # Updated method for indented block braces: indent one full level if + # there is no continuation indentation. This will occur for major + # structures such as sub, if, else, but not for things like map + # blocks. + # + # Note: only code blocks without continuation indentation are + # handled here (if, else, unless, ..). In the following snippet, + # the terminal brace of the sort block will have continuation + # indentation as shown so it will not be handled by the coding + # here. We would have to undo the continuation indentation to do + # this, but it probably looks ok as is. This is a possible future + # update for semicolon terminated lines. + # + # if ($sortby eq 'date' or $sortby eq 'size') { + # @files = sort { + # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby} + # or $a cmp $b + # } @files; + # } + # + if ( $block_type_to_go[$ibeg] + && $ci_levels_to_go[$i_terminal] == 0 ) + { + my $spaces = get_spaces( $leading_spaces_to_go[$i_terminal] ); + $indentation = $spaces + $rOpts_indent_columns; + + # NOTE: for -lp we could create a new indentation object, but + # there is probably no need to do it + } + + # handle -icp and any -icb block braces which fall through above + # test such as the 'sort' block mentioned above. + else { + + # There are currently two ways to handle -icp... + # One way is to use the indentation of the previous line: + # $indentation = $last_indentation_written; + + # The other way is to use the indentation that the previous line + # would have had if it hadn't been adjusted: + $indentation = $last_unadjusted_indentation; + + # Current method: use the minimum of the two. This avoids + # inconsistent indentation. + if ( get_spaces($last_indentation_written) < + get_spaces($indentation) ) + { + $indentation = $last_indentation_written; + } + } + + # use previous indentation but use own level + # to cause list to be flushed properly + $lev = $levels_to_go[$ibeg]; + } + + # remember indentation except for multi-line quotes, which get + # no indentation + unless ( $ibeg == 0 && $starting_in_quote ) { + $last_indentation_written = $indentation; + $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg]; + $last_leading_token = $tokens_to_go[$ibeg]; + } + + # be sure lines with leading closing tokens are not outdented more + # than the line which contained the corresponding opening token. + + ############################################################# + # updated per bug report in alex_bug.pl: we must not + # mess with the indentation of closing logical braces so + # we must treat something like '} else {' as if it were + # an isolated brace my $is_isolated_block_brace = ( + # $iend == $ibeg ) && $block_type_to_go[$ibeg]; + ############################################################# + my $is_isolated_block_brace = $block_type_to_go[$ibeg] + && ( $iend == $ibeg + || $is_if_elsif_else_unless_while_until_for_foreach{ + $block_type_to_go[$ibeg] + } ); + + # only do this for a ':; which is aligned with its leading '?' + my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading; + + if ( + defined($opening_indentation) + && !$leading_paren_arrow # MOJO + && !$is_isolated_block_brace + && !$is_unaligned_colon + ) + { + if ( get_spaces($opening_indentation) > get_spaces($indentation) ) { + $indentation = $opening_indentation; + } + } + + # remember the indentation of each line of this batch + push @{$rindentation_list}, $indentation; + + # outdent lines with certain leading tokens... + if ( + + # must be first word of this batch + $ibeg == 0 + + # and ... + && ( + + # certain leading keywords if requested + ( + $rOpts->{'outdent-keywords'} + && $types_to_go[$ibeg] eq 'k' + && $outdent_keyword{ $tokens_to_go[$ibeg] } + ) + + # or labels if requested + || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' ) + + # or static block comments if requested + || ( $types_to_go[$ibeg] eq '#' + && $rOpts->{'outdent-static-block-comments'} + && $is_static_block_comment ) + ) + ) + + { + my $space_count = leading_spaces_to_go($ibeg); + if ( $space_count > 0 ) { + $space_count -= $rOpts_continuation_indentation; + $is_outdented_line = 1; + if ( $space_count < 0 ) { $space_count = 0 } + + # do not promote a spaced static block comment to non-spaced; + # this is not normally necessary but could be for some + # unusual user inputs (such as -ci = -i) + if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) { + $space_count = 1; + } + + if ($rOpts_line_up_parentheses) { + $indentation = + new_lp_indentation_item( $space_count, $lev, 0, 0, 0 ); + } + else { + $indentation = $space_count; + } + } + } + + return ( $indentation, $lev, $level_end, $terminal_type, + $is_semicolon_terminated, $is_outdented_line ); + } +} + +sub set_vertical_tightness_flags { + + my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_; + + # Define vertical tightness controls for the nth line of a batch. + # We create an array of parameters which tell the vertical aligner + # if we should combine this line with the next line to achieve the + # desired vertical tightness. The array of parameters contains: + # + # [0] type: 1=opening non-block 2=closing non-block + # 3=opening block brace 4=closing block brace + # + # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok + # if closing: spaces of padding to use + # [2] sequence number of container + # [3] valid flag: do not append if this flag is false. Will be + # true if appropriate -vt flag is set. Otherwise, Will be + # made true only for 2 line container in parens with -lp + # + # These flags are used by sub set_leading_whitespace in + # the vertical aligner + + my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ]; + + #-------------------------------------------------------------- + # Vertical Tightness Flags Section 1: + # Handle Lines 1 .. n-1 but not the last line + # For non-BLOCK tokens, we will need to examine the next line + # too, so we won't consider the last line. + #-------------------------------------------------------------- + if ( $n < $n_last_line ) { + + #-------------------------------------------------------------- + # Vertical Tightness Flags Section 1a: + # Look for Type 1, last token of this line is a non-block opening token + #-------------------------------------------------------------- + my $ibeg_next = $ri_first->[ $n + 1 ]; + my $token_end = $tokens_to_go[$iend]; + my $iend_next = $ri_last->[ $n + 1 ]; + if ( + $type_sequence_to_go[$iend] + && !$block_type_to_go[$iend] + && $is_opening_token{$token_end} + && ( + $opening_vertical_tightness{$token_end} > 0 + + # allow 2-line method call to be closed up + || ( $rOpts_line_up_parentheses + && $token_end eq '(' + && $iend > $ibeg + && $types_to_go[ $iend - 1 ] ne 'b' ) + ) + ) + { + + # avoid multiple jumps in nesting depth in one line if + # requested + my $ovt = $opening_vertical_tightness{$token_end}; + my $iend_next = $ri_last->[ $n + 1 ]; + unless ( + $ovt < 2 + && ( $nesting_depth_to_go[ $iend_next + 1 ] != + $nesting_depth_to_go[$ibeg_next] ) + ) + { + + # If -vt flag has not been set, mark this as invalid + # and aligner will validate it if it sees the closing paren + # within 2 lines. + my $valid_flag = $ovt; + @{$rvertical_tightness_flags} = + ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag ); + } + } + + #-------------------------------------------------------------- + # Vertical Tightness Flags Section 1b: + # Look for Type 2, first token of next line is a non-block closing + # token .. and be sure this line does not have a side comment + #-------------------------------------------------------------- + my $token_next = $tokens_to_go[$ibeg_next]; + if ( $type_sequence_to_go[$ibeg_next] + && !$block_type_to_go[$ibeg_next] + && $is_closing_token{$token_next} + && $types_to_go[$iend] !~ '#' ) # for safety, shouldn't happen! + { + my $ovt = $opening_vertical_tightness{$token_next}; + my $cvt = $closing_vertical_tightness{$token_next}; + if ( + + # never append a trailing line like )->pack( + # because it will throw off later alignment + ( + $nesting_depth_to_go[$ibeg_next] == + $nesting_depth_to_go[ $iend_next + 1 ] + 1 + ) + && ( + $cvt == 2 + || ( + $container_environment_to_go[$ibeg_next] ne 'LIST' + && ( + $cvt == 1 + + # allow closing up 2-line method calls + || ( $rOpts_line_up_parentheses + && $token_next eq ')' ) + ) + ) + ) + ) + { + + # decide which trailing closing tokens to append.. + my $ok = 0; + if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 } + else { + my $str = join( '', + @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] ); + + # append closing token if followed by comment or ';' + if ( $str =~ /^b?[#;]/ ) { $ok = 1 } + } + + if ($ok) { + my $valid_flag = $cvt; + @{$rvertical_tightness_flags} = ( + 2, + $tightness{$token_next} == 2 ? 0 : 1, + $type_sequence_to_go[$ibeg_next], $valid_flag, + ); + } + } + } + + #-------------------------------------------------------------- + # Vertical Tightness Flags Section 1c: + # Implement the Opening Token Right flag (Type 2).. + # If requested, move an isolated trailing opening token to the end of + # the previous line which ended in a comma. We could do this + # in sub recombine_breakpoints but that would cause problems + # with -lp formatting. The problem is that indentation will + # quickly move far to the right in nested expressions. By + # doing it after indentation has been set, we avoid changes + # to the indentation. Actual movement of the token takes place + # in sub valign_output_step_B. + #-------------------------------------------------------------- + if ( + $opening_token_right{ $tokens_to_go[$ibeg_next] } + + # previous line is not opening + # (use -sot to combine with it) + && !$is_opening_token{$token_end} + + # previous line ended in one of these + # (add other cases if necessary; '=>' and '.' are not necessary + && !$block_type_to_go[$ibeg_next] + + # this is a line with just an opening token + && ( $iend_next == $ibeg_next + || $iend_next == $ibeg_next + 2 + && $types_to_go[$iend_next] eq '#' ) + + # looks bad if we align vertically with the wrong container + && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next] + ) + { + my $valid_flag = 1; + my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0; + @{$rvertical_tightness_flags} = + ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, ); + } + + #-------------------------------------------------------------- + # Vertical Tightness Flags Section 1d: + # Stacking of opening and closing tokens (Type 2) + #-------------------------------------------------------------- + my $stackable; + my $token_beg_next = $tokens_to_go[$ibeg_next]; + + # patch to make something like 'qw(' behave like an opening paren + # (aran.t) + if ( $types_to_go[$ibeg_next] eq 'q' ) { + if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) { + $token_beg_next = $1; + } + } + + if ( $is_closing_token{$token_end} + && $is_closing_token{$token_beg_next} ) + { + $stackable = $stack_closing_token{$token_beg_next} + unless ( $block_type_to_go[$ibeg_next] ) + ; # shouldn't happen; just checking + } + elsif ($is_opening_token{$token_end} + && $is_opening_token{$token_beg_next} ) + { + $stackable = $stack_opening_token{$token_beg_next} + unless ( $block_type_to_go[$ibeg_next] ) + ; # shouldn't happen; just checking + } + + if ($stackable) { + + my $is_semicolon_terminated; + if ( $n + 1 == $n_last_line ) { + my ( $terminal_type, $i_terminal ) = terminal_type( + \@types_to_go, \@block_type_to_go, + $ibeg_next, $iend_next + ); + $is_semicolon_terminated = $terminal_type eq ';' + && $nesting_depth_to_go[$iend_next] < + $nesting_depth_to_go[$ibeg_next]; + } + + # this must be a line with just an opening token + # or end in a semicolon + if ( + $is_semicolon_terminated + || ( $iend_next == $ibeg_next + || $iend_next == $ibeg_next + 2 + && $types_to_go[$iend_next] eq '#' ) + ) + { + my $valid_flag = 1; + my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0; + @{$rvertical_tightness_flags} = + ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, + ); + } + } + } + + #-------------------------------------------------------------- + # Vertical Tightness Flags Section 2: + # Handle type 3, opening block braces on last line of the batch + # Check for a last line with isolated opening BLOCK curly + #-------------------------------------------------------------- + elsif ($rOpts_block_brace_vertical_tightness + && $ibeg eq $iend + && $types_to_go[$iend] eq '{' + && $block_type_to_go[$iend] =~ + /$block_brace_vertical_tightness_pattern/o ) + { + @{$rvertical_tightness_flags} = + ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 ); + } + + #-------------------------------------------------------------- + # Vertical Tightness Flags Section 3: + # Handle type 4, a closing block brace on the last line of the batch Check + # for a last line with isolated closing BLOCK curly + #-------------------------------------------------------------- + elsif ($rOpts_stack_closing_block_brace + && $ibeg eq $iend + && $block_type_to_go[$iend] + && $types_to_go[$iend] eq '}' ) + { + my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1; + @{$rvertical_tightness_flags} = + ( 4, $spaces, $type_sequence_to_go[$iend], 1 ); + } + + # pack in the sequence numbers of the ends of this line + $rvertical_tightness_flags->[4] = get_seqno($ibeg); + $rvertical_tightness_flags->[5] = get_seqno($iend); + return $rvertical_tightness_flags; +} + +sub get_seqno { + + # get opening and closing sequence numbers of a token for the vertical + # aligner. Assign qw quotes a value to allow qw opening and closing tokens + # to be treated somewhat like opening and closing tokens for stacking + # tokens by the vertical aligner. + my ($ii) = @_; + my $seqno = $type_sequence_to_go[$ii]; + if ( $types_to_go[$ii] eq 'q' ) { + my $SEQ_QW = -1; + if ( $ii > 0 ) { + $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ ); + } + else { + if ( !$ending_in_quote ) { + $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ ); + } + } + } + return ($seqno); +} + +{ + my %is_vertical_alignment_type; + my %is_vertical_alignment_keyword; + my %is_terminal_alignment_type; + + BEGIN { + + my @q; + + # Removed =~ from list to improve chances of alignment + # Removed // from list to improve chances of alignment (RT# 119588) + @q = qw# + = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= + { ? : => && || ~~ !~~ + #; + @is_vertical_alignment_type{@q} = (1) x scalar(@q); + + # only align these at end of line + @q = qw(&& ||); + @is_terminal_alignment_type{@q} = (1) x scalar(@q); + + # eq and ne were removed from this list to improve alignment chances + @q = qw(if unless and or err for foreach while until); + @is_vertical_alignment_keyword{@q} = (1) x scalar(@q); + } + + sub set_vertical_alignment_markers { + + # This routine takes the first step toward vertical alignment of the + # lines of output text. It looks for certain tokens which can serve as + # vertical alignment markers (such as an '='). + # + # Method: We look at each token $i in this output batch and set + # $matching_token_to_go[$i] equal to those tokens at which we would + # accept vertical alignment. + + my ( $ri_first, $ri_last ) = @_; + + # nothing to do if we aren't allowed to change whitespace + if ( !$rOpts_add_whitespace ) { + for my $i ( 0 .. $max_index_to_go ) { + $matching_token_to_go[$i] = ''; + } + return; + } + + # remember the index of last nonblank token before any sidecomment + my $i_terminal = $max_index_to_go; + if ( $types_to_go[$i_terminal] eq '#' ) { + if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) { + if ( $i_terminal > 0 ) { --$i_terminal } + } + } + + # look at each line of this batch.. + my $last_vertical_alignment_before_index; + my $vert_last_nonblank_type; + my $vert_last_nonblank_token; + my $vert_last_nonblank_block_type; + my $max_line = @{$ri_first} - 1; + + foreach my $line ( 0 .. $max_line ) { + my $ibeg = $ri_first->[$line]; + my $iend = $ri_last->[$line]; + $last_vertical_alignment_before_index = -1; + $vert_last_nonblank_type = ''; + $vert_last_nonblank_token = ''; + $vert_last_nonblank_block_type = ''; + + # look at each token in this output line.. + foreach my $i ( $ibeg .. $iend ) { + my $alignment_type = ''; + my $type = $types_to_go[$i]; + my $block_type = $block_type_to_go[$i]; + my $token = $tokens_to_go[$i]; + + # check for flag indicating that we should not align + # this token + if ( $matching_token_to_go[$i] ) { + $matching_token_to_go[$i] = ''; + next; + } + + #-------------------------------------------------------- + # First see if we want to align BEFORE this token + #-------------------------------------------------------- + + # The first possible token that we can align before + # is index 2 because: 1) it doesn't normally make sense to + # align before the first token and 2) the second + # token must be a blank if we are to align before + # the third + if ( $i < $ibeg + 2 ) { } + + # must follow a blank token + elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { } + + # align a side comment -- + elsif ( $type eq '#' ) { + + unless ( + + # it is a static side comment + ( + $rOpts->{'static-side-comments'} + && $token =~ /$static_side_comment_pattern/o + ) + + # or a closing side comment + || ( $vert_last_nonblank_block_type + && $token =~ + /$closing_side_comment_prefix_pattern/o ) + ) + { + $alignment_type = $type; + } ## Example of a static side comment + } + + # otherwise, do not align two in a row to create a + # blank field + elsif ( $last_vertical_alignment_before_index == $i - 2 ) { } + + # align before one of these keywords + # (within a line, since $i>1) + elsif ( $type eq 'k' ) { + + # /^(if|unless|and|or|eq|ne)$/ + if ( $is_vertical_alignment_keyword{$token} ) { + $alignment_type = $token; + } + } + + # align before one of these types.. + # Note: add '.' after new vertical aligner is operational + elsif ( $is_vertical_alignment_type{$type} ) { + $alignment_type = $token; + + # Do not align a terminal token. Although it might + # occasionally look ok to do this, this has been found to be + # a good general rule. The main problems are: + # (1) that the terminal token (such as an = or :) might get + # moved far to the right where it is hard to see because + # nothing follows it, and + # (2) doing so may prevent other good alignments. + # Current exceptions are && and || + if ( $i == $iend || $i >= $i_terminal ) { + $alignment_type = "" + unless ( $is_terminal_alignment_type{$type} ); + } + + # Do not align leading ': (' or '. ('. This would prevent + # alignment in something like the following: + # $extra_space .= + # ( $input_line_number < 10 ) ? " " + # : ( $input_line_number < 100 ) ? " " + # : ""; + # or + # $code = + # ( $case_matters ? $accessor : " lc($accessor) " ) + # . ( $yesno ? " eq " : " ne " ) + if ( $i == $ibeg + 2 + && $types_to_go[$ibeg] =~ /^[\.\:]$/ + && $types_to_go[ $i - 1 ] eq 'b' ) + { + $alignment_type = ""; + } + + # For a paren after keyword, only align something like this: + # if ( $a ) { &a } + # elsif ( $b ) { &b } + if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) { + $alignment_type = "" + unless $vert_last_nonblank_token =~ + /^(if|unless|elsif)$/; + } + + # be sure the alignment tokens are unique + # This didn't work well: reason not determined + # if ($token ne $type) {$alignment_type .= $type} + } + + # NOTE: This is deactivated because it causes the previous + # if/elsif alignment to fail + #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i]) + #{ $alignment_type = $type; } + + if ($alignment_type) { + $last_vertical_alignment_before_index = $i; + } + + #-------------------------------------------------------- + # Next see if we want to align AFTER the previous nonblank + #-------------------------------------------------------- + + # We want to line up ',' and interior ';' tokens, with the added + # space AFTER these tokens. (Note: interior ';' is included + # because it may occur in short blocks). + if ( + + # we haven't already set it + !$alignment_type + + # and its not the first token of the line + && ( $i > $ibeg ) + + # and it follows a blank + && $types_to_go[ $i - 1 ] eq 'b' + + # and previous token IS one of these: + && ( $vert_last_nonblank_type =~ /^[\,\;]$/ ) + + # and it's NOT one of these + && ( $type !~ /^[b\#\)\]\}]$/ ) + + # then go ahead and align + ) + + { + $alignment_type = $vert_last_nonblank_type; + } + + #-------------------------------------------------------- + # then store the value + #-------------------------------------------------------- + $matching_token_to_go[$i] = $alignment_type; + if ( $type ne 'b' ) { + $vert_last_nonblank_type = $type; + $vert_last_nonblank_token = $token; + $vert_last_nonblank_block_type = $block_type; + } + } + } + return; + } +} + +sub terminal_type { + + # returns type of last token on this line (terminal token), as follows: + # returns # for a full-line comment + # returns ' ' for a blank line + # otherwise returns final token type + + my ( $rtype, $rblock_type, $ibeg, $iend ) = @_; + + # check for full-line comment.. + if ( $rtype->[$ibeg] eq '#' ) { + return wantarray ? ( $rtype->[$ibeg], $ibeg ) : $rtype->[$ibeg]; + } + else { + + # start at end and walk backwards.. + for ( my $i = $iend ; $i >= $ibeg ; $i-- ) { + + # skip past any side comment and blanks + next if ( $rtype->[$i] eq 'b' ); + next if ( $rtype->[$i] eq '#' ); + + # found it..make sure it is a BLOCK termination, + # but hide a terminal } after sort/grep/map because it is not + # necessarily the end of the line. (terminal.t) + my $terminal_type = $rtype->[$i]; + if ( + $terminal_type eq '}' + && ( !$rblock_type->[$i] + || ( $is_sort_map_grep_eval_do{ $rblock_type->[$i] } ) ) + ) + { + $terminal_type = 'b'; + } + return wantarray ? ( $terminal_type, $i ) : $terminal_type; + } + + # empty line + return wantarray ? ( ' ', $ibeg ) : ' '; + } +} + +{ # set_bond_strengths + + my %is_good_keyword_breakpoint; + my %is_lt_gt_le_ge; + + my %binary_bond_strength; + my %nobreak_lhs; + my %nobreak_rhs; + + my @bias_tokens; + my $delta_bias; + + sub bias_table_key { + my ( $type, $token ) = @_; + my $bias_table_key = $type; + if ( $type eq 'k' ) { + $bias_table_key = $token; + if ( $token eq 'err' ) { $bias_table_key = 'or' } + } + return $bias_table_key; + } + + sub initialize_bond_strength_hashes { + + my @q; + @q = qw(if unless while until for foreach); + @is_good_keyword_breakpoint{@q} = (1) x scalar(@q); + + @q = qw(lt gt le ge); + @is_lt_gt_le_ge{@q} = (1) x scalar(@q); + # + # The decision about where to break a line depends upon a "bond + # strength" between tokens. The LOWER the bond strength, the MORE + # likely a break. A bond strength may be any value but to simplify + # things there are several pre-defined strength levels: + + # NO_BREAK => 10000; + # VERY_STRONG => 100; + # STRONG => 2.1; + # NOMINAL => 1.1; + # WEAK => 0.8; + # VERY_WEAK => 0.55; + + # The strength values are based on trial-and-error, and need to be + # tweaked occasionally to get desired results. Some comments: + # + # 1. Only relative strengths are important. small differences + # in strengths can make big formatting differences. + # 2. Each indentation level adds one unit of bond strength. + # 3. A value of NO_BREAK makes an unbreakable bond + # 4. A value of VERY_WEAK is the strength of a ',' + # 5. Values below NOMINAL are considered ok break points. + # 6. Values above NOMINAL are considered poor break points. + # + # The bond strengths should roughly follow precedence order where + # possible. If you make changes, please check the results very + # carefully on a variety of scripts. Testing with the -extrude + # options is particularly helpful in exercising all of the rules. + + # Wherever possible, bond strengths are defined in the following + # tables. There are two main stages to setting bond strengths and + # two types of tables: + # + # The first stage involves looking at each token individually and + # defining left and right bond strengths, according to if we want + # to break to the left or right side, and how good a break point it + # is. For example tokens like =, ||, && make good break points and + # will have low strengths, but one might want to break on either + # side to put them at the end of one line or beginning of the next. + # + # The second stage involves looking at certain pairs of tokens and + # defining a bond strength for that particular pair. This second + # stage has priority. + + #--------------------------------------------------------------- + # Bond Strength BEGIN Section 1. + # Set left and right bond strengths of individual tokens. + #--------------------------------------------------------------- + + # NOTE: NO_BREAK's set in this section first are HINTS which will + # probably not be honored. Essential NO_BREAKS's should be set in + # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end + # of this subroutine. + + # Note that we are setting defaults in this section. The user + # cannot change bond strengths but can cause the left and right + # bond strengths of any token type to be swapped through the use of + # the -wba and -wbb flags. In this way the user can determine if a + # breakpoint token should appear at the end of one line or the + # beginning of the next line. + + # The hash keys in this section are token types, plus the text of + # certain keywords like 'or', 'and'. + + # no break around possible filehandle + $left_bond_strength{'Z'} = NO_BREAK; + $right_bond_strength{'Z'} = NO_BREAK; + + # never put a bare word on a new line: + # example print (STDERR, "bla"); will fail with break after ( + $left_bond_strength{'w'} = NO_BREAK; + + # blanks always have infinite strength to force breaks after + # real tokens + $right_bond_strength{'b'} = NO_BREAK; + + # try not to break on exponentation + @q = qw# ** .. ... <=> #; + @left_bond_strength{@q} = (STRONG) x scalar(@q); + @right_bond_strength{@q} = (STRONG) x scalar(@q); + + # The comma-arrow has very low precedence but not a good break point + $left_bond_strength{'=>'} = NO_BREAK; + $right_bond_strength{'=>'} = NOMINAL; + + # ok to break after label + $left_bond_strength{'J'} = NO_BREAK; + $right_bond_strength{'J'} = NOMINAL; + $left_bond_strength{'j'} = STRONG; + $right_bond_strength{'j'} = STRONG; + $left_bond_strength{'A'} = STRONG; + $right_bond_strength{'A'} = STRONG; + + $left_bond_strength{'->'} = STRONG; + $right_bond_strength{'->'} = VERY_STRONG; + + $left_bond_strength{'CORE::'} = NOMINAL; + $right_bond_strength{'CORE::'} = NO_BREAK; + + # breaking AFTER modulus operator is ok: + @q = qw< % >; + @left_bond_strength{@q} = (STRONG) x scalar(@q); + @right_bond_strength{@q} = + ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@q); + + # Break AFTER math operators * and / + @q = qw< * / x >; + @left_bond_strength{@q} = (STRONG) x scalar(@q); + @right_bond_strength{@q} = (NOMINAL) x scalar(@q); + + # Break AFTER weakest math operators + and - + # Make them weaker than * but a bit stronger than '.' + @q = qw< + - >; + @left_bond_strength{@q} = (STRONG) x scalar(@q); + @right_bond_strength{@q} = + ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@q); + + # breaking BEFORE these is just ok: + @q = qw# >> << #; + @right_bond_strength{@q} = (STRONG) x scalar(@q); + @left_bond_strength{@q} = (NOMINAL) x scalar(@q); + + # breaking before the string concatenation operator seems best + # because it can be hard to see at the end of a line + $right_bond_strength{'.'} = STRONG; + $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK; + + @q = qw< } ] ) R >; + @left_bond_strength{@q} = (STRONG) x scalar(@q); + @right_bond_strength{@q} = (NOMINAL) x scalar(@q); + + # make these a little weaker than nominal so that they get + # favored for end-of-line characters + @q = qw< != == =~ !~ ~~ !~~ >; + @left_bond_strength{@q} = (STRONG) x scalar(@q); + @right_bond_strength{@q} = + ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@q); + + # break AFTER these + @q = qw# < > | & >= <= #; + @left_bond_strength{@q} = (VERY_STRONG) x scalar(@q); + @right_bond_strength{@q} = + ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@q); + + # breaking either before or after a quote is ok + # but bias for breaking before a quote + $left_bond_strength{'Q'} = NOMINAL; + $right_bond_strength{'Q'} = NOMINAL + 0.02; + $left_bond_strength{'q'} = NOMINAL; + $right_bond_strength{'q'} = NOMINAL; + + # starting a line with a keyword is usually ok + $left_bond_strength{'k'} = NOMINAL; + + # we usually want to bond a keyword strongly to what immediately + # follows, rather than leaving it stranded at the end of a line + $right_bond_strength{'k'} = STRONG; + + $left_bond_strength{'G'} = NOMINAL; + $right_bond_strength{'G'} = STRONG; + + # assignment operators + @q = qw( + = **= += *= &= <<= &&= + -= /= |= >>= ||= //= + .= %= ^= + x= + ); + + # Default is to break AFTER various assignment operators + @left_bond_strength{@q} = (STRONG) x scalar(@q); + @right_bond_strength{@q} = + ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@q); + + # Default is to break BEFORE '&&' and '||' and '//' + # set strength of '||' to same as '=' so that chains like + # $a = $b || $c || $d will break before the first '||' + $right_bond_strength{'||'} = NOMINAL; + $left_bond_strength{'||'} = $right_bond_strength{'='}; + + # same thing for '//' + $right_bond_strength{'//'} = NOMINAL; + $left_bond_strength{'//'} = $right_bond_strength{'='}; + + # set strength of && a little higher than || + $right_bond_strength{'&&'} = NOMINAL; + $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1; + + $left_bond_strength{';'} = VERY_STRONG; + $right_bond_strength{';'} = VERY_WEAK; + $left_bond_strength{'f'} = VERY_STRONG; + + # make right strength of for ';' a little less than '=' + # to make for contents break after the ';' to avoid this: + # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j += + # $number_of_fields ) + # and make it weaker than ',' and 'and' too + $right_bond_strength{'f'} = VERY_WEAK - 0.03; + + # The strengths of ?/: should be somewhere between + # an '=' and a quote (NOMINAL), + # make strength of ':' slightly less than '?' to help + # break long chains of ? : after the colons + $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL; + $right_bond_strength{':'} = NO_BREAK; + $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01; + $right_bond_strength{'?'} = NO_BREAK; + + $left_bond_strength{','} = VERY_STRONG; + $right_bond_strength{','} = VERY_WEAK; + + # remaining digraphs and trigraphs not defined above + @q = qw( :: <> ++ --); + @left_bond_strength{@q} = (WEAK) x scalar(@q); + @right_bond_strength{@q} = (STRONG) x scalar(@q); + + # Set bond strengths of certain keywords + # make 'or', 'err', 'and' slightly weaker than a ',' + $left_bond_strength{'and'} = VERY_WEAK - 0.01; + $left_bond_strength{'or'} = VERY_WEAK - 0.02; + $left_bond_strength{'err'} = VERY_WEAK - 0.02; + $left_bond_strength{'xor'} = NOMINAL; + $right_bond_strength{'and'} = NOMINAL; + $right_bond_strength{'or'} = NOMINAL; + $right_bond_strength{'err'} = NOMINAL; + $right_bond_strength{'xor'} = STRONG; + + #--------------------------------------------------------------- + # Bond Strength BEGIN Section 2. + # Set binary rules for bond strengths between certain token types. + #--------------------------------------------------------------- + + # We have a little problem making tables which apply to the + # container tokens. Here is a list of container tokens and + # their types: + # + # type tokens // meaning + # { {, [, ( // indent + # } }, ], ) // outdent + # [ [ // left non-structural [ (enclosing an array index) + # ] ] // right non-structural square bracket + # ( ( // left non-structural paren + # ) ) // right non-structural paren + # L { // left non-structural curly brace (enclosing a key) + # R } // right non-structural curly brace + # + # Some rules apply to token types and some to just the token + # itself. We solve the problem by combining type and token into a + # new hash key for the container types. + # + # If a rule applies to a token 'type' then we need to make rules + # for each of these 'type.token' combinations: + # Type Type.Token + # { {{, {[, {( + # [ [[ + # ( (( + # L L{ + # } }}, }], }) + # ] ]] + # ) )) + # R R} + # + # If a rule applies to a token then we need to make rules for + # these 'type.token' combinations: + # Token Type.Token + # { {{, L{ + # [ {[, [[ + # ( {(, (( + # } }}, R} + # ] }], ]] + # ) }), )) + + # allow long lines before final { in an if statement, as in: + # if (.......... + # ..........) + # { + # + # Otherwise, the line before the { tends to be too short. + + $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03; + $binary_bond_strength{'(('}{'{{'} = NOMINAL; + + # break on something like '} (', but keep this stronger than a ',' + # example is in 'howe.pl' + $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK; + $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK; + + # keep matrix and hash indices together + # but make them a little below STRONG to allow breaking open + # something like {'some-word'}{'some-very-long-word'} at the }{ + # (bracebrk.t) + $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL; + $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL; + $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL; + $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL; + + # increase strength to the point where a break in the following + # will be after the opening paren rather than at the arrow: + # $a->$b($c); + $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG; + + $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; + $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; + $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; + $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; + $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; + $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; + + $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL; + $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL; + $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL; + $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL; + + #--------------------------------------------------------------- + # Binary NO_BREAK rules + #--------------------------------------------------------------- + + # use strict requires that bare word and => not be separated + $binary_bond_strength{'C'}{'=>'} = NO_BREAK; + $binary_bond_strength{'U'}{'=>'} = NO_BREAK; + + # Never break between a bareword and a following paren because + # perl may give an error. For example, if a break is placed + # between 'to_filehandle' and its '(' the following line will + # give a syntax error [Carp.pm]: my( $no) =fileno( + # to_filehandle( $in)) ; + $binary_bond_strength{'C'}{'(('} = NO_BREAK; + $binary_bond_strength{'C'}{'{('} = NO_BREAK; + $binary_bond_strength{'U'}{'(('} = NO_BREAK; + $binary_bond_strength{'U'}{'{('} = NO_BREAK; + + # use strict requires that bare word within braces not start new + # line + $binary_bond_strength{'L{'}{'w'} = NO_BREAK; + + $binary_bond_strength{'w'}{'R}'} = NO_BREAK; + + # use strict requires that bare word and => not be separated + $binary_bond_strength{'w'}{'=>'} = NO_BREAK; + + # use strict does not allow separating type info from trailing { } + # testfile is readmail.pl + $binary_bond_strength{'t'}{'L{'} = NO_BREAK; + $binary_bond_strength{'i'}{'L{'} = NO_BREAK; + + # As a defensive measure, do not break between a '(' and a + # filehandle. In some cases, this can cause an error. For + # example, the following program works: + # my $msg="hi!\n"; + # print + # ( STDOUT + # $msg + # ); + # + # But this program fails: + # my $msg="hi!\n"; + # print + # ( + # STDOUT + # $msg + # ); + # + # This is normally only a problem with the 'extrude' option + $binary_bond_strength{'(('}{'Y'} = NO_BREAK; + $binary_bond_strength{'{('}{'Y'} = NO_BREAK; + + # never break between sub name and opening paren + $binary_bond_strength{'w'}{'(('} = NO_BREAK; + $binary_bond_strength{'w'}{'{('} = NO_BREAK; + + # keep '}' together with ';' + $binary_bond_strength{'}}'}{';'} = NO_BREAK; + + # Breaking before a ++ can cause perl to guess wrong. For + # example the following line will cause a syntax error + # with -extrude if we break between '$i' and '++' [fixstyle2] + # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) ); + $nobreak_lhs{'++'} = NO_BREAK; + + # Do not break before a possible file handle + $nobreak_lhs{'Z'} = NO_BREAK; + + # use strict hates bare words on any new line. For + # example, a break before the underscore here provokes the + # wrath of use strict: + # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) { + $nobreak_rhs{'F'} = NO_BREAK; + $nobreak_rhs{'CORE::'} = NO_BREAK; + + #--------------------------------------------------------------- + # Bond Strength BEGIN Section 3. + # Define tables and values for applying a small bias to the above + # values. + #--------------------------------------------------------------- + # Adding a small 'bias' to strengths is a simple way to make a line + # break at the first of a sequence of identical terms. For + # example, to force long string of conditional operators to break + # with each line ending in a ':', we can add a small number to the + # bond strength of each ':' (colon.t) + @bias_tokens = qw( : && || f and or . ); # tokens which get bias + $delta_bias = 0.0001; # a very small strength level + return; + + } ## end sub initialize_bond_strength_hashes + + sub set_bond_strengths { + + # patch-its always ok to break at end of line + $nobreak_to_go[$max_index_to_go] = 0; + + # we start a new set of bias values for each line + my %bias; + @bias{@bias_tokens} = (0) x scalar(@bias_tokens); + my $code_bias = -.01; # bias for closing block braces + + my $type = 'b'; + my $token = ' '; + my $last_type; + my $last_nonblank_type = $type; + my $last_nonblank_token = $token; + my $list_str = $left_bond_strength{'?'}; + + my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token, + $next_nonblank_type, $next_token, $next_type, $total_nesting_depth, + ); + + # main loop to compute bond strengths between each pair of tokens + foreach my $i ( 0 .. $max_index_to_go ) { + $last_type = $type; + if ( $type ne 'b' ) { + $last_nonblank_type = $type; + $last_nonblank_token = $token; + } + $type = $types_to_go[$i]; + + # strength on both sides of a blank is the same + if ( $type eq 'b' && $last_type ne 'b' ) { + $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ]; + next; + } + + $token = $tokens_to_go[$i]; + $block_type = $block_type_to_go[$i]; + $i_next = $i + 1; + $next_type = $types_to_go[$i_next]; + $next_token = $tokens_to_go[$i_next]; + $total_nesting_depth = $nesting_depth_to_go[$i_next]; + $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); + $next_nonblank_type = $types_to_go[$i_next_nonblank]; + $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; + + # We are computing the strength of the bond between the current + # token and the NEXT token. + + #--------------------------------------------------------------- + # Bond Strength Section 1: + # First Approximation. + # Use minimum of individual left and right tabulated bond + # strengths. + #--------------------------------------------------------------- + my $bsr = $right_bond_strength{$type}; + my $bsl = $left_bond_strength{$next_nonblank_type}; + + # define right bond strengths of certain keywords + if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) { + $bsr = $right_bond_strength{$token}; + } + elsif ( $token eq 'ne' or $token eq 'eq' ) { + $bsr = NOMINAL; + } + + # set terminal bond strength to the nominal value + # this will cause good preceding breaks to be retained + if ( $i_next_nonblank > $max_index_to_go ) { + $bsl = NOMINAL; + } + + # define right bond strengths of certain keywords + if ( $next_nonblank_type eq 'k' + && defined( $left_bond_strength{$next_nonblank_token} ) ) + { + $bsl = $left_bond_strength{$next_nonblank_token}; + } + elsif ($next_nonblank_token eq 'ne' + or $next_nonblank_token eq 'eq' ) + { + $bsl = NOMINAL; + } + elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) { + $bsl = 0.9 * NOMINAL + 0.1 * STRONG; + } + + # Use the minimum of the left and right strengths. Note: it might + # seem that we would want to keep a NO_BREAK if either token has + # this value. This didn't work, for example because in an arrow + # list, it prevents the comma from separating from the following + # bare word (which is probably quoted by its arrow). So necessary + # NO_BREAK's have to be handled as special cases in the final + # section. + if ( !defined($bsr) ) { $bsr = VERY_STRONG } + if ( !defined($bsl) ) { $bsl = VERY_STRONG } + my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl; + my $bond_str_1 = $bond_str; + + #--------------------------------------------------------------- + # Bond Strength Section 2: + # Apply hardwired rules.. + #--------------------------------------------------------------- + + # Patch to put terminal or clauses on a new line: Weaken the bond + # at an || followed by die or similar keyword to make the terminal + # or clause fall on a new line, like this: + # + # my $class = shift + # || die "Cannot add broadcast: No class identifier found"; + # + # Otherwise the break will be at the previous '=' since the || and + # = have the same starting strength and the or is biased, like + # this: + # + # my $class = + # shift || die "Cannot add broadcast: No class identifier found"; + # + # In any case if the user places a break at either the = or the || + # it should remain there. + if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) { + if ( $next_nonblank_token =~ /^(die|confess|croak|warn)$/ ) { + if ( $want_break_before{$token} && $i > 0 ) { + $bond_strength_to_go[ $i - 1 ] -= $delta_bias; + } + else { + $bond_str -= $delta_bias; + } + } + } + + # good to break after end of code blocks + if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) { + + $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias; + $code_bias += $delta_bias; + } + + if ( $type eq 'k' ) { + + # allow certain control keywords to stand out + if ( $next_nonblank_type eq 'k' + && $is_last_next_redo_return{$token} ) + { + $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK; + } + + # Don't break after keyword my. This is a quick fix for a + # rare problem with perl. An example is this line from file + # Container.pm: + + # foreach my $question( Debian::DebConf::ConfigDb::gettree( + # $this->{'question'} ) ) + + if ( $token eq 'my' ) { + $bond_str = NO_BREAK; + } + + } + + # good to break before 'if', 'unless', etc + if ( $is_if_brace_follower{$next_nonblank_token} ) { + $bond_str = VERY_WEAK; + } + + if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) { + + # FIXME: needs more testing + if ( $is_keyword_returning_list{$next_nonblank_token} ) { + $bond_str = $list_str if ( $bond_str > $list_str ); + } + + # keywords like 'unless', 'if', etc, within statements + # make good breaks + if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) { + $bond_str = VERY_WEAK / 1.05; + } + } + + # try not to break before a comma-arrow + elsif ( $next_nonblank_type eq '=>' ) { + if ( $bond_str < STRONG ) { $bond_str = STRONG } + } + + #--------------------------------------------------------------- + # Additional hardwired NOBREAK rules + #--------------------------------------------------------------- + + # map1.t -- correct for a quirk in perl + if ( $token eq '(' + && $next_nonblank_type eq 'i' + && $last_nonblank_type eq 'k' + && $is_sort_map_grep{$last_nonblank_token} ) + + # /^(sort|map|grep)$/ ) + { + $bond_str = NO_BREAK; + } + + # extrude.t: do not break before paren at: + # -l pid_filename( + if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) { + $bond_str = NO_BREAK; + } + + # in older version of perl, use strict can cause problems with + # breaks before bare words following opening parens. For example, + # this will fail under older versions if a break is made between + # '(' and 'MAIL': use strict; open( MAIL, "a long filename or + # command"); close MAIL; + if ( $type eq '{' ) { + + if ( $token eq '(' && $next_nonblank_type eq 'w' ) { + + # but it's fine to break if the word is followed by a '=>' + # or if it is obviously a sub call + my $i_next_next_nonblank = $i_next_nonblank + 1; + my $next_next_type = $types_to_go[$i_next_next_nonblank]; + if ( $next_next_type eq 'b' + && $i_next_nonblank < $max_index_to_go ) + { + $i_next_next_nonblank++; + $next_next_type = $types_to_go[$i_next_next_nonblank]; + } + + # We'll check for an old breakpoint and keep a leading + # bareword if it was that way in the input file. + # Presumably it was ok that way. For example, the + # following would remain unchanged: + # + # @months = ( + # January, February, March, April, + # May, June, July, August, + # September, October, November, December, + # ); + # + # This should be sufficient: + if ( + !$old_breakpoint_to_go[$i] + && ( $next_next_type eq ',' + || $next_next_type eq '}' ) + ) + { + $bond_str = NO_BREAK; + } + } + } + + # Do not break between a possible filehandle and a ? or / and do + # not introduce a break after it if there is no blank + # (extrude.t) + elsif ( $type eq 'Z' ) { + + # don't break.. + if ( + + # if there is no blank and we do not want one. Examples: + # print $x++ # do not break after $x + # print HTML"HELLO" # break ok after HTML + ( + $next_type ne 'b' + && defined( $want_left_space{$next_type} ) + && $want_left_space{$next_type} == WS_NO + ) + + # or we might be followed by the start of a quote + || $next_nonblank_type =~ /^[\/\?]$/ + ) + { + $bond_str = NO_BREAK; + } + } + + # Breaking before a ? before a quote can cause trouble if + # they are not separated by a blank. + # Example: a syntax error occurs if you break before the ? here + # my$logic=join$all?' && ':' || ',@regexps; + # From: Professional_Perl_Programming_Code/multifind.pl + if ( $next_nonblank_type eq '?' ) { + $bond_str = NO_BREAK + if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' ); + } + + # Breaking before a . followed by a number + # can cause trouble if there is no intervening space + # Example: a syntax error occurs if you break before the .2 here + # $str .= pack($endian.2, ensurrogate($ord)); + # From: perl58/Unicode.pm + elsif ( $next_nonblank_type eq '.' ) { + $bond_str = NO_BREAK + if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' ); + } + + my $bond_str_2 = $bond_str; + + #--------------------------------------------------------------- + # End of hardwired rules + #--------------------------------------------------------------- + + #--------------------------------------------------------------- + # Bond Strength Section 3: + # Apply table rules. These have priority over the above + # hardwired rules. + #--------------------------------------------------------------- + + my $tabulated_bond_str; + my $ltype = $type; + my $rtype = $next_nonblank_type; + if ( $token =~ /^[\(\[\{\)\]\}]/ ) { $ltype = $type . $token } + if ( $next_nonblank_token =~ /^[\(\[\{\)\]\}]/ ) { + $rtype = $next_nonblank_type . $next_nonblank_token; + } + + if ( $binary_bond_strength{$ltype}{$rtype} ) { + $bond_str = $binary_bond_strength{$ltype}{$rtype}; + $tabulated_bond_str = $bond_str; + } + + if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) { + $bond_str = NO_BREAK; + $tabulated_bond_str = $bond_str; + } + my $bond_str_3 = $bond_str; + + # If the hardwired rules conflict with the tabulated bond + # strength then there is an inconsistency that should be fixed + FORMATTER_DEBUG_FLAG_BOND_TABLES + && $tabulated_bond_str + && $bond_str_1 + && $bond_str_1 != $bond_str_2 + && $bond_str_2 != $tabulated_bond_str + && do { + print STDERR +"BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n"; + }; + + #----------------------------------------------------------------- + # Bond Strength Section 4: + # Modify strengths of certain tokens which often occur in sequence + # by adding a small bias to each one in turn so that the breaks + # occur from left to right. + # + # Note that we only changing strengths by small amounts here, + # and usually increasing, so we should not be altering any NO_BREAKs. + # Other routines which check for NO_BREAKs will use a tolerance + # of one to avoid any problem. + #----------------------------------------------------------------- + + # The bias tables use special keys + my $left_key = bias_table_key( $type, $token ); + my $right_key = + bias_table_key( $next_nonblank_type, $next_nonblank_token ); + + # add any bias set by sub scan_list at old comma break points. + if ( $type eq ',' ) { $bond_str += $bond_strength_to_go[$i] } + + # bias left token + elsif ( defined( $bias{$left_key} ) ) { + if ( !$want_break_before{$left_key} ) { + $bias{$left_key} += $delta_bias; + $bond_str += $bias{$left_key}; + } + } + + # bias right token + if ( defined( $bias{$right_key} ) ) { + if ( $want_break_before{$right_key} ) { + + # for leading '.' align all but 'short' quotes; the idea + # is to not place something like "\n" on a single line. + if ( $right_key eq '.' ) { + unless ( + $last_nonblank_type eq '.' + && ( + length($token) <= + $rOpts_short_concatenation_item_length ) + && ( !$is_closing_token{$token} ) + ) + { + $bias{$right_key} += $delta_bias; + } + } + else { + $bias{$right_key} += $delta_bias; + } + $bond_str += $bias{$right_key}; + } + } + my $bond_str_4 = $bond_str; + + #--------------------------------------------------------------- + # Bond Strength Section 5: + # Fifth Approximation. + # Take nesting depth into account by adding the nesting depth + # to the bond strength. + #--------------------------------------------------------------- + my $strength; + + if ( defined($bond_str) && !$nobreak_to_go[$i] ) { + if ( $total_nesting_depth > 0 ) { + $strength = $bond_str + $total_nesting_depth; + } + else { + $strength = $bond_str; + } + } + else { + $strength = NO_BREAK; + } + + #--------------------------------------------------------------- + # Bond Strength Section 6: + # Sixth Approximation. Welds. + #--------------------------------------------------------------- + + # Do not allow a break within welds, + if ( weld_len_right_to_go($i) ) { $strength = NO_BREAK } + + # But encourage breaking after opening welded tokens + elsif ( weld_len_left_to_go($i) && $is_opening_token{$token} ) { + $strength -= 1; + } + + # always break after side comment + if ( $type eq '#' ) { $strength = 0 } + + $bond_strength_to_go[$i] = $strength; + + FORMATTER_DEBUG_FLAG_BOND && do { + my $str = substr( $token, 0, 15 ); + $str .= ' ' x ( 16 - length($str) ); + print STDOUT +"BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n"; + }; + } ## end main loop + return; + } ## end sub set_bond_strengths +} + +sub pad_array_to_go { + + # to simplify coding in scan_list and set_bond_strengths, it helps + # to create some extra blank tokens at the end of the arrays + $tokens_to_go[ $max_index_to_go + 1 ] = ''; + $tokens_to_go[ $max_index_to_go + 2 ] = ''; + $types_to_go[ $max_index_to_go + 1 ] = 'b'; + $types_to_go[ $max_index_to_go + 2 ] = 'b'; + $nesting_depth_to_go[ $max_index_to_go + 1 ] = + $nesting_depth_to_go[$max_index_to_go]; + + # /^[R\}\)\]]$/ + if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) { + if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) { + + # shouldn't happen: + unless ( get_saw_brace_error() ) { + warning( +"Program bug in scan_list: hit nesting error which should have been caught\n" + ); + report_definite_bug(); + } + } + else { + $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1; + } + } + + # /^[L\{\(\[]$/ + elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) { + $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1; + } + return; +} + +{ # begin scan_list + + my ( + $block_type, $current_depth, + $depth, $i, + $i_last_nonblank_token, $last_colon_sequence_number, + $last_nonblank_token, $last_nonblank_type, + $last_nonblank_block_type, $last_old_breakpoint_count, + $minimum_depth, $next_nonblank_block_type, + $next_nonblank_token, $next_nonblank_type, + $old_breakpoint_count, $starting_breakpoint_count, + $starting_depth, $token, + $type, $type_sequence, + ); + + my ( + @breakpoint_stack, @breakpoint_undo_stack, + @comma_index, @container_type, + @identifier_count_stack, @index_before_arrow, + @interrupted_list, @item_count_stack, + @last_comma_index, @last_dot_index, + @last_nonblank_type, @old_breakpoint_count_stack, + @opening_structure_index_stack, @rfor_semicolon_list, + @has_old_logical_breakpoints, @rand_or_list, + @i_equals, + ); + + # routine to define essential variables when we go 'up' to + # a new depth + sub check_for_new_minimum_depth { + my $depth = shift; + if ( $depth < $minimum_depth ) { + + $minimum_depth = $depth; + + # these arrays need not retain values between calls + $breakpoint_stack[$depth] = $starting_breakpoint_count; + $container_type[$depth] = ""; + $identifier_count_stack[$depth] = 0; + $index_before_arrow[$depth] = -1; + $interrupted_list[$depth] = 1; + $item_count_stack[$depth] = 0; + $last_nonblank_type[$depth] = ""; + $opening_structure_index_stack[$depth] = -1; + + $breakpoint_undo_stack[$depth] = undef; + $comma_index[$depth] = undef; + $last_comma_index[$depth] = undef; + $last_dot_index[$depth] = undef; + $old_breakpoint_count_stack[$depth] = undef; + $has_old_logical_breakpoints[$depth] = 0; + $rand_or_list[$depth] = []; + $rfor_semicolon_list[$depth] = []; + $i_equals[$depth] = -1; + + # these arrays must retain values between calls + if ( !defined( $has_broken_sublist[$depth] ) ) { + $dont_align[$depth] = 0; + $has_broken_sublist[$depth] = 0; + $want_comma_break[$depth] = 0; + } + } + return; + } + + # routine to decide which commas to break at within a container; + # returns: + # $bp_count = number of comma breakpoints set + # $do_not_break_apart = a flag indicating if container need not + # be broken open + sub set_comma_breakpoints { + + my $dd = shift; + my $bp_count = 0; + my $do_not_break_apart = 0; + + # anything to do? + if ( $item_count_stack[$dd] ) { + + # handle commas not in containers... + if ( $dont_align[$dd] ) { + do_uncontained_comma_breaks($dd); + } + + # handle commas within containers... + else { + my $fbc = $forced_breakpoint_count; + + # always open comma lists not preceded by keywords, + # barewords, identifiers (that is, anything that doesn't + # look like a function call) + my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/; + + set_comma_breakpoints_do( + $dd, + $opening_structure_index_stack[$dd], + $i, + $item_count_stack[$dd], + $identifier_count_stack[$dd], + $comma_index[$dd], + $next_nonblank_type, + $container_type[$dd], + $interrupted_list[$dd], + \$do_not_break_apart, + $must_break_open, + ); + $bp_count = $forced_breakpoint_count - $fbc; + $do_not_break_apart = 0 if $must_break_open; + } + } + return ( $bp_count, $do_not_break_apart ); + } + + sub do_uncontained_comma_breaks { + + # Handle commas not in containers... + # This is a catch-all routine for commas that we + # don't know what to do with because the don't fall + # within containers. We will bias the bond strength + # to break at commas which ended lines in the input + # file. This usually works better than just trying + # to put as many items on a line as possible. A + # downside is that if the input file is garbage it + # won't work very well. However, the user can always + # prevent following the old breakpoints with the + # -iob flag. + my $dd = shift; + my $bias = -.01; + my $old_comma_break_count = 0; + foreach my $ii ( @{ $comma_index[$dd] } ) { + if ( $old_breakpoint_to_go[$ii] ) { + $old_comma_break_count++; + $bond_strength_to_go[$ii] = $bias; + + # reduce bias magnitude to force breaks in order + $bias *= 0.99; + } + } + + # Also put a break before the first comma if + # (1) there was a break there in the input, and + # (2) there was exactly one old break before the first comma break + # (3) OLD: there are multiple old comma breaks + # (3) NEW: there are one or more old comma breaks (see return example) + # + # For example, we will follow the user and break after + # 'print' in this snippet: + # print + # "conformability (Not the same dimension)\n", + # "\t", $have, " is ", text_unit($hu), "\n", + # "\t", $want, " is ", text_unit($wu), "\n", + # ; + # + # Another example, just one comma, where we will break after + # the return: + # return + # $x * cos($a) - $y * sin($a), + # $x * sin($a) + $y * cos($a); + + # Breaking a print statement: + # print SAVEOUT + # ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "", + # ( $? & 128 ) ? " -- core dumped" : "", "\n"; + # + # But we will not force a break after the opening paren here + # (causes a blinker): + # $heap->{stream}->set_output_filter( + # poe::filter::reference->new('myotherfreezer') ), + # ; + # + my $i_first_comma = $comma_index[$dd]->[0]; + if ( $old_breakpoint_to_go[$i_first_comma] ) { + my $level_comma = $levels_to_go[$i_first_comma]; + my $ibreak = -1; + my $obp_count = 0; + for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) { + if ( $old_breakpoint_to_go[$ii] ) { + $obp_count++; + last if ( $obp_count > 1 ); + $ibreak = $ii + if ( $levels_to_go[$ii] == $level_comma ); + } + } + + # Changed rule from multiple old commas to just one here: + if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 ) + { + # Do not to break before an opening token because + # it can lead to "blinkers". + my $ibreakm = $ibreak; + $ibreakm-- if ( $types_to_go[$ibreakm] eq 'b' ); + if ( $ibreakm >= 0 && $types_to_go[$ibreakm] !~ /^[\(\{\[L]$/ ) + { + set_forced_breakpoint($ibreak); + } + } + } + return; + } + + my %is_logical_container; + + BEGIN { + my @q = qw# if elsif unless while and or err not && | || ? : ! #; + @is_logical_container{@q} = (1) x scalar(@q); + } + + sub set_for_semicolon_breakpoints { + my $dd = shift; + foreach ( @{ $rfor_semicolon_list[$dd] } ) { + set_forced_breakpoint($_); + } + return; + } + + sub set_logical_breakpoints { + my $dd = shift; + if ( + $item_count_stack[$dd] == 0 + && $is_logical_container{ $container_type[$dd] } + + || $has_old_logical_breakpoints[$dd] + ) + { + + # Look for breaks in this order: + # 0 1 2 3 + # or and || && + foreach my $i ( 0 .. 3 ) { + if ( $rand_or_list[$dd][$i] ) { + foreach ( @{ $rand_or_list[$dd][$i] } ) { + set_forced_breakpoint($_); + } + + # break at any 'if' and 'unless' too + foreach ( @{ $rand_or_list[$dd][4] } ) { + set_forced_breakpoint($_); + } + $rand_or_list[$dd] = []; + last; + } + } + } + return; + } + + sub is_unbreakable_container { + + # never break a container of one of these types + # because bad things can happen (map1.t) + my $dd = shift; + return $is_sort_map_grep{ $container_type[$dd] }; + } + + sub scan_list { + + # This routine is responsible for setting line breaks for all lists, + # so that hierarchical structure can be displayed and so that list + # items can be vertically aligned. The output of this routine is + # stored in the array @forced_breakpoint_to_go, which is used to set + # final breakpoints. + + $starting_depth = $nesting_depth_to_go[0]; + + $block_type = ' '; + $current_depth = $starting_depth; + $i = -1; + $last_colon_sequence_number = -1; + $last_nonblank_token = ';'; + $last_nonblank_type = ';'; + $last_nonblank_block_type = ' '; + $last_old_breakpoint_count = 0; + $minimum_depth = $current_depth + 1; # forces update in check below + $old_breakpoint_count = 0; + $starting_breakpoint_count = $forced_breakpoint_count; + $token = ';'; + $type = ';'; + $type_sequence = ''; + + my $total_depth_variation = 0; + my $i_old_assignment_break; + my $depth_last = $starting_depth; + + check_for_new_minimum_depth($current_depth); + + my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0; + my $want_previous_breakpoint = -1; + + my $saw_good_breakpoint; + my $i_line_end = -1; + my $i_line_start = -1; + + # loop over all tokens in this batch + while ( ++$i <= $max_index_to_go ) { + if ( $type ne 'b' ) { + $i_last_nonblank_token = $i - 1; + $last_nonblank_type = $type; + $last_nonblank_token = $token; + $last_nonblank_block_type = $block_type; + } ## end if ( $type ne 'b' ) + $type = $types_to_go[$i]; + $block_type = $block_type_to_go[$i]; + $token = $tokens_to_go[$i]; + $type_sequence = $type_sequence_to_go[$i]; + my $next_type = $types_to_go[ $i + 1 ]; + my $next_token = $tokens_to_go[ $i + 1 ]; + my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); + $next_nonblank_type = $types_to_go[$i_next_nonblank]; + $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; + $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank]; + + # set break if flag was set + if ( $want_previous_breakpoint >= 0 ) { + set_forced_breakpoint($want_previous_breakpoint); + $want_previous_breakpoint = -1; + } + + $last_old_breakpoint_count = $old_breakpoint_count; + if ( $old_breakpoint_to_go[$i] ) { + $i_line_end = $i; + $i_line_start = $i_next_nonblank; + + $old_breakpoint_count++; + + # Break before certain keywords if user broke there and + # this is a 'safe' break point. The idea is to retain + # any preferred breaks for sequential list operations, + # like a schwartzian transform. + if ($rOpts_break_at_old_keyword_breakpoints) { + if ( + $next_nonblank_type eq 'k' + && $is_keyword_returning_list{$next_nonblank_token} + && ( $type =~ /^[=\)\]\}Riw]$/ + || $type eq 'k' + && $is_keyword_returning_list{$token} ) + ) + { + + # we actually have to set this break next time through + # the loop because if we are at a closing token (such + # as '}') which forms a one-line block, this break might + # get undone. + $want_previous_breakpoint = $i; + } ## end if ( $next_nonblank_type...) + } ## end if ($rOpts_break_at_old_keyword_breakpoints) + + # Break before attributes if user broke there + if ($rOpts_break_at_old_attribute_breakpoints) { + if ( $next_nonblank_type eq 'A' ) { + $want_previous_breakpoint = $i; + } + } + + # remember an = break as possible good break point + if ( $is_assignment{$type} ) { + $i_old_assignment_break = $i; + } + elsif ( $is_assignment{$next_nonblank_type} ) { + $i_old_assignment_break = $i_next_nonblank; + } + } ## end if ( $old_breakpoint_to_go...) + + next if ( $type eq 'b' ); + $depth = $nesting_depth_to_go[ $i + 1 ]; + + $total_depth_variation += abs( $depth - $depth_last ); + $depth_last = $depth; + + # safety check - be sure we always break after a comment + # Shouldn't happen .. an error here probably means that the + # nobreak flag did not get turned off correctly during + # formatting. + if ( $type eq '#' ) { + if ( $i != $max_index_to_go ) { + warning( +"Non-fatal program bug: backup logic needed to break after a comment\n" + ); + report_definite_bug(); + $nobreak_to_go[$i] = 0; + set_forced_breakpoint($i); + } ## end if ( $i != $max_index_to_go) + } ## end if ( $type eq '#' ) + + # Force breakpoints at certain tokens in long lines. + # Note that such breakpoints will be undone later if these tokens + # are fully contained within parens on a line. + if ( + + # break before a keyword within a line + $type eq 'k' + && $i > 0 + + # if one of these keywords: + && $token =~ /^(if|unless|while|until|for)$/ + + # but do not break at something like '1 while' + && ( $last_nonblank_type ne 'n' || $i > 2 ) + + # and let keywords follow a closing 'do' brace + && $last_nonblank_block_type ne 'do' + + && ( + $is_long_line + + # or container is broken (by side-comment, etc) + || ( $next_nonblank_token eq '(' + && $mate_index_to_go[$i_next_nonblank] < $i ) + ) + ) + { + set_forced_breakpoint( $i - 1 ); + } ## end if ( $type eq 'k' && $i...) + + # remember locations of '||' and '&&' for possible breaks if we + # decide this is a long logical expression. + if ( $type eq '||' ) { + push @{ $rand_or_list[$depth][2] }, $i; + ++$has_old_logical_breakpoints[$depth] + if ( ( $i == $i_line_start || $i == $i_line_end ) + && $rOpts_break_at_old_logical_breakpoints ); + } ## end if ( $type eq '||' ) + elsif ( $type eq '&&' ) { + push @{ $rand_or_list[$depth][3] }, $i; + ++$has_old_logical_breakpoints[$depth] + if ( ( $i == $i_line_start || $i == $i_line_end ) + && $rOpts_break_at_old_logical_breakpoints ); + } ## end elsif ( $type eq '&&' ) + elsif ( $type eq 'f' ) { + push @{ $rfor_semicolon_list[$depth] }, $i; + } + elsif ( $type eq 'k' ) { + if ( $token eq 'and' ) { + push @{ $rand_or_list[$depth][1] }, $i; + ++$has_old_logical_breakpoints[$depth] + if ( ( $i == $i_line_start || $i == $i_line_end ) + && $rOpts_break_at_old_logical_breakpoints ); + } ## end if ( $token eq 'and' ) + + # break immediately at 'or's which are probably not in a logical + # block -- but we will break in logical breaks below so that + # they do not add to the forced_breakpoint_count + elsif ( $token eq 'or' ) { + push @{ $rand_or_list[$depth][0] }, $i; + ++$has_old_logical_breakpoints[$depth] + if ( ( $i == $i_line_start || $i == $i_line_end ) + && $rOpts_break_at_old_logical_breakpoints ); + if ( $is_logical_container{ $container_type[$depth] } ) { + } + else { + if ($is_long_line) { set_forced_breakpoint($i) } + elsif ( ( $i == $i_line_start || $i == $i_line_end ) + && $rOpts_break_at_old_logical_breakpoints ) + { + $saw_good_breakpoint = 1; + } + } ## end else [ if ( $is_logical_container...)] + } ## end elsif ( $token eq 'or' ) + elsif ( $token eq 'if' || $token eq 'unless' ) { + push @{ $rand_or_list[$depth][4] }, $i; + if ( ( $i == $i_line_start || $i == $i_line_end ) + && $rOpts_break_at_old_logical_breakpoints ) + { + set_forced_breakpoint($i); + } + } ## end elsif ( $token eq 'if' ||...) + } ## end elsif ( $type eq 'k' ) + elsif ( $is_assignment{$type} ) { + $i_equals[$depth] = $i; + } + + if ($type_sequence) { + + # handle any postponed closing breakpoints + if ( $token =~ /^[\)\]\}\:]$/ ) { + if ( $type eq ':' ) { + $last_colon_sequence_number = $type_sequence; + + # retain break at a ':' line break + if ( ( $i == $i_line_start || $i == $i_line_end ) + && $rOpts_break_at_old_ternary_breakpoints ) + { + + set_forced_breakpoint($i); + + # break at previous '=' + if ( $i_equals[$depth] > 0 ) { + set_forced_breakpoint( $i_equals[$depth] ); + $i_equals[$depth] = -1; + } + } ## end if ( ( $i == $i_line_start...)) + } ## end if ( $type eq ':' ) + if ( defined( $postponed_breakpoint{$type_sequence} ) ) { + my $inc = ( $type eq ':' ) ? 0 : 1; + set_forced_breakpoint( $i - $inc ); + delete $postponed_breakpoint{$type_sequence}; + } + } ## end if ( $token =~ /^[\)\]\}\:]$/[{[(]) + + # set breaks at ?/: if they will get separated (and are + # not a ?/: chain), or if the '?' is at the end of the + # line + elsif ( $token eq '?' ) { + my $i_colon = $mate_index_to_go[$i]; + if ( + $i_colon <= 0 # the ':' is not in this batch + || $i == 0 # this '?' is the first token of the line + || $i == + $max_index_to_go # or this '?' is the last token + ) + { + + # don't break at a '?' if preceded by ':' on + # this line of previous ?/: pair on this line. + # This is an attempt to preserve a chain of ?/: + # expressions (elsif2.t). And don't break if + # this has a side comment. + set_forced_breakpoint($i) + unless ( + $type_sequence == ( + $last_colon_sequence_number + + TYPE_SEQUENCE_INCREMENT + ) + || $tokens_to_go[$max_index_to_go] eq '#' + ); + set_closing_breakpoint($i); + } ## end if ( $i_colon <= 0 ||...) + } ## end elsif ( $token eq '?' ) + } ## end if ($type_sequence) + +#print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n"; + + #------------------------------------------------------------ + # Handle Increasing Depth.. + # + # prepare for a new list when depth increases + # token $i is a '(','{', or '[' + #------------------------------------------------------------ + if ( $depth > $current_depth ) { + + $breakpoint_stack[$depth] = $forced_breakpoint_count; + $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count; + $has_broken_sublist[$depth] = 0; + $identifier_count_stack[$depth] = 0; + $index_before_arrow[$depth] = -1; + $interrupted_list[$depth] = 0; + $item_count_stack[$depth] = 0; + $last_comma_index[$depth] = undef; + $last_dot_index[$depth] = undef; + $last_nonblank_type[$depth] = $last_nonblank_type; + $old_breakpoint_count_stack[$depth] = $old_breakpoint_count; + $opening_structure_index_stack[$depth] = $i; + $rand_or_list[$depth] = []; + $rfor_semicolon_list[$depth] = []; + $i_equals[$depth] = -1; + $want_comma_break[$depth] = 0; + $container_type[$depth] = + ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ ) + ? $last_nonblank_token + : ""; + $has_old_logical_breakpoints[$depth] = 0; + + # if line ends here then signal closing token to break + if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' ) + { + set_closing_breakpoint($i); + } + + # Not all lists of values should be vertically aligned.. + $dont_align[$depth] = + + # code BLOCKS are handled at a higher level + ( $block_type ne "" ) + + # certain paren lists + || ( $type eq '(' ) && ( + + # it does not usually look good to align a list of + # identifiers in a parameter list, as in: + # my($var1, $var2, ...) + # (This test should probably be refined, for now I'm just + # testing for any keyword) + ( $last_nonblank_type eq 'k' ) + + # a trailing '(' usually indicates a non-list + || ( $next_nonblank_type eq '(' ) + ); + + # patch to outdent opening brace of long if/for/.. + # statements (like this one). See similar coding in + # set_continuation breaks. We have also catch it here for + # short line fragments which otherwise will not go through + # set_continuation_breaks. + if ( + $block_type + + # if we have the ')' but not its '(' in this batch.. + && ( $last_nonblank_token eq ')' ) + && $mate_index_to_go[$i_last_nonblank_token] < 0 + + # and user wants brace to left + && !$rOpts->{'opening-brace-always-on-right'} + + && ( $type eq '{' ) # should be true + && ( $token eq '{' ) # should be true + ) + { + set_forced_breakpoint( $i - 1 ); + } ## end if ( $block_type && ( ...)) + } ## end if ( $depth > $current_depth) + + #------------------------------------------------------------ + # Handle Decreasing Depth.. + # + # finish off any old list when depth decreases + # token $i is a ')','}', or ']' + #------------------------------------------------------------ + elsif ( $depth < $current_depth ) { + + check_for_new_minimum_depth($depth); + + # force all outer logical containers to break after we see on + # old breakpoint + $has_old_logical_breakpoints[$depth] ||= + $has_old_logical_breakpoints[$current_depth]; + + # Patch to break between ') {' if the paren list is broken. + # There is similar logic in set_continuation_breaks for + # non-broken lists. + if ( $token eq ')' + && $next_nonblank_block_type + && $interrupted_list[$current_depth] + && $next_nonblank_type eq '{' + && !$rOpts->{'opening-brace-always-on-right'} ) + { + set_forced_breakpoint($i); + } ## end if ( $token eq ')' && ... + +#print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n"; + + # set breaks at commas if necessary + my ( $bp_count, $do_not_break_apart ) = + set_comma_breakpoints($current_depth); + + my $i_opening = $opening_structure_index_stack[$current_depth]; + my $saw_opening_structure = ( $i_opening >= 0 ); + + # this term is long if we had to break at interior commas.. + my $is_long_term = $bp_count > 0; + + # If this is a short container with one or more comma arrows, + # then we will mark it as a long term to open it if requested. + # $rOpts_comma_arrow_breakpoints = + # 0 - open only if comma precedes closing brace + # 1 - stable: except for one line blocks + # 2 - try to form 1 line blocks + # 3 - ignore => + # 4 - always open up if vt=0 + # 5 - stable: even for one line blocks if vt=0 + if ( !$is_long_term + && $tokens_to_go[$i_opening] =~ /^[\(\{\[]$/ + && $index_before_arrow[ $depth + 1 ] > 0 + && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] } + ) + { + $is_long_term = $rOpts_comma_arrow_breakpoints == 4 + || ( $rOpts_comma_arrow_breakpoints == 0 + && $last_nonblank_token eq ',' ) + || ( $rOpts_comma_arrow_breakpoints == 5 + && $old_breakpoint_to_go[$i_opening] ); + } ## end if ( !$is_long_term &&...) + + # mark term as long if the length between opening and closing + # parens exceeds allowed line length + if ( !$is_long_term && $saw_opening_structure ) { + my $i_opening_minus = find_token_starting_list($i_opening); + + # Note: we have to allow for one extra space after a + # closing token so that we do not strand a comma or + # semicolon, hence the '>=' here (oneline.t) + # Note: we ignore left weld lengths here for best results + $is_long_term = + excess_line_length( $i_opening_minus, $i, 1 ) >= 0; + } ## end if ( !$is_long_term &&...) + + # We've set breaks after all comma-arrows. Now we have to + # undo them if this can be a one-line block + # (the only breakpoints set will be due to comma-arrows) + if ( + + # user doesn't require breaking after all comma-arrows + ( $rOpts_comma_arrow_breakpoints != 0 ) + && ( $rOpts_comma_arrow_breakpoints != 4 ) + + # and if the opening structure is in this batch + && $saw_opening_structure + + # and either on the same old line + && ( + $old_breakpoint_count_stack[$current_depth] == + $last_old_breakpoint_count + + # or user wants to form long blocks with arrows + || $rOpts_comma_arrow_breakpoints == 2 + ) + + # and we made some breakpoints between the opening and closing + && ( $breakpoint_undo_stack[$current_depth] < + $forced_breakpoint_undo_count ) + + # and this block is short enough to fit on one line + # Note: use < because need 1 more space for possible comma + && !$is_long_term + + ) + { + undo_forced_breakpoint_stack( + $breakpoint_undo_stack[$current_depth] ); + } ## end if ( ( $rOpts_comma_arrow_breakpoints...)) + + # now see if we have any comma breakpoints left + my $has_comma_breakpoints = + ( $breakpoint_stack[$current_depth] != + $forced_breakpoint_count ); + + # update broken-sublist flag of the outer container + $has_broken_sublist[$depth] = + $has_broken_sublist[$depth] + || $has_broken_sublist[$current_depth] + || $is_long_term + || $has_comma_breakpoints; + +# Having come to the closing ')', '}', or ']', now we have to decide if we +# should 'open up' the structure by placing breaks at the opening and +# closing containers. This is a tricky decision. Here are some of the +# basic considerations: +# +# -If this is a BLOCK container, then any breakpoints will have already +# been set (and according to user preferences), so we need do nothing here. +# +# -If we have a comma-separated list for which we can align the list items, +# then we need to do so because otherwise the vertical aligner cannot +# currently do the alignment. +# +# -If this container does itself contain a container which has been broken +# open, then it should be broken open to properly show the structure. +# +# -If there is nothing to align, and no other reason to break apart, +# then do not do it. +# +# We will not break open the parens of a long but 'simple' logical expression. +# For example: +# +# This is an example of a simple logical expression and its formatting: +# +# if ( $bigwasteofspace1 && $bigwasteofspace2 +# || $bigwasteofspace3 && $bigwasteofspace4 ) +# +# Most people would prefer this than the 'spacey' version: +# +# if ( +# $bigwasteofspace1 && $bigwasteofspace2 +# || $bigwasteofspace3 && $bigwasteofspace4 +# ) +# +# To illustrate the rules for breaking logical expressions, consider: +# +# FULLY DENSE: +# if ( $opt_excl +# and ( exists $ids_excl_uc{$id_uc} +# or grep $id_uc =~ /$_/, @ids_excl_uc )) +# +# This is on the verge of being difficult to read. The current default is to +# open it up like this: +# +# DEFAULT: +# if ( +# $opt_excl +# and ( exists $ids_excl_uc{$id_uc} +# or grep $id_uc =~ /$_/, @ids_excl_uc ) +# ) +# +# This is a compromise which tries to avoid being too dense and to spacey. +# A more spaced version would be: +# +# SPACEY: +# if ( +# $opt_excl +# and ( +# exists $ids_excl_uc{$id_uc} +# or grep $id_uc =~ /$_/, @ids_excl_uc +# ) +# ) +# +# Some people might prefer the spacey version -- an option could be added. The +# innermost expression contains a long block '( exists $ids_... ')'. +# +# Here is how the logic goes: We will force a break at the 'or' that the +# innermost expression contains, but we will not break apart its opening and +# closing containers because (1) it contains no multi-line sub-containers itself, +# and (2) there is no alignment to be gained by breaking it open like this +# +# and ( +# exists $ids_excl_uc{$id_uc} +# or grep $id_uc =~ /$_/, @ids_excl_uc +# ) +# +# (although this looks perfectly ok and might be good for long expressions). The +# outer 'if' container, though, contains a broken sub-container, so it will be +# broken open to avoid too much density. Also, since it contains no 'or's, there +# will be a forced break at its 'and'. + + # set some flags telling something about this container.. + my $is_simple_logical_expression = 0; + if ( $item_count_stack[$current_depth] == 0 + && $saw_opening_structure + && $tokens_to_go[$i_opening] eq '(' + && $is_logical_container{ $container_type[$current_depth] } + ) + { + + # This seems to be a simple logical expression with + # no existing breakpoints. Set a flag to prevent + # opening it up. + if ( !$has_comma_breakpoints ) { + $is_simple_logical_expression = 1; + } + + # This seems to be a simple logical expression with + # breakpoints (broken sublists, for example). Break + # at all 'or's and '||'s. + else { + set_logical_breakpoints($current_depth); + } + } ## end if ( $item_count_stack...) + + if ( $is_long_term + && @{ $rfor_semicolon_list[$current_depth] } ) + { + set_for_semicolon_breakpoints($current_depth); + + # open up a long 'for' or 'foreach' container to allow + # leading term alignment unless -lp is used. + $has_comma_breakpoints = 1 + unless $rOpts_line_up_parentheses; + } ## end if ( $is_long_term && ...) + + if ( + + # breaks for code BLOCKS are handled at a higher level + !$block_type + + # we do not need to break at the top level of an 'if' + # type expression + && !$is_simple_logical_expression + + ## modification to keep ': (' containers vertically tight; + ## but probably better to let user set -vt=1 to avoid + ## inconsistency with other paren types + ## && ($container_type[$current_depth] ne ':') + + # otherwise, we require one of these reasons for breaking: + && ( + + # - this term has forced line breaks + $has_comma_breakpoints + + # - the opening container is separated from this batch + # for some reason (comment, blank line, code block) + # - this is a non-paren container spanning multiple lines + || !$saw_opening_structure + + # - this is a long block contained in another breakable + # container + || ( $is_long_term + && $container_environment_to_go[$i_opening] ne + 'BLOCK' ) + ) + ) + { + + # For -lp option, we must put a breakpoint before + # the token which has been identified as starting + # this indentation level. This is necessary for + # proper alignment. + if ( $rOpts_line_up_parentheses && $saw_opening_structure ) + { + my $item = $leading_spaces_to_go[ $i_opening + 1 ]; + if ( $i_opening + 1 < $max_index_to_go + && $types_to_go[ $i_opening + 1 ] eq 'b' ) + { + $item = $leading_spaces_to_go[ $i_opening + 2 ]; + } + if ( defined($item) ) { + my $i_start_2 = $item->get_starting_index(); + if ( + defined($i_start_2) + + # we are breaking after an opening brace, paren, + # so don't break before it too + && $i_start_2 ne $i_opening + ) + { + + # Only break for breakpoints at the same + # indentation level as the opening paren + my $test1 = $nesting_depth_to_go[$i_opening]; + my $test2 = $nesting_depth_to_go[$i_start_2]; + if ( $test2 == $test1 ) { + set_forced_breakpoint( $i_start_2 - 1 ); + } + } ## end if ( defined($i_start_2...)) + } ## end if ( defined($item) ) + } ## end if ( $rOpts_line_up_parentheses...) + + # break after opening structure. + # note: break before closing structure will be automatic + if ( $minimum_depth <= $current_depth ) { + + set_forced_breakpoint($i_opening) + unless ( $do_not_break_apart + || is_unbreakable_container($current_depth) ); + + # break at ',' of lower depth level before opening token + if ( $last_comma_index[$depth] ) { + set_forced_breakpoint( $last_comma_index[$depth] ); + } + + # break at '.' of lower depth level before opening token + if ( $last_dot_index[$depth] ) { + set_forced_breakpoint( $last_dot_index[$depth] ); + } + + # break before opening structure if preceded by another + # closing structure and a comma. This is normally + # done by the previous closing brace, but not + # if it was a one-line block. + if ( $i_opening > 2 ) { + my $i_prev = + ( $types_to_go[ $i_opening - 1 ] eq 'b' ) + ? $i_opening - 2 + : $i_opening - 1; + + if ( $types_to_go[$i_prev] eq ',' + && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ ) + { + set_forced_breakpoint($i_prev); + } + + # also break before something like ':(' or '?(' + # if appropriate. + elsif ( + $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ ) + { + my $token_prev = $tokens_to_go[$i_prev]; + if ( $want_break_before{$token_prev} ) { + set_forced_breakpoint($i_prev); + } + } ## end elsif ( $types_to_go[$i_prev...]) + } ## end if ( $i_opening > 2 ) + } ## end if ( $minimum_depth <=...) + + # break after comma following closing structure + if ( $next_type eq ',' ) { + set_forced_breakpoint( $i + 1 ); + } + + # break before an '=' following closing structure + if ( + $is_assignment{$next_nonblank_type} + && ( $breakpoint_stack[$current_depth] != + $forced_breakpoint_count ) + ) + { + set_forced_breakpoint($i); + } ## end if ( $is_assignment{$next_nonblank_type...}) + + # break at any comma before the opening structure Added + # for -lp, but seems to be good in general. It isn't + # obvious how far back to look; the '5' below seems to + # work well and will catch the comma in something like + # push @list, myfunc( $param, $param, .. + + my $icomma = $last_comma_index[$depth]; + if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) { + unless ( $forced_breakpoint_to_go[$icomma] ) { + set_forced_breakpoint($icomma); + } + } + } # end logic to open up a container + + # Break open a logical container open if it was already open + elsif ($is_simple_logical_expression + && $has_old_logical_breakpoints[$current_depth] ) + { + set_logical_breakpoints($current_depth); + } + + # Handle long container which does not get opened up + elsif ($is_long_term) { + + # must set fake breakpoint to alert outer containers that + # they are complex + set_fake_breakpoint(); + } ## end elsif ($is_long_term) + + } ## end elsif ( $depth < $current_depth) + + #------------------------------------------------------------ + # Handle this token + #------------------------------------------------------------ + + $current_depth = $depth; + + # handle comma-arrow + if ( $type eq '=>' ) { + next if ( $last_nonblank_type eq '=>' ); + next if $rOpts_break_at_old_comma_breakpoints; + next if $rOpts_comma_arrow_breakpoints == 3; + $want_comma_break[$depth] = 1; + $index_before_arrow[$depth] = $i_last_nonblank_token; + next; + } ## end if ( $type eq '=>' ) + + elsif ( $type eq '.' ) { + $last_dot_index[$depth] = $i; + } + + # Turn off alignment if we are sure that this is not a list + # environment. To be safe, we will do this if we see certain + # non-list tokens, such as ';', and also the environment is + # not a list. Note that '=' could be in any of the = operators + # (lextest.t). We can't just use the reported environment + # because it can be incorrect in some cases. + elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} ) + && $container_environment_to_go[$i] ne 'LIST' ) + { + $dont_align[$depth] = 1; + $want_comma_break[$depth] = 0; + $index_before_arrow[$depth] = -1; + } ## end elsif ( ( $type =~ /^[\;\<\>\~]$/...)) + + # now just handle any commas + next unless ( $type eq ',' ); + + $last_dot_index[$depth] = undef; + $last_comma_index[$depth] = $i; + + # break here if this comma follows a '=>' + # but not if there is a side comment after the comma + if ( $want_comma_break[$depth] ) { + + if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) { + if ($rOpts_comma_arrow_breakpoints) { + $want_comma_break[$depth] = 0; + ##$index_before_arrow[$depth] = -1; + next; + } + } + + set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' ); + + # break before the previous token if it looks safe + # Example of something that we will not try to break before: + # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt}, + # Also we don't want to break at a binary operator (like +): + # $c->createOval( + # $x + $R, $y + + # $R => $x - $R, + # $y - $R, -fill => 'black', + # ); + my $ibreak = $index_before_arrow[$depth] - 1; + if ( $ibreak > 0 + && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ ) + { + if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- } + if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- } + if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) { + + # don't break pointer calls, such as the following: + # File::Spec->curdir => 1, + # (This is tokenized as adjacent 'w' tokens) + ##if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) { + + # And don't break before a comma, as in the following: + # ( LONGER_THAN,=> 1, + # EIGHTY_CHARACTERS,=> 2, + # CAUSES_FORMATTING,=> 3, + # LIKE_THIS,=> 4, + # ); + # This example is for -tso but should be general rule + if ( $tokens_to_go[ $ibreak + 1 ] ne '->' + && $tokens_to_go[ $ibreak + 1 ] ne ',' ) + { + set_forced_breakpoint($ibreak); + } + } ## end if ( $types_to_go[$ibreak...]) + } ## end if ( $ibreak > 0 && $tokens_to_go...) + + $want_comma_break[$depth] = 0; + $index_before_arrow[$depth] = -1; + + # handle list which mixes '=>'s and ','s: + # treat any list items so far as an interrupted list + $interrupted_list[$depth] = 1; + next; + } ## end if ( $want_comma_break...) + + # break after all commas above starting depth + if ( $depth < $starting_depth && !$dont_align[$depth] ) { + set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' ); + next; + } + + # add this comma to the list.. + my $item_count = $item_count_stack[$depth]; + if ( $item_count == 0 ) { + + # but do not form a list with no opening structure + # for example: + + # open INFILE_COPY, ">$input_file_copy" + # or die ("very long message"); + + if ( ( $opening_structure_index_stack[$depth] < 0 ) + && $container_environment_to_go[$i] eq 'BLOCK' ) + { + $dont_align[$depth] = 1; + } + } ## end if ( $item_count == 0 ) + + $comma_index[$depth][$item_count] = $i; + ++$item_count_stack[$depth]; + if ( $last_nonblank_type =~ /^[iR\]]$/ ) { + $identifier_count_stack[$depth]++; + } + } ## end while ( ++$i <= $max_index_to_go) + + #------------------------------------------- + # end of loop over all tokens in this batch + #------------------------------------------- + + # set breaks for any unfinished lists .. + for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) { + + $interrupted_list[$dd] = 1; + $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth ); + set_comma_breakpoints($dd); + set_logical_breakpoints($dd) + if ( $has_old_logical_breakpoints[$dd] ); + set_for_semicolon_breakpoints($dd); + + # break open container... + my $i_opening = $opening_structure_index_stack[$dd]; + set_forced_breakpoint($i_opening) + unless ( + is_unbreakable_container($dd) + + # Avoid a break which would place an isolated ' or " + # on a line + || ( $type eq 'Q' + && $i_opening >= $max_index_to_go - 2 + && $token =~ /^['"]$/ ) + ); + } ## end for ( my $dd = $current_depth...) + + # Return a flag indicating if the input file had some good breakpoints. + # This flag will be used to force a break in a line shorter than the + # allowed line length. + if ( $has_old_logical_breakpoints[$current_depth] ) { + $saw_good_breakpoint = 1; + } + + # A complex line with one break at an = has a good breakpoint. + # This is not complex ($total_depth_variation=0): + # $res1 + # = 10; + # + # This is complex ($total_depth_variation=6): + # $res2 = + # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert')); + elsif ($i_old_assignment_break + && $total_depth_variation > 4 + && $old_breakpoint_count == 1 ) + { + $saw_good_breakpoint = 1; + } ## end elsif ( $i_old_assignment_break...) + + return $saw_good_breakpoint; + } ## end sub scan_list +} # end scan_list + +sub find_token_starting_list { + + # When testing to see if a block will fit on one line, some + # previous token(s) may also need to be on the line; particularly + # if this is a sub call. So we will look back at least one + # token. NOTE: This isn't perfect, but not critical, because + # if we mis-identify a block, it will be wrapped and therefore + # fixed the next time it is formatted. + my $i_opening_paren = shift; + my $i_opening_minus = $i_opening_paren; + my $im1 = $i_opening_paren - 1; + my $im2 = $i_opening_paren - 2; + my $im3 = $i_opening_paren - 3; + my $typem1 = $types_to_go[$im1]; + my $typem2 = $im2 >= 0 ? $types_to_go[$im2] : 'b'; + if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) { + $i_opening_minus = $i_opening_paren; + } + elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) { + $i_opening_minus = $im1 if $im1 >= 0; + + # walk back to improve length estimate + for ( my $j = $im1 ; $j >= 0 ; $j-- ) { + last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ ); + $i_opening_minus = $j; + } + if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ } + } + elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 } + elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) { + $i_opening_minus = $im2; + } + return $i_opening_minus; +} + +{ # begin set_comma_breakpoints_do + + my %is_keyword_with_special_leading_term; + + BEGIN { + + # These keywords have prototypes which allow a special leading item + # followed by a list + my @q = + qw(formline grep kill map printf sprintf push chmod join pack unshift); + @is_keyword_with_special_leading_term{@q} = (1) x scalar(@q); + } + + sub set_comma_breakpoints_do { + + # Given a list with some commas, set breakpoints at some of the + # commas, if necessary, to make it easy to read. This list is + # an example: + my ( + $depth, $i_opening_paren, $i_closing_paren, + $item_count, $identifier_count, $rcomma_index, + $next_nonblank_type, $list_type, $interrupted, + $rdo_not_break_apart, $must_break_open, + ) = @_; + + # nothing to do if no commas seen + return if ( $item_count < 1 ); + my $i_first_comma = $rcomma_index->[0]; + my $i_true_last_comma = $rcomma_index->[ $item_count - 1 ]; + my $i_last_comma = $i_true_last_comma; + if ( $i_last_comma >= $max_index_to_go ) { + $i_last_comma = $rcomma_index->[ --$item_count - 1 ]; + return if ( $item_count < 1 ); + } + + #--------------------------------------------------------------- + # find lengths of all items in the list to calculate page layout + #--------------------------------------------------------------- + my $comma_count = $item_count; + my @item_lengths; + my @i_term_begin; + my @i_term_end; + my @i_term_comma; + my $i_prev_plus; + my @max_length = ( 0, 0 ); + my $first_term_length; + my $i = $i_opening_paren; + my $is_odd = 1; + + foreach my $j ( 0 .. $comma_count - 1 ) { + $is_odd = 1 - $is_odd; + $i_prev_plus = $i + 1; + $i = $rcomma_index->[$j]; + + my $i_term_end = + ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1; + my $i_term_begin = + ( $types_to_go[$i_prev_plus] eq 'b' ) + ? $i_prev_plus + 1 + : $i_prev_plus; + push @i_term_begin, $i_term_begin; + push @i_term_end, $i_term_end; + push @i_term_comma, $i; + + # note: currently adding 2 to all lengths (for comma and space) + my $length = + 2 + token_sequence_length( $i_term_begin, $i_term_end ); + push @item_lengths, $length; + + if ( $j == 0 ) { + $first_term_length = $length; + } + else { + + if ( $length > $max_length[$is_odd] ) { + $max_length[$is_odd] = $length; + } + } + } + + # now we have to make a distinction between the comma count and item + # count, because the item count will be one greater than the comma + # count if the last item is not terminated with a comma + my $i_b = + ( $types_to_go[ $i_last_comma + 1 ] eq 'b' ) + ? $i_last_comma + 1 + : $i_last_comma; + my $i_e = + ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' ) + ? $i_closing_paren - 2 + : $i_closing_paren - 1; + my $i_effective_last_comma = $i_last_comma; + + my $last_item_length = token_sequence_length( $i_b + 1, $i_e ); + + if ( $last_item_length > 0 ) { + + # add 2 to length because other lengths include a comma and a blank + $last_item_length += 2; + push @item_lengths, $last_item_length; + push @i_term_begin, $i_b + 1; + push @i_term_end, $i_e; + push @i_term_comma, undef; + + my $i_odd = $item_count % 2; + + if ( $last_item_length > $max_length[$i_odd] ) { + $max_length[$i_odd] = $last_item_length; + } + + $item_count++; + $i_effective_last_comma = $i_e + 1; + + if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) { + $identifier_count++; + } + } + + #--------------------------------------------------------------- + # End of length calculations + #--------------------------------------------------------------- + + #--------------------------------------------------------------- + # Compound List Rule 1: + # Break at (almost) every comma for a list containing a broken + # sublist. This has higher priority than the Interrupted List + # Rule. + #--------------------------------------------------------------- + if ( $has_broken_sublist[$depth] ) { + + # Break at every comma except for a comma between two + # simple, small terms. This prevents long vertical + # columns of, say, just 0's. + my $small_length = 10; # 2 + actual maximum length wanted + + # We'll insert a break in long runs of small terms to + # allow alignment in uniform tables. + my $skipped_count = 0; + my $columns = table_columns_available($i_first_comma); + my $fields = int( $columns / $small_length ); + if ( $rOpts_maximum_fields_per_table + && $fields > $rOpts_maximum_fields_per_table ) + { + $fields = $rOpts_maximum_fields_per_table; + } + my $max_skipped_count = $fields - 1; + + my $is_simple_last_term = 0; + my $is_simple_next_term = 0; + foreach my $j ( 0 .. $item_count ) { + $is_simple_last_term = $is_simple_next_term; + $is_simple_next_term = 0; + if ( $j < $item_count + && $i_term_end[$j] == $i_term_begin[$j] + && $item_lengths[$j] <= $small_length ) + { + $is_simple_next_term = 1; + } + next if $j == 0; + if ( $is_simple_last_term + && $is_simple_next_term + && $skipped_count < $max_skipped_count ) + { + $skipped_count++; + } + else { + $skipped_count = 0; + my $i = $i_term_comma[ $j - 1 ]; + last unless defined $i; + set_forced_breakpoint($i); + } + } + + # always break at the last comma if this list is + # interrupted; we wouldn't want to leave a terminal '{', for + # example. + if ($interrupted) { set_forced_breakpoint($i_true_last_comma) } + return; + } + +#my ( $a, $b, $c ) = caller(); +#print "LISTX: in set_list $a $c interrupt=$interrupted count=$item_count +#i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n"; +#print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n"; + + #--------------------------------------------------------------- + # Interrupted List Rule: + # A list is forced to use old breakpoints if it was interrupted + # by side comments or blank lines, or requested by user. + #--------------------------------------------------------------- + if ( $rOpts_break_at_old_comma_breakpoints + || $interrupted + || $i_opening_paren < 0 ) + { + copy_old_breakpoints( $i_first_comma, $i_true_last_comma ); + return; + } + + #--------------------------------------------------------------- + # Looks like a list of items. We have to look at it and size it up. + #--------------------------------------------------------------- + + my $opening_token = $tokens_to_go[$i_opening_paren]; + my $opening_environment = + $container_environment_to_go[$i_opening_paren]; + + #------------------------------------------------------------------- + # Return if this will fit on one line + #------------------------------------------------------------------- + + my $i_opening_minus = find_token_starting_list($i_opening_paren); + return + unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0; + + #------------------------------------------------------------------- + # Now we know that this block spans multiple lines; we have to set + # at least one breakpoint -- real or fake -- as a signal to break + # open any outer containers. + #------------------------------------------------------------------- + set_fake_breakpoint(); + + # be sure we do not extend beyond the current list length + if ( $i_effective_last_comma >= $max_index_to_go ) { + $i_effective_last_comma = $max_index_to_go - 1; + } + + # Set a flag indicating if we need to break open to keep -lp + # items aligned. This is necessary if any of the list terms + # exceeds the available space after the '('. + my $need_lp_break_open = $must_break_open; + if ( $rOpts_line_up_parentheses && !$must_break_open ) { + my $columns_if_unbroken = + maximum_line_length($i_opening_minus) - + total_line_length( $i_opening_minus, $i_opening_paren ); + $need_lp_break_open = + ( $max_length[0] > $columns_if_unbroken ) + || ( $max_length[1] > $columns_if_unbroken ) + || ( $first_term_length > $columns_if_unbroken ); + } + + # Specify if the list must have an even number of fields or not. + # It is generally safest to assume an even number, because the + # list items might be a hash list. But if we can be sure that + # it is not a hash, then we can allow an odd number for more + # flexibility. + my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count + + if ( $identifier_count >= $item_count - 1 + || $is_assignment{$next_nonblank_type} + || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ ) + ) + { + $odd_or_even = 1; + } + + # do we have a long first term which should be + # left on a line by itself? + my $use_separate_first_term = ( + $odd_or_even == 1 # only if we can use 1 field/line + && $item_count > 3 # need several items + && $first_term_length > + 2 * $max_length[0] - 2 # need long first term + && $first_term_length > + 2 * $max_length[1] - 2 # need long first term + ); + + # or do we know from the type of list that the first term should + # be placed alone? + if ( !$use_separate_first_term ) { + if ( $is_keyword_with_special_leading_term{$list_type} ) { + $use_separate_first_term = 1; + + # should the container be broken open? + if ( $item_count < 3 ) { + if ( $i_first_comma - $i_opening_paren < 4 ) { + ${$rdo_not_break_apart} = 1; + } + } + elsif ($first_term_length < 20 + && $i_first_comma - $i_opening_paren < 4 ) + { + my $columns = table_columns_available($i_first_comma); + if ( $first_term_length < $columns ) { + ${$rdo_not_break_apart} = 1; + } + } + } + } + + # if so, + if ($use_separate_first_term) { + + # ..set a break and update starting values + $use_separate_first_term = 1; + set_forced_breakpoint($i_first_comma); + $i_opening_paren = $i_first_comma; + $i_first_comma = $rcomma_index->[1]; + $item_count--; + return if $comma_count == 1; + shift @item_lengths; + shift @i_term_begin; + shift @i_term_end; + shift @i_term_comma; + } + + # if not, update the metrics to include the first term + else { + if ( $first_term_length > $max_length[0] ) { + $max_length[0] = $first_term_length; + } + } + + # Field width parameters + my $pair_width = ( $max_length[0] + $max_length[1] ); + my $max_width = + ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1]; + + # Number of free columns across the page width for laying out tables + my $columns = table_columns_available($i_first_comma); + + # Estimated maximum number of fields which fit this space + # This will be our first guess + my $number_of_fields_max = + maximum_number_of_fields( $columns, $odd_or_even, $max_width, + $pair_width ); + my $number_of_fields = $number_of_fields_max; + + # Find the best-looking number of fields + # and make this our second guess if possible + my ( $number_of_fields_best, $ri_ragged_break_list, + $new_identifier_count ) + = study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths, + $max_width ); + + if ( $number_of_fields_best != 0 + && $number_of_fields_best < $number_of_fields_max ) + { + $number_of_fields = $number_of_fields_best; + } + + # ---------------------------------------------------------------------- + # If we are crowded and the -lp option is being used, try to + # undo some indentation + # ---------------------------------------------------------------------- + if ( + $rOpts_line_up_parentheses + && ( + $number_of_fields == 0 + || ( $number_of_fields == 1 + && $number_of_fields != $number_of_fields_best ) + ) + ) + { + my $available_spaces = get_available_spaces_to_go($i_first_comma); + if ( $available_spaces > 0 ) { + + my $spaces_wanted = $max_width - $columns; # for 1 field + + if ( $number_of_fields_best == 0 ) { + $number_of_fields_best = + get_maximum_fields_wanted( \@item_lengths ); + } + + if ( $number_of_fields_best != 1 ) { + my $spaces_wanted_2 = + 1 + $pair_width - $columns; # for 2 fields + if ( $available_spaces > $spaces_wanted_2 ) { + $spaces_wanted = $spaces_wanted_2; + } + } + + if ( $spaces_wanted > 0 ) { + my $deleted_spaces = + reduce_lp_indentation( $i_first_comma, $spaces_wanted ); + + # redo the math + if ( $deleted_spaces > 0 ) { + $columns = table_columns_available($i_first_comma); + $number_of_fields_max = + maximum_number_of_fields( $columns, $odd_or_even, + $max_width, $pair_width ); + $number_of_fields = $number_of_fields_max; + + if ( $number_of_fields_best == 1 + && $number_of_fields >= 1 ) + { + $number_of_fields = $number_of_fields_best; + } + } + } + } + } + + # try for one column if two won't work + if ( $number_of_fields <= 0 ) { + $number_of_fields = int( $columns / $max_width ); + } + + # The user can place an upper bound on the number of fields, + # which can be useful for doing maintenance on tables + if ( $rOpts_maximum_fields_per_table + && $number_of_fields > $rOpts_maximum_fields_per_table ) + { + $number_of_fields = $rOpts_maximum_fields_per_table; + } + + # How many columns (characters) and lines would this container take + # if no additional whitespace were added? + my $packed_columns = token_sequence_length( $i_opening_paren + 1, + $i_effective_last_comma + 1 ); + if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero + my $packed_lines = 1 + int( $packed_columns / $columns ); + + # are we an item contained in an outer list? + my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/; + + if ( $number_of_fields <= 0 ) { + +# #--------------------------------------------------------------- +# # We're in trouble. We can't find a single field width that works. +# # There is no simple answer here; we may have a single long list +# # item, or many. +# #--------------------------------------------------------------- +# +# In many cases, it may be best to not force a break if there is just one +# comma, because the standard continuation break logic will do a better +# job without it. +# +# In the common case that all but one of the terms can fit +# on a single line, it may look better not to break open the +# containing parens. Consider, for example +# +# $color = +# join ( '/', +# sort { $color_value{$::a} <=> $color_value{$::b}; } +# keys %colors ); +# +# which will look like this with the container broken: +# +# $color = join ( +# '/', +# sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors +# ); +# +# Here is an example of this rule for a long last term: +# +# log_message( 0, 256, 128, +# "Number of routes in adj-RIB-in to be considered: $peercount" ); +# +# And here is an example with a long first term: +# +# $s = sprintf( +# "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)", +# $r, $pu, $ps, $cu, $cs, $tt +# ) +# if $style eq 'all'; + + my $i_last_comma = $rcomma_index->[ $comma_count - 1 ]; + my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0; + my $long_first_term = + excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0; + + # break at every comma ... + if ( + + # if requested by user or is best looking + $number_of_fields_best == 1 + + # or if this is a sublist of a larger list + || $in_hierarchical_list + + # or if multiple commas and we don't have a long first or last + # term + || ( $comma_count > 1 + && !( $long_last_term || $long_first_term ) ) + ) + { + foreach ( 0 .. $comma_count - 1 ) { + set_forced_breakpoint( $rcomma_index->[$_] ); + } + } + elsif ($long_last_term) { + + set_forced_breakpoint($i_last_comma); + ${$rdo_not_break_apart} = 1 unless $must_break_open; + } + elsif ($long_first_term) { + + set_forced_breakpoint($i_first_comma); + } + else { + + # let breaks be defined by default bond strength logic + } + return; + } + + # -------------------------------------------------------- + # We have a tentative field count that seems to work. + # How many lines will this require? + # -------------------------------------------------------- + my $formatted_lines = $item_count / ($number_of_fields); + if ( $formatted_lines != int $formatted_lines ) { + $formatted_lines = 1 + int $formatted_lines; + } + + # So far we've been trying to fill out to the right margin. But + # compact tables are easier to read, so let's see if we can use fewer + # fields without increasing the number of lines. + $number_of_fields = + compactify_table( $item_count, $number_of_fields, $formatted_lines, + $odd_or_even ); + + # How many spaces across the page will we fill? + my $columns_per_line = + ( int $number_of_fields / 2 ) * $pair_width + + ( $number_of_fields % 2 ) * $max_width; + + my $formatted_columns; + + if ( $number_of_fields > 1 ) { + $formatted_columns = + ( $pair_width * ( int( $item_count / 2 ) ) + + ( $item_count % 2 ) * $max_width ); + } + else { + $formatted_columns = $max_width * $item_count; + } + if ( $formatted_columns < $packed_columns ) { + $formatted_columns = $packed_columns; + } + + my $unused_columns = $formatted_columns - $packed_columns; + + # set some empirical parameters to help decide if we should try to + # align; high sparsity does not look good, especially with few lines + my $sparsity = ($unused_columns) / ($formatted_columns); + my $max_allowed_sparsity = + ( $item_count < 3 ) ? 0.1 + : ( $packed_lines == 1 ) ? 0.15 + : ( $packed_lines == 2 ) ? 0.4 + : 0.7; + + # Begin check for shortcut methods, which avoid treating a list + # as a table for relatively small parenthesized lists. These + # are usually easier to read if not formatted as tables. + if ( + $packed_lines <= 2 # probably can fit in 2 lines + && $item_count < 9 # doesn't have too many items + && $opening_environment eq 'BLOCK' # not a sub-container + && $opening_token eq '(' # is paren list + ) + { + + # Shortcut method 1: for -lp and just one comma: + # This is a no-brainer, just break at the comma. + if ( + $rOpts_line_up_parentheses # -lp + && $item_count == 2 # two items, one comma + && !$must_break_open + ) + { + my $i_break = $rcomma_index->[0]; + set_forced_breakpoint($i_break); + ${$rdo_not_break_apart} = 1; + set_non_alignment_flags( $comma_count, $rcomma_index ); + return; + + } + + # method 2 is for most small ragged lists which might look + # best if not displayed as a table. + if ( + ( $number_of_fields == 2 && $item_count == 3 ) + || ( + $new_identifier_count > 0 # isn't all quotes + && $sparsity > 0.15 + ) # would be fairly spaced gaps if aligned + ) + { + + my $break_count = set_ragged_breakpoints( \@i_term_comma, + $ri_ragged_break_list ); + ++$break_count if ($use_separate_first_term); + + # NOTE: we should really use the true break count here, + # which can be greater if there are large terms and + # little space, but usually this will work well enough. + unless ($must_break_open) { + + if ( $break_count <= 1 ) { + ${$rdo_not_break_apart} = 1; + } + elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open ) + { + ${$rdo_not_break_apart} = 1; + } + } + set_non_alignment_flags( $comma_count, $rcomma_index ); + return; + } + + } # end shortcut methods + + # debug stuff + + FORMATTER_DEBUG_FLAG_SPARSE && do { + print STDOUT +"SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n"; + + }; + + #--------------------------------------------------------------- + # Compound List Rule 2: + # If this list is too long for one line, and it is an item of a + # larger list, then we must format it, regardless of sparsity + # (ian.t). One reason that we have to do this is to trigger + # Compound List Rule 1, above, which causes breaks at all commas of + # all outer lists. In this way, the structure will be properly + # displayed. + #--------------------------------------------------------------- + + # Decide if this list is too long for one line unless broken + my $total_columns = table_columns_available($i_opening_paren); + my $too_long = $packed_columns > $total_columns; + + # For a paren list, include the length of the token just before the + # '(' because this is likely a sub call, and we would have to + # include the sub name on the same line as the list. This is still + # imprecise, but not too bad. (steve.t) + if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) { + + $too_long = excess_line_length( $i_opening_minus, + $i_effective_last_comma + 1 ) > 0; + } + + # FIXME: For an item after a '=>', try to include the length of the + # thing before the '=>'. This is crude and should be improved by + # actually looking back token by token. + if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) { + my $i_opening_minus = $i_opening_paren - 4; + if ( $i_opening_minus >= 0 ) { + $too_long = excess_line_length( $i_opening_minus, + $i_effective_last_comma + 1 ) > 0; + } + } + + # Always break lists contained in '[' and '{' if too long for 1 line, + # and always break lists which are too long and part of a more complex + # structure. + my $must_break_open_container = $must_break_open + || ( $too_long + && ( $in_hierarchical_list || $opening_token ne '(' ) ); + +#print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n"; + + #--------------------------------------------------------------- + # The main decision: + # Now decide if we will align the data into aligned columns. Do not + # attempt to align columns if this is a tiny table or it would be + # too spaced. It seems that the more packed lines we have, the + # sparser the list that can be allowed and still look ok. + #--------------------------------------------------------------- + + if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines ) + || ( $formatted_lines < 2 ) + || ( $unused_columns > $max_allowed_sparsity * $formatted_columns ) + ) + { + + #--------------------------------------------------------------- + # too sparse: would look ugly if aligned in a table; + #--------------------------------------------------------------- + + # use old breakpoints if this is a 'big' list + # FIXME: goal is to improve set_ragged_breakpoints so that + # this is not necessary. + if ( $packed_lines > 2 && $item_count > 10 ) { + write_logfile_entry("List sparse: using old breakpoints\n"); + copy_old_breakpoints( $i_first_comma, $i_last_comma ); + } + + # let the continuation logic handle it if 2 lines + else { + + my $break_count = set_ragged_breakpoints( \@i_term_comma, + $ri_ragged_break_list ); + ++$break_count if ($use_separate_first_term); + + unless ($must_break_open_container) { + if ( $break_count <= 1 ) { + ${$rdo_not_break_apart} = 1; + } + elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open ) + { + ${$rdo_not_break_apart} = 1; + } + } + set_non_alignment_flags( $comma_count, $rcomma_index ); + } + return; + } + + #--------------------------------------------------------------- + # go ahead and format as a table + #--------------------------------------------------------------- + write_logfile_entry( + "List: auto formatting with $number_of_fields fields/row\n"); + + my $j_first_break = + $use_separate_first_term ? $number_of_fields : $number_of_fields - 1; + + for ( + my $j = $j_first_break ; + $j < $comma_count ; + $j += $number_of_fields + ) + { + my $i = $rcomma_index->[$j]; + set_forced_breakpoint($i); + } + return; + } +} + +sub set_non_alignment_flags { + + # set flag which indicates that these commas should not be + # aligned + my ( $comma_count, $rcomma_index ) = @_; + foreach ( 0 .. $comma_count - 1 ) { + $matching_token_to_go[ $rcomma_index->[$_] ] = 1; + } + return; +} + +sub study_list_complexity { + + # Look for complex tables which should be formatted with one term per line. + # Returns the following: + # + # \@i_ragged_break_list = list of good breakpoints to avoid lines + # which are hard to read + # $number_of_fields_best = suggested number of fields based on + # complexity; = 0 if any number may be used. + # + my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_; + my $item_count = @{$ri_term_begin}; + my $complex_item_count = 0; + my $number_of_fields_best = $rOpts_maximum_fields_per_table; + my $i_max = @{$ritem_lengths} - 1; + ##my @item_complexity; + + my $i_last_last_break = -3; + my $i_last_break = -2; + my @i_ragged_break_list; + + my $definitely_complex = 30; + my $definitely_simple = 12; + my $quote_count = 0; + + for my $i ( 0 .. $i_max ) { + my $ib = $ri_term_begin->[$i]; + my $ie = $ri_term_end->[$i]; + + # define complexity: start with the actual term length + my $weighted_length = ( $ritem_lengths->[$i] - 2 ); + + ##TBD: join types here and check for variations + ##my $str=join "", @tokens_to_go[$ib..$ie]; + + my $is_quote = 0; + if ( $types_to_go[$ib] =~ /^[qQ]$/ ) { + $is_quote = 1; + $quote_count++; + } + elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) { + $quote_count++; + } + + if ( $ib eq $ie ) { + if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) { + $complex_item_count++; + $weighted_length *= 2; + } + else { + } + } + else { + if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) { + $complex_item_count++; + $weighted_length *= 2; + } + if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) { + $weighted_length += 4; + } + } + + # add weight for extra tokens. + $weighted_length += 2 * ( $ie - $ib ); + +## my $BUB = join '', @tokens_to_go[$ib..$ie]; +## print "# COMPLEXITY:$weighted_length $BUB\n"; + +##push @item_complexity, $weighted_length; + + # now mark a ragged break after this item it if it is 'long and + # complex': + if ( $weighted_length >= $definitely_complex ) { + + # if we broke after the previous term + # then break before it too + if ( $i_last_break == $i - 1 + && $i > 1 + && $i_last_last_break != $i - 2 ) + { + + ## FIXME: don't strand a small term + pop @i_ragged_break_list; + push @i_ragged_break_list, $i - 2; + push @i_ragged_break_list, $i - 1; + } + + push @i_ragged_break_list, $i; + $i_last_last_break = $i_last_break; + $i_last_break = $i; + } + + # don't break before a small last term -- it will + # not look good on a line by itself. + elsif ($i == $i_max + && $i_last_break == $i - 1 + && $weighted_length <= $definitely_simple ) + { + pop @i_ragged_break_list; + } + } + + my $identifier_count = $i_max + 1 - $quote_count; + + # Need more tuning here.. + if ( $max_width > 12 + && $complex_item_count > $item_count / 2 + && $number_of_fields_best != 2 ) + { + $number_of_fields_best = 1; + } + + return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count ); +} + +sub get_maximum_fields_wanted { + + # Not all tables look good with more than one field of items. + # This routine looks at a table and decides if it should be + # formatted with just one field or not. + # This coding is still under development. + my ($ritem_lengths) = @_; + + my $number_of_fields_best = 0; + + # For just a few items, we tentatively assume just 1 field. + my $item_count = @{$ritem_lengths}; + if ( $item_count <= 5 ) { + $number_of_fields_best = 1; + } + + # For larger tables, look at it both ways and see what looks best + else { + + my $is_odd = 1; + my @max_length = ( 0, 0 ); + my @last_length_2 = ( undef, undef ); + my @first_length_2 = ( undef, undef ); + my $last_length = undef; + my $total_variation_1 = 0; + my $total_variation_2 = 0; + my @total_variation_2 = ( 0, 0 ); + foreach my $j ( 0 .. $item_count - 1 ) { + + $is_odd = 1 - $is_odd; + my $length = $ritem_lengths->[$j]; + if ( $length > $max_length[$is_odd] ) { + $max_length[$is_odd] = $length; + } + + if ( defined($last_length) ) { + my $dl = abs( $length - $last_length ); + $total_variation_1 += $dl; + } + $last_length = $length; + + my $ll = $last_length_2[$is_odd]; + if ( defined($ll) ) { + my $dl = abs( $length - $ll ); + $total_variation_2[$is_odd] += $dl; + } + else { + $first_length_2[$is_odd] = $length; + } + $last_length_2[$is_odd] = $length; + } + $total_variation_2 = $total_variation_2[0] + $total_variation_2[1]; + + my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0; + unless ( $total_variation_2 < $factor * $total_variation_1 ) { + $number_of_fields_best = 1; + } + } + return ($number_of_fields_best); +} + +sub table_columns_available { + my $i_first_comma = shift; + my $columns = + maximum_line_length($i_first_comma) - + leading_spaces_to_go($i_first_comma); + + # Patch: the vertical formatter does not line up lines whose lengths + # exactly equal the available line length because of allowances + # that must be made for side comments. Therefore, the number of + # available columns is reduced by 1 character. + $columns -= 1; + return $columns; +} + +sub maximum_number_of_fields { + + # how many fields will fit in the available space? + my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_; + my $max_pairs = int( $columns / $pair_width ); + my $number_of_fields = $max_pairs * 2; + if ( $odd_or_even == 1 + && $max_pairs * $pair_width + $max_width <= $columns ) + { + $number_of_fields++; + } + return $number_of_fields; +} + +sub compactify_table { + + # given a table with a certain number of fields and a certain number + # of lines, see if reducing the number of fields will make it look + # better. + my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_; + if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) { + my $min_fields; + + for ( + $min_fields = $number_of_fields ; + $min_fields >= $odd_or_even + && $min_fields * $formatted_lines >= $item_count ; + $min_fields -= $odd_or_even + ) + { + $number_of_fields = $min_fields; + } + } + return $number_of_fields; +} + +sub set_ragged_breakpoints { + + # Set breakpoints in a list that cannot be formatted nicely as a + # table. + my ( $ri_term_comma, $ri_ragged_break_list ) = @_; + + my $break_count = 0; + foreach ( @{$ri_ragged_break_list} ) { + my $j = $ri_term_comma->[$_]; + if ($j) { + set_forced_breakpoint($j); + $break_count++; + } + } + return $break_count; +} + +sub copy_old_breakpoints { + my ( $i_first_comma, $i_last_comma ) = @_; + for my $i ( $i_first_comma .. $i_last_comma ) { + if ( $old_breakpoint_to_go[$i] ) { + set_forced_breakpoint($i); + } + } + return; +} + +sub set_nobreaks { + my ( $i, $j ) = @_; + if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) { + + FORMATTER_DEBUG_FLAG_NOBREAK && do { + my ( $a, $b, $c ) = caller(); + print STDOUT +"NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n"; + }; + + @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 ); + } + + # shouldn't happen; non-critical error + else { + FORMATTER_DEBUG_FLAG_NOBREAK && do { + my ( $a, $b, $c ) = caller(); + print STDOUT + "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n"; + }; + } + return; +} + +sub set_fake_breakpoint { + + # Just bump up the breakpoint count as a signal that there are breaks. + # This is useful if we have breaks but may want to postpone deciding where + # to make them. + $forced_breakpoint_count++; + return; +} + +sub set_forced_breakpoint { + my $i = shift; + + return unless defined $i && $i >= 0; + + # no breaks between welded tokens + return if ( weld_len_right_to_go($i) ); + + # when called with certain tokens, use bond strengths to decide + # if we break before or after it + my $token = $tokens_to_go[$i]; + + if ( $token =~ /^([\=\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) { + if ( $want_break_before{$token} && $i >= 0 ) { $i-- } + } + + # breaks are forced before 'if' and 'unless' + elsif ( $is_if_unless{$token} ) { $i-- } + + if ( $i >= 0 && $i <= $max_index_to_go ) { + my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1; + + FORMATTER_DEBUG_FLAG_FORCE && do { + my ( $a, $b, $c ) = caller(); + print STDOUT +"FORCE $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n"; + }; + + if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) { + $forced_breakpoint_to_go[$i_nonblank] = 1; + + if ( $i_nonblank > $index_max_forced_break ) { + $index_max_forced_break = $i_nonblank; + } + $forced_breakpoint_count++; + $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] = + $i_nonblank; + + # if we break at an opening container..break at the closing + if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) { + set_closing_breakpoint($i_nonblank); + } + } + } + return; +} + +sub clear_breakpoint_undo_stack { + $forced_breakpoint_undo_count = 0; + return; +} + +sub undo_forced_breakpoint_stack { + + my $i_start = shift; + if ( $i_start < 0 ) { + $i_start = 0; + my ( $a, $b, $c ) = caller(); + warning( +"Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start " + ); + } + + while ( $forced_breakpoint_undo_count > $i_start ) { + my $i = + $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ]; + if ( $i >= 0 && $i <= $max_index_to_go ) { + $forced_breakpoint_to_go[$i] = 0; + $forced_breakpoint_count--; + + FORMATTER_DEBUG_FLAG_UNDOBP && do { + my ( $a, $b, $c ) = caller(); + print STDOUT +"UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n"; + }; + } + + # shouldn't happen, but not a critical error + else { + FORMATTER_DEBUG_FLAG_UNDOBP && do { + my ( $a, $b, $c ) = caller(); + print STDOUT +"Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go"; + }; + } + } + return; +} + +{ # begin recombine_breakpoints + + my %is_amp_amp; + my %is_ternary; + my %is_math_op; + my %is_plus_minus; + my %is_mult_div; + + BEGIN { + + my @q; + @q = qw( && || ); + @is_amp_amp{@q} = (1) x scalar(@q); + + @q = qw( ? : ); + @is_ternary{@q} = (1) x scalar(@q); + + @q = qw( + - * / ); + @is_math_op{@q} = (1) x scalar(@q); + + @q = qw( + - ); + @is_plus_minus{@q} = (1) x scalar(@q); + + @q = qw( * / ); + @is_mult_div{@q} = (1) x scalar(@q); + } + + sub DUMP_BREAKPOINTS { + + # Debug routine to dump current breakpoints...not normally called + # We are given indexes to the current lines: + # $ri_beg = ref to array of BEGinning indexes of each line + # $ri_end = ref to array of ENDing indexes of each line + my ( $ri_beg, $ri_end, $msg ) = @_; + print STDERR "----Dumping breakpoints from: $msg----\n"; + for my $n ( 0 .. @{$ri_end} - 1 ) { + my $ibeg = $ri_beg->[$n]; + my $iend = $ri_end->[$n]; + my $text = ""; + foreach my $i ( $ibeg .. $iend ) { + $text .= $tokens_to_go[$i]; + } + print STDERR "$n ($ibeg:$iend) $text\n"; + } + print STDERR "----\n"; + return; + } + + sub unmask_phantom_semicolons { + + my ( $self, $ri_beg, $ri_end ) = @_; + + # Walk down the lines of this batch and unmask any invisible line-ending + # semicolons. They were placed by sub respace_tokens but we only now + # know if we actually need them. + + my $nmax = @{$ri_end} - 1; + foreach my $n ( 0 .. $nmax ) { + + my $i = $ri_end->[$n]; + if ( $types_to_go[$i] eq ';' && $tokens_to_go[$i] eq '' ) { + + $tokens_to_go[$i] = $want_left_space{';'} == WS_NO ? ';' : ' ;'; + + my $line_number = 1 + $self->get_old_line_index( $K_to_go[$i] ); + note_added_semicolon($line_number); + } + } + return; + } + + sub recombine_breakpoints { + + # sub set_continuation_breaks is very liberal in setting line breaks + # for long lines, always setting breaks at good breakpoints, even + # when that creates small lines. Sometimes small line fragments + # are produced which would look better if they were combined. + # That's the task of this routine. + # + # We are given indexes to the current lines: + # $ri_beg = ref to array of BEGinning indexes of each line + # $ri_end = ref to array of ENDing indexes of each line + my ( $ri_beg, $ri_end ) = @_; + + # Make a list of all good joining tokens between the lines + # n-1 and n. + my @joint; + my $nmax = @{$ri_end} - 1; + for my $n ( 1 .. $nmax ) { + my $ibeg_1 = $ri_beg->[ $n - 1 ]; + my $iend_1 = $ri_end->[ $n - 1 ]; + my $iend_2 = $ri_end->[$n]; + my $ibeg_2 = $ri_beg->[$n]; + + my ( $itok, $itokp, $itokm ); + + foreach my $itest ( $iend_1, $ibeg_2 ) { + my $type = $types_to_go[$itest]; + if ( $is_math_op{$type} + || $is_amp_amp{$type} + || $is_assignment{$type} + || $type eq ':' ) + { + $itok = $itest; + } + } + $joint[$n] = [$itok]; + } + + my $more_to_do = 1; + + # We keep looping over all of the lines of this batch + # until there are no more possible recombinations + my $nmax_last = @{$ri_end}; + my $reverse = 0; + while ($more_to_do) { + my $n_best = 0; + my $bs_best; + my $nmax = @{$ri_end} - 1; + + # Safety check for infinite loop + unless ( $nmax < $nmax_last ) { + + # Shouldn't happen because splice below decreases nmax on each + # pass. + Fault("Program bug-infinite loop in recombine breakpoints\n"); + } + $nmax_last = $nmax; + $more_to_do = 0; + my $skip_Section_3; + my $leading_amp_count = 0; + my $this_line_is_semicolon_terminated; + + # loop over all remaining lines in this batch + for my $iter ( 1 .. $nmax ) { + + # alternating sweep direction gives symmetric results + # for recombining lines which exceed the line length + # such as eval {{{{.... }}}} + my $n; + if ($reverse) { $n = 1 + $nmax - $iter; } + else { $n = $iter } + + #---------------------------------------------------------- + # If we join the current pair of lines, + # line $n-1 will become the left part of the joined line + # line $n will become the right part of the joined line + # + # Here are Indexes of the endpoint tokens of the two lines: + # + # -----line $n-1--- | -----line $n----- + # $ibeg_1 $iend_1 | $ibeg_2 $iend_2 + # ^ + # | + # We want to decide if we should remove the line break + # between the tokens at $iend_1 and $ibeg_2 + # + # We will apply a number of ad-hoc tests to see if joining + # here will look ok. The code will just issue a 'next' + # command if the join doesn't look good. If we get through + # the gauntlet of tests, the lines will be recombined. + #---------------------------------------------------------- + # + # beginning and ending tokens of the lines we are working on + my $ibeg_1 = $ri_beg->[ $n - 1 ]; + my $iend_1 = $ri_end->[ $n - 1 ]; + my $iend_2 = $ri_end->[$n]; + my $ibeg_2 = $ri_beg->[$n]; + my $ibeg_nmax = $ri_beg->[$nmax]; + + # combined line cannot be too long + my $excess = excess_line_length( $ibeg_1, $iend_2, 1, 1 ); + next if ( $excess > 0 ); + + my $type_iend_1 = $types_to_go[$iend_1]; + my $type_iend_2 = $types_to_go[$iend_2]; + my $type_ibeg_1 = $types_to_go[$ibeg_1]; + my $type_ibeg_2 = $types_to_go[$ibeg_2]; + + # terminal token of line 2 if any side comment is ignored: + my $iend_2t = $iend_2; + my $type_iend_2t = $type_iend_2; + + # some beginning indexes of other lines, which may not exist + my $ibeg_0 = $n > 1 ? $ri_beg->[ $n - 2 ] : -1; + my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1; + my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1; + + my $bs_tweak = 0; + + #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] - + # $nesting_depth_to_go[$ibeg_1] ); + + FORMATTER_DEBUG_FLAG_RECOMBINE && do { + print STDERR +"RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n"; + }; + + # If line $n is the last line, we set some flags and + # do any special checks for it + if ( $n == $nmax ) { + + # a terminal '{' should stay where it is + # unless preceded by a fat comma + next if ( $type_ibeg_2 eq '{' && $type_iend_1 ne '=>' ); + + if ( $type_iend_2 eq '#' + && $iend_2 - $ibeg_2 >= 2 + && $types_to_go[ $iend_2 - 1 ] eq 'b' ) + { + $iend_2t = $iend_2 - 2; + $type_iend_2t = $types_to_go[$iend_2t]; + } + + $this_line_is_semicolon_terminated = $type_iend_2t eq ';'; + } + + #---------------------------------------------------------- + # Recombine Section 0: + # Examine the special token joining this line pair, if any. + # Put as many tests in this section to avoid duplicate code and + # to make formatting independent of whether breaks are to the + # left or right of an operator. + #---------------------------------------------------------- + + my ($itok) = @{ $joint[$n] }; + if ($itok) { + + # FIXME: Patch - may not be necessary + my $iend_1 = + $type_iend_1 eq 'b' + ? $iend_1 - 1 + : $iend_1; + + my $iend_2 = + $type_iend_2 eq 'b' + ? $iend_2 - 1 + : $iend_2; + ## END PATCH + + my $type = $types_to_go[$itok]; + + if ( $type eq ':' ) { + + # do not join at a colon unless it disobeys the break request + if ( $itok eq $iend_1 ) { + next unless $want_break_before{$type}; + } + else { + $leading_amp_count++; + next if $want_break_before{$type}; + } + } ## end if ':' + + # handle math operators + - * / + elsif ( $is_math_op{$type} ) { + + # Combine these lines if this line is a single + # number, or if it is a short term with same + # operator as the previous line. For example, in + # the following code we will combine all of the + # short terms $A, $B, $C, $D, $E, $F, together + # instead of leaving them one per line: + # my $time = + # $A * $B * $C * $D * $E * $F * + # ( 2. * $eps * $sigma * $area ) * + # ( 1. / $tcold**3 - 1. / $thot**3 ); + + # This can be important in math-intensive code. + + my $good_combo; + + my $itokp = min( $inext_to_go[$itok], $iend_2 ); + my $itokpp = min( $inext_to_go[$itokp], $iend_2 ); + my $itokm = max( $iprev_to_go[$itok], $ibeg_1 ); + my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 ); + + # check for a number on the right + if ( $types_to_go[$itokp] eq 'n' ) { + + # ok if nothing else on right + if ( $itokp == $iend_2 ) { + $good_combo = 1; + } + else { + + # look one more token to right.. + # okay if math operator or some termination + $good_combo = + ( ( $itokpp == $iend_2 ) + && $is_math_op{ $types_to_go[$itokpp] } ) + || $types_to_go[$itokpp] =~ /^[#,;]$/; + } + } + + # check for a number on the left + if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) { + + # okay if nothing else to left + if ( $itokm == $ibeg_1 ) { + $good_combo = 1; + } + + # otherwise look one more token to left + else { + + # okay if math operator, comma, or assignment + $good_combo = ( $itokmm == $ibeg_1 ) + && ( $is_math_op{ $types_to_go[$itokmm] } + || $types_to_go[$itokmm] =~ /^[,]$/ + || $is_assignment{ $types_to_go[$itokmm] } + ); + } + } + + # look for a single short token either side of the + # operator + if ( !$good_combo ) { + + # Slight adjustment factor to make results + # independent of break before or after operator in + # long summed lists. (An operator and a space make + # two spaces). + my $two = ( $itok eq $iend_1 ) ? 2 : 0; + + $good_combo = + + # numbers or id's on both sides of this joint + $types_to_go[$itokp] =~ /^[in]$/ + && $types_to_go[$itokm] =~ /^[in]$/ + + # one of the two lines must be short: + && ( + ( + # no more than 2 nonblank tokens right of + # joint + $itokpp == $iend_2 + + # short + && token_sequence_length( $itokp, $iend_2 ) + < $two + + $rOpts_short_concatenation_item_length + ) + || ( + # no more than 2 nonblank tokens left of + # joint + $itokmm == $ibeg_1 + + # short + && token_sequence_length( $ibeg_1, $itokm ) + < 2 - $two + + $rOpts_short_concatenation_item_length + ) + + ) + + # keep pure terms; don't mix +- with */ + && !( + $is_plus_minus{$type} + && ( $is_mult_div{ $types_to_go[$itokmm] } + || $is_mult_div{ $types_to_go[$itokpp] } ) + ) + && !( + $is_mult_div{$type} + && ( $is_plus_minus{ $types_to_go[$itokmm] } + || $is_plus_minus{ $types_to_go[$itokpp] } ) + ) + + ; + } + + # it is also good to combine if we can reduce to 2 lines + if ( !$good_combo ) { + + # index on other line where same token would be in a + # long chain. + my $iother = + ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1; + + $good_combo = + $n == 2 + && $n == $nmax + && $types_to_go[$iother] ne $type; + } + + next unless ($good_combo); + + } ## end math + + elsif ( $is_amp_amp{$type} ) { + ##TBD + } ## end &&, || + + elsif ( $is_assignment{$type} ) { + ##TBD + } ## end assignment + } + + #---------------------------------------------------------- + # Recombine Section 1: + # Join welded nested containers immediately + #---------------------------------------------------------- + if ( weld_len_right_to_go($iend_1) + || weld_len_left_to_go($ibeg_2) ) + { + $n_best = $n; + + # Old coding alternated sweep direction: no longer needed + # $reverse = 1 - $reverse; + last; + } + $reverse = 0; + + #---------------------------------------------------------- + # Recombine Section 2: + # Examine token at $iend_1 (right end of first line of pair) + #---------------------------------------------------------- + + # an isolated '}' may join with a ';' terminated segment + if ( $type_iend_1 eq '}' ) { + + # Check for cases where combining a semicolon terminated + # statement with a previous isolated closing paren will + # allow the combined line to be outdented. This is + # generally a good move. For example, we can join up + # the last two lines here: + # ( + # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, + # $size, $atime, $mtime, $ctime, $blksize, $blocks + # ) + # = stat($file); + # + # to get: + # ( + # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, + # $size, $atime, $mtime, $ctime, $blksize, $blocks + # ) = stat($file); + # + # which makes the parens line up. + # + # Another example, from Joe Matarazzo, probably looks best + # with the 'or' clause appended to the trailing paren: + # $self->some_method( + # PARAM1 => 'foo', + # PARAM2 => 'bar' + # ) or die "Some_method didn't work"; + # + # But we do not want to do this for something like the -lp + # option where the paren is not outdentable because the + # trailing clause will be far to the right. + # + # The logic here is synchronized with the logic in sub + # sub set_adjusted_indentation, which actually does + # the outdenting. + # + $skip_Section_3 ||= $this_line_is_semicolon_terminated + + # only one token on last line + && $ibeg_1 == $iend_1 + + # must be structural paren + && $tokens_to_go[$iend_1] eq ')' + + # style must allow outdenting, + && !$closing_token_indentation{')'} + + # only leading '&&', '||', and ':' if no others seen + # (but note: our count made below could be wrong + # due to intervening comments) + && ( $leading_amp_count == 0 + || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ ) + + # but leading colons probably line up with a + # previous colon or question (count could be wrong). + && $type_ibeg_2 ne ':' + + # only one step in depth allowed. this line must not + # begin with a ')' itself. + && ( $nesting_depth_to_go[$iend_1] == + $nesting_depth_to_go[$iend_2] + 1 ); + + # YVES patch 2 of 2: + # Allow cuddled eval chains, like this: + # eval { + # #STUFF; + # 1; # return true + # } or do { + # #handle error + # }; + # This patch works together with a patch in + # setting adjusted indentation (where the closing eval + # brace is outdented if possible). + # The problem is that an 'eval' block has continuation + # indentation and it looks better to undo it in some + # cases. If we do not use this patch we would get: + # eval { + # #STUFF; + # 1; # return true + # } + # or do { + # #handle error + # }; + # The alternative, for uncuddled style, is to create + # a patch in set_adjusted_indentation which undoes + # the indentation of a leading line like 'or do {'. + # This doesn't work well with -icb through + if ( + $block_type_to_go[$iend_1] eq 'eval' + && !$rOpts->{'line-up-parentheses'} + && !$rOpts->{'indent-closing-brace'} + && $tokens_to_go[$iend_2] eq '{' + && ( + ( $type_ibeg_2 =~ /^(|\&\&|\|\|)$/ ) + || ( $type_ibeg_2 eq 'k' + && $is_and_or{ $tokens_to_go[$ibeg_2] } ) + || $is_if_unless{ $tokens_to_go[$ibeg_2] } + ) + ) + { + $skip_Section_3 ||= 1; + } + + next + unless ( + $skip_Section_3 + + # handle '.' and '?' specially below + || ( $type_ibeg_2 =~ /^[\.\?]$/ ) + ); + } + + elsif ( $type_iend_1 eq '{' ) { + + # YVES + # honor breaks at opening brace + # Added to prevent recombining something like this: + # } || eval { package main; + next if $forced_breakpoint_to_go[$iend_1]; + } + + # do not recombine lines with ending &&, ||, + elsif ( $is_amp_amp{$type_iend_1} ) { + next unless $want_break_before{$type_iend_1}; + } + + # Identify and recombine a broken ?/: chain + elsif ( $type_iend_1 eq '?' ) { + + # Do not recombine different levels + next + if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] ); + + # do not recombine unless next line ends in : + next unless $type_iend_2 eq ':'; + } + + # for lines ending in a comma... + elsif ( $type_iend_1 eq ',' ) { + + # Do not recombine at comma which is following the + # input bias. + # TODO: might be best to make a special flag + next if ( $old_breakpoint_to_go[$iend_1] ); + + # an isolated '},' may join with an identifier + ';' + # this is useful for the class of a 'bless' statement (bless.t) + if ( $type_ibeg_1 eq '}' + && $type_ibeg_2 eq 'i' ) + { + next + unless ( ( $ibeg_1 == ( $iend_1 - 1 ) ) + && ( $iend_2 == ( $ibeg_2 + 1 ) ) + && $this_line_is_semicolon_terminated ); + + # override breakpoint + $forced_breakpoint_to_go[$iend_1] = 0; + } + + # but otherwise .. + else { + + # do not recombine after a comma unless this will leave + # just 1 more line + next unless ( $n + 1 >= $nmax ); + + # do not recombine if there is a change in indentation depth + next + if ( + $levels_to_go[$iend_1] != $levels_to_go[$iend_2] ); + + # do not recombine a "complex expression" after a + # comma. "complex" means no parens. + my $saw_paren; + foreach my $ii ( $ibeg_2 .. $iend_2 ) { + if ( $tokens_to_go[$ii] eq '(' ) { + $saw_paren = 1; + last; + } + } + next if $saw_paren; + } + } + + # opening paren.. + elsif ( $type_iend_1 eq '(' ) { + + # No longer doing this + } + + elsif ( $type_iend_1 eq ')' ) { + + # No longer doing this + } + + # keep a terminal for-semicolon + elsif ( $type_iend_1 eq 'f' ) { + next; + } + + # if '=' at end of line ... + elsif ( $is_assignment{$type_iend_1} ) { + + # keep break after = if it was in input stream + # this helps prevent 'blinkers' + next if $old_breakpoint_to_go[$iend_1] + + # don't strand an isolated '=' + && $iend_1 != $ibeg_1; + + my $is_short_quote = + ( $type_ibeg_2 eq 'Q' + && $ibeg_2 == $iend_2 + && token_sequence_length( $ibeg_2, $ibeg_2 ) < + $rOpts_short_concatenation_item_length ); + my $is_ternary = + ( $type_ibeg_1 eq '?' + && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) ); + + # always join an isolated '=', a short quote, or if this + # will put ?/: at start of adjacent lines + if ( $ibeg_1 != $iend_1 + && !$is_short_quote + && !$is_ternary ) + { + next + unless ( + ( + + # unless we can reduce this to two lines + $nmax < $n + 2 + + # or three lines, the last with a leading semicolon + || ( $nmax == $n + 2 + && $types_to_go[$ibeg_nmax] eq ';' ) + + # or the next line ends with a here doc + || $type_iend_2 eq 'h' + + # or the next line ends in an open paren or brace + # and the break hasn't been forced [dima.t] + || ( !$forced_breakpoint_to_go[$iend_1] + && $type_iend_2 eq '{' ) + ) + + # do not recombine if the two lines might align well + # this is a very approximate test for this + && ( $ibeg_3 >= 0 + && $type_ibeg_2 ne $types_to_go[$ibeg_3] ) + ); + + if ( + + # Recombine if we can make two lines + $nmax >= $n + 2 + + # -lp users often prefer this: + # my $title = function($env, $env, $sysarea, + # "bubba Borrower Entry"); + # so we will recombine if -lp is used we have + # ending comma + && ( !$rOpts_line_up_parentheses + || $type_iend_2 ne ',' ) + ) + { + + # otherwise, scan the rhs line up to last token for + # complexity. Note that we are not counting the last + # token in case it is an opening paren. + my $tv = 0; + my $depth = $nesting_depth_to_go[$ibeg_2]; + foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) { + if ( $nesting_depth_to_go[$i] != $depth ) { + $tv++; + last if ( $tv > 1 ); + } + $depth = $nesting_depth_to_go[$i]; + } + + # ok to recombine if no level changes before last token + if ( $tv > 0 ) { + + # otherwise, do not recombine if more than two + # level changes. + next if ( $tv > 1 ); + + # check total complexity of the two adjacent lines + # that will occur if we do this join + my $istop = + ( $n < $nmax ) + ? $ri_end->[ $n + 1 ] + : $iend_2; + foreach my $i ( $iend_2 .. $istop ) { + if ( $nesting_depth_to_go[$i] != $depth ) { + $tv++; + last if ( $tv > 2 ); + } + $depth = $nesting_depth_to_go[$i]; + } + + # do not recombine if total is more than 2 level changes + next if ( $tv > 2 ); + } + } + } + + unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) { + $forced_breakpoint_to_go[$iend_1] = 0; + } + } + + # for keywords.. + elsif ( $type_iend_1 eq 'k' ) { + + # make major control keywords stand out + # (recombine.t) + next + if ( + + #/^(last|next|redo|return)$/ + $is_last_next_redo_return{ $tokens_to_go[$iend_1] } + + # but only if followed by multiple lines + && $n < $nmax + ); + + if ( $is_and_or{ $tokens_to_go[$iend_1] } ) { + next + unless $want_break_before{ $tokens_to_go[$iend_1] }; + } + } + + #---------------------------------------------------------- + # Recombine Section 3: + # Examine token at $ibeg_2 (left end of second line of pair) + #---------------------------------------------------------- + + # join lines identified above as capable of + # causing an outdented line with leading closing paren + # Note that we are skipping the rest of this section + # and the rest of the loop to do the join + if ($skip_Section_3) { + $forced_breakpoint_to_go[$iend_1] = 0; + $n_best = $n; + last; + } + + # handle lines with leading &&, || + elsif ( $is_amp_amp{$type_ibeg_2} ) { + + $leading_amp_count++; + + # ok to recombine if it follows a ? or : + # and is followed by an open paren.. + my $ok = + ( $is_ternary{$type_ibeg_1} + && $tokens_to_go[$iend_2] eq '(' ) + + # or is followed by a ? or : at same depth + # + # We are looking for something like this. We can + # recombine the && line with the line above to make the + # structure more clear: + # return + # exists $G->{Attr}->{V} + # && exists $G->{Attr}->{V}->{$u} + # ? %{ $G->{Attr}->{V}->{$u} } + # : (); + # + # We should probably leave something like this alone: + # return + # exists $G->{Attr}->{E} + # && exists $G->{Attr}->{E}->{$u} + # && exists $G->{Attr}->{E}->{$u}->{$v} + # ? %{ $G->{Attr}->{E}->{$u}->{$v} } + # : (); + # so that we either have all of the &&'s (or ||'s) + # on one line, as in the first example, or break at + # each one as in the second example. However, it + # sometimes makes things worse to check for this because + # it prevents multiple recombinations. So this is not done. + || ( $ibeg_3 >= 0 + && $is_ternary{ $types_to_go[$ibeg_3] } + && $nesting_depth_to_go[$ibeg_3] == + $nesting_depth_to_go[$ibeg_2] ); + + next if !$ok && $want_break_before{$type_ibeg_2}; + $forced_breakpoint_to_go[$iend_1] = 0; + + # tweak the bond strength to give this joint priority + # over ? and : + $bs_tweak = 0.25; + } + + # Identify and recombine a broken ?/: chain + elsif ( $type_ibeg_2 eq '?' ) { + + # Do not recombine different levels + my $lev = $levels_to_go[$ibeg_2]; + next if ( $lev ne $levels_to_go[$ibeg_1] ); + + # Do not recombine a '?' if either next line or + # previous line does not start with a ':'. The reasons + # are that (1) no alignment of the ? will be possible + # and (2) the expression is somewhat complex, so the + # '?' is harder to see in the interior of the line. + my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':'; + my $precedes_colon = + $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':'; + next unless ( $follows_colon || $precedes_colon ); + + # we will always combining a ? line following a : line + if ( !$follows_colon ) { + + # ...otherwise recombine only if it looks like a chain. + # we will just look at a few nearby lines to see if + # this looks like a chain. + my $local_count = 0; + foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) { + $local_count++ + if $ii >= 0 + && $types_to_go[$ii] eq ':' + && $levels_to_go[$ii] == $lev; + } + next unless ( $local_count > 1 ); + } + $forced_breakpoint_to_go[$iend_1] = 0; + } + + # do not recombine lines with leading '.' + elsif ( $type_ibeg_2 eq '.' ) { + my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 ); + next + unless ( + + # ... unless there is just one and we can reduce + # this to two lines if we do. For example, this + # + # + # $bodyA .= + # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;' + # + # looks better than this: + # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;' + # . '$args .= $pat;' + + ( + $n == 2 + && $n == $nmax + && $type_ibeg_1 ne $type_ibeg_2 + ) + + # ... or this would strand a short quote , like this + # . "some long quote" + # . "\n"; + + || ( $types_to_go[$i_next_nonblank] eq 'Q' + && $i_next_nonblank >= $iend_2 - 1 + && $token_lengths_to_go[$i_next_nonblank] < + $rOpts_short_concatenation_item_length ) + ); + } + + # handle leading keyword.. + elsif ( $type_ibeg_2 eq 'k' ) { + + # handle leading "or" + if ( $tokens_to_go[$ibeg_2] eq 'or' ) { + next + unless ( + $this_line_is_semicolon_terminated + && ( + + # following 'if' or 'unless' or 'or' + $type_ibeg_1 eq 'k' + && $is_if_unless{ $tokens_to_go[$ibeg_1] } + + # important: only combine a very simple or + # statement because the step below may have + # combined a trailing 'and' with this or, + # and we do not want to then combine + # everything together + && ( $iend_2 - $ibeg_2 <= 7 ) + ) + ); + + #X: RT #81854 + $forced_breakpoint_to_go[$iend_1] = 0 + unless $old_breakpoint_to_go[$iend_1]; + } + + # handle leading 'and' + elsif ( $tokens_to_go[$ibeg_2] eq 'and' ) { + + # Decide if we will combine a single terminal 'and' + # after an 'if' or 'unless'. + + # This looks best with the 'and' on the same + # line as the 'if': + # + # $a = 1 + # if $seconds and $nu < 2; + # + # But this looks better as shown: + # + # $a = 1 + # if !$this->{Parents}{$_} + # or $this->{Parents}{$_} eq $_; + # + next + unless ( + $this_line_is_semicolon_terminated + && ( + + # following 'if' or 'unless' or 'or' + $type_ibeg_1 eq 'k' + && ( $is_if_unless{ $tokens_to_go[$ibeg_1] } + || $tokens_to_go[$ibeg_1] eq 'or' ) + ) + ); + } + + # handle leading "if" and "unless" + elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) { + + # FIXME: This is still experimental..may not be too useful + next + unless ( + $this_line_is_semicolon_terminated + + # previous line begins with 'and' or 'or' + && $type_ibeg_1 eq 'k' + && $is_and_or{ $tokens_to_go[$ibeg_1] } + + ); + } + + # handle all other leading keywords + else { + + # keywords look best at start of lines, + # but combine things like "1 while" + unless ( $is_assignment{$type_iend_1} ) { + next + if ( ( $type_iend_1 ne 'k' ) + && ( $tokens_to_go[$ibeg_2] ne 'while' ) ); + } + } + } + + # similar treatment of && and || as above for 'and' and 'or': + # NOTE: This block of code is currently bypassed because + # of a previous block but is retained for possible future use. + elsif ( $is_amp_amp{$type_ibeg_2} ) { + + # maybe looking at something like: + # unless $TEXTONLY || $item =~ m%|p>|a|img)%i; + + next + unless ( + $this_line_is_semicolon_terminated + + # previous line begins with an 'if' or 'unless' keyword + && $type_ibeg_1 eq 'k' + && $is_if_unless{ $tokens_to_go[$ibeg_1] } + + ); + } + + # handle line with leading = or similar + elsif ( $is_assignment{$type_ibeg_2} ) { + next unless ( $n == 1 || $n == $nmax ); + next if $old_breakpoint_to_go[$iend_1]; + next + unless ( + + # unless we can reduce this to two lines + $nmax == 2 + + # or three lines, the last with a leading semicolon + || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' ) + + # or the next line ends with a here doc + || $type_iend_2 eq 'h' + + # or this is a short line ending in ; + || ( $n == $nmax && $this_line_is_semicolon_terminated ) + ); + $forced_breakpoint_to_go[$iend_1] = 0; + } + + #---------------------------------------------------------- + # Recombine Section 4: + # Combine the lines if we arrive here and it is possible + #---------------------------------------------------------- + + # honor hard breakpoints + next if ( $forced_breakpoint_to_go[$iend_1] > 0 ); + + my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak; + + # Require a few extra spaces before recombining lines if we are + # at an old breakpoint unless this is a simple list or terminal + # line. The goal is to avoid oscillating between two + # quasi-stable end states. For example this snippet caused + # problems: +## my $this = +## bless { +## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]" +## }, +## $type; + next + if ( $old_breakpoint_to_go[$iend_1] + && !$this_line_is_semicolon_terminated + && $n < $nmax + && $excess + 4 > 0 + && $type_iend_2 ne ',' ); + + # do not recombine if we would skip in indentation levels + if ( $n < $nmax ) { + my $if_next = $ri_beg->[ $n + 1 ]; + next + if ( + $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2] + && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next] + + # but an isolated 'if (' is undesirable + && !( + $n == 1 + && $iend_1 - $ibeg_1 <= 2 + && $type_ibeg_1 eq 'k' + && $tokens_to_go[$ibeg_1] eq 'if' + && $tokens_to_go[$iend_1] ne '(' + ) + ); + } + + # honor no-break's + next if ( $bs >= NO_BREAK - 1 ); + + # remember the pair with the greatest bond strength + if ( !$n_best ) { + $n_best = $n; + $bs_best = $bs; + } + else { + + if ( $bs > $bs_best ) { + $n_best = $n; + $bs_best = $bs; + } + } + } + + # recombine the pair with the greatest bond strength + if ($n_best) { + splice @{$ri_beg}, $n_best, 1; + splice @{$ri_end}, $n_best - 1, 1; + splice @joint, $n_best, 1; + + # keep going if we are still making progress + $more_to_do++; + } + } + return ( $ri_beg, $ri_end ); + } +} # end recombine_breakpoints + +sub break_all_chain_tokens { + + # scan the current breakpoints looking for breaks at certain "chain + # operators" (. : && || + etc) which often occur repeatedly in a long + # statement. If we see a break at any one, break at all similar tokens + # within the same container. + # + my ( $ri_left, $ri_right ) = @_; + + my %saw_chain_type; + my %left_chain_type; + my %right_chain_type; + my %interior_chain_type; + my $nmax = @{$ri_right} - 1; + + # scan the left and right end tokens of all lines + my $count = 0; + for my $n ( 0 .. $nmax ) { + my $il = $ri_left->[$n]; + my $ir = $ri_right->[$n]; + my $typel = $types_to_go[$il]; + my $typer = $types_to_go[$ir]; + $typel = '+' if ( $typel eq '-' ); # treat + and - the same + $typer = '+' if ( $typer eq '-' ); + $typel = '*' if ( $typel eq '/' ); # treat * and / the same + $typer = '*' if ( $typer eq '/' ); + my $tokenl = $tokens_to_go[$il]; + my $tokenr = $tokens_to_go[$ir]; + + if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) { + next if ( $typel eq '?' ); + push @{ $left_chain_type{$typel} }, $il; + $saw_chain_type{$typel} = 1; + $count++; + } + if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) { + next if ( $typer eq '?' ); + push @{ $right_chain_type{$typer} }, $ir; + $saw_chain_type{$typer} = 1; + $count++; + } + } + return unless $count; + + # now look for any interior tokens of the same types + $count = 0; + for my $n ( 0 .. $nmax ) { + my $il = $ri_left->[$n]; + my $ir = $ri_right->[$n]; + foreach my $i ( $il + 1 .. $ir - 1 ) { + my $type = $types_to_go[$i]; + $type = '+' if ( $type eq '-' ); + $type = '*' if ( $type eq '/' ); + if ( $saw_chain_type{$type} ) { + push @{ $interior_chain_type{$type} }, $i; + $count++; + } + } + } + return unless $count; + + # now make a list of all new break points + my @insert_list; + + # loop over all chain types + foreach my $type ( keys %saw_chain_type ) { + + # quit if just ONE continuation line with leading . For example-- + # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{' + # . $contents; + last if ( $nmax == 1 && $type =~ /^[\.\+]$/ ); + + # loop over all interior chain tokens + foreach my $itest ( @{ $interior_chain_type{$type} } ) { + + # loop over all left end tokens of same type + if ( $left_chain_type{$type} ) { + next if $nobreak_to_go[ $itest - 1 ]; + foreach my $i ( @{ $left_chain_type{$type} } ) { + next unless in_same_container( $i, $itest ); + push @insert_list, $itest - 1; + + # Break at matching ? if this : is at a different level. + # For example, the ? before $THRf_DEAD in the following + # should get a break if its : gets a break. + # + # my $flags = + # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE + # : ( $_ & 4 ) ? $THRf_R_DETACHED + # : $THRf_R_JOINABLE; + if ( $type eq ':' + && $levels_to_go[$i] != $levels_to_go[$itest] ) + { + my $i_question = $mate_index_to_go[$itest]; + if ( $i_question > 0 ) { + push @insert_list, $i_question - 1; + } + } + last; + } + } + + # loop over all right end tokens of same type + if ( $right_chain_type{$type} ) { + next if $nobreak_to_go[$itest]; + foreach my $i ( @{ $right_chain_type{$type} } ) { + next unless in_same_container( $i, $itest ); + push @insert_list, $itest; + + # break at matching ? if this : is at a different level + if ( $type eq ':' + && $levels_to_go[$i] != $levels_to_go[$itest] ) + { + my $i_question = $mate_index_to_go[$itest]; + if ( $i_question >= 0 ) { + push @insert_list, $i_question; + } + } + last; + } + } + } + } + + # insert any new break points + if (@insert_list) { + insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); + } + return; +} + +sub break_equals { + + # Look for assignment operators that could use a breakpoint. + # For example, in the following snippet + # + # $HOME = $ENV{HOME} + # || $ENV{LOGDIR} + # || $pw[7] + # || die "no home directory for user $<"; + # + # we could break at the = to get this, which is a little nicer: + # $HOME = + # $ENV{HOME} + # || $ENV{LOGDIR} + # || $pw[7] + # || die "no home directory for user $<"; + # + # The logic here follows the logic in set_logical_padding, which + # will add the padding in the second line to improve alignment. + # + my ( $ri_left, $ri_right ) = @_; + my $nmax = @{$ri_right} - 1; + return unless ( $nmax >= 2 ); + + # scan the left ends of first two lines + my $tokbeg = ""; + my $depth_beg; + for my $n ( 1 .. 2 ) { + my $il = $ri_left->[$n]; + my $typel = $types_to_go[$il]; + my $tokenl = $tokens_to_go[$il]; + + my $has_leading_op = ( $tokenl =~ /^\w/ ) + ? $is_chain_operator{$tokenl} # + - * / : ? && || + : $is_chain_operator{$typel}; # and, or + return unless ($has_leading_op); + if ( $n > 1 ) { + return + unless ( $tokenl eq $tokbeg + && $nesting_depth_to_go[$il] eq $depth_beg ); + } + $tokbeg = $tokenl; + $depth_beg = $nesting_depth_to_go[$il]; + } + + # now look for any interior tokens of the same types + my $il = $ri_left->[0]; + my $ir = $ri_right->[0]; + + # now make a list of all new break points + my @insert_list; + for ( my $i = $ir - 1 ; $i > $il ; $i-- ) { + my $type = $types_to_go[$i]; + if ( $is_assignment{$type} + && $nesting_depth_to_go[$i] eq $depth_beg ) + { + if ( $want_break_before{$type} ) { + push @insert_list, $i - 1; + } + else { + push @insert_list, $i; + } + } + } + + # Break after a 'return' followed by a chain of operators + # return ( $^O !~ /win32|dos/i ) + # && ( $^O ne 'VMS' ) + # && ( $^O ne 'OS2' ) + # && ( $^O ne 'MacOS' ); + # To give: + # return + # ( $^O !~ /win32|dos/i ) + # && ( $^O ne 'VMS' ) + # && ( $^O ne 'OS2' ) + # && ( $^O ne 'MacOS' ); + my $i = 0; + if ( $types_to_go[$i] eq 'k' + && $tokens_to_go[$i] eq 'return' + && $ir > $il + && $nesting_depth_to_go[$i] eq $depth_beg ) + { + push @insert_list, $i; + } + + return unless (@insert_list); + + # One final check... + # scan second and third lines and be sure there are no assignments + # we want to avoid breaking at an = to make something like this: + # unless ( $icon = + # $html_icons{"$type-$state"} + # or $icon = $html_icons{$type} + # or $icon = $html_icons{$state} ) + for my $n ( 1 .. 2 ) { + my $il = $ri_left->[$n]; + my $ir = $ri_right->[$n]; + foreach my $i ( $il + 1 .. $ir ) { + my $type = $types_to_go[$i]; + return + if ( $is_assignment{$type} + && $nesting_depth_to_go[$i] eq $depth_beg ); + } + } + + # ok, insert any new break point + if (@insert_list) { + insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); + } + return; +} + +sub insert_final_breaks { + + my ( $ri_left, $ri_right ) = @_; + + my $nmax = @{$ri_right} - 1; + + # scan the left and right end tokens of all lines + my $count = 0; + my $i_first_colon = -1; + for my $n ( 0 .. $nmax ) { + my $il = $ri_left->[$n]; + my $ir = $ri_right->[$n]; + my $typel = $types_to_go[$il]; + my $typer = $types_to_go[$ir]; + return if ( $typel eq '?' ); + return if ( $typer eq '?' ); + if ( $typel eq ':' ) { $i_first_colon = $il; last; } + elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; } + } + + # For long ternary chains, + # if the first : we see has its # ? is in the interior + # of a preceding line, then see if there are any good + # breakpoints before the ?. + if ( $i_first_colon > 0 ) { + my $i_question = $mate_index_to_go[$i_first_colon]; + if ( $i_question > 0 ) { + my @insert_list; + for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) { + my $token = $tokens_to_go[$ii]; + my $type = $types_to_go[$ii]; + + # For now, a good break is either a comma or a 'return'. + if ( ( $type eq ',' || $type eq 'k' && $token eq 'return' ) + && in_same_container( $ii, $i_question ) ) + { + push @insert_list, $ii; + last; + } + } + + # insert any new break points + if (@insert_list) { + insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); + } + } + } + return; +} + +sub in_same_container { + + # check to see if tokens at i1 and i2 are in the + # same container, and not separated by a comma, ? or : + # FIXME: this can be written more efficiently now + my ( $i1, $i2 ) = @_; + my $type = $types_to_go[$i1]; + my $depth = $nesting_depth_to_go[$i1]; + return unless ( $nesting_depth_to_go[$i2] == $depth ); + if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) } + + ########################################################### + # This is potentially a very slow routine and not critical. + # For safety just give up for large differences. + # See test file 'infinite_loop.txt' + # TODO: replace this loop with a data structure + ########################################################### + return if ( $i2 - $i1 > 200 ); + + foreach my $i ( $i1 + 1 .. $i2 - 1 ) { + next if ( $nesting_depth_to_go[$i] > $depth ); + return if ( $nesting_depth_to_go[$i] < $depth ); + + my $tok = $tokens_to_go[$i]; + $tok = ',' if $tok eq '=>'; # treat => same as , + + # Example: we would not want to break at any of these .'s + # : "$str" + if ( $type ne ':' ) { + return if ( $tok =~ /^[\,\:\?]$/ ) || $tok eq '||' || $tok eq 'or'; + } + else { + return if ( $tok =~ /^[\,]$/ ); + } + } + return 1; +} + +sub set_continuation_breaks { + + # Define an array of indexes for inserting newline characters to + # keep the line lengths below the maximum desired length. There is + # an implied break after the last token, so it need not be included. + + # Method: + # This routine is part of series of routines which adjust line + # lengths. It is only called if a statement is longer than the + # maximum line length, or if a preliminary scanning located + # desirable break points. Sub scan_list has already looked at + # these tokens and set breakpoints (in array + # $forced_breakpoint_to_go[$i]) where it wants breaks (for example + # after commas, after opening parens, and before closing parens). + # This routine will honor these breakpoints and also add additional + # breakpoints as necessary to keep the line length below the maximum + # requested. It bases its decision on where the 'bond strength' is + # lowest. + + # Output: returns references to the arrays: + # @i_first + # @i_last + # which contain the indexes $i of the first and last tokens on each + # line. + + # In addition, the array: + # $forced_breakpoint_to_go[$i] + # may be updated to be =1 for any index $i after which there must be + # a break. This signals later routines not to undo the breakpoint. + + my $saw_good_break = shift; + my @i_first = (); # the first index to output + my @i_last = (); # the last index to output + my @i_colon_breaks = (); # needed to decide if we have to break at ?'s + if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 } + + set_bond_strengths(); + + my $imin = 0; + my $imax = $max_index_to_go; + if ( $types_to_go[$imin] eq 'b' ) { $imin++ } + if ( $types_to_go[$imax] eq 'b' ) { $imax-- } + my $i_begin = $imin; # index for starting next iteration + + my $leading_spaces = leading_spaces_to_go($imin); + my $line_count = 0; + my $last_break_strength = NO_BREAK; + my $i_last_break = -1; + my $max_bias = 0.001; + my $tiny_bias = 0.0001; + my $leading_alignment_token = ""; + my $leading_alignment_type = ""; + + # see if any ?/:'s are in order + my $colons_in_order = 1; + my $last_tok = ""; + my @colon_list = grep { /^[\?\:]$/ } @types_to_go[ 0 .. $max_index_to_go ]; + my $colon_count = @colon_list; + foreach (@colon_list) { + if ( $_ eq $last_tok ) { $colons_in_order = 0; last } + $last_tok = $_; + } + + # This is a sufficient but not necessary condition for colon chain + my $is_colon_chain = ( $colons_in_order && @colon_list > 2 ); + + #------------------------------------------------------- + # BEGINNING of main loop to set continuation breakpoints + # Keep iterating until we reach the end + #------------------------------------------------------- + while ( $i_begin <= $imax ) { + my $lowest_strength = NO_BREAK; + my $starting_sum = $summed_lengths_to_go[$i_begin]; + my $i_lowest = -1; + my $i_test = -1; + my $lowest_next_token = ''; + my $lowest_next_type = 'b'; + my $i_lowest_next_nonblank = -1; + + #------------------------------------------------------- + # BEGINNING of inner loop to find the best next breakpoint + #------------------------------------------------------- + for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) { + my $type = $types_to_go[$i_test]; + my $token = $tokens_to_go[$i_test]; + my $next_type = $types_to_go[ $i_test + 1 ]; + my $next_token = $tokens_to_go[ $i_test + 1 ]; + my $i_next_nonblank = $inext_to_go[$i_test]; + my $next_nonblank_type = $types_to_go[$i_next_nonblank]; + my $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; + my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank]; + my $strength = $bond_strength_to_go[$i_test]; + my $maximum_line_length = maximum_line_length($i_begin); + + # use old breaks as a tie-breaker. For example to + # prevent blinkers with -pbp in this code: + +##@keywords{ +## qw/ARG OUTPUT PROTO CONSTRUCTOR RETURNS DESC PARAMS SEEALSO EXAMPLE/} +## = (); + + # At the same time try to prevent a leading * in this code + # with the default formatting: + # +## return +## factorial( $a + $b - 1 ) / factorial( $a - 1 ) / factorial( $b - 1 ) +## * ( $x**( $a - 1 ) ) +## * ( ( 1 - $x )**( $b - 1 ) ); + + # reduce strength a bit to break ties at an old breakpoint ... + if ( + $old_breakpoint_to_go[$i_test] + + # which is a 'good' breakpoint, meaning ... + # we don't want to break before it + && !$want_break_before{$type} + + # and either we want to break before the next token + # or the next token is not short (i.e. not a '*', '/' etc.) + && $i_next_nonblank <= $imax + && ( $want_break_before{$next_nonblank_type} + || $token_lengths_to_go[$i_next_nonblank] > 2 + || $next_nonblank_type =~ /^[\,\(\[\{L]$/ ) + ) + { + $strength -= $tiny_bias; + } + + # otherwise increase strength a bit if this token would be at the + # maximum line length. This is necessary to avoid blinking + # in the above example when the -iob flag is added. + else { + my $len = + $leading_spaces + + $summed_lengths_to_go[ $i_test + 1 ] - + $starting_sum; + if ( $len >= $maximum_line_length ) { + $strength += $tiny_bias; + } + } + + my $must_break = 0; + + # Force an immediate break at certain operators + # with lower level than the start of the line, + # unless we've already seen a better break. + # + ############################################## + # Note on an issue with a preceding ? + ############################################## + # We don't include a ? in the above list, but there may + # be a break at a previous ? if the line is long. + # Because of this we do not want to force a break if + # there is a previous ? on this line. For now the best way + # to do this is to not break if we have seen a lower strength + # point, which is probably a ?. + # + # Example of unwanted breaks we are avoiding at a '.' following a ? + # from pod2html using perltidy -gnu: + # ) + # ? "\n<A NAME=\"" + # . $value + # . "\">\n$text</A>\n" + # : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n"; + if ( + ( + $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/ + || ( $next_nonblank_type eq 'k' + && $next_nonblank_token =~ /^(and|or)$/ ) + ) + && ( $nesting_depth_to_go[$i_begin] > + $nesting_depth_to_go[$i_next_nonblank] ) + && ( $strength <= $lowest_strength ) + ) + { + set_forced_breakpoint($i_next_nonblank); + } + + if ( + + # Try to put a break where requested by scan_list + $forced_breakpoint_to_go[$i_test] + + # break between ) { in a continued line so that the '{' can + # be outdented + # See similar logic in scan_list which catches instances + # where a line is just something like ') {'. We have to + # be careful because the corresponding block keyword might + # not be on the first line, such as 'for' here: + # + # eval { + # for ("a") { + # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ } + # } + # }; + # + || ( + $line_count + && ( $token eq ')' ) + && ( $next_nonblank_type eq '{' ) + && ($next_nonblank_block_type) + && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] ) + + # RT #104427: Dont break before opening sub brace because + # sub block breaks handled at higher level, unless + # it looks like the preceeding list is long and broken + && !( + $next_nonblank_block_type =~ /^sub\b/ + && ( $nesting_depth_to_go[$i_begin] == + $nesting_depth_to_go[$i_next_nonblank] ) + ) + + && !$rOpts->{'opening-brace-always-on-right'} + ) + + # There is an implied forced break at a terminal opening brace + || ( ( $type eq '{' ) && ( $i_test == $imax ) ) + ) + { + + # Forced breakpoints must sometimes be overridden, for example + # because of a side comment causing a NO_BREAK. It is easier + # to catch this here than when they are set. + if ( $strength < NO_BREAK - 1 ) { + $strength = $lowest_strength - $tiny_bias; + $must_break = 1; + } + } + + # quit if a break here would put a good terminal token on + # the next line and we already have a possible break + if ( + !$must_break + && ( $next_nonblank_type =~ /^[\;\,]$/ ) + && ( + ( + $leading_spaces + + $summed_lengths_to_go[ $i_next_nonblank + 1 ] - + $starting_sum + ) > $maximum_line_length + ) + ) + { + last if ( $i_lowest >= 0 ); + } + + # Avoid a break which would strand a single punctuation + # token. For example, we do not want to strand a leading + # '.' which is followed by a long quoted string. + # But note that we do want to do this with -extrude (l=1) + # so please test any changes to this code on -extrude. + if ( + !$must_break + && ( $i_test == $i_begin ) + && ( $i_test < $imax ) + && ( $token eq $type ) + && ( + ( + $leading_spaces + + $summed_lengths_to_go[ $i_test + 1 ] - + $starting_sum + ) < $maximum_line_length + ) + ) + { + $i_test = min( $imax, $inext_to_go[$i_test] ); + redo; + } + + if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) ) + { + + # break at previous best break if it would have produced + # a leading alignment of certain common tokens, and it + # is different from the latest candidate break + last + if ($leading_alignment_type); + + # Force at least one breakpoint if old code had good + # break It is only called if a breakpoint is required or + # desired. This will probably need some adjustments + # over time. A goal is to try to be sure that, if a new + # side comment is introduced into formatted text, then + # the same breakpoints will occur. scbreak.t + last + if ( + $i_test == $imax # we are at the end + && !$forced_breakpoint_count # + && $saw_good_break # old line had good break + && $type =~ /^[#;\{]$/ # and this line ends in + # ';' or side comment + && $i_last_break < 0 # and we haven't made a break + && $i_lowest >= 0 # and we saw a possible break + && $i_lowest < $imax - 1 # (but not just before this ;) + && $strength - $lowest_strength < 0.5 * WEAK # and it's good + ); + + # Do not skip past an important break point in a short final + # segment. For example, without this check we would miss the + # break at the final / in the following code: + # + # $depth_stop = + # ( $tau * $mass_pellet * $q_0 * + # ( 1. - exp( -$t_stop / $tau ) ) - + # 4. * $pi * $factor * $k_ice * + # ( $t_melt - $t_ice ) * + # $r_pellet * + # $t_stop ) / + # ( $rho_ice * $Qs * $pi * $r_pellet**2 ); + # + if ( $line_count > 2 + && $i_lowest < $i_test + && $i_test > $imax - 2 + && $nesting_depth_to_go[$i_begin] > + $nesting_depth_to_go[$i_lowest] + && $lowest_strength < $last_break_strength - .5 * WEAK ) + { + # Make this break for math operators for now + my $ir = $inext_to_go[$i_lowest]; + my $il = $iprev_to_go[$ir]; + last + if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/ + || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ ); + } + + # Update the minimum bond strength location + $lowest_strength = $strength; + $i_lowest = $i_test; + $lowest_next_token = $next_nonblank_token; + $lowest_next_type = $next_nonblank_type; + $i_lowest_next_nonblank = $i_next_nonblank; + last if $must_break; + + # set flags to remember if a break here will produce a + # leading alignment of certain common tokens + if ( $line_count > 0 + && $i_test < $imax + && ( $lowest_strength - $last_break_strength <= $max_bias ) + ) + { + my $i_last_end = $iprev_to_go[$i_begin]; + my $tok_beg = $tokens_to_go[$i_begin]; + my $type_beg = $types_to_go[$i_begin]; + if ( + + # check for leading alignment of certain tokens + ( + $tok_beg eq $next_nonblank_token + && $is_chain_operator{$tok_beg} + && ( $type_beg eq 'k' + || $type_beg eq $tok_beg ) + && $nesting_depth_to_go[$i_begin] >= + $nesting_depth_to_go[$i_next_nonblank] + ) + + || ( $tokens_to_go[$i_last_end] eq $token + && $is_chain_operator{$token} + && ( $type eq 'k' || $type eq $token ) + && $nesting_depth_to_go[$i_last_end] >= + $nesting_depth_to_go[$i_test] ) + ) + { + $leading_alignment_token = $next_nonblank_token; + $leading_alignment_type = $next_nonblank_type; + } + } + } + + my $too_long = ( $i_test >= $imax ); + if ( !$too_long ) { + my $next_length = + $leading_spaces + + $summed_lengths_to_go[ $i_test + 2 ] - + $starting_sum; + $too_long = $next_length > $maximum_line_length; + + # To prevent blinkers we will avoid leaving a token exactly at + # the line length limit unless it is the last token or one of + # several "good" types. + # + # The following code was a blinker with -pbp before this + # modification: +## $last_nonblank_token eq '(' +## && $is_indirect_object_taker{ $paren_type +## [$paren_depth] } + # The issue causing the problem is that if the + # term [$paren_depth] gets broken across a line then + # the whitespace routine doesn't see both opening and closing + # brackets and will format like '[ $paren_depth ]'. This + # leads to an oscillation in length depending if we break + # before the closing bracket or not. + if ( !$too_long + && $i_test + 1 < $imax + && $next_nonblank_type !~ /^[,\}\]\)R]$/ ) + { + $too_long = $next_length >= $maximum_line_length; + } + } + + FORMATTER_DEBUG_FLAG_BREAK + && do { + my $ltok = $token; + my $rtok = $next_nonblank_token ? $next_nonblank_token : ""; + my $i_testp2 = $i_test + 2; + if ( $i_testp2 > $max_index_to_go + 1 ) { + $i_testp2 = $max_index_to_go + 1; + } + if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) } + if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) } + print STDOUT +"BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength $ltok $rtok\n"; + }; + + # allow one extra terminal token after exceeding line length + # if it would strand this token. + if ( $rOpts_fuzzy_line_length + && $too_long + && $i_lowest == $i_test + && $token_lengths_to_go[$i_test] > 1 + && $next_nonblank_type =~ /^[\;\,]$/ ) + { + $too_long = 0; + } + + last + if ( + ( $i_test == $imax ) # we're done if no more tokens, + || ( + ( $i_lowest >= 0 ) # or no more space and we have a break + && $too_long + ) + ); + } + + #------------------------------------------------------- + # END of inner loop to find the best next breakpoint + # Now decide exactly where to put the breakpoint + #------------------------------------------------------- + + # it's always ok to break at imax if no other break was found + if ( $i_lowest < 0 ) { $i_lowest = $imax } + + # semi-final index calculation + my $i_next_nonblank = $inext_to_go[$i_lowest]; + my $next_nonblank_type = $types_to_go[$i_next_nonblank]; + my $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; + + #------------------------------------------------------- + # ?/: rule 1 : if a break here will separate a '?' on this + # line from its closing ':', then break at the '?' instead. + #------------------------------------------------------- + foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) { + next unless ( $tokens_to_go[$i] eq '?' ); + + # do not break if probable sequence of ?/: statements + next if ($is_colon_chain); + + # do not break if statement is broken by side comment + next + if ( + $tokens_to_go[$max_index_to_go] eq '#' + && terminal_type( \@types_to_go, \@block_type_to_go, 0, + $max_index_to_go ) !~ /^[\;\}]$/ + ); + + # no break needed if matching : is also on the line + next + if ( $mate_index_to_go[$i] >= 0 + && $mate_index_to_go[$i] <= $i_next_nonblank ); + + $i_lowest = $i; + if ( $want_break_before{'?'} ) { $i_lowest-- } + last; + } + + #------------------------------------------------------- + # END of inner loop to find the best next breakpoint: + # Break the line after the token with index i=$i_lowest + #------------------------------------------------------- + + # final index calculation + $i_next_nonblank = $inext_to_go[$i_lowest]; + $next_nonblank_type = $types_to_go[$i_next_nonblank]; + $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; + + FORMATTER_DEBUG_FLAG_BREAK + && print STDOUT + "BREAK: best is i = $i_lowest strength = $lowest_strength\n"; + + #------------------------------------------------------- + # ?/: rule 2 : if we break at a '?', then break at its ':' + # + # Note: this rule is also in sub scan_list to handle a break + # at the start and end of a line (in case breaks are dictated + # by side comments). + #------------------------------------------------------- + if ( $next_nonblank_type eq '?' ) { + set_closing_breakpoint($i_next_nonblank); + } + elsif ( $types_to_go[$i_lowest] eq '?' ) { + set_closing_breakpoint($i_lowest); + } + + #------------------------------------------------------- + # ?/: rule 3 : if we break at a ':' then we save + # its location for further work below. We may need to go + # back and break at its '?'. + #------------------------------------------------------- + if ( $next_nonblank_type eq ':' ) { + push @i_colon_breaks, $i_next_nonblank; + } + elsif ( $types_to_go[$i_lowest] eq ':' ) { + push @i_colon_breaks, $i_lowest; + } + + # here we should set breaks for all '?'/':' pairs which are + # separated by this line + + $line_count++; + + # save this line segment, after trimming blanks at the ends + push( @i_first, + ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin ); + push( @i_last, + ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest ); + + # set a forced breakpoint at a container opening, if necessary, to + # signal a break at a closing container. Excepting '(' for now. + if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/ + && !$forced_breakpoint_to_go[$i_lowest] ) + { + set_closing_breakpoint($i_lowest); + } + + # get ready to go again + $i_begin = $i_lowest + 1; + $last_break_strength = $lowest_strength; + $i_last_break = $i_lowest; + $leading_alignment_token = ""; + $leading_alignment_type = ""; + $lowest_next_token = ''; + $lowest_next_type = 'b'; + + if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) { + $i_begin++; + } + + # update indentation size + if ( $i_begin <= $imax ) { + $leading_spaces = leading_spaces_to_go($i_begin); + } + } + + #------------------------------------------------------- + # END of main loop to set continuation breakpoints + # Now go back and make any necessary corrections + #------------------------------------------------------- + + #------------------------------------------------------- + # ?/: rule 4 -- if we broke at a ':', then break at + # corresponding '?' unless this is a chain of ?: expressions + #------------------------------------------------------- + if (@i_colon_breaks) { + + # using a simple method for deciding if we are in a ?/: chain -- + # this is a chain if it has multiple ?/: pairs all in order; + # otherwise not. + # Note that if line starts in a ':' we count that above as a break + my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 ); + + unless ($is_chain) { + my @insert_list = (); + foreach (@i_colon_breaks) { + my $i_question = $mate_index_to_go[$_]; + if ( $i_question >= 0 ) { + if ( $want_break_before{'?'} ) { + $i_question = $iprev_to_go[$i_question]; + } + + if ( $i_question >= 0 ) { + push @insert_list, $i_question; + } + } + insert_additional_breaks( \@insert_list, \@i_first, \@i_last ); + } + } + } + return ( \@i_first, \@i_last, $colon_count ); +} + +sub insert_additional_breaks { + + # this routine will add line breaks at requested locations after + # sub set_continuation_breaks has made preliminary breaks. + + my ( $ri_break_list, $ri_first, $ri_last ) = @_; + my $i_f; + my $i_l; + my $line_number = 0; + foreach my $i_break_left ( sort { $a <=> $b } @{$ri_break_list} ) { + + $i_f = $ri_first->[$line_number]; + $i_l = $ri_last->[$line_number]; + while ( $i_break_left >= $i_l ) { + $line_number++; + + # shouldn't happen unless caller passes bad indexes + if ( $line_number >= @{$ri_last} ) { + warning( +"Non-fatal program bug: couldn't set break at $i_break_left\n" + ); + report_definite_bug(); + return; + } + $i_f = $ri_first->[$line_number]; + $i_l = $ri_last->[$line_number]; + } + + # Do not leave a blank at the end of a line; back up if necessary + if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- } + + my $i_break_right = $inext_to_go[$i_break_left]; + if ( $i_break_left >= $i_f + && $i_break_left < $i_l + && $i_break_right > $i_f + && $i_break_right <= $i_l ) + { + splice( @{$ri_first}, $line_number, 1, ( $i_f, $i_break_right ) ); + splice( @{$ri_last}, $line_number, 1, ( $i_break_left, $i_l ) ); + } + } + return; +} + +sub set_closing_breakpoint { + + # set a breakpoint at a matching closing token + # at present, this is only used to break at a ':' which matches a '?' + my $i_break = shift; + + if ( $mate_index_to_go[$i_break] >= 0 ) { + + # CAUTION: infinite recursion possible here: + # set_closing_breakpoint calls set_forced_breakpoint, and + # set_forced_breakpoint call set_closing_breakpoint + # ( test files attrib.t, BasicLyx.pm.html). + # Don't reduce the '2' in the statement below + if ( $mate_index_to_go[$i_break] > $i_break + 2 ) { + + # break before } ] and ), but sub set_forced_breakpoint will decide + # to break before or after a ? and : + my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1; + set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc ); + } + } + else { + my $type_sequence = $type_sequence_to_go[$i_break]; + if ($type_sequence) { + my $closing_token = $matching_token{ $tokens_to_go[$i_break] }; + $postponed_breakpoint{$type_sequence} = 1; + } + } + return; +} + +sub compare_indentation_levels { + + # check to see if output line tabbing agrees with input line + # this can be very useful for debugging a script which has an extra + # or missing brace + my ( $guessed_indentation_level, $structural_indentation_level ) = @_; + if ( $guessed_indentation_level ne $structural_indentation_level ) { + $last_tabbing_disagreement = $input_line_number; + + if ($in_tabbing_disagreement) { + } + else { + $tabbing_disagreement_count++; + + if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) { + write_logfile_entry( +"Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n" + ); + } + $in_tabbing_disagreement = $input_line_number; + $first_tabbing_disagreement = $in_tabbing_disagreement + unless ($first_tabbing_disagreement); + } + } + else { + + if ($in_tabbing_disagreement) { + + if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) { + write_logfile_entry( +"End indentation disagreement from input line $in_tabbing_disagreement\n" + ); + + if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) { + write_logfile_entry( + "No further tabbing disagreements will be noted\n"); + } + } + $in_tabbing_disagreement = 0; + } + } + return; +} +1; + diff --git a/lib/Perl/Tidy/HtmlWriter.pm b/lib/Perl/Tidy/HtmlWriter.pm new file mode 100644 index 00000000..a0449873 --- /dev/null +++ b/lib/Perl/Tidy/HtmlWriter.pm @@ -0,0 +1,1465 @@ +##################################################################### +# +# The Perl::Tidy::HtmlWriter class writes a copy of the input stream in html +# +##################################################################### + +package Perl::Tidy::HtmlWriter; +use strict; +use warnings; + +use File::Basename; + +# class variables +use vars qw{ + %html_color + %html_bold + %html_italic + %token_short_names + %short_to_long_names + $rOpts + $css_filename + $css_linkname + $missing_html_entities + $missing_pod_html +}; + +# replace unsafe characters with HTML entity representation if HTML::Entities +# is available +#{ eval "use HTML::Entities"; $missing_html_entities = $@; } + +BEGIN { + if ( !eval { require HTML::Entities; 1 } ) { + $missing_html_entities = $@ ? $@ : 1; + } + if ( !eval { require Pod::Html; 1 } ) { + $missing_pod_html = $@ ? $@ : 1; + } +} + +sub new { + + my ( $class, $input_file, $html_file, $extension, $html_toc_extension, + $html_src_extension ) + = @_; + + my $html_file_opened = 0; + my $html_fh; + ( $html_fh, my $html_filename ) = + Perl::Tidy::streamhandle( $html_file, 'w' ); + unless ($html_fh) { + Perl::Tidy::Warn("can't open $html_file: $!\n"); + return; + } + $html_file_opened = 1; + + if ( !$input_file || $input_file eq '-' || ref($input_file) ) { + $input_file = "NONAME"; + } + + # write the table of contents to a string + my $toc_string; + my $html_toc_fh = Perl::Tidy::IOScalar->new( \$toc_string, 'w' ); + + my $html_pre_fh; + my @pre_string_stack; + if ( $rOpts->{'html-pre-only'} ) { + + # pre section goes directly to the output stream + $html_pre_fh = $html_fh; + $html_pre_fh->print( <<"PRE_END"); +
+PRE_END
+    }
+    else {
+
+        # pre section go out to a temporary string
+        my $pre_string;
+        $html_pre_fh = Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
+        push @pre_string_stack, \$pre_string;
+    }
+
+    # pod text gets diverted if the 'pod2html' is used
+    my $html_pod_fh;
+    my $pod_string;
+    if ( $rOpts->{'pod2html'} ) {
+        if ( $rOpts->{'html-pre-only'} ) {
+            undef $rOpts->{'pod2html'};
+        }
+        else {
+            ##eval "use Pod::Html";
+            #if ($@) {
+            if ($missing_pod_html) {
+                Perl::Tidy::Warn(
+"unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n"
+                );
+                undef $rOpts->{'pod2html'};
+            }
+            else {
+                $html_pod_fh = Perl::Tidy::IOScalar->new( \$pod_string, 'w' );
+            }
+        }
+    }
+
+    my $toc_filename;
+    my $src_filename;
+    if ( $rOpts->{'frames'} ) {
+        unless ($extension) {
+            Perl::Tidy::Warn(
+"cannot use frames without a specified output extension; ignoring -frm\n"
+            );
+            undef $rOpts->{'frames'};
+        }
+        else {
+            $toc_filename = $input_file . $html_toc_extension . $extension;
+            $src_filename = $input_file . $html_src_extension . $extension;
+        }
+    }
+
+    # ----------------------------------------------------------
+    # Output is now directed as follows:
+    # html_toc_fh <-- table of contents items
+    # html_pre_fh <-- the 
 section of formatted code, except:
+    # html_pod_fh <-- pod goes here with the pod2html option
+    # ----------------------------------------------------------
+
+    my $title = $rOpts->{'title'};
+    unless ($title) {
+        ( $title, my $path ) = fileparse($input_file);
+    }
+    my $toc_item_count = 0;
+    my $in_toc_package = "";
+    my $last_level     = 0;
+    return bless {
+        _input_file        => $input_file,          # name of input file
+        _title             => $title,               # title, unescaped
+        _html_file         => $html_file,           # name of .html output file
+        _toc_filename      => $toc_filename,        # for frames option
+        _src_filename      => $src_filename,        # for frames option
+        _html_file_opened  => $html_file_opened,    # a flag
+        _html_fh           => $html_fh,             # the output stream
+        _html_pre_fh       => $html_pre_fh,         # pre section goes here
+        _rpre_string_stack => \@pre_string_stack,   # stack of pre sections
+        _html_pod_fh       => $html_pod_fh,         # pod goes here if pod2html
+        _rpod_string       => \$pod_string,         # string holding pod
+        _pod_cut_count     => 0,                    # how many =cut's?
+        _html_toc_fh       => $html_toc_fh,         # fh for table of contents
+        _rtoc_string       => \$toc_string,         # string holding toc
+        _rtoc_item_count   => \$toc_item_count,     # how many toc items
+        _rin_toc_package   => \$in_toc_package,     # package name
+        _rtoc_name_count   => {},                   # hash to track unique names
+        _rpackage_stack    => [],                   # stack to check for package
+                                                    # name changes
+        _rlast_level       => \$last_level,         # brace indentation level
+    }, $class;
+}
+
+sub close_object {
+    my ($object) = @_;
+
+    # returns true if close works, false if not
+    # failure probably means there is no close method
+    return eval { $object->close(); 1 };
+}
+
+sub add_toc_item {
+
+    # Add an item to the html table of contents.
+    # This is called even if no table of contents is written,
+    # because we still want to put the anchors in the 
 text.
+    # We are given an anchor name and its type; types are:
+    #      'package', 'sub', '__END__', '__DATA__', 'EOF'
+    # There must be an 'EOF' call at the end to wrap things up.
+    my ( $self, $name, $type ) = @_;
+    my $html_toc_fh     = $self->{_html_toc_fh};
+    my $html_pre_fh     = $self->{_html_pre_fh};
+    my $rtoc_name_count = $self->{_rtoc_name_count};
+    my $rtoc_item_count = $self->{_rtoc_item_count};
+    my $rlast_level     = $self->{_rlast_level};
+    my $rin_toc_package = $self->{_rin_toc_package};
+    my $rpackage_stack  = $self->{_rpackage_stack};
+
+    # packages contain sublists of subs, so to avoid errors all package
+    # items are written and finished with the following routines
+    my $end_package_list = sub {
+        if ( ${$rin_toc_package} ) {
+            $html_toc_fh->print("\n\n");
+            ${$rin_toc_package} = "";
+        }
+    };
+
+    my $start_package_list = sub {
+        my ( $unique_name, $package ) = @_;
+        if ( ${$rin_toc_package} ) { $end_package_list->() }
+        $html_toc_fh->print(<package $package
+
    +EOM + ${$rin_toc_package} = $package; + }; + + # start the table of contents on the first item + unless ( ${$rtoc_item_count} ) { + + # but just quit if we hit EOF without any other entries + # in this case, there will be no toc + return if ( $type eq 'EOF' ); + $html_toc_fh->print( <<"TOC_END"); + +
      +TOC_END + } + ${$rtoc_item_count}++; + + # make a unique anchor name for this location: + # - packages get a 'package-' prefix + # - subs use their names + my $unique_name = $name; + if ( $type eq 'package' ) { $unique_name = "package-$name" } + + # append '-1', '-2', etc if necessary to make unique; this will + # be unique because subs and packages cannot have a '-' + if ( my $count = $rtoc_name_count->{ lc $unique_name }++ ) { + $unique_name .= "-$count"; + } + + # - all names get terminal '-' if pod2html is used, to avoid + # conflicts with anchor names created by pod2html + if ( $rOpts->{'pod2html'} ) { $unique_name .= '-' } + + # start/stop lists of subs + if ( $type eq 'sub' ) { + my $package = $rpackage_stack->[ ${$rlast_level} ]; + unless ($package) { $package = 'main' } + + # if we're already in a package/sub list, be sure its the right + # package or else close it + if ( ${$rin_toc_package} && ${$rin_toc_package} ne $package ) { + $end_package_list->(); + } + + # start a package/sub list if necessary + unless ( ${$rin_toc_package} ) { + $start_package_list->( $unique_name, $package ); + } + } + + # now write an entry in the toc for this item + if ( $type eq 'package' ) { + $start_package_list->( $unique_name, $name ); + } + elsif ( $type eq 'sub' ) { + $html_toc_fh->print("
    • $name
    • \n"); + } + else { + $end_package_list->(); + $html_toc_fh->print("
    • $name
    • \n"); + } + + # write the anchor in the
       section
      +    $html_pre_fh->print("");
      +
      +    # end the table of contents, if any, on the end of file
      +    if ( $type eq 'EOF' ) {
      +        $html_toc_fh->print( <<"TOC_END");
      +
    + +TOC_END + } + return; +} + +BEGIN { + + # This is the official list of tokens which may be identified by the + # user. Long names are used as getopt keys. Short names are + # convenient short abbreviations for specifying input. Short names + # somewhat resemble token type characters, but are often different + # because they may only be alphanumeric, to allow command line + # input. Also, note that because of case insensitivity of html, + # this table must be in a single case only (I've chosen to use all + # lower case). + # When adding NEW_TOKENS: update this hash table + # short names => long names + %short_to_long_names = ( + 'n' => 'numeric', + 'p' => 'paren', + 'q' => 'quote', + 's' => 'structure', + 'c' => 'comment', + 'v' => 'v-string', + 'cm' => 'comma', + 'w' => 'bareword', + 'co' => 'colon', + 'pu' => 'punctuation', + 'i' => 'identifier', + 'j' => 'label', + 'h' => 'here-doc-target', + 'hh' => 'here-doc-text', + 'k' => 'keyword', + 'sc' => 'semicolon', + 'm' => 'subroutine', + 'pd' => 'pod-text', + ); + + # Now we have to map actual token types into one of the above short + # names; any token types not mapped will get 'punctuation' + # properties. + + # The values of this hash table correspond to the keys of the + # previous hash table. + # The keys of this hash table are token types and can be seen + # by running with --dump-token-types (-dtt). + + # When adding NEW_TOKENS: update this hash table + # $type => $short_name + %token_short_names = ( + '#' => 'c', + 'n' => 'n', + 'v' => 'v', + 'k' => 'k', + 'F' => 'k', + 'Q' => 'q', + 'q' => 'q', + 'J' => 'j', + 'j' => 'j', + 'h' => 'h', + 'H' => 'hh', + 'w' => 'w', + ',' => 'cm', + '=>' => 'cm', + ';' => 'sc', + ':' => 'co', + 'f' => 'sc', + '(' => 'p', + ')' => 'p', + 'M' => 'm', + 'P' => 'pd', + 'A' => 'co', + ); + + # These token types will all be called identifiers for now + # FIXME: could separate user defined modules as separate type + my @identifier = qw< i t U C Y Z G :: CORE::>; + @token_short_names{@identifier} = ('i') x scalar(@identifier); + + # These token types will be called 'structure' + my @structure = qw< { } >; + @token_short_names{@structure} = ('s') x scalar(@structure); + + # OLD NOTES: save for reference + # Any of these could be added later if it would be useful. + # For now, they will by default become punctuation + # my @list = qw< L R [ ] >; + # @token_long_names{@list} = ('non-structure') x scalar(@list); + # + # my @list = qw" + # / /= * *= ** **= + += - -= % %= = ++ -- << <<= >> >>= pp p m mm + # "; + # @token_long_names{@list} = ('math') x scalar(@list); + # + # my @list = qw" & &= ~ ~= ^ ^= | |= "; + # @token_long_names{@list} = ('bit') x scalar(@list); + # + # my @list = qw" == != < > <= <=> "; + # @token_long_names{@list} = ('numerical-comparison') x scalar(@list); + # + # my @list = qw" && || ! &&= ||= //= "; + # @token_long_names{@list} = ('logical') x scalar(@list); + # + # my @list = qw" . .= =~ !~ x x= "; + # @token_long_names{@list} = ('string-operators') x scalar(@list); + # + # # Incomplete.. + # my @list = qw" .. -> <> ... \ ? "; + # @token_long_names{@list} = ('misc-operators') x scalar(@list); + +} + +sub make_getopt_long_names { + my ( $class, $rgetopt_names ) = @_; + while ( my ( $short_name, $name ) = each %short_to_long_names ) { + push @{$rgetopt_names}, "html-color-$name=s"; + push @{$rgetopt_names}, "html-italic-$name!"; + push @{$rgetopt_names}, "html-bold-$name!"; + } + push @{$rgetopt_names}, "html-color-background=s"; + push @{$rgetopt_names}, "html-linked-style-sheet=s"; + push @{$rgetopt_names}, "nohtml-style-sheets"; + push @{$rgetopt_names}, "html-pre-only"; + push @{$rgetopt_names}, "html-line-numbers"; + push @{$rgetopt_names}, "html-entities!"; + push @{$rgetopt_names}, "stylesheet"; + push @{$rgetopt_names}, "html-table-of-contents!"; + push @{$rgetopt_names}, "pod2html!"; + push @{$rgetopt_names}, "frames!"; + push @{$rgetopt_names}, "html-toc-extension=s"; + push @{$rgetopt_names}, "html-src-extension=s"; + + # Pod::Html parameters: + push @{$rgetopt_names}, "backlink=s"; + push @{$rgetopt_names}, "cachedir=s"; + push @{$rgetopt_names}, "htmlroot=s"; + push @{$rgetopt_names}, "libpods=s"; + push @{$rgetopt_names}, "podpath=s"; + push @{$rgetopt_names}, "podroot=s"; + push @{$rgetopt_names}, "title=s"; + + # Pod::Html parameters with leading 'pod' which will be removed + # before the call to Pod::Html + push @{$rgetopt_names}, "podquiet!"; + push @{$rgetopt_names}, "podverbose!"; + push @{$rgetopt_names}, "podrecurse!"; + push @{$rgetopt_names}, "podflush"; + push @{$rgetopt_names}, "podheader!"; + push @{$rgetopt_names}, "podindex!"; + return; +} + +sub make_abbreviated_names { + + # We're appending things like this to the expansion list: + # 'hcc' => [qw(html-color-comment)], + # 'hck' => [qw(html-color-keyword)], + # etc + my ( $class, $rexpansion ) = @_; + + # abbreviations for color/bold/italic properties + while ( my ( $short_name, $long_name ) = each %short_to_long_names ) { + ${$rexpansion}{"hc$short_name"} = ["html-color-$long_name"]; + ${$rexpansion}{"hb$short_name"} = ["html-bold-$long_name"]; + ${$rexpansion}{"hi$short_name"} = ["html-italic-$long_name"]; + ${$rexpansion}{"nhb$short_name"} = ["nohtml-bold-$long_name"]; + ${$rexpansion}{"nhi$short_name"} = ["nohtml-italic-$long_name"]; + } + + # abbreviations for all other html options + ${$rexpansion}{"hcbg"} = ["html-color-background"]; + ${$rexpansion}{"pre"} = ["html-pre-only"]; + ${$rexpansion}{"toc"} = ["html-table-of-contents"]; + ${$rexpansion}{"ntoc"} = ["nohtml-table-of-contents"]; + ${$rexpansion}{"nnn"} = ["html-line-numbers"]; + ${$rexpansion}{"hent"} = ["html-entities"]; + ${$rexpansion}{"nhent"} = ["nohtml-entities"]; + ${$rexpansion}{"css"} = ["html-linked-style-sheet"]; + ${$rexpansion}{"nss"} = ["nohtml-style-sheets"]; + ${$rexpansion}{"ss"} = ["stylesheet"]; + ${$rexpansion}{"pod"} = ["pod2html"]; + ${$rexpansion}{"npod"} = ["nopod2html"]; + ${$rexpansion}{"frm"} = ["frames"]; + ${$rexpansion}{"nfrm"} = ["noframes"]; + ${$rexpansion}{"text"} = ["html-toc-extension"]; + ${$rexpansion}{"sext"} = ["html-src-extension"]; + return; +} + +sub check_options { + + # This will be called once after options have been parsed + # Note that we are defining the package variable $rOpts here: + ( my $class, $rOpts ) = @_; + + # X11 color names for default settings that seemed to look ok + # (these color names are only used for programming clarity; the hex + # numbers are actually written) + use constant ForestGreen => "#228B22"; + use constant SaddleBrown => "#8B4513"; + use constant magenta4 => "#8B008B"; + use constant IndianRed3 => "#CD5555"; + use constant DeepSkyBlue4 => "#00688B"; + use constant MediumOrchid3 => "#B452CD"; + use constant black => "#000000"; + use constant white => "#FFFFFF"; + use constant red => "#FF0000"; + + # set default color, bold, italic properties + # anything not listed here will be given the default (punctuation) color -- + # these types currently not listed and get default: ws pu s sc cm co p + # When adding NEW_TOKENS: add an entry here if you don't want defaults + + # set_default_properties( $short_name, default_color, bold?, italic? ); + set_default_properties( 'c', ForestGreen, 0, 0 ); + set_default_properties( 'pd', ForestGreen, 0, 1 ); + set_default_properties( 'k', magenta4, 1, 0 ); # was SaddleBrown + set_default_properties( 'q', IndianRed3, 0, 0 ); + set_default_properties( 'hh', IndianRed3, 0, 1 ); + set_default_properties( 'h', IndianRed3, 1, 0 ); + set_default_properties( 'i', DeepSkyBlue4, 0, 0 ); + set_default_properties( 'w', black, 0, 0 ); + set_default_properties( 'n', MediumOrchid3, 0, 0 ); + set_default_properties( 'v', MediumOrchid3, 0, 0 ); + set_default_properties( 'j', IndianRed3, 1, 0 ); + set_default_properties( 'm', red, 1, 0 ); + + set_default_color( 'html-color-background', white ); + set_default_color( 'html-color-punctuation', black ); + + # setup property lookup tables for tokens based on their short names + # every token type has a short name, and will use these tables + # to do the html markup + while ( my ( $short_name, $long_name ) = each %short_to_long_names ) { + $html_color{$short_name} = $rOpts->{"html-color-$long_name"}; + $html_bold{$short_name} = $rOpts->{"html-bold-$long_name"}; + $html_italic{$short_name} = $rOpts->{"html-italic-$long_name"}; + } + + # write style sheet to STDOUT and die if requested + if ( defined( $rOpts->{'stylesheet'} ) ) { + write_style_sheet_file('-'); + Perl::Tidy::Exit(0); + } + + # make sure user gives a file name after -css + if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) { + $css_linkname = $rOpts->{'html-linked-style-sheet'}; + if ( $css_linkname =~ /^-/ ) { + Perl::Tidy::Die("You must specify a valid filename after -css\n"); + } + } + + # check for conflict + if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) { + $rOpts->{'nohtml-style-sheets'} = 0; + warning("You can't specify both -css and -nss; -nss ignored\n"); + } + + # write a style sheet file if necessary + if ($css_linkname) { + + # if the selected filename exists, don't write, because user may + # have done some work by hand to create it; use backup name instead + # Also, this will avoid a potential disaster in which the user + # forgets to specify the style sheet, like this: + # perltidy -html -css myfile1.pl myfile2.pl + # This would cause myfile1.pl to parsed as the style sheet by GetOpts + my $css_filename = $css_linkname; + unless ( -e $css_filename ) { + write_style_sheet_file($css_filename); + } + } + $missing_html_entities = 1 unless $rOpts->{'html-entities'}; + return; +} + +sub write_style_sheet_file { + + my $css_filename = shift; + my $fh; + unless ( $fh = IO::File->new("> $css_filename") ) { + Perl::Tidy::Die("can't open $css_filename: $!\n"); + } + write_style_sheet_data($fh); + close_object($fh); + return; +} + +sub write_style_sheet_data { + + # write the style sheet data to an open file handle + my $fh = shift; + + my $bg_color = $rOpts->{'html-color-background'}; + my $text_color = $rOpts->{'html-color-punctuation'}; + + # pre-bgcolor is new, and may not be defined + my $pre_bg_color = $rOpts->{'html-pre-color-background'}; + $pre_bg_color = $bg_color unless $pre_bg_color; + + $fh->print(<<"EOM"); +/* default style sheet generated by perltidy */ +body {background: $bg_color; color: $text_color} +pre { color: $text_color; + background: $pre_bg_color; + font-family: courier; + } + +EOM + + foreach my $short_name ( sort keys %short_to_long_names ) { + my $long_name = $short_to_long_names{$short_name}; + + my $abbrev = '.' . $short_name; + if ( length($short_name) == 1 ) { $abbrev .= ' ' } # for alignment + my $color = $html_color{$short_name}; + if ( !defined($color) ) { $color = $text_color } + $fh->print("$abbrev \{ color: $color;"); + + if ( $html_bold{$short_name} ) { + $fh->print(" font-weight:bold;"); + } + + if ( $html_italic{$short_name} ) { + $fh->print(" font-style:italic;"); + } + $fh->print("} /* $long_name */\n"); + } + return; +} + +sub set_default_color { + + # make sure that options hash $rOpts->{$key} contains a valid color + my ( $key, $color ) = @_; + if ( $rOpts->{$key} ) { $color = $rOpts->{$key} } + $rOpts->{$key} = check_RGB($color); + return; +} + +sub check_RGB { + + # if color is a 6 digit hex RGB value, prepend a #, otherwise + # assume that it is a valid ascii color name + my ($color) = @_; + if ( $color =~ /^[0-9a-fA-F]{6,6}$/ ) { $color = "#$color" } + return $color; +} + +sub set_default_properties { + my ( $short_name, $color, $bold, $italic ) = @_; + + set_default_color( "html-color-$short_to_long_names{$short_name}", $color ); + my $key; + $key = "html-bold-$short_to_long_names{$short_name}"; + $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold; + $key = "html-italic-$short_to_long_names{$short_name}"; + $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic; + return; +} + +sub pod_to_html { + + # Use Pod::Html to process the pod and make the page + # then merge the perltidy code sections into it. + # return 1 if success, 0 otherwise + my ( $self, $pod_string, $css_string, $toc_string, $rpre_string_stack ) = + @_; + my $input_file = $self->{_input_file}; + my $title = $self->{_title}; + my $success_flag = 0; + + # don't try to use pod2html if no pod + unless ($pod_string) { + return $success_flag; + } + + # Pod::Html requires a real temporary filename + my ( $fh_tmp, $tmpfile ) = File::Temp::tempfile(); + unless ($fh_tmp) { + Perl::Tidy::Warn( + "unable to open temporary file $tmpfile; cannot use pod2html\n"); + return $success_flag; + } + + #------------------------------------------------------------------ + # Warning: a temporary file is open; we have to clean up if + # things go bad. From here on all returns should be by going to + # RETURN so that the temporary file gets unlinked. + #------------------------------------------------------------------ + + # write the pod text to the temporary file + $fh_tmp->print($pod_string); + $fh_tmp->close(); + + # Hand off the pod to pod2html. + # Note that we can use the same temporary filename for input and output + # because of the way pod2html works. + { + + my @args; + push @args, "--infile=$tmpfile", "--outfile=$tmpfile", "--title=$title"; + + # Flags with string args: + # "backlink=s", "cachedir=s", "htmlroot=s", "libpods=s", + # "podpath=s", "podroot=s" + # Note: -css=s is handled by perltidy itself + foreach my $kw (qw(backlink cachedir htmlroot libpods podpath podroot)) + { + if ( $rOpts->{$kw} ) { push @args, "--$kw=$rOpts->{$kw}" } + } + + # Toggle switches; these have extra leading 'pod' + # "header!", "index!", "recurse!", "quiet!", "verbose!" + foreach my $kw (qw(podheader podindex podrecurse podquiet podverbose)) { + my $kwd = $kw; # allows us to strip 'pod' + if ( $rOpts->{$kw} ) { $kwd =~ s/^pod//; push @args, "--$kwd" } + elsif ( defined( $rOpts->{$kw} ) ) { + $kwd =~ s/^pod//; + push @args, "--no$kwd"; + } + } + + # "flush", + my $kw = 'podflush'; + if ( $rOpts->{$kw} ) { $kw =~ s/^pod//; push @args, "--$kw" } + + # Must clean up if pod2html dies (it can); + # Be careful not to overwrite callers __DIE__ routine + local $SIG{__DIE__} = sub { + unlink $tmpfile if -e $tmpfile; + Perl::Tidy::Die( $_[0] ); + }; + + pod2html(@args); + } + $fh_tmp = IO::File->new( $tmpfile, 'r' ); + unless ($fh_tmp) { + + # this error shouldn't happen ... we just used this filename + Perl::Tidy::Warn( + "unable to open temporary file $tmpfile; cannot use pod2html\n"); + goto RETURN; + } + + my $html_fh = $self->{_html_fh}; + my @toc; + my $in_toc; + my $ul_level = 0; + my $no_print; + + # This routine will write the html selectively and store the toc + my $html_print = sub { + foreach (@_) { + $html_fh->print($_) unless ($no_print); + if ($in_toc) { push @toc, $_ } + } + }; + + # loop over lines of html output from pod2html and merge in + # the necessary perltidy html sections + my ( $saw_body, $saw_index, $saw_body_end ); + + my $timestamp = ""; + if ( $rOpts->{'timestamp'} ) { + my $date = localtime; + $timestamp = "on $date"; + } + while ( my $line = $fh_tmp->getline() ) { + + if ( $line =~ /^\s*\s*$/i ) { + ##my $date = localtime; + ##$html_print->("\n"); + $html_print->("\n"); + $html_print->($line); + } + + # Copy the perltidy css, if any, after tag + elsif ( $line =~ /^\s*\s*$/i ) { + $saw_body = 1; + $html_print->($css_string) if $css_string; + $html_print->($line); + + # add a top anchor and heading + $html_print->("\n"); + $title = escape_html($title); + $html_print->("

    $title

    \n"); + } + + # check for start of index, old pod2html + # before Pod::Html VERSION 1.15_02 it is delimited by comments as: + # + #
      + # ... + #
    + # + # + elsif ( $line =~ /^\s*\s*$/i ) { + $in_toc = 'INDEX'; + + # when frames are used, an extra table of contents in the + # contents panel is confusing, so don't print it + $no_print = $rOpts->{'frames'} + || !$rOpts->{'html-table-of-contents'}; + $html_print->("

    Doc Index:

    \n") if $rOpts->{'frames'}; + $html_print->($line); + } + + # check for start of index, new pod2html + # After Pod::Html VERSION 1.15_02 it is delimited as: + #
      + # ... + #
    + elsif ( $line =~ /^\s*/i ) { + $in_toc = 'UL'; + $ul_level = 1; + + # when frames are used, an extra table of contents in the + # contents panel is confusing, so don't print it + $no_print = $rOpts->{'frames'} + || !$rOpts->{'html-table-of-contents'}; + $html_print->("

    Doc Index:

    \n") if $rOpts->{'frames'}; + $html_print->($line); + } + + # Check for end of index, old pod2html + elsif ( $line =~ /^\s*\s*$/i ) { + $saw_index = 1; + $html_print->($line); + + # Copy the perltidy toc, if any, after the Pod::Html toc + if ($toc_string) { + $html_print->("
    \n") if $rOpts->{'frames'}; + $html_print->("

    Code Index:

    \n"); + ##my @toc = map { $_ .= "\n" } split /\n/, $toc_string; + my @toc = map { $_ . "\n" } split /\n/, $toc_string; + $html_print->(@toc); + } + $in_toc = ""; + $no_print = 0; + } + + # must track
      depth level for new pod2html + elsif ( $line =~ /\s*
        \s*$/i && $in_toc eq 'UL' ) { + $ul_level++; + $html_print->($line); + } + + # Check for end of index, for new pod2html + elsif ( $line =~ /\s*<\/ul>/i && $in_toc eq 'UL' ) { + $ul_level--; + $html_print->($line); + + # Copy the perltidy toc, if any, after the Pod::Html toc + if ( $ul_level <= 0 ) { + $saw_index = 1; + if ($toc_string) { + $html_print->("
        \n") if $rOpts->{'frames'}; + $html_print->("

        Code Index:

        \n"); + ##my @toc = map { $_ .= "\n" } split /\n/, $toc_string; + my @toc = map { $_ . "\n" } split /\n/, $toc_string; + $html_print->(@toc); + } + $in_toc = ""; + $ul_level = 0; + $no_print = 0; + } + } + + # Copy one perltidy section after each marker + elsif ( $line =~ /^(.*)(.*)$/ ) { + $line = $2; + $html_print->($1) if $1; + + # Intermingle code and pod sections if we saw multiple =cut's. + if ( $self->{_pod_cut_count} > 1 ) { + my $rpre_string = shift( @{$rpre_string_stack} ); + if ( ${$rpre_string} ) { + $html_print->('
        ');
        +                    $html_print->( ${$rpre_string} );
        +                    $html_print->('
        '); + } + else { + + # shouldn't happen: we stored a string before writing + # each marker. + Perl::Tidy::Warn( +"Problem merging html stream with pod2html; order may be wrong\n" + ); + } + $html_print->($line); + } + + # If didn't see multiple =cut lines, we'll put the pod out first + # and then the code, because it's less confusing. + else { + + # since we are not intermixing code and pod, we don't need + # or want any
        lines which separated pod and code + $html_print->($line) unless ( $line =~ /^\s*
        \s*$/i ); + } + } + + # Copy any remaining code section before the tag + elsif ( $line =~ /^\s*<\/body>\s*$/i ) { + $saw_body_end = 1; + if ( @{$rpre_string_stack} ) { + unless ( $self->{_pod_cut_count} > 1 ) { + $html_print->('
        '); + } + while ( my $rpre_string = shift( @{$rpre_string_stack} ) ) { + $html_print->('
        ');
        +                    $html_print->( ${$rpre_string} );
        +                    $html_print->('
        '); + } + } + $html_print->($line); + } + else { + $html_print->($line); + } + } + + $success_flag = 1; + unless ($saw_body) { + Perl::Tidy::Warn("Did not see in pod2html output\n"); + $success_flag = 0; + } + unless ($saw_body_end) { + Perl::Tidy::Warn("Did not see in pod2html output\n"); + $success_flag = 0; + } + unless ($saw_index) { + Perl::Tidy::Warn("Did not find INDEX END in pod2html output\n"); + $success_flag = 0; + } + + RETURN: + close_object($html_fh); + + # note that we have to unlink tmpfile before making frames + # because the tmpfile may be one of the names used for frames + if ( -e $tmpfile ) { + unless ( unlink($tmpfile) ) { + Perl::Tidy::Warn("couldn't unlink temporary file $tmpfile: $!\n"); + $success_flag = 0; + } + } + + if ( $success_flag && $rOpts->{'frames'} ) { + $self->make_frame( \@toc ); + } + return $success_flag; +} + +sub make_frame { + + # Make a frame with table of contents in the left panel + # and the text in the right panel. + # On entry: + # $html_filename contains the no-frames html output + # $rtoc is a reference to an array with the table of contents + my ( $self, $rtoc ) = @_; + my $input_file = $self->{_input_file}; + my $html_filename = $self->{_html_file}; + my $toc_filename = $self->{_toc_filename}; + my $src_filename = $self->{_src_filename}; + my $title = $self->{_title}; + $title = escape_html($title); + + # FUTURE input parameter: + my $top_basename = ""; + + # We need to produce 3 html files: + # 1. - the table of contents + # 2. - the contents (source code) itself + # 3. - the frame which contains them + + # get basenames for relative links + my ( $toc_basename, $toc_path ) = fileparse($toc_filename); + my ( $src_basename, $src_path ) = fileparse($src_filename); + + # 1. Make the table of contents panel, with appropriate changes + # to the anchor names + my $src_frame_name = 'SRC'; + my $first_anchor = + write_toc_html( $title, $toc_filename, $src_basename, $rtoc, + $src_frame_name ); + + # 2. The current .html filename is renamed to be the contents panel + rename( $html_filename, $src_filename ) + or Perl::Tidy::Die("Cannot rename $html_filename to $src_filename:$!\n"); + + # 3. Then use the original html filename for the frame + write_frame_html( + $title, $html_filename, $top_basename, + $toc_basename, $src_basename, $src_frame_name + ); + return; +} + +sub write_toc_html { + + # write a separate html table of contents file for frames + my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_; + my $fh = IO::File->new( $toc_filename, 'w' ) + or Perl::Tidy::Die("Cannot open $toc_filename:$!\n"); + $fh->print(< + +$title + + +

        $title

        +EOM + + my $first_anchor = + change_anchor_names( $rtoc, $src_basename, "$src_frame_name" ); + $fh->print( join "", @{$rtoc} ); + + $fh->print(< + +EOM + + return; +} + +sub write_frame_html { + + # write an html file to be the table of contents frame + my ( + $title, $frame_filename, $top_basename, + $toc_basename, $src_basename, $src_frame_name + ) = @_; + + my $fh = IO::File->new( $frame_filename, 'w' ) + or Perl::Tidy::Die("Cannot open $toc_basename:$!\n"); + + $fh->print(< + + + +$title + +EOM + + # two left panels, one right, if master index file + if ($top_basename) { + $fh->print(< + + + + +EOM + } + + # one left panels, one right, if no master index file + else { + $fh->print(< + +EOM + } + $fh->print(< + +<body> +<p>If you see this message, you are using a non-frame-capable web client.</p> +<p>This document contains:</p> +<ul> +<li><a href="$toc_basename">A table of contents</a></li> +<li><a href="$src_basename">The source code</a></li> +</ul> +</body> + + + +EOM + return; +} + +sub change_anchor_names { + + # add a filename and target to anchors + # also return the first anchor + my ( $rlines, $filename, $target ) = @_; + my $first_anchor; + foreach my $line ( @{$rlines} ) { + + # We're looking for lines like this: + #
      • SYNOPSIS
      • + # ---- - -------- ----------------- + # $1 $4 $5 + if ( $line =~ /^(.*)]*>(.*)$/i ) { + my $pre = $1; + my $name = $4; + my $post = $5; + my $href = "$filename#$name"; + $line = "$pre$post\n"; + unless ($first_anchor) { $first_anchor = $href } + } + } + return $first_anchor; +} + +sub close_html_file { + my $self = shift; + return unless $self->{_html_file_opened}; + + my $html_fh = $self->{_html_fh}; + my $rtoc_string = $self->{_rtoc_string}; + + # There are 3 basic paths to html output... + + # --------------------------------- + # Path 1: finish up if in -pre mode + # --------------------------------- + if ( $rOpts->{'html-pre-only'} ) { + $html_fh->print( <<"PRE_END"); +
+PRE_END + close_object($html_fh); + return; + } + + # Finish the index + $self->add_toc_item( 'EOF', 'EOF' ); + + my $rpre_string_stack = $self->{_rpre_string_stack}; + + # Patch to darken the
 background color in case of pod2html and
+    # interleaved code/documentation.  Otherwise, the distinction
+    # between code and documentation is blurred.
+    if (   $rOpts->{pod2html}
+        && $self->{_pod_cut_count} >= 1
+        && $rOpts->{'html-color-background'} eq '#FFFFFF' )
+    {
+        $rOpts->{'html-pre-color-background'} = '#F0F0F0';
+    }
+
+    # put the css or its link into a string, if used
+    my $css_string;
+    my $fh_css = Perl::Tidy::IOScalar->new( \$css_string, 'w' );
+
+    # use css linked to another file
+    if ( $rOpts->{'html-linked-style-sheet'} ) {
+        $fh_css->print(
+            qq());
+    }
+
+    # use css embedded in this file
+    elsif ( !$rOpts->{'nohtml-style-sheets'} ) {
+        $fh_css->print( <<'ENDCSS');
+
+ENDCSS
+    }
+
+    # -----------------------------------------------------------
+    # path 2: use pod2html if requested
+    #         If we fail for some reason, continue on to path 3
+    # -----------------------------------------------------------
+    if ( $rOpts->{'pod2html'} ) {
+        my $rpod_string = $self->{_rpod_string};
+        $self->pod_to_html(
+            ${$rpod_string}, $css_string,
+            ${$rtoc_string}, $rpre_string_stack
+        ) && return;
+    }
+
+    # --------------------------------------------------
+    # path 3: write code in html, with pod only in italics
+    # --------------------------------------------------
+    my $input_file = $self->{_input_file};
+    my $title      = escape_html($input_file);
+    my $timestamp  = "";
+    if ( $rOpts->{'timestamp'} ) {
+        my $date = localtime;
+        $timestamp = "on $date";
+    }
+    $html_fh->print( <<"HTML_START");
+
+
+
+
+$title
+HTML_START
+
+    # output the css, if used
+    if ($css_string) {
+        $html_fh->print($css_string);
+        $html_fh->print( <<"ENDCSS");
+
+
+ENDCSS
+    }
+    else {
+
+        $html_fh->print( <<"HTML_START");
+
+{'html-color-background'}\" text=\"$rOpts->{'html-color-punctuation'}\">
+HTML_START
+    }
+
+    $html_fh->print("\n");
+    $html_fh->print( <<"EOM");
+

$title

+EOM + + # copy the table of contents + if ( ${$rtoc_string} + && !$rOpts->{'frames'} + && $rOpts->{'html-table-of-contents'} ) + { + $html_fh->print( ${$rtoc_string} ); + } + + # copy the pre section(s) + my $fname_comment = $input_file; + $fname_comment =~ s/--+/-/g; # protect HTML comment tags + $html_fh->print( <<"END_PRE"); +
+ +
+END_PRE
+
+    foreach my $rpre_string ( @{$rpre_string_stack} ) {
+        $html_fh->print( ${$rpre_string} );
+    }
+
+    # and finish the html page
+    $html_fh->print( <<"HTML_END");
+
+ + +HTML_END + close_object($html_fh); + + if ( $rOpts->{'frames'} ) { + ##my @toc = map { $_ .= "\n" } split /\n/, ${$rtoc_string}; + my @toc = map { $_ . "\n" } split /\n/, ${$rtoc_string}; + $self->make_frame( \@toc ); + } + return; +} + +sub markup_tokens { + my ( $self, $rtokens, $rtoken_type, $rlevels ) = @_; + my ( @colored_tokens, $type, $token, $level ); + my $rlast_level = $self->{_rlast_level}; + my $rpackage_stack = $self->{_rpackage_stack}; + + for ( my $j = 0 ; $j < @{$rtoken_type} ; $j++ ) { + $type = $rtoken_type->[$j]; + $token = $rtokens->[$j]; + $level = $rlevels->[$j]; + $level = 0 if ( $level < 0 ); + + #------------------------------------------------------- + # Update the package stack. The package stack is needed to keep + # the toc correct because some packages may be declared within + # blocks and go out of scope when we leave the block. + #------------------------------------------------------- + if ( $level > ${$rlast_level} ) { + unless ( $rpackage_stack->[ $level - 1 ] ) { + $rpackage_stack->[ $level - 1 ] = 'main'; + } + $rpackage_stack->[$level] = $rpackage_stack->[ $level - 1 ]; + } + elsif ( $level < ${$rlast_level} ) { + my $package = $rpackage_stack->[$level]; + unless ($package) { $package = 'main' } + + # if we change packages due to a nesting change, we + # have to make an entry in the toc + if ( $package ne $rpackage_stack->[ $level + 1 ] ) { + $self->add_toc_item( $package, 'package' ); + } + } + ${$rlast_level} = $level; + + #------------------------------------------------------- + # Intercept a sub name here; split it + # into keyword 'sub' and sub name; and add an + # entry in the toc + #------------------------------------------------------- + if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) { + $token = $self->markup_html_element( $1, 'k' ); + push @colored_tokens, $token; + $token = $2; + $type = 'M'; + + # but don't include sub declarations in the toc; + # these wlll have leading token types 'i;' + my $signature = join "", @{$rtoken_type}; + unless ( $signature =~ /^i;/ ) { + my $subname = $token; + $subname =~ s/[\s\(].*$//; # remove any attributes and prototype + $self->add_toc_item( $subname, 'sub' ); + } + } + + #------------------------------------------------------- + # Intercept a package name here; split it + # into keyword 'package' and name; add to the toc, + # and update the package stack + #------------------------------------------------------- + if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) { + $token = $self->markup_html_element( $1, 'k' ); + push @colored_tokens, $token; + $token = $2; + $type = 'i'; + $self->add_toc_item( "$token", 'package' ); + $rpackage_stack->[$level] = $token; + } + + $token = $self->markup_html_element( $token, $type ); + push @colored_tokens, $token; + } + return ( \@colored_tokens ); +} + +sub markup_html_element { + my ( $self, $token, $type ) = @_; + + return $token if ( $type eq 'b' ); # skip a blank token + return $token if ( $token =~ /^\s*$/ ); # skip a blank line + $token = escape_html($token); + + # get the short abbreviation for this token type + my $short_name = $token_short_names{$type}; + if ( !defined($short_name) ) { + $short_name = "pu"; # punctuation is default + } + + # handle style sheets.. + if ( !$rOpts->{'nohtml-style-sheets'} ) { + if ( $short_name ne 'pu' ) { + $token = qq() . $token . ""; + } + } + + # handle no style sheets.. + else { + my $color = $html_color{$short_name}; + + if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) { + $token = qq() . $token . ""; + } + if ( $html_italic{$short_name} ) { $token = "$token" } + if ( $html_bold{$short_name} ) { $token = "$token" } + } + return $token; +} + +sub escape_html { + + my $token = shift; + if ($missing_html_entities) { + $token =~ s/\&/&/g; + $token =~ s/\/>/g; + $token =~ s/\"/"/g; + } + else { + HTML::Entities::encode_entities($token); + } + return $token; +} + +sub finish_formatting { + + # called after last line + my $self = shift; + $self->close_html_file(); + return; +} + +sub write_line { + + my ( $self, $line_of_tokens ) = @_; + return unless $self->{_html_file_opened}; + my $html_pre_fh = $self->{_html_pre_fh}; + my $line_type = $line_of_tokens->{_line_type}; + my $input_line = $line_of_tokens->{_line_text}; + my $line_number = $line_of_tokens->{_line_number}; + chomp $input_line; + + # markup line of code.. + my $html_line; + if ( $line_type eq 'CODE' ) { + my $rtoken_type = $line_of_tokens->{_rtoken_type}; + my $rtokens = $line_of_tokens->{_rtokens}; + my $rlevels = $line_of_tokens->{_rlevels}; + + if ( $input_line =~ /(^\s*)/ ) { + $html_line = $1; + } + else { + $html_line = ""; + } + my ($rcolored_tokens) = + $self->markup_tokens( $rtokens, $rtoken_type, $rlevels ); + $html_line .= join '', @{$rcolored_tokens}; + } + + # markup line of non-code.. + else { + my $line_character; + if ( $line_type eq 'HERE' ) { $line_character = 'H' } + elsif ( $line_type eq 'HERE_END' ) { $line_character = 'h' } + elsif ( $line_type eq 'FORMAT' ) { $line_character = 'H' } + elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' } + elsif ( $line_type eq 'SYSTEM' ) { $line_character = 'c' } + elsif ( $line_type eq 'END_START' ) { + $line_character = 'k'; + $self->add_toc_item( '__END__', '__END__' ); + } + elsif ( $line_type eq 'DATA_START' ) { + $line_character = 'k'; + $self->add_toc_item( '__DATA__', '__DATA__' ); + } + elsif ( $line_type =~ /^POD/ ) { + $line_character = 'P'; + if ( $rOpts->{'pod2html'} ) { + my $html_pod_fh = $self->{_html_pod_fh}; + if ( $line_type eq 'POD_START' ) { + + my $rpre_string_stack = $self->{_rpre_string_stack}; + my $rpre_string = $rpre_string_stack->[-1]; + + # if we have written any non-blank lines to the + # current pre section, start writing to a new output + # string + if ( ${$rpre_string} =~ /\S/ ) { + my $pre_string; + $html_pre_fh = + Perl::Tidy::IOScalar->new( \$pre_string, 'w' ); + $self->{_html_pre_fh} = $html_pre_fh; + push @{$rpre_string_stack}, \$pre_string; + + # leave a marker in the pod stream so we know + # where to put the pre section we just + # finished. + my $for_html = '=for html'; # don't confuse pod utils + $html_pod_fh->print(< + +EOM + } + + # otherwise, just clear the current string and start + # over + else { + ${$rpre_string} = ""; + $html_pod_fh->print("\n"); + } + } + $html_pod_fh->print( $input_line . "\n" ); + if ( $line_type eq 'POD_END' ) { + $self->{_pod_cut_count}++; + $html_pod_fh->print("\n"); + } + return; + } + } + else { $line_character = 'Q' } + $html_line = $self->markup_html_element( $input_line, $line_character ); + } + + # add the line number if requested + if ( $rOpts->{'html-line-numbers'} ) { + my $extra_space = + ( $line_number < 10 ) ? " " + : ( $line_number < 100 ) ? " " + : ( $line_number < 1000 ) ? " " + : ""; + $html_line = $extra_space . $line_number . " " . $html_line; + } + + # write the line + $html_pre_fh->print("$html_line\n"); + return; +} +1; + diff --git a/lib/Perl/Tidy/IOScalar.pm b/lib/Perl/Tidy/IOScalar.pm new file mode 100644 index 00000000..285b517a --- /dev/null +++ b/lib/Perl/Tidy/IOScalar.pm @@ -0,0 +1,86 @@ +##################################################################### +# +# This is a stripped down version of IO::Scalar +# Given a reference to a scalar, it supplies either: +# a getline method which reads lines (mode='r'), or +# a print method which reads lines (mode='w') +# +##################################################################### +package Perl::Tidy::IOScalar; +use strict; +use warnings; +use Carp; + +sub new { + my ( $package, $rscalar, $mode ) = @_; + my $ref = ref $rscalar; + if ( $ref ne 'SCALAR' ) { + confess <[1]; + if ( $mode ne 'r' ) { + confess <[2]++; + return $self->[0]->[$i]; +} + +sub print { + my ( $self, $msg ) = @_; + my $mode = $self->[1]; + if ( $mode ne 'w' ) { + confess <[0] } .= $msg; + return; +} +sub close { return } +1; + diff --git a/lib/Perl/Tidy/IOScalarArray.pm b/lib/Perl/Tidy/IOScalarArray.pm new file mode 100644 index 00000000..5cc399e8 --- /dev/null +++ b/lib/Perl/Tidy/IOScalarArray.pm @@ -0,0 +1,75 @@ +##################################################################### +# +# This is a stripped down version of IO::ScalarArray +# Given a reference to an array, it supplies either: +# a getline method which reads lines (mode='r'), or +# a print method which reads lines (mode='w') +# +# NOTE: this routine assumes that there aren't any embedded +# newlines within any of the array elements. There are no checks +# for that. +# +##################################################################### +package Perl::Tidy::IOScalarArray; +use strict; +use warnings; +use Carp; + +sub new { + my ( $package, $rarray, $mode ) = @_; + my $ref = ref $rarray; + if ( $ref ne 'ARRAY' ) { + confess <[1]; + if ( $mode ne 'r' ) { + confess <[2]++; + return $self->[0]->[$i]; +} + +sub print { + my ( $self, $msg ) = @_; + my $mode = $self->[1]; + if ( $mode ne 'w' ) { + confess <[0] }, $msg; + return; +} +sub close { return } +1; + diff --git a/lib/Perl/Tidy/IndentationItem.pm b/lib/Perl/Tidy/IndentationItem.pm new file mode 100644 index 00000000..13011c97 --- /dev/null +++ b/lib/Perl/Tidy/IndentationItem.pm @@ -0,0 +1,253 @@ +##################################################################### +# +# the Perl::Tidy::IndentationItem class supplies items which contain +# how much whitespace should be used at the start of a line +# +##################################################################### + +package Perl::Tidy::IndentationItem; +use strict; +use warnings; + +sub new { + + # Create an 'indentation_item' which describes one level of leading + # whitespace when the '-lp' indentation is used. + my ( + $class, $spaces, $level, + $ci_level, $available_spaces, $index, + $gnu_sequence_number, $align_paren, $stack_depth, + $starting_index, + ) = @_; + + my $closed = -1; + my $arrow_count = 0; + my $comma_count = 0; + my $have_child = 0; + my $want_right_spaces = 0; + my $marked = 0; + + # DEFINITIONS: + # spaces => # total leading white spaces + # level => # the indentation 'level' + # ci_level => # the 'continuation level' + # available_spaces => # how many left spaces available + # # for this level + # closed => # index where we saw closing '}' + # comma_count => # how many commas at this level? + # sequence_number => # output batch number + # index => # index in output batch list + # have_child => # any dependents? + # recoverable_spaces => # how many spaces to the right + # # we would like to move to get + # # alignment (negative if left) + # align_paren => # do we want to try to align + # # with an opening structure? + # marked => # if visited by corrector logic + # stack_depth => # indentation nesting depth + # starting_index => # first token index of this level + # arrow_count => # how many =>'s + + return bless { + _spaces => $spaces, + _level => $level, + _ci_level => $ci_level, + _available_spaces => $available_spaces, + _closed => $closed, + _comma_count => $comma_count, + _sequence_number => $gnu_sequence_number, + _index => $index, + _have_child => $have_child, + _recoverable_spaces => $want_right_spaces, + _align_paren => $align_paren, + _marked => $marked, + _stack_depth => $stack_depth, + _starting_index => $starting_index, + _arrow_count => $arrow_count, + }, $class; +} + +sub permanently_decrease_available_spaces { + + # make a permanent reduction in the available indentation spaces + # at one indentation item. NOTE: if there are child nodes, their + # total SPACES must be reduced by the caller. + + my ( $item, $spaces_needed ) = @_; + my $available_spaces = $item->get_available_spaces(); + my $deleted_spaces = + ( $available_spaces > $spaces_needed ) + ? $spaces_needed + : $available_spaces; + $item->decrease_available_spaces($deleted_spaces); + $item->decrease_SPACES($deleted_spaces); + $item->set_recoverable_spaces(0); + + return $deleted_spaces; +} + +sub tentatively_decrease_available_spaces { + + # We are asked to tentatively delete $spaces_needed of indentation + # for a indentation item. We may want to undo this later. NOTE: if + # there are child nodes, their total SPACES must be reduced by the + # caller. + my ( $item, $spaces_needed ) = @_; + my $available_spaces = $item->get_available_spaces(); + my $deleted_spaces = + ( $available_spaces > $spaces_needed ) + ? $spaces_needed + : $available_spaces; + $item->decrease_available_spaces($deleted_spaces); + $item->decrease_SPACES($deleted_spaces); + $item->increase_recoverable_spaces($deleted_spaces); + return $deleted_spaces; +} + +sub get_stack_depth { + my $self = shift; + return $self->{_stack_depth}; +} + +sub get_spaces { + my $self = shift; + return $self->{_spaces}; +} + +sub get_marked { + my $self = shift; + return $self->{_marked}; +} + +sub set_marked { + my ( $self, $value ) = @_; + if ( defined($value) ) { + $self->{_marked} = $value; + } + return $self->{_marked}; +} + +sub get_available_spaces { + my $self = shift; + return $self->{_available_spaces}; +} + +sub decrease_SPACES { + my ( $self, $value ) = @_; + if ( defined($value) ) { + $self->{_spaces} -= $value; + } + return $self->{_spaces}; +} + +sub decrease_available_spaces { + my ( $self, $value ) = @_; + if ( defined($value) ) { + $self->{_available_spaces} -= $value; + } + return $self->{_available_spaces}; +} + +sub get_align_paren { + my $self = shift; + return $self->{_align_paren}; +} + +sub get_recoverable_spaces { + my $self = shift; + return $self->{_recoverable_spaces}; +} + +sub set_recoverable_spaces { + my ( $self, $value ) = @_; + if ( defined($value) ) { + $self->{_recoverable_spaces} = $value; + } + return $self->{_recoverable_spaces}; +} + +sub increase_recoverable_spaces { + my ( $self, $value ) = @_; + if ( defined($value) ) { + $self->{_recoverable_spaces} += $value; + } + return $self->{_recoverable_spaces}; +} + +sub get_ci_level { + my $self = shift; + return $self->{_ci_level}; +} + +sub get_level { + my $self = shift; + return $self->{_level}; +} + +sub get_sequence_number { + my $self = shift; + return $self->{_sequence_number}; +} + +sub get_index { + my $self = shift; + return $self->{_index}; +} + +sub get_starting_index { + my $self = shift; + return $self->{_starting_index}; +} + +sub set_have_child { + my ( $self, $value ) = @_; + if ( defined($value) ) { + $self->{_have_child} = $value; + } + return $self->{_have_child}; +} + +sub get_have_child { + my $self = shift; + return $self->{_have_child}; +} + +sub set_arrow_count { + my ( $self, $value ) = @_; + if ( defined($value) ) { + $self->{_arrow_count} = $value; + } + return $self->{_arrow_count}; +} + +sub get_arrow_count { + my $self = shift; + return $self->{_arrow_count}; +} + +sub set_comma_count { + my ( $self, $value ) = @_; + if ( defined($value) ) { + $self->{_comma_count} = $value; + } + return $self->{_comma_count}; +} + +sub get_comma_count { + my $self = shift; + return $self->{_comma_count}; +} + +sub set_closed { + my ( $self, $value ) = @_; + if ( defined($value) ) { + $self->{_closed} = $value; + } + return $self->{_closed}; +} + +sub get_closed { + my $self = shift; + return $self->{_closed}; +} +1; diff --git a/lib/Perl/Tidy/LineBuffer.pm b/lib/Perl/Tidy/LineBuffer.pm new file mode 100644 index 00000000..5268f570 --- /dev/null +++ b/lib/Perl/Tidy/LineBuffer.pm @@ -0,0 +1,56 @@ +##################################################################### +# +# The Perl::Tidy::LineBuffer class supplies a 'get_line()' +# method for returning the next line to be parsed, as well as a +# 'peek_ahead()' method +# +# The input parameter is an object with a 'get_line()' method +# which returns the next line to be parsed +# +##################################################################### + +package Perl::Tidy::LineBuffer; +use strict; +use warnings; + +sub new { + + my ( $class, $line_source_object ) = @_; + + return bless { + _line_source_object => $line_source_object, + _rlookahead_buffer => [], + }, $class; +} + +sub peek_ahead { + my ( $self, $buffer_index ) = @_; + my $line = undef; + my $line_source_object = $self->{_line_source_object}; + my $rlookahead_buffer = $self->{_rlookahead_buffer}; + if ( $buffer_index < scalar( @{$rlookahead_buffer} ) ) { + $line = $rlookahead_buffer->[$buffer_index]; + } + else { + $line = $line_source_object->get_line(); + push( @{$rlookahead_buffer}, $line ); + } + return $line; +} + +sub get_line { + my $self = shift; + my $line = undef; + my $line_source_object = $self->{_line_source_object}; + my $rlookahead_buffer = $self->{_rlookahead_buffer}; + + if ( scalar( @{$rlookahead_buffer} ) ) { + $line = shift @{$rlookahead_buffer}; + } + else { + $line = $line_source_object->get_line(); + } + return $line; +} +1; + diff --git a/lib/Perl/Tidy/LineSink.pm b/lib/Perl/Tidy/LineSink.pm new file mode 100644 index 00000000..66f1da70 --- /dev/null +++ b/lib/Perl/Tidy/LineSink.pm @@ -0,0 +1,144 @@ +##################################################################### +# +# the Perl::Tidy::LineSink class supplies a write_line method for +# actual file writing +# +##################################################################### + +package Perl::Tidy::LineSink; +use strict; +use warnings; + +sub new { + + my ( $class, $output_file, $tee_file, $line_separator, $rOpts, + $rpending_logfile_message, $binmode ) + = @_; + my $fh = undef; + my $fh_tee = undef; + + my $output_file_open = 0; + + if ( $rOpts->{'format'} eq 'tidy' ) { + ( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' ); + unless ($fh) { Perl::Tidy::Die("Cannot write to output stream\n"); } + $output_file_open = 1; + if ($binmode) { + if ( $rOpts->{'character-encoding'} + && $rOpts->{'character-encoding'} eq 'utf8' ) + { + if ( ref($fh) eq 'IO::File' ) { + $fh->binmode(":encoding(UTF-8)"); + } + elsif ( $output_file eq '-' ) { + binmode STDOUT, ":encoding(UTF-8)"; + } + } + + # Patch for RT 122030 + elsif ( ref($fh) eq 'IO::File' ) { $fh->binmode(); } + + elsif ( $output_file eq '-' ) { binmode STDOUT } + } + } + + # in order to check output syntax when standard output is used, + # or when it is an object, we have to make a copy of the file + if ( $output_file eq '-' || ref $output_file ) { + if ( $rOpts->{'check-syntax'} ) { + + # Turning off syntax check when standard output is used. + # The reason is that temporary files cause problems on + # on many systems. + $rOpts->{'check-syntax'} = 0; + ${$rpending_logfile_message} .= < $fh, + _fh_tee => $fh_tee, + _output_file => $output_file, + _output_file_open => $output_file_open, + _tee_flag => 0, + _tee_file => $tee_file, + _tee_file_opened => 0, + _line_separator => $line_separator, + _binmode => $binmode, + }, $class; +} + +sub write_line { + + my ( $self, $line ) = @_; + my $fh = $self->{_fh}; + + my $output_file_open = $self->{_output_file_open}; + chomp $line; + $line .= $self->{_line_separator}; + + $fh->print($line) if ( $self->{_output_file_open} ); + + if ( $self->{_tee_flag} ) { + unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() } + my $fh_tee = $self->{_fh_tee}; + print $fh_tee $line; + } + return; +} + +sub tee_on { + my $self = shift; + $self->{_tee_flag} = 1; + return; +} + +sub tee_off { + my $self = shift; + $self->{_tee_flag} = 0; + return; +} + +sub really_open_tee_file { + my $self = shift; + my $tee_file = $self->{_tee_file}; + my $fh_tee; + $fh_tee = IO::File->new(">$tee_file") + or Perl::Tidy::Die("couldn't open TEE file $tee_file: $!\n"); + binmode $fh_tee if $self->{_binmode}; + $self->{_tee_file_opened} = 1; + $self->{_fh_tee} = $fh_tee; + return; +} + +sub close_output_file { + my $self = shift; + + # Only close physical files, not STDOUT and other objects + my $output_file = $self->{_output_file}; + if ( $output_file ne '-' && !ref $output_file ) { + eval { $self->{_fh}->close() } if $self->{_output_file_open}; + } + $self->close_tee_file(); + return; +} + +sub close_tee_file { + my $self = shift; + + # Only close physical files, not STDOUT and other objects + if ( $self->{_tee_file_opened} ) { + my $tee_file = $self->{_tee_file}; + if ( $tee_file ne '-' && !ref $tee_file ) { + eval { $self->{_fh_tee}->close() }; + $self->{_tee_file_opened} = 0; + } + } + return; +} + +1; + diff --git a/lib/Perl/Tidy/LineSource.pm b/lib/Perl/Tidy/LineSource.pm new file mode 100644 index 00000000..abbd3e65 --- /dev/null +++ b/lib/Perl/Tidy/LineSource.pm @@ -0,0 +1,88 @@ +##################################################################### +# +# the Perl::Tidy::LineSource class supplies an object with a 'get_line()' method +# which returns the next line to be parsed +# +##################################################################### + +package Perl::Tidy::LineSource; +use strict; +use warnings; + +sub new { + + my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_; + + my $input_line_ending; + if ( $rOpts->{'preserve-line-endings'} ) { + $input_line_ending = Perl::Tidy::find_input_line_ending($input_file); + } + + ( my $fh, $input_file ) = Perl::Tidy::streamhandle( $input_file, 'r' ); + return unless $fh; + + # in order to check output syntax when standard output is used, + # or when it is an object, we have to make a copy of the file + if ( ( $input_file eq '-' || ref $input_file ) && $rOpts->{'check-syntax'} ) + { + + # Turning off syntax check when input output is used. + # The reason is that temporary files cause problems on + # on many systems. + $rOpts->{'check-syntax'} = 0; + + ${$rpending_logfile_message} .= < $fh, + _filename => $input_file, + _input_line_ending => $input_line_ending, + _rinput_buffer => [], + _started => 0, + }, $class; +} + +sub close_input_file { + my $self = shift; + + # Only close physical files, not STDIN and other objects + my $filename = $self->{_filename}; + if ( $filename ne '-' && !ref $filename ) { + eval { $self->{_fh}->close() }; + } + return; +} + +sub get_line { + my $self = shift; + my $line = undef; + my $fh = $self->{_fh}; + my $rinput_buffer = $self->{_rinput_buffer}; + + if ( scalar( @{$rinput_buffer} ) ) { + $line = shift @{$rinput_buffer}; + } + else { + $line = $fh->getline(); + + # patch to read raw mac files under unix, dos + # see if the first line has embedded \r's + if ( $line && !$self->{_started} ) { + if ( $line =~ /[\015][^\015\012]/ ) { + + # found one -- break the line up and store in a buffer + @{$rinput_buffer} = map { $_ . "\n" } split /\015/, $line; + my $count = @{$rinput_buffer}; + $line = shift @{$rinput_buffer}; + } + $self->{_started}++; + } + } + return $line; +} +1; + diff --git a/lib/Perl/Tidy/Logger.pm b/lib/Perl/Tidy/Logger.pm new file mode 100644 index 00000000..d3a3de4b --- /dev/null +++ b/lib/Perl/Tidy/Logger.pm @@ -0,0 +1,466 @@ +##################################################################### +# +# The Perl::Tidy::Logger class writes the .LOG and .ERR files +# +##################################################################### + +package Perl::Tidy::Logger; +use strict; +use warnings; + +sub new { + + my ( $class, $rOpts, $log_file, $warning_file, $fh_stderr, $saw_extrude ) = + @_; + + my $fh_warnings = $rOpts->{'standard-error-output'} ? $fh_stderr : undef; + + # remove any old error output file if we might write a new one + unless ( $fh_warnings || ref($warning_file) ) { + if ( -e $warning_file ) { + unlink($warning_file) + or Perl::Tidy::Die( + "couldn't unlink warning file $warning_file: $!\n"); + } + } + + my $logfile_gap = + defined( $rOpts->{'logfile-gap'} ) + ? $rOpts->{'logfile-gap'} + : 50; + if ( $logfile_gap == 0 ) { $logfile_gap = 1 } + + return bless { + _log_file => $log_file, + _logfile_gap => $logfile_gap, + _rOpts => $rOpts, + _fh_warnings => $fh_warnings, + _last_input_line_written => 0, + _at_end_of_file => 0, + _use_prefix => 1, + _block_log_output => 0, + _line_of_tokens => undef, + _output_line_number => undef, + _wrote_line_information_string => 0, + _wrote_column_headings => 0, + _warning_file => $warning_file, + _warning_count => 0, + _complaint_count => 0, + _saw_code_bug => -1, # -1=no 0=maybe 1=for sure + _saw_brace_error => 0, + _saw_extrude => $saw_extrude, + _output_array => [], + }, $class; +} + +sub get_warning_count { + my $self = shift; + return $self->{_warning_count}; +} + +sub get_use_prefix { + my $self = shift; + return $self->{_use_prefix}; +} + +sub block_log_output { + my $self = shift; + $self->{_block_log_output} = 1; + return; +} + +sub unblock_log_output { + my $self = shift; + $self->{_block_log_output} = 0; + return; +} + +sub interrupt_logfile { + my $self = shift; + $self->{_use_prefix} = 0; + $self->warning("\n"); + $self->write_logfile_entry( '#' x 24 . " WARNING " . '#' x 25 . "\n" ); + return; +} + +sub resume_logfile { + my $self = shift; + $self->write_logfile_entry( '#' x 60 . "\n" ); + $self->{_use_prefix} = 1; + return; +} + +sub we_are_at_the_last_line { + my $self = shift; + unless ( $self->{_wrote_line_information_string} ) { + $self->write_logfile_entry("Last line\n\n"); + } + $self->{_at_end_of_file} = 1; + return; +} + +# record some stuff in case we go down in flames +sub black_box { + my ( $self, $line_of_tokens, $output_line_number ) = @_; + my $input_line = $line_of_tokens->{_line_text}; + my $input_line_number = $line_of_tokens->{_line_number}; + + # save line information in case we have to write a logfile message + $self->{_line_of_tokens} = $line_of_tokens; + $self->{_output_line_number} = $output_line_number; + $self->{_wrote_line_information_string} = 0; + + my $last_input_line_written = $self->{_last_input_line_written}; + my $rOpts = $self->{_rOpts}; + if ( + ( + ( $input_line_number - $last_input_line_written ) >= + $self->{_logfile_gap} + ) + || ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) + ) + { + my $structural_indentation_level = $line_of_tokens->{_level_0}; + $structural_indentation_level = 0 + if ( $structural_indentation_level < 0 ); + $self->{_last_input_line_written} = $input_line_number; + ( my $out_str = $input_line ) =~ s/^\s*//; + chomp $out_str; + + $out_str = ( '.' x $structural_indentation_level ) . $out_str; + + if ( length($out_str) > 35 ) { + $out_str = substr( $out_str, 0, 35 ) . " ...."; + } + $self->logfile_output( "", "$out_str\n" ); + } + return; +} + +sub write_logfile_entry { + + my ( $self, @msg ) = @_; + + # add leading >>> to avoid confusing error messages and code + $self->logfile_output( ">>>", "@msg" ); + return; +} + +sub write_column_headings { + my $self = shift; + + $self->{_wrote_column_headings} = 1; + my $routput_array = $self->{_output_array}; + push @{$routput_array}, <>>) +lines levels i k (code begins with one '.' per indent level) +------ ----- - - -------- ------------------------------------------- +EOM + return; +} + +sub make_line_information_string { + + # make columns of information when a logfile message needs to go out + my $self = shift; + my $line_of_tokens = $self->{_line_of_tokens}; + my $input_line_number = $line_of_tokens->{_line_number}; + my $line_information_string = ""; + if ($input_line_number) { + + my $output_line_number = $self->{_output_line_number}; + my $brace_depth = $line_of_tokens->{_curly_brace_depth}; + my $paren_depth = $line_of_tokens->{_paren_depth}; + my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth}; + my $guessed_indentation_level = + $line_of_tokens->{_guessed_indentation_level}; + ##my $rtoken_array = $line_of_tokens->{_rtoken_array}; + + my $structural_indentation_level = $line_of_tokens->{_level_0}; + + $self->write_column_headings() unless $self->{_wrote_column_headings}; + + # keep logfile columns aligned for scripts up to 999 lines; + # for longer scripts it doesn't really matter + my $extra_space = ""; + $extra_space .= + ( $input_line_number < 10 ) ? " " + : ( $input_line_number < 100 ) ? " " + : ""; + $extra_space .= + ( $output_line_number < 10 ) ? " " + : ( $output_line_number < 100 ) ? " " + : ""; + + # there are 2 possible nesting strings: + # the original which looks like this: (0 [1 {2 + # the new one, which looks like this: {{[ + # the new one is easier to read, and shows the order, but + # could be arbitrarily long, so we use it unless it is too long + my $nesting_string = + "($paren_depth [$square_bracket_depth {$brace_depth"; + my $nesting_string_new = $line_of_tokens->{_nesting_tokens_0}; + my $ci_level = $line_of_tokens->{_ci_level_0}; + if ( $ci_level > 9 ) { $ci_level = '*' } + my $bk = ( $line_of_tokens->{_nesting_blocks_0} =~ /1$/ ) ? '1' : '0'; + + if ( length($nesting_string_new) <= 8 ) { + $nesting_string = + $nesting_string_new . " " x ( 8 - length($nesting_string_new) ); + } + $line_information_string = +"L$input_line_number:$output_line_number$extra_space i$guessed_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string"; + } + return $line_information_string; +} + +sub logfile_output { + my ( $self, $prompt, $msg ) = @_; + return if ( $self->{_block_log_output} ); + + my $routput_array = $self->{_output_array}; + if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) { + push @{$routput_array}, "$msg"; + } + else { + my $line_information_string = $self->make_line_information_string(); + $self->{_wrote_line_information_string} = 1; + + if ($line_information_string) { + push @{$routput_array}, "$line_information_string $prompt$msg"; + } + else { + push @{$routput_array}, "$msg"; + } + } + return; +} + +sub get_saw_brace_error { + my $self = shift; + return $self->{_saw_brace_error}; +} + +sub increment_brace_error { + my $self = shift; + $self->{_saw_brace_error}++; + return; +} + +sub brace_warning { + my ( $self, $msg ) = @_; + + #use constant BRACE_WARNING_LIMIT => 10; + my $BRACE_WARNING_LIMIT = 10; + my $saw_brace_error = $self->{_saw_brace_error}; + + if ( $saw_brace_error < $BRACE_WARNING_LIMIT ) { + $self->warning($msg); + } + $saw_brace_error++; + $self->{_saw_brace_error} = $saw_brace_error; + + if ( $saw_brace_error == $BRACE_WARNING_LIMIT ) { + $self->warning("No further warnings of this type will be given\n"); + } + return; +} + +sub complain { + + # handle non-critical warning messages based on input flag + my ( $self, $msg ) = @_; + my $rOpts = $self->{_rOpts}; + + # these appear in .ERR output only if -w flag is used + if ( $rOpts->{'warning-output'} ) { + $self->warning($msg); + } + + # otherwise, they go to the .LOG file + else { + $self->{_complaint_count}++; + $self->write_logfile_entry($msg); + } + return; +} + +sub warning { + + # report errors to .ERR file (or stdout) + my ( $self, $msg ) = @_; + + #use constant WARNING_LIMIT => 50; + my $WARNING_LIMIT = 50; + + my $rOpts = $self->{_rOpts}; + unless ( $rOpts->{'quiet'} ) { + + my $warning_count = $self->{_warning_count}; + my $fh_warnings = $self->{_fh_warnings}; + if ( !$fh_warnings ) { + my $warning_file = $self->{_warning_file}; + ( $fh_warnings, my $filename ) = + Perl::Tidy::streamhandle( $warning_file, 'w' ); + $fh_warnings or Perl::Tidy::Die("couldn't open $filename $!\n"); + Perl::Tidy::Warn("## Please see file $filename\n") + unless ref($warning_file); + $self->{_fh_warnings} = $fh_warnings; + $fh_warnings->print("Perltidy version is $Perl::Tidy::VERSION\n"); + } + + if ( $warning_count < $WARNING_LIMIT ) { + if ( $self->get_use_prefix() > 0 ) { + my $input_line_number = + Perl::Tidy::Tokenizer::get_input_line_number(); + if ( !defined($input_line_number) ) { $input_line_number = -1 } + $fh_warnings->print("$input_line_number:\t$msg"); + $self->write_logfile_entry("WARNING: $msg"); + } + else { + $fh_warnings->print($msg); + $self->write_logfile_entry($msg); + } + } + $warning_count++; + $self->{_warning_count} = $warning_count; + + if ( $warning_count == $WARNING_LIMIT ) { + $fh_warnings->print("No further warnings will be given\n"); + } + } + return; +} + +# programming bug codes: +# -1 = no bug +# 0 = maybe, not sure. +# 1 = definitely +sub report_possible_bug { + my $self = shift; + my $saw_code_bug = $self->{_saw_code_bug}; + $self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug; + return; +} + +sub report_definite_bug { + my $self = shift; + $self->{_saw_code_bug} = 1; + return; +} + +sub ask_user_for_bug_report { + + my ( $self, $infile_syntax_ok, $formatter ) = @_; + my $saw_code_bug = $self->{_saw_code_bug}; + if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) { + $self->warning(<{_saw_extrude} ) { + $self->warning(<warning(<get_added_semicolon_count(); + }; + if ( $added_semicolon_count > 0 ) { + $self->warning(<{_rOpts}; + my $warning_count = $self->{_warning_count}; + my $saw_code_bug = $self->{_saw_code_bug}; + + my $save_logfile = + ( $saw_code_bug == 0 && $infile_syntax_ok == 1 ) + || $saw_code_bug == 1 + || $rOpts->{'logfile'}; + my $log_file = $self->{_log_file}; + if ($warning_count) { + if ($save_logfile) { + $self->block_log_output(); # avoid echoing this to the logfile + $self->warning( + "The logfile $log_file may contain useful information\n"); + $self->unblock_log_output(); + } + + if ( $self->{_complaint_count} > 0 ) { + $self->warning( +"To see $self->{_complaint_count} non-critical warnings rerun with -w\n" + ); + } + + if ( $self->{_saw_brace_error} + && ( $self->{_logfile_gap} > 1 || !$save_logfile ) ) + { + $self->warning("To save a full .LOG file rerun with -g\n"); + } + } + $self->ask_user_for_bug_report( $infile_syntax_ok, $formatter ); + + if ($save_logfile) { + my $log_file = $self->{_log_file}; + my ( $fh, $filename ) = Perl::Tidy::streamhandle( $log_file, 'w' ); + if ($fh) { + my $routput_array = $self->{_output_array}; + foreach ( @{$routput_array} ) { $fh->print($_) } + if ( $log_file ne '-' && !ref $log_file ) { + eval { $fh->close() }; + } + } + } + return; +} +1; + diff --git a/lib/Perl/Tidy/Tokenizer.pm b/lib/Perl/Tidy/Tokenizer.pm new file mode 100644 index 00000000..2363f66a --- /dev/null +++ b/lib/Perl/Tidy/Tokenizer.pm @@ -0,0 +1,7866 @@ +######################################################################## +# +# the Perl::Tidy::Tokenizer package is essentially a filter which +# reads lines of perl source code from a source object and provides +# corresponding tokenized lines through its get_line() method. Lines +# flow from the source_object to the caller like this: +# +# source_object --> LineBuffer_object --> Tokenizer --> calling routine +# get_line() get_line() get_line() line_of_tokens +# +# The source object can be any object with a get_line() method which +# supplies one line (a character string) perl call. +# The LineBuffer object is created by the Tokenizer. +# The Tokenizer returns a reference to a data structure 'line_of_tokens' +# containing one tokenized line for each call to its get_line() method. +# +# WARNING: This is not a real class yet. Only one tokenizer my be used. +# +######################################################################## + +package Perl::Tidy::Tokenizer; +use strict; +use warnings; + +use Perl::Tidy::LineBuffer; + +BEGIN { + + # Caution: these debug flags produce a lot of output + # They should all be 0 except when debugging small scripts + + use constant TOKENIZER_DEBUG_FLAG_EXPECT => 0; + use constant TOKENIZER_DEBUG_FLAG_NSCAN => 0; + use constant TOKENIZER_DEBUG_FLAG_QUOTE => 0; + use constant TOKENIZER_DEBUG_FLAG_SCAN_ID => 0; + use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0; + + my $debug_warning = sub { + print STDOUT "TOKENIZER_DEBUGGING with key $_[0]\n"; + }; + + TOKENIZER_DEBUG_FLAG_EXPECT && $debug_warning->('EXPECT'); + TOKENIZER_DEBUG_FLAG_NSCAN && $debug_warning->('NSCAN'); + TOKENIZER_DEBUG_FLAG_QUOTE && $debug_warning->('QUOTE'); + TOKENIZER_DEBUG_FLAG_SCAN_ID && $debug_warning->('SCAN_ID'); + TOKENIZER_DEBUG_FLAG_TOKENIZE && $debug_warning->('TOKENIZE'); + +} + +use Carp; + +# PACKAGE VARIABLES for processing an entire FILE. +use vars qw{ + $tokenizer_self + + $last_nonblank_token + $last_nonblank_type + $last_nonblank_block_type + $statement_type + $in_attribute_list + $current_package + $context + + %is_constant + %is_user_function + %user_function_prototype + %is_block_function + %is_block_list_function + %saw_function_definition + + $brace_depth + $paren_depth + $square_bracket_depth + + @current_depth + @total_depth + $total_depth + @nesting_sequence_number + @current_sequence_number + @paren_type + @paren_semicolon_count + @paren_structural_type + @brace_type + @brace_structural_type + @brace_context + @brace_package + @square_bracket_type + @square_bracket_structural_type + @depth_array + @nested_ternary_flag + @nested_statement_type + @starting_line_of_current_depth +}; + +# GLOBAL CONSTANTS for routines in this package +use vars qw{ + %is_indirect_object_taker + %is_block_operator + %expecting_operator_token + %expecting_operator_types + %expecting_term_types + %expecting_term_token + %is_digraph + %is_file_test_operator + %is_trigraph + %is_tetragraph + %is_valid_token_type + %is_keyword + %is_code_block_token + %really_want_term + @opening_brace_names + @closing_brace_names + %is_keyword_taking_list + %is_q_qq_qw_qx_qr_s_y_tr_m +}; + +# possible values of operator_expected() +use constant TERM => -1; +use constant UNKNOWN => 0; +use constant OPERATOR => 1; + +# possible values of context +use constant SCALAR_CONTEXT => -1; +use constant UNKNOWN_CONTEXT => 0; +use constant LIST_CONTEXT => 1; + +# Maximum number of little messages; probably need not be changed. +use constant MAX_NAG_MESSAGES => 6; + +{ + + # methods to count instances + my $_count = 0; + sub get_count { return $_count; } + sub _increment_count { return ++$_count } + sub _decrement_count { return --$_count } +} + +sub DESTROY { + my $self = shift; + $self->_decrement_count(); + return; +} + +sub new { + + my ( $class, @args ) = @_; + + # Note: 'tabs' and 'indent_columns' are temporary and should be + # removed asap + my %defaults = ( + source_object => undef, + debugger_object => undef, + diagnostics_object => undef, + logger_object => undef, + starting_level => undef, + indent_columns => 4, + tabsize => 8, + look_for_hash_bang => 0, + trim_qw => 1, + look_for_autoloader => 1, + look_for_selfloader => 1, + starting_line_number => 1, + extended_syntax => 0, + ); + my %args = ( %defaults, @args ); + + # we are given an object with a get_line() method to supply source lines + my $source_object = $args{source_object}; + + # we create another object with a get_line() and peek_ahead() method + my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object); + + # Tokenizer state data is as follows: + # _rhere_target_list reference to list of here-doc targets + # _here_doc_target the target string for a here document + # _here_quote_character the type of here-doc quoting (" ' ` or none) + # to determine if interpolation is done + # _quote_target character we seek if chasing a quote + # _line_start_quote line where we started looking for a long quote + # _in_here_doc flag indicating if we are in a here-doc + # _in_pod flag set if we are in pod documentation + # _in_error flag set if we saw severe error (binary in script) + # _in_data flag set if we are in __DATA__ section + # _in_end flag set if we are in __END__ section + # _in_format flag set if we are in a format description + # _in_attribute_list flag telling if we are looking for attributes + # _in_quote flag telling if we are chasing a quote + # _starting_level indentation level of first line + # _line_buffer_object object with get_line() method to supply source code + # _diagnostics_object place to write debugging information + # _unexpected_error_count error count used to limit output + # _lower_case_labels_at line numbers where lower case labels seen + # _hit_bug program bug detected + $tokenizer_self = { + _rhere_target_list => [], + _in_here_doc => 0, + _here_doc_target => "", + _here_quote_character => "", + _in_data => 0, + _in_end => 0, + _in_format => 0, + _in_error => 0, + _in_pod => 0, + _in_attribute_list => 0, + _in_quote => 0, + _quote_target => "", + _line_start_quote => -1, + _starting_level => $args{starting_level}, + _know_starting_level => defined( $args{starting_level} ), + _tabsize => $args{tabsize}, + _indent_columns => $args{indent_columns}, + _look_for_hash_bang => $args{look_for_hash_bang}, + _trim_qw => $args{trim_qw}, + _continuation_indentation => $args{continuation_indentation}, + _outdent_labels => $args{outdent_labels}, + _last_line_number => $args{starting_line_number} - 1, + _saw_perl_dash_P => 0, + _saw_perl_dash_w => 0, + _saw_use_strict => 0, + _saw_v_string => 0, + _hit_bug => 0, + _look_for_autoloader => $args{look_for_autoloader}, + _look_for_selfloader => $args{look_for_selfloader}, + _saw_autoloader => 0, + _saw_selfloader => 0, + _saw_hash_bang => 0, + _saw_end => 0, + _saw_data => 0, + _saw_negative_indentation => 0, + _started_tokenizing => 0, + _line_buffer_object => $line_buffer_object, + _debugger_object => $args{debugger_object}, + _diagnostics_object => $args{diagnostics_object}, + _logger_object => $args{logger_object}, + _unexpected_error_count => 0, + _started_looking_for_here_target_at => 0, + _nearly_matched_here_target_at => undef, + _line_text => "", + _rlower_case_labels_at => undef, + _extended_syntax => $args{extended_syntax}, + }; + + prepare_for_a_new_file(); + find_starting_indentation_level(); + + bless $tokenizer_self, $class; + + # This is not a full class yet, so die if an attempt is made to + # create more than one object. + + if ( _increment_count() > 1 ) { + confess +"Attempt to create more than 1 object in $class, which is not a true class yet\n"; + } + + return $tokenizer_self; + +} + +# interface to Perl::Tidy::Logger routines +sub warning { + my $msg = shift; + my $logger_object = $tokenizer_self->{_logger_object}; + if ($logger_object) { + $logger_object->warning($msg); + } + return; +} + +sub complain { + my $msg = shift; + my $logger_object = $tokenizer_self->{_logger_object}; + if ($logger_object) { + $logger_object->complain($msg); + } + return; +} + +sub write_logfile_entry { + my $msg = shift; + my $logger_object = $tokenizer_self->{_logger_object}; + if ($logger_object) { + $logger_object->write_logfile_entry($msg); + } + return; +} + +sub interrupt_logfile { + my $logger_object = $tokenizer_self->{_logger_object}; + if ($logger_object) { + $logger_object->interrupt_logfile(); + } + return; +} + +sub resume_logfile { + my $logger_object = $tokenizer_self->{_logger_object}; + if ($logger_object) { + $logger_object->resume_logfile(); + } + return; +} + +sub increment_brace_error { + my $logger_object = $tokenizer_self->{_logger_object}; + if ($logger_object) { + $logger_object->increment_brace_error(); + } + return; +} + +sub report_definite_bug { + $tokenizer_self->{_hit_bug} = 1; + my $logger_object = $tokenizer_self->{_logger_object}; + if ($logger_object) { + $logger_object->report_definite_bug(); + } + return; +} + +sub brace_warning { + my $msg = shift; + my $logger_object = $tokenizer_self->{_logger_object}; + if ($logger_object) { + $logger_object->brace_warning($msg); + } + return; +} + +sub get_saw_brace_error { + my $logger_object = $tokenizer_self->{_logger_object}; + if ($logger_object) { + return $logger_object->get_saw_brace_error(); + } + else { + return 0; + } +} + +# interface to Perl::Tidy::Diagnostics routines +sub write_diagnostics { + my $msg = shift; + if ( $tokenizer_self->{_diagnostics_object} ) { + $tokenizer_self->{_diagnostics_object}->write_diagnostics($msg); + } + return; +} + +sub report_tokenization_errors { + + my $self = shift; + my $severe_error = $self->{_in_error}; + + my $level = get_indentation_level(); + if ( $level != $tokenizer_self->{_starting_level} ) { + warning("final indentation level: $level\n"); + } + + check_final_nesting_depths(); + + if ( $tokenizer_self->{_look_for_hash_bang} + && !$tokenizer_self->{_saw_hash_bang} ) + { + warning( + "hit EOF without seeing hash-bang line; maybe don't need -x?\n"); + } + + if ( $tokenizer_self->{_in_format} ) { + warning("hit EOF while in format description\n"); + } + + if ( $tokenizer_self->{_in_pod} ) { + + # Just write log entry if this is after __END__ or __DATA__ + # because this happens to often, and it is not likely to be + # a parsing error. + if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) { + write_logfile_entry( +"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n" + ); + } + + else { + complain( +"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n" + ); + } + + } + + if ( $tokenizer_self->{_in_here_doc} ) { + $severe_error = 1; + my $here_doc_target = $tokenizer_self->{_here_doc_target}; + my $started_looking_for_here_target_at = + $tokenizer_self->{_started_looking_for_here_target_at}; + if ($here_doc_target) { + warning( +"hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n" + ); + } + else { + warning( +"hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n" + ); + } + my $nearly_matched_here_target_at = + $tokenizer_self->{_nearly_matched_here_target_at}; + if ($nearly_matched_here_target_at) { + warning( +"NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n" + ); + } + } + + if ( $tokenizer_self->{_in_quote} ) { + $severe_error = 1; + my $line_start_quote = $tokenizer_self->{_line_start_quote}; + my $quote_target = $tokenizer_self->{_quote_target}; + my $what = + ( $tokenizer_self->{_in_attribute_list} ) + ? "attribute list" + : "quote/pattern"; + warning( +"hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n" + ); + } + + if ( $tokenizer_self->{_hit_bug} ) { + $severe_error = 1; + } + + my $logger_object = $tokenizer_self->{_logger_object}; + +# TODO: eventually may want to activate this to cause file to be output verbatim + if (0) { + + # Set the severe error for a fairly high warning count because + # some of the warnings do not harm formatting, such as duplicate + # sub names. + my $warning_count = $logger_object->{_warning_count}; + if ( $warning_count > 50 ) { + $severe_error = 1; + } + + # Brace errors are significant, so set the severe error flag at + # a low number. + my $saw_brace_error = $logger_object->{_saw_brace_error}; + if ( $saw_brace_error > 2 ) { + $severe_error = 1; + } + } + + unless ( $tokenizer_self->{_saw_perl_dash_w} ) { + if ( $] < 5.006 ) { + write_logfile_entry("Suggest including '-w parameter'\n"); + } + else { + write_logfile_entry("Suggest including 'use warnings;'\n"); + } + } + + if ( $tokenizer_self->{_saw_perl_dash_P} ) { + write_logfile_entry("Use of -P parameter for defines is discouraged\n"); + } + + unless ( $tokenizer_self->{_saw_use_strict} ) { + write_logfile_entry("Suggest including 'use strict;'\n"); + } + + # it is suggested that labels have at least one upper case character + # for legibility and to avoid code breakage as new keywords are introduced + if ( $tokenizer_self->{_rlower_case_labels_at} ) { + my @lower_case_labels_at = + @{ $tokenizer_self->{_rlower_case_labels_at} }; + write_logfile_entry( + "Suggest using upper case characters in label(s)\n"); + local $" = ')('; + write_logfile_entry(" defined at line(s): (@lower_case_labels_at)\n"); + } + return $severe_error; +} + +sub report_v_string { + + # warn if this version can't handle v-strings + my $tok = shift; + unless ( $tokenizer_self->{_saw_v_string} ) { + $tokenizer_self->{_saw_v_string} = $tokenizer_self->{_last_line_number}; + } + if ( $] < 5.006 ) { + warning( +"Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n" + ); + } + return; +} + +sub get_input_line_number { + return $tokenizer_self->{_last_line_number}; +} + +# returns the next tokenized line +sub get_line { + + my $self = shift; + + # USES GLOBAL VARIABLES: $tokenizer_self, $brace_depth, + # $square_bracket_depth, $paren_depth + + my $input_line = $tokenizer_self->{_line_buffer_object}->get_line(); + $tokenizer_self->{_line_text} = $input_line; + + return unless ($input_line); + + my $input_line_number = ++$tokenizer_self->{_last_line_number}; + + # Find and remove what characters terminate this line, including any + # control r + my $input_line_separator = ""; + if ( chomp($input_line) ) { $input_line_separator = $/ } + + # TODO: what other characters should be included here? + if ( $input_line =~ s/((\r|\035|\032)+)$// ) { + $input_line_separator = $2 . $input_line_separator; + } + + # for backwards compatibility we keep the line text terminated with + # a newline character + $input_line .= "\n"; + $tokenizer_self->{_line_text} = $input_line; # update + + # create a data structure describing this line which will be + # returned to the caller. + + # _line_type codes are: + # SYSTEM - system-specific code before hash-bang line + # CODE - line of perl code (including comments) + # POD_START - line starting pod, such as '=head' + # POD - pod documentation text + # POD_END - last line of pod section, '=cut' + # HERE - text of here-document + # HERE_END - last line of here-doc (target word) + # FORMAT - format section + # FORMAT_END - last line of format section, '.' + # DATA_START - __DATA__ line + # DATA - unidentified text following __DATA__ + # END_START - __END__ line + # END - unidentified text following __END__ + # ERROR - we are in big trouble, probably not a perl script + + # Other variables: + # _curly_brace_depth - depth of curly braces at start of line + # _square_bracket_depth - depth of square brackets at start of line + # _paren_depth - depth of parens at start of line + # _starting_in_quote - this line continues a multi-line quote + # (so don't trim leading blanks!) + # _ending_in_quote - this line ends in a multi-line quote + # (so don't trim trailing blanks!) + my $line_of_tokens = { + _line_type => 'EOF', + _line_text => $input_line, + _line_number => $input_line_number, + _rtoken_type => undef, + _rtokens => undef, + _rlevels => undef, + _rslevels => undef, + _rblock_type => undef, + _rcontainer_type => undef, + _rcontainer_environment => undef, + _rtype_sequence => undef, + _rnesting_tokens => undef, + _rci_levels => undef, + _rnesting_blocks => undef, + _guessed_indentation_level => 0, + _starting_in_quote => 0, # to be set by subroutine + _ending_in_quote => 0, + _curly_brace_depth => $brace_depth, + _square_bracket_depth => $square_bracket_depth, + _paren_depth => $paren_depth, + _quote_character => '', + }; + + # must print line unchanged if we are in a here document + if ( $tokenizer_self->{_in_here_doc} ) { + + $line_of_tokens->{_line_type} = 'HERE'; + my $here_doc_target = $tokenizer_self->{_here_doc_target}; + my $here_quote_character = $tokenizer_self->{_here_quote_character}; + my $candidate_target = $input_line; + chomp $candidate_target; + + # Handle <<~ targets, which are indicated here by a leading space on + # the here quote character + if ( $here_quote_character =~ /^\s/ ) { + $candidate_target =~ s/^\s*//; + } + if ( $candidate_target eq $here_doc_target ) { + $tokenizer_self->{_nearly_matched_here_target_at} = undef; + $line_of_tokens->{_line_type} = 'HERE_END'; + write_logfile_entry("Exiting HERE document $here_doc_target\n"); + + my $rhere_target_list = $tokenizer_self->{_rhere_target_list}; + if ( @{$rhere_target_list} ) { # there can be multiple here targets + ( $here_doc_target, $here_quote_character ) = + @{ shift @{$rhere_target_list} }; + $tokenizer_self->{_here_doc_target} = $here_doc_target; + $tokenizer_self->{_here_quote_character} = + $here_quote_character; + write_logfile_entry( + "Entering HERE document $here_doc_target\n"); + $tokenizer_self->{_nearly_matched_here_target_at} = undef; + $tokenizer_self->{_started_looking_for_here_target_at} = + $input_line_number; + } + else { + $tokenizer_self->{_in_here_doc} = 0; + $tokenizer_self->{_here_doc_target} = ""; + $tokenizer_self->{_here_quote_character} = ""; + } + } + + # check for error of extra whitespace + # note for PERL6: leading whitespace is allowed + else { + $candidate_target =~ s/\s*$//; + $candidate_target =~ s/^\s*//; + if ( $candidate_target eq $here_doc_target ) { + $tokenizer_self->{_nearly_matched_here_target_at} = + $input_line_number; + } + } + return $line_of_tokens; + } + + # must print line unchanged if we are in a format section + elsif ( $tokenizer_self->{_in_format} ) { + + if ( $input_line =~ /^\.[\s#]*$/ ) { + write_logfile_entry("Exiting format section\n"); + $tokenizer_self->{_in_format} = 0; + $line_of_tokens->{_line_type} = 'FORMAT_END'; + } + else { + $line_of_tokens->{_line_type} = 'FORMAT'; + } + return $line_of_tokens; + } + + # must print line unchanged if we are in pod documentation + elsif ( $tokenizer_self->{_in_pod} ) { + + $line_of_tokens->{_line_type} = 'POD'; + if ( $input_line =~ /^=cut/ ) { + $line_of_tokens->{_line_type} = 'POD_END'; + write_logfile_entry("Exiting POD section\n"); + $tokenizer_self->{_in_pod} = 0; + } + if ( $input_line =~ /^\#\!.*perl\b/ ) { + warning( + "Hash-bang in pod can cause older versions of perl to fail! \n" + ); + } + + return $line_of_tokens; + } + + # must print line unchanged if we have seen a severe error (i.e., we + # are seeing illegal tokens and cannot continue. Syntax errors do + # not pass this route). Calling routine can decide what to do, but + # the default can be to just pass all lines as if they were after __END__ + elsif ( $tokenizer_self->{_in_error} ) { + $line_of_tokens->{_line_type} = 'ERROR'; + return $line_of_tokens; + } + + # print line unchanged if we are __DATA__ section + elsif ( $tokenizer_self->{_in_data} ) { + + # ...but look for POD + # Note that the _in_data and _in_end flags remain set + # so that we return to that state after seeing the + # end of a pod section + if ( $input_line =~ /^=(?!cut)/ ) { + $line_of_tokens->{_line_type} = 'POD_START'; + write_logfile_entry("Entering POD section\n"); + $tokenizer_self->{_in_pod} = 1; + return $line_of_tokens; + } + else { + $line_of_tokens->{_line_type} = 'DATA'; + return $line_of_tokens; + } + } + + # print line unchanged if we are in __END__ section + elsif ( $tokenizer_self->{_in_end} ) { + + # ...but look for POD + # Note that the _in_data and _in_end flags remain set + # so that we return to that state after seeing the + # end of a pod section + if ( $input_line =~ /^=(?!cut)/ ) { + $line_of_tokens->{_line_type} = 'POD_START'; + write_logfile_entry("Entering POD section\n"); + $tokenizer_self->{_in_pod} = 1; + return $line_of_tokens; + } + else { + $line_of_tokens->{_line_type} = 'END'; + return $line_of_tokens; + } + } + + # check for a hash-bang line if we haven't seen one + if ( !$tokenizer_self->{_saw_hash_bang} ) { + if ( $input_line =~ /^\#\!.*perl\b/ ) { + $tokenizer_self->{_saw_hash_bang} = $input_line_number; + + # check for -w and -P flags + if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) { + $tokenizer_self->{_saw_perl_dash_P} = 1; + } + + if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) { + $tokenizer_self->{_saw_perl_dash_w} = 1; + } + + if ( + ( $input_line_number > 1 ) + + # leave any hash bang in a BEGIN block alone + # i.e. see 'debugger-duck_type.t' + && !( + $last_nonblank_block_type + && $last_nonblank_block_type eq 'BEGIN' + ) + && ( !$tokenizer_self->{_look_for_hash_bang} ) + ) + { + + # this is helpful for VMS systems; we may have accidentally + # tokenized some DCL commands + if ( $tokenizer_self->{_started_tokenizing} ) { + warning( +"There seems to be a hash-bang after line 1; do you need to run with -x ?\n" + ); + } + else { + complain("Useless hash-bang after line 1\n"); + } + } + + # Report the leading hash-bang as a system line + # This will prevent -dac from deleting it + else { + $line_of_tokens->{_line_type} = 'SYSTEM'; + return $line_of_tokens; + } + } + } + + # wait for a hash-bang before parsing if the user invoked us with -x + if ( $tokenizer_self->{_look_for_hash_bang} + && !$tokenizer_self->{_saw_hash_bang} ) + { + $line_of_tokens->{_line_type} = 'SYSTEM'; + return $line_of_tokens; + } + + # a first line of the form ': #' will be marked as SYSTEM + # since lines of this form may be used by tcsh + if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) { + $line_of_tokens->{_line_type} = 'SYSTEM'; + return $line_of_tokens; + } + + # now we know that it is ok to tokenize the line... + # the line tokenizer will modify any of these private variables: + # _rhere_target_list + # _in_data + # _in_end + # _in_format + # _in_error + # _in_pod + # _in_quote + my $ending_in_quote_last = $tokenizer_self->{_in_quote}; + tokenize_this_line($line_of_tokens); + + # Now finish defining the return structure and return it + $line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote}; + + # handle severe error (binary data in script) + if ( $tokenizer_self->{_in_error} ) { + $tokenizer_self->{_in_quote} = 0; # to avoid any more messages + warning("Giving up after error\n"); + $line_of_tokens->{_line_type} = 'ERROR'; + reset_indentation_level(0); # avoid error messages + return $line_of_tokens; + } + + # handle start of pod documentation + if ( $tokenizer_self->{_in_pod} ) { + + # This gets tricky..above a __DATA__ or __END__ section, perl + # accepts '=cut' as the start of pod section. But afterwards, + # only pod utilities see it and they may ignore an =cut without + # leading =head. In any case, this isn't good. + if ( $input_line =~ /^=cut\b/ ) { + if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) { + complain("=cut while not in pod ignored\n"); + $tokenizer_self->{_in_pod} = 0; + $line_of_tokens->{_line_type} = 'POD_END'; + } + else { + $line_of_tokens->{_line_type} = 'POD_START'; + complain( +"=cut starts a pod section .. this can fool pod utilities.\n" + ); + write_logfile_entry("Entering POD section\n"); + } + } + + else { + $line_of_tokens->{_line_type} = 'POD_START'; + write_logfile_entry("Entering POD section\n"); + } + + return $line_of_tokens; + } + + # update indentation levels for log messages + if ( $input_line !~ /^\s*$/ ) { + my $rlevels = $line_of_tokens->{_rlevels}; + $line_of_tokens->{_guessed_indentation_level} = + guess_old_indentation_level($input_line); + } + + # see if this line contains here doc targets + my $rhere_target_list = $tokenizer_self->{_rhere_target_list}; + if ( @{$rhere_target_list} ) { + + my ( $here_doc_target, $here_quote_character ) = + @{ shift @{$rhere_target_list} }; + $tokenizer_self->{_in_here_doc} = 1; + $tokenizer_self->{_here_doc_target} = $here_doc_target; + $tokenizer_self->{_here_quote_character} = $here_quote_character; + write_logfile_entry("Entering HERE document $here_doc_target\n"); + $tokenizer_self->{_started_looking_for_here_target_at} = + $input_line_number; + } + + # NOTE: __END__ and __DATA__ statements are written unformatted + # because they can theoretically contain additional characters + # which are not tokenized (and cannot be read with either!). + if ( $tokenizer_self->{_in_data} ) { + $line_of_tokens->{_line_type} = 'DATA_START'; + write_logfile_entry("Starting __DATA__ section\n"); + $tokenizer_self->{_saw_data} = 1; + + # keep parsing after __DATA__ if use SelfLoader was seen + if ( $tokenizer_self->{_saw_selfloader} ) { + $tokenizer_self->{_in_data} = 0; + write_logfile_entry( + "SelfLoader seen, continuing; -nlsl deactivates\n"); + } + + return $line_of_tokens; + } + + elsif ( $tokenizer_self->{_in_end} ) { + $line_of_tokens->{_line_type} = 'END_START'; + write_logfile_entry("Starting __END__ section\n"); + $tokenizer_self->{_saw_end} = 1; + + # keep parsing after __END__ if use AutoLoader was seen + if ( $tokenizer_self->{_saw_autoloader} ) { + $tokenizer_self->{_in_end} = 0; + write_logfile_entry( + "AutoLoader seen, continuing; -nlal deactivates\n"); + } + return $line_of_tokens; + } + + # now, finally, we know that this line is type 'CODE' + $line_of_tokens->{_line_type} = 'CODE'; + + # remember if we have seen any real code + if ( !$tokenizer_self->{_started_tokenizing} + && $input_line !~ /^\s*$/ + && $input_line !~ /^\s*#/ ) + { + $tokenizer_self->{_started_tokenizing} = 1; + } + + if ( $tokenizer_self->{_debugger_object} ) { + $tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens); + } + + # Note: if keyword 'format' occurs in this line code, it is still CODE + # (keyword 'format' need not start a line) + if ( $tokenizer_self->{_in_format} ) { + write_logfile_entry("Entering format section\n"); + } + + if ( $tokenizer_self->{_in_quote} + and ( $tokenizer_self->{_line_start_quote} < 0 ) ) + { + + #if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) { + if ( + ( my $quote_target = $tokenizer_self->{_quote_target} ) !~ /^\s*$/ ) + { + $tokenizer_self->{_line_start_quote} = $input_line_number; + write_logfile_entry( + "Start multi-line quote or pattern ending in $quote_target\n"); + } + } + elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 ) + && !$tokenizer_self->{_in_quote} ) + { + $tokenizer_self->{_line_start_quote} = -1; + write_logfile_entry("End of multi-line quote or pattern\n"); + } + + # we are returning a line of CODE + return $line_of_tokens; +} + +sub find_starting_indentation_level { + + # We need to find the indentation level of the first line of the + # script being formatted. Often it will be zero for an entire file, + # but if we are formatting a local block of code (within an editor for + # example) it may not be zero. The user may specify this with the + # -sil=n parameter but normally doesn't so we have to guess. + # + # USES GLOBAL VARIABLES: $tokenizer_self + my $starting_level = 0; + + # use value if given as parameter + if ( $tokenizer_self->{_know_starting_level} ) { + $starting_level = $tokenizer_self->{_starting_level}; + } + + # if we know there is a hash_bang line, the level must be zero + elsif ( $tokenizer_self->{_look_for_hash_bang} ) { + $tokenizer_self->{_know_starting_level} = 1; + } + + # otherwise figure it out from the input file + else { + my $line; + my $i = 0; + + # keep looking at lines until we find a hash bang or piece of code + my $msg = ""; + while ( $line = + $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) ) + { + + # if first line is #! then assume starting level is zero + if ( $i == 1 && $line =~ /^\#\!/ ) { + $starting_level = 0; + last; + } + next if ( $line =~ /^\s*#/ ); # skip past comments + next if ( $line =~ /^\s*$/ ); # skip past blank lines + $starting_level = guess_old_indentation_level($line); + last; + } + $msg = "Line $i implies starting-indentation-level = $starting_level\n"; + write_logfile_entry("$msg"); + } + $tokenizer_self->{_starting_level} = $starting_level; + reset_indentation_level($starting_level); + return; +} + +sub guess_old_indentation_level { + my ($line) = @_; + + # Guess the indentation level of an input line. + # + # For the first line of code this result will define the starting + # indentation level. It will mainly be non-zero when perltidy is applied + # within an editor to a local block of code. + # + # This is an impossible task in general because we can't know what tabs + # meant for the old script and how many spaces were used for one + # indentation level in the given input script. For example it may have + # been previously formatted with -i=7 -et=3. But we can at least try to + # make sure that perltidy guesses correctly if it is applied repeatedly to + # a block of code within an editor, so that the block stays at the same + # level when perltidy is applied repeatedly. + # + # USES GLOBAL VARIABLES: $tokenizer_self + my $level = 0; + + # find leading tabs, spaces, and any statement label + my $spaces = 0; + if ( $line =~ /^(\t+)?(\s+)?(\w+:[^:])?/ ) { + + # If there are leading tabs, we use the tab scheme for this run, if + # any, so that the code will remain stable when editing. + if ($1) { $spaces += length($1) * $tokenizer_self->{_tabsize} } + + if ($2) { $spaces += length($2) } + + # correct for outdented labels + if ( $3 && $tokenizer_self->{'_outdent_labels'} ) { + $spaces += $tokenizer_self->{_continuation_indentation}; + } + } + + # compute indentation using the value of -i for this run. + # If -i=0 is used for this run (which is possible) it doesn't matter + # what we do here but we'll guess that the old run used 4 spaces per level. + my $indent_columns = $tokenizer_self->{_indent_columns}; + $indent_columns = 4 if ( !$indent_columns ); + $level = int( $spaces / $indent_columns ); + return ($level); +} + +# This is a currently unused debug routine +sub dump_functions { + + my $fh = *STDOUT; + foreach my $pkg ( keys %is_user_function ) { + print $fh "\nnon-constant subs in package $pkg\n"; + + foreach my $sub ( keys %{ $is_user_function{$pkg} } ) { + my $msg = ""; + if ( $is_block_list_function{$pkg}{$sub} ) { + $msg = 'block_list'; + } + + if ( $is_block_function{$pkg}{$sub} ) { + $msg = 'block'; + } + print $fh "$sub $msg\n"; + } + } + + foreach my $pkg ( keys %is_constant ) { + print $fh "\nconstants and constant subs in package $pkg\n"; + + foreach my $sub ( keys %{ $is_constant{$pkg} } ) { + print $fh "$sub\n"; + } + } + return; +} + +sub ones_count { + + # count number of 1's in a string of 1's and 0's + # example: ones_count("010101010101") gives 6 + my $str = shift; + return $str =~ tr/1/0/; +} + +sub prepare_for_a_new_file { + + # previous tokens needed to determine what to expect next + $last_nonblank_token = ';'; # the only possible starting state which + $last_nonblank_type = ';'; # will make a leading brace a code block + $last_nonblank_block_type = ''; + + # scalars for remembering statement types across multiple lines + $statement_type = ''; # '' or 'use' or 'sub..' or 'case..' + $in_attribute_list = 0; + + # scalars for remembering where we are in the file + $current_package = "main"; + $context = UNKNOWN_CONTEXT; + + # hashes used to remember function information + %is_constant = (); # user-defined constants + %is_user_function = (); # user-defined functions + %user_function_prototype = (); # their prototypes + %is_block_function = (); + %is_block_list_function = (); + %saw_function_definition = (); + + # variables used to track depths of various containers + # and report nesting errors + $paren_depth = 0; + $brace_depth = 0; + $square_bracket_depth = 0; + @current_depth[ 0 .. $#closing_brace_names ] = + (0) x scalar @closing_brace_names; + $total_depth = 0; + @total_depth = (); + @nesting_sequence_number[ 0 .. $#closing_brace_names ] = + ( 0 .. $#closing_brace_names ); + @current_sequence_number = (); + $paren_type[$paren_depth] = ''; + $paren_semicolon_count[$paren_depth] = 0; + $paren_structural_type[$brace_depth] = ''; + $brace_type[$brace_depth] = ';'; # identify opening brace as code block + $brace_structural_type[$brace_depth] = ''; + $brace_context[$brace_depth] = UNKNOWN_CONTEXT; + $brace_package[$paren_depth] = $current_package; + $square_bracket_type[$square_bracket_depth] = ''; + $square_bracket_structural_type[$square_bracket_depth] = ''; + + initialize_tokenizer_state(); + return; +} + +{ # begin tokenize_this_line + + use constant BRACE => 0; + use constant SQUARE_BRACKET => 1; + use constant PAREN => 2; + use constant QUESTION_COLON => 3; + + # TV1: scalars for processing one LINE. + # Re-initialized on each entry to sub tokenize_this_line. + my ( + $block_type, $container_type, $expecting, + $i, $i_tok, $input_line, + $input_line_number, $last_nonblank_i, $max_token_index, + $next_tok, $next_type, $peeked_ahead, + $prototype, $rhere_target_list, $rtoken_map, + $rtoken_type, $rtokens, $tok, + $type, $type_sequence, $indent_flag, + ); + + # TV2: refs to ARRAYS for processing one LINE + # Re-initialized on each call. + my $routput_token_list = []; # stack of output token indexes + my $routput_token_type = []; # token types + my $routput_block_type = []; # types of code block + my $routput_container_type = []; # paren types, such as if, elsif, .. + my $routput_type_sequence = []; # nesting sequential number + my $routput_indent_flag = []; # + + # TV3: SCALARS for quote variables. These are initialized with a + # subroutine call and continually updated as lines are processed. + my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth, + $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, ); + + # TV4: SCALARS for multi-line identifiers and + # statements. These are initialized with a subroutine call + # and continually updated as lines are processed. + my ( $id_scan_state, $identifier, $want_paren, $indented_if_level ); + + # TV5: SCALARS for tracking indentation level. + # Initialized once and continually updated as lines are + # processed. + my ( + $nesting_token_string, $nesting_type_string, + $nesting_block_string, $nesting_block_flag, + $nesting_list_string, $nesting_list_flag, + $ci_string_in_tokenizer, $continuation_string_in_tokenizer, + $in_statement_continuation, $level_in_tokenizer, + $slevel_in_tokenizer, $rslevel_stack, + ); + + # TV6: SCALARS for remembering several previous + # tokens. Initialized once and continually updated as + # lines are processed. + my ( + $last_nonblank_container_type, $last_nonblank_type_sequence, + $last_last_nonblank_token, $last_last_nonblank_type, + $last_last_nonblank_block_type, $last_last_nonblank_container_type, + $last_last_nonblank_type_sequence, $last_nonblank_prototype, + ); + + # ---------------------------------------------------------------- + # beginning of tokenizer variable access and manipulation routines + # ---------------------------------------------------------------- + + sub initialize_tokenizer_state { + + # TV1: initialized on each call + # TV2: initialized on each call + # TV3: + $in_quote = 0; + $quote_type = 'Q'; + $quote_character = ""; + $quote_pos = 0; + $quote_depth = 0; + $quoted_string_1 = ""; + $quoted_string_2 = ""; + $allowed_quote_modifiers = ""; + + # TV4: + $id_scan_state = ''; + $identifier = ''; + $want_paren = ""; + $indented_if_level = 0; + + # TV5: + $nesting_token_string = ""; + $nesting_type_string = ""; + $nesting_block_string = '1'; # initially in a block + $nesting_block_flag = 1; + $nesting_list_string = '0'; # initially not in a list + $nesting_list_flag = 0; # initially not in a list + $ci_string_in_tokenizer = ""; + $continuation_string_in_tokenizer = "0"; + $in_statement_continuation = 0; + $level_in_tokenizer = 0; + $slevel_in_tokenizer = 0; + $rslevel_stack = []; + + # TV6: + $last_nonblank_container_type = ''; + $last_nonblank_type_sequence = ''; + $last_last_nonblank_token = ';'; + $last_last_nonblank_type = ';'; + $last_last_nonblank_block_type = ''; + $last_last_nonblank_container_type = ''; + $last_last_nonblank_type_sequence = ''; + $last_nonblank_prototype = ""; + return; + } + + sub save_tokenizer_state { + + my $rTV1 = [ + $block_type, $container_type, $expecting, + $i, $i_tok, $input_line, + $input_line_number, $last_nonblank_i, $max_token_index, + $next_tok, $next_type, $peeked_ahead, + $prototype, $rhere_target_list, $rtoken_map, + $rtoken_type, $rtokens, $tok, + $type, $type_sequence, $indent_flag, + ]; + + my $rTV2 = [ + $routput_token_list, $routput_token_type, + $routput_block_type, $routput_container_type, + $routput_type_sequence, $routput_indent_flag, + ]; + + my $rTV3 = [ + $in_quote, $quote_type, + $quote_character, $quote_pos, + $quote_depth, $quoted_string_1, + $quoted_string_2, $allowed_quote_modifiers, + ]; + + my $rTV4 = + [ $id_scan_state, $identifier, $want_paren, $indented_if_level ]; + + my $rTV5 = [ + $nesting_token_string, $nesting_type_string, + $nesting_block_string, $nesting_block_flag, + $nesting_list_string, $nesting_list_flag, + $ci_string_in_tokenizer, $continuation_string_in_tokenizer, + $in_statement_continuation, $level_in_tokenizer, + $slevel_in_tokenizer, $rslevel_stack, + ]; + + my $rTV6 = [ + $last_nonblank_container_type, + $last_nonblank_type_sequence, + $last_last_nonblank_token, + $last_last_nonblank_type, + $last_last_nonblank_block_type, + $last_last_nonblank_container_type, + $last_last_nonblank_type_sequence, + $last_nonblank_prototype, + ]; + return [ $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ]; + } + + sub restore_tokenizer_state { + my ($rstate) = @_; + my ( $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate}; + ( + $block_type, $container_type, $expecting, + $i, $i_tok, $input_line, + $input_line_number, $last_nonblank_i, $max_token_index, + $next_tok, $next_type, $peeked_ahead, + $prototype, $rhere_target_list, $rtoken_map, + $rtoken_type, $rtokens, $tok, + $type, $type_sequence, $indent_flag, + ) = @{$rTV1}; + + ( + $routput_token_list, $routput_token_type, + $routput_block_type, $routput_container_type, + $routput_type_sequence, $routput_type_sequence, + ) = @{$rTV2}; + + ( + $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth, + $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, + ) = @{$rTV3}; + + ( $id_scan_state, $identifier, $want_paren, $indented_if_level ) = + @{$rTV4}; + + ( + $nesting_token_string, $nesting_type_string, + $nesting_block_string, $nesting_block_flag, + $nesting_list_string, $nesting_list_flag, + $ci_string_in_tokenizer, $continuation_string_in_tokenizer, + $in_statement_continuation, $level_in_tokenizer, + $slevel_in_tokenizer, $rslevel_stack, + ) = @{$rTV5}; + + ( + $last_nonblank_container_type, + $last_nonblank_type_sequence, + $last_last_nonblank_token, + $last_last_nonblank_type, + $last_last_nonblank_block_type, + $last_last_nonblank_container_type, + $last_last_nonblank_type_sequence, + $last_nonblank_prototype, + ) = @{$rTV6}; + return; + } + + sub get_indentation_level { + + # patch to avoid reporting error if indented if is not terminated + if ($indented_if_level) { return $level_in_tokenizer - 1 } + return $level_in_tokenizer; + } + + sub reset_indentation_level { + $level_in_tokenizer = $slevel_in_tokenizer = shift; + push @{$rslevel_stack}, $slevel_in_tokenizer; + return; + } + + sub peeked_ahead { + my $flag = shift; + $peeked_ahead = defined($flag) ? $flag : $peeked_ahead; + return $peeked_ahead; + } + + # ------------------------------------------------------------ + # end of tokenizer variable access and manipulation routines + # ------------------------------------------------------------ + + # ------------------------------------------------------------ + # beginning of various scanner interface routines + # ------------------------------------------------------------ + sub scan_replacement_text { + + # check for here-docs in replacement text invoked by + # a substitution operator with executable modifier 'e'. + # + # given: + # $replacement_text + # return: + # $rht = reference to any here-doc targets + my ($replacement_text) = @_; + + # quick check + return unless ( $replacement_text =~ /<{_logger_object}; + + # localize all package variables + local ( + $tokenizer_self, $last_nonblank_token, + $last_nonblank_type, $last_nonblank_block_type, + $statement_type, $in_attribute_list, + $current_package, $context, + %is_constant, %is_user_function, + %user_function_prototype, %is_block_function, + %is_block_list_function, %saw_function_definition, + $brace_depth, $paren_depth, + $square_bracket_depth, @current_depth, + @total_depth, $total_depth, + @nesting_sequence_number, @current_sequence_number, + @paren_type, @paren_semicolon_count, + @paren_structural_type, @brace_type, + @brace_structural_type, @brace_context, + @brace_package, @square_bracket_type, + @square_bracket_structural_type, @depth_array, + @starting_line_of_current_depth, @nested_ternary_flag, + @nested_statement_type, + ); + + # save all lexical variables + my $rstate = save_tokenizer_state(); + _decrement_count(); # avoid error check for multiple tokenizers + + # make a new tokenizer + my $rOpts = {}; + my $rpending_logfile_message; + my $source_object = + Perl::Tidy::LineSource->new( \$replacement_text, $rOpts, + $rpending_logfile_message ); + my $tokenizer = Perl::Tidy::Tokenizer->new( + source_object => $source_object, + logger_object => $logger_object, + starting_line_number => $input_line_number, + ); + + # scan the replacement text + 1 while ( $tokenizer->get_line() ); + + # remove any here doc targets + my $rht = undef; + if ( $tokenizer_self->{_in_here_doc} ) { + $rht = []; + push @{$rht}, + [ + $tokenizer_self->{_here_doc_target}, + $tokenizer_self->{_here_quote_character} + ]; + if ( $tokenizer_self->{_rhere_target_list} ) { + push @{$rht}, @{ $tokenizer_self->{_rhere_target_list} }; + $tokenizer_self->{_rhere_target_list} = undef; + } + $tokenizer_self->{_in_here_doc} = undef; + } + + # now its safe to report errors + my $severe_error = $tokenizer->report_tokenization_errors(); + + # TODO: Could propagate a severe error up + + # restore all tokenizer lexical variables + restore_tokenizer_state($rstate); + + # return the here doc targets + return $rht; + } + + sub scan_bare_identifier { + ( $i, $tok, $type, $prototype ) = + scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype, + $rtoken_map, $max_token_index ); + return; + } + + sub scan_identifier { + ( $i, $tok, $type, $id_scan_state, $identifier ) = + scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens, + $max_token_index, $expecting, $paren_type[$paren_depth] ); + return; + } + + sub scan_id { + ( $i, $tok, $type, $id_scan_state ) = + scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map, + $id_scan_state, $max_token_index ); + return; + } + + sub scan_number { + my $number; + ( $i, $type, $number ) = + scan_number_do( $input_line, $i, $rtoken_map, $type, + $max_token_index ); + return $number; + } + + # a sub to warn if token found where term expected + sub error_if_expecting_TERM { + if ( $expecting == TERM ) { + if ( $really_want_term{$last_nonblank_type} ) { + report_unexpected( $tok, "term", $i_tok, $last_nonblank_i, + $rtoken_map, $rtoken_type, $input_line ); + return 1; + } + } + return; + } + + # a sub to warn if token found where operator expected + sub error_if_expecting_OPERATOR { + my $thing = shift; + if ( $expecting == OPERATOR ) { + if ( !defined($thing) ) { $thing = $tok } + report_unexpected( $thing, "operator", $i_tok, $last_nonblank_i, + $rtoken_map, $rtoken_type, $input_line ); + if ( $i_tok == 0 ) { + interrupt_logfile(); + warning("Missing ';' above?\n"); + resume_logfile(); + } + return 1; + } + return; + } + + # ------------------------------------------------------------ + # end scanner interfaces + # ------------------------------------------------------------ + + my %is_for_foreach; + @_ = qw(for foreach); + @is_for_foreach{@_} = (1) x scalar(@_); + + my %is_my_our; + @_ = qw(my our); + @is_my_our{@_} = (1) x scalar(@_); + + # These keywords may introduce blocks after parenthesized expressions, + # in the form: + # keyword ( .... ) { BLOCK } + # patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when' + my %is_blocktype_with_paren; + @_ = + qw(if elsif unless while until for foreach switch case given when catch); + @is_blocktype_with_paren{@_} = (1) x scalar(@_); + + # ------------------------------------------------------------ + # begin hash of code for handling most token types + # ------------------------------------------------------------ + my $tokenization_code = { + + # no special code for these types yet, but syntax checks + # could be added + +## '!' => undef, +## '!=' => undef, +## '!~' => undef, +## '%=' => undef, +## '&&=' => undef, +## '&=' => undef, +## '+=' => undef, +## '-=' => undef, +## '..' => undef, +## '..' => undef, +## '...' => undef, +## '.=' => undef, +## '<<=' => undef, +## '<=' => undef, +## '<=>' => undef, +## '<>' => undef, +## '=' => undef, +## '==' => undef, +## '=~' => undef, +## '>=' => undef, +## '>>' => undef, +## '>>=' => undef, +## '\\' => undef, +## '^=' => undef, +## '|=' => undef, +## '||=' => undef, +## '//=' => undef, +## '~' => undef, +## '~~' => undef, +## '!~~' => undef, + + '>' => sub { + error_if_expecting_TERM() + if ( $expecting == TERM ); + }, + '|' => sub { + error_if_expecting_TERM() + if ( $expecting == TERM ); + }, + '$' => sub { + + # start looking for a scalar + error_if_expecting_OPERATOR("Scalar") + if ( $expecting == OPERATOR ); + scan_identifier(); + + if ( $identifier eq '$^W' ) { + $tokenizer_self->{_saw_perl_dash_w} = 1; + } + + # Check for identifier in indirect object slot + # (vorboard.pl, sort.t). Something like: + # /^(print|printf|sort|exec|system)$/ + if ( + $is_indirect_object_taker{$last_nonblank_token} + + || ( ( $last_nonblank_token eq '(' ) + && $is_indirect_object_taker{ $paren_type[$paren_depth] } ) + || ( $last_nonblank_type =~ /^[Uw]$/ ) # possible object + ) + { + $type = 'Z'; + } + }, + '(' => sub { + + ++$paren_depth; + $paren_semicolon_count[$paren_depth] = 0; + if ($want_paren) { + $container_type = $want_paren; + $want_paren = ""; + } + elsif ( $statement_type =~ /^sub\b/ ) { + $container_type = $statement_type; + } + else { + $container_type = $last_nonblank_token; + + # We can check for a syntax error here of unexpected '(', + # but this is going to get messy... + if ( + $expecting == OPERATOR + + # be sure this is not a method call of the form + # &method(...), $method->(..), &{method}(...), + # $ref[2](list) is ok & short for $ref[2]->(list) + # NOTE: at present, braces in something like &{ xxx } + # are not marked as a block, we might have a method call + && $last_nonblank_token !~ /^([\]\}\&]|\-\>)/ + + ) + { + + # ref: camel 3 p 703. + if ( $last_last_nonblank_token eq 'do' ) { + complain( +"do SUBROUTINE is deprecated; consider & or -> notation\n" + ); + } + else { + + # if this is an empty list, (), then it is not an + # error; for example, we might have a constant pi and + # invoke it with pi() or just pi; + my ( $next_nonblank_token, $i_next ) = + find_next_nonblank_token( $i, $rtokens, + $max_token_index ); + if ( $next_nonblank_token ne ')' ) { + my $hint; + error_if_expecting_OPERATOR('('); + + if ( $last_nonblank_type eq 'C' ) { + $hint = + "$last_nonblank_token has a void prototype\n"; + } + elsif ( $last_nonblank_type eq 'i' ) { + if ( $i_tok > 0 + && $last_nonblank_token =~ /^\$/ ) + { + $hint = +"Do you mean '$last_nonblank_token->(' ?\n"; + } + } + if ($hint) { + interrupt_logfile(); + warning($hint); + resume_logfile(); + } + } ## end if ( $next_nonblank_token... + } ## end else [ if ( $last_last_nonblank_token... + } ## end if ( $expecting == OPERATOR... + } + $paren_type[$paren_depth] = $container_type; + ( $type_sequence, $indent_flag ) = + increase_nesting_depth( PAREN, $rtoken_map->[$i_tok] ); + + # propagate types down through nested parens + # for example: the second paren in 'if ((' would be structural + # since the first is. + + if ( $last_nonblank_token eq '(' ) { + $type = $last_nonblank_type; + } + + # We exclude parens as structural after a ',' because it + # causes subtle problems with continuation indentation for + # something like this, where the first 'or' will not get + # indented. + # + # assert( + # __LINE__, + # ( not defined $check ) + # or ref $check + # or $check eq "new" + # or $check eq "old", + # ); + # + # Likewise, we exclude parens where a statement can start + # because of problems with continuation indentation, like + # these: + # + # ($firstline =~ /^#\!.*perl/) + # and (print $File::Find::name, "\n") + # and (return 1); + # + # (ref($usage_fref) =~ /CODE/) + # ? &$usage_fref + # : (&blast_usage, &blast_params, &blast_general_params); + + else { + $type = '{'; + } + + if ( $last_nonblank_type eq ')' ) { + warning( + "Syntax error? found token '$last_nonblank_type' then '('\n" + ); + } + $paren_structural_type[$paren_depth] = $type; + + }, + ')' => sub { + ( $type_sequence, $indent_flag ) = + decrease_nesting_depth( PAREN, $rtoken_map->[$i_tok] ); + + if ( $paren_structural_type[$paren_depth] eq '{' ) { + $type = '}'; + } + + $container_type = $paren_type[$paren_depth]; + + # restore statement type as 'sub' at closing paren of a signature + # so that a subsequent ':' is identified as an attribute + if ( $container_type =~ /^sub\b/ ) { + $statement_type = $container_type; + } + + # /^(for|foreach)$/ + if ( $is_for_foreach{ $paren_type[$paren_depth] } ) { + my $num_sc = $paren_semicolon_count[$paren_depth]; + if ( $num_sc > 0 && $num_sc != 2 ) { + warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n"); + } + } + + if ( $paren_depth > 0 ) { $paren_depth-- } + }, + ',' => sub { + if ( $last_nonblank_type eq ',' ) { + complain("Repeated ','s \n"); + } + + # patch for operator_expected: note if we are in the list (use.t) + if ( $statement_type eq 'use' ) { $statement_type = '_use' } +## FIXME: need to move this elsewhere, perhaps check after a '(' +## elsif ($last_nonblank_token eq '(') { +## warning("Leading ','s illegal in some versions of perl\n"); +## } + }, + ';' => sub { + $context = UNKNOWN_CONTEXT; + $statement_type = ''; + $want_paren = ""; + + # /^(for|foreach)$/ + if ( $is_for_foreach{ $paren_type[$paren_depth] } ) + { # mark ; in for loop + + # Be careful: we do not want a semicolon such as the + # following to be included: + # + # for (sort {strcoll($a,$b);} keys %investments) { + + if ( $brace_depth == $depth_array[PAREN][BRACE][$paren_depth] + && $square_bracket_depth == + $depth_array[PAREN][SQUARE_BRACKET][$paren_depth] ) + { + + $type = 'f'; + $paren_semicolon_count[$paren_depth]++; + } + } + + }, + '"' => sub { + error_if_expecting_OPERATOR("String") + if ( $expecting == OPERATOR ); + $in_quote = 1; + $type = 'Q'; + $allowed_quote_modifiers = ""; + }, + "'" => sub { + error_if_expecting_OPERATOR("String") + if ( $expecting == OPERATOR ); + $in_quote = 1; + $type = 'Q'; + $allowed_quote_modifiers = ""; + }, + '`' => sub { + error_if_expecting_OPERATOR("String") + if ( $expecting == OPERATOR ); + $in_quote = 1; + $type = 'Q'; + $allowed_quote_modifiers = ""; + }, + '/' => sub { + my $is_pattern; + + if ( $expecting == UNKNOWN ) { # indeterminate, must guess.. + my $msg; + ( $is_pattern, $msg ) = + guess_if_pattern_or_division( $i, $rtokens, $rtoken_map, + $max_token_index ); + + if ($msg) { + write_diagnostics("DIVIDE:$msg\n"); + write_logfile_entry($msg); + } + } + else { $is_pattern = ( $expecting == TERM ) } + + if ($is_pattern) { + $in_quote = 1; + $type = 'Q'; + $allowed_quote_modifiers = '[msixpodualngc]'; + } + else { # not a pattern; check for a /= token + + if ( $rtokens->[ $i + 1 ] eq '=' ) { # form token /= + $i++; + $tok = '/='; + $type = $tok; + } + + #DEBUG - collecting info on what tokens follow a divide + # for development of guessing algorithm + #if ( numerator_expected( $i, $rtokens, $max_token_index ) < 0 ) { + # #write_diagnostics( "DIVIDE? $input_line\n" ); + #} + } + }, + '{' => sub { + + # if we just saw a ')', we will label this block with + # its type. We need to do this to allow sub + # code_block_type to determine if this brace starts a + # code block or anonymous hash. (The type of a paren + # pair is the preceding token, such as 'if', 'else', + # etc). + $container_type = ""; + + # ATTRS: for a '{' following an attribute list, reset + # things to look like we just saw the sub name + if ( $statement_type =~ /^sub/ ) { + $last_nonblank_token = $statement_type; + $last_nonblank_type = 'i'; + $statement_type = ""; + } + + # patch for SWITCH/CASE: hide these keywords from an immediately + # following opening brace + elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' ) + && $statement_type eq $last_nonblank_token ) + { + $last_nonblank_token = ";"; + } + + elsif ( $last_nonblank_token eq ')' ) { + $last_nonblank_token = $paren_type[ $paren_depth + 1 ]; + + # defensive move in case of a nesting error (pbug.t) + # in which this ')' had no previous '(' + # this nesting error will have been caught + if ( !defined($last_nonblank_token) ) { + $last_nonblank_token = 'if'; + } + + # check for syntax error here; + unless ( $is_blocktype_with_paren{$last_nonblank_token} ) { + if ( $tokenizer_self->{'_extended_syntax'} ) { + + # we append a trailing () to mark this as an unknown + # block type. This allows perltidy to format some + # common extensions of perl syntax. + # This is used by sub code_block_type + $last_nonblank_token .= '()'; + } + else { + my $list = + join( ' ', sort keys %is_blocktype_with_paren ); + warning( +"syntax error at ') {', didn't see one of: <<$list>>; If this code is okay try using the -xs flag\n" + ); + } + } + } + + # patch for paren-less for/foreach glitch, part 2. + # see note below under 'qw' + elsif ($last_nonblank_token eq 'qw' + && $is_for_foreach{$want_paren} ) + { + $last_nonblank_token = $want_paren; + if ( $last_last_nonblank_token eq $want_paren ) { + warning( +"syntax error at '$want_paren .. {' -- missing \$ loop variable\n" + ); + + } + $want_paren = ""; + } + + # now identify which of the three possible types of + # curly braces we have: hash index container, anonymous + # hash reference, or code block. + + # non-structural (hash index) curly brace pair + # get marked 'L' and 'R' + if ( is_non_structural_brace() ) { + $type = 'L'; + + # patch for SWITCH/CASE: + # allow paren-less identifier after 'when' + # if the brace is preceded by a space + if ( $statement_type eq 'when' + && $last_nonblank_type eq 'i' + && $last_last_nonblank_type eq 'k' + && ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) ) + { + $type = '{'; + $block_type = $statement_type; + } + } + + # code and anonymous hash have the same type, '{', but are + # distinguished by 'block_type', + # which will be blank for an anonymous hash + else { + + $block_type = code_block_type( $i_tok, $rtokens, $rtoken_type, + $max_token_index ); + + # patch to promote bareword type to function taking block + if ( $block_type + && $last_nonblank_type eq 'w' + && $last_nonblank_i >= 0 ) + { + if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) { + $routput_token_type->[$last_nonblank_i] = 'G'; + } + } + + # patch for SWITCH/CASE: if we find a stray opening block brace + # where we might accept a 'case' or 'when' block, then take it + if ( $statement_type eq 'case' + || $statement_type eq 'when' ) + { + if ( !$block_type || $block_type eq '}' ) { + $block_type = $statement_type; + } + } + } + + $brace_type[ ++$brace_depth ] = $block_type; + $brace_package[$brace_depth] = $current_package; + $brace_structural_type[$brace_depth] = $type; + $brace_context[$brace_depth] = $context; + ( $type_sequence, $indent_flag ) = + increase_nesting_depth( BRACE, $rtoken_map->[$i_tok] ); + }, + '}' => sub { + $block_type = $brace_type[$brace_depth]; + if ($block_type) { $statement_type = '' } + if ( defined( $brace_package[$brace_depth] ) ) { + $current_package = $brace_package[$brace_depth]; + } + + # can happen on brace error (caught elsewhere) + else { + } + ( $type_sequence, $indent_flag ) = + decrease_nesting_depth( BRACE, $rtoken_map->[$i_tok] ); + + if ( $brace_structural_type[$brace_depth] eq 'L' ) { + $type = 'R'; + } + + # propagate type information for 'do' and 'eval' blocks, and also + # for smartmatch operator. This is necessary to enable us to know + # if an operator or term is expected next. + if ( $is_block_operator{$block_type} ) { + $tok = $block_type; + } + + $context = $brace_context[$brace_depth]; + if ( $brace_depth > 0 ) { $brace_depth--; } + }, + '&' => sub { # maybe sub call? start looking + + # We have to check for sub call unless we are sure we + # are expecting an operator. This example from s2p + # got mistaken as a q operator in an early version: + # print BODY &q(<<'EOT'); + if ( $expecting != OPERATOR ) { + + # But only look for a sub call if we are expecting a term or + # if there is no existing space after the &. + # For example we probably don't want & as sub call here: + # Fcntl::S_IRUSR & $mode; + if ( $expecting == TERM || $next_type ne 'b' ) { + scan_identifier(); + } + } + else { + } + }, + '<' => sub { # angle operator or less than? + + if ( $expecting != OPERATOR ) { + ( $i, $type ) = + find_angle_operator_termination( $input_line, $i, $rtoken_map, + $expecting, $max_token_index ); + + if ( $type eq '<' && $expecting == TERM ) { + error_if_expecting_TERM(); + interrupt_logfile(); + warning("Unterminated <> operator?\n"); + resume_logfile(); + } + } + else { + } + }, + '?' => sub { # ?: conditional or starting pattern? + + my $is_pattern; + + if ( $expecting == UNKNOWN ) { + + my $msg; + ( $is_pattern, $msg ) = + guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map, + $max_token_index ); + + if ($msg) { write_logfile_entry($msg) } + } + else { $is_pattern = ( $expecting == TERM ) } + + if ($is_pattern) { + $in_quote = 1; + $type = 'Q'; + $allowed_quote_modifiers = '[msixpodualngc]'; + } + else { + ( $type_sequence, $indent_flag ) = + increase_nesting_depth( QUESTION_COLON, + $rtoken_map->[$i_tok] ); + } + }, + '*' => sub { # typeglob, or multiply? + + if ( $expecting == TERM ) { + scan_identifier(); + } + else { + + if ( $rtokens->[ $i + 1 ] eq '=' ) { + $tok = '*='; + $type = $tok; + $i++; + } + elsif ( $rtokens->[ $i + 1 ] eq '*' ) { + $tok = '**'; + $type = $tok; + $i++; + if ( $rtokens->[ $i + 1 ] eq '=' ) { + $tok = '**='; + $type = $tok; + $i++; + } + } + } + }, + '.' => sub { # what kind of . ? + + if ( $expecting != OPERATOR ) { + scan_number(); + if ( $type eq '.' ) { + error_if_expecting_TERM() + if ( $expecting == TERM ); + } + } + else { + } + }, + ':' => sub { + + # if this is the first nonblank character, call it a label + # since perl seems to just swallow it + if ( $input_line_number == 1 && $last_nonblank_i == -1 ) { + $type = 'J'; + } + + # ATTRS: check for a ':' which introduces an attribute list + # (this might eventually get its own token type) + elsif ( $statement_type =~ /^sub\b/ ) { + $type = 'A'; + $in_attribute_list = 1; + } + + # check for scalar attribute, such as + # my $foo : shared = 1; + elsif ($is_my_our{$statement_type} + && $current_depth[QUESTION_COLON] == 0 ) + { + $type = 'A'; + $in_attribute_list = 1; + } + + # otherwise, it should be part of a ?/: operator + else { + ( $type_sequence, $indent_flag ) = + decrease_nesting_depth( QUESTION_COLON, + $rtoken_map->[$i_tok] ); + if ( $last_nonblank_token eq '?' ) { + warning("Syntax error near ? :\n"); + } + } + }, + '+' => sub { # what kind of plus? + + if ( $expecting == TERM ) { + my $number = scan_number(); + + # unary plus is safest assumption if not a number + if ( !defined($number) ) { $type = 'p'; } + } + elsif ( $expecting == OPERATOR ) { + } + else { + if ( $next_type eq 'w' ) { $type = 'p' } + } + }, + '@' => sub { + + error_if_expecting_OPERATOR("Array") + if ( $expecting == OPERATOR ); + scan_identifier(); + }, + '%' => sub { # hash or modulo? + + # first guess is hash if no following blank + if ( $expecting == UNKNOWN ) { + if ( $next_type ne 'b' ) { $expecting = TERM } + } + if ( $expecting == TERM ) { + scan_identifier(); + } + }, + '[' => sub { + $square_bracket_type[ ++$square_bracket_depth ] = + $last_nonblank_token; + ( $type_sequence, $indent_flag ) = + increase_nesting_depth( SQUARE_BRACKET, $rtoken_map->[$i_tok] ); + + # It may seem odd, but structural square brackets have + # type '{' and '}'. This simplifies the indentation logic. + if ( !is_non_structural_brace() ) { + $type = '{'; + } + $square_bracket_structural_type[$square_bracket_depth] = $type; + }, + ']' => sub { + ( $type_sequence, $indent_flag ) = + decrease_nesting_depth( SQUARE_BRACKET, $rtoken_map->[$i_tok] ); + + if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' ) + { + $type = '}'; + } + + # propagate type information for smartmatch operator. This is + # necessary to enable us to know if an operator or term is expected + # next. + if ( $square_bracket_type[$square_bracket_depth] eq '~~' ) { + $tok = $square_bracket_type[$square_bracket_depth]; + } + + if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; } + }, + '-' => sub { # what kind of minus? + + if ( ( $expecting != OPERATOR ) + && $is_file_test_operator{$next_tok} ) + { + my ( $next_nonblank_token, $i_next ) = + find_next_nonblank_token( $i + 1, $rtokens, + $max_token_index ); + + # check for a quoted word like "-w=>xx"; + # it is sufficient to just check for a following '=' + if ( $next_nonblank_token eq '=' ) { + $type = 'm'; + } + else { + $i++; + $tok .= $next_tok; + $type = 'F'; + } + } + elsif ( $expecting == TERM ) { + my $number = scan_number(); + + # maybe part of bareword token? unary is safest + if ( !defined($number) ) { $type = 'm'; } + + } + elsif ( $expecting == OPERATOR ) { + } + else { + + if ( $next_type eq 'w' ) { + $type = 'm'; + } + } + }, + + '^' => sub { + + # check for special variables like ${^WARNING_BITS} + if ( $expecting == TERM ) { + + # FIXME: this should work but will not catch errors + # because we also have to be sure that previous token is + # a type character ($,@,%). + if ( $last_nonblank_token eq '{' + && ( $next_tok =~ /^[A-Za-z_]/ ) ) + { + + if ( $next_tok eq 'W' ) { + $tokenizer_self->{_saw_perl_dash_w} = 1; + } + $tok = $tok . $next_tok; + $i = $i + 1; + $type = 'w'; + } + + else { + unless ( error_if_expecting_TERM() ) { + + # Something like this is valid but strange: + # undef ^I; + complain("The '^' seems unusual here\n"); + } + } + } + }, + + '::' => sub { # probably a sub call + scan_bare_identifier(); + }, + '<<' => sub { # maybe a here-doc? + return + unless ( $i < $max_token_index ) + ; # here-doc not possible if end of line + + if ( $expecting != OPERATOR ) { + my ( $found_target, $here_doc_target, $here_quote_character, + $saw_error ); + ( + $found_target, $here_doc_target, $here_quote_character, $i, + $saw_error + ) + = find_here_doc( $expecting, $i, $rtokens, $rtoken_map, + $max_token_index ); + + if ($found_target) { + push @{$rhere_target_list}, + [ $here_doc_target, $here_quote_character ]; + $type = 'h'; + if ( length($here_doc_target) > 80 ) { + my $truncated = substr( $here_doc_target, 0, 80 ); + complain("Long here-target: '$truncated' ...\n"); + } + elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) { + complain( + "Unconventional here-target: '$here_doc_target'\n"); + } + } + elsif ( $expecting == TERM ) { + unless ($saw_error) { + + # shouldn't happen.. + warning("Program bug; didn't find here doc target\n"); + report_definite_bug(); + } + } + } + else { + } + }, + '<<~' => sub { # a here-doc, new type added in v26 + return + unless ( $i < $max_token_index ) + ; # here-doc not possible if end of line + if ( $expecting != OPERATOR ) { + my ( $found_target, $here_doc_target, $here_quote_character, + $saw_error ); + ( + $found_target, $here_doc_target, $here_quote_character, $i, + $saw_error + ) + = find_here_doc( $expecting, $i, $rtokens, $rtoken_map, + $max_token_index ); + + if ($found_target) { + + if ( length($here_doc_target) > 80 ) { + my $truncated = substr( $here_doc_target, 0, 80 ); + complain("Long here-target: '$truncated' ...\n"); + } + elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) { + complain( + "Unconventional here-target: '$here_doc_target'\n"); + } + + # Note that we put a leading space on the here quote + # character indicate that it may be preceded by spaces + $here_quote_character = " " . $here_quote_character; + push @{$rhere_target_list}, + [ $here_doc_target, $here_quote_character ]; + $type = 'h'; + } + elsif ( $expecting == TERM ) { + unless ($saw_error) { + + # shouldn't happen.. + warning("Program bug; didn't find here doc target\n"); + report_definite_bug(); + } + } + } + else { + } + }, + '->' => sub { + + # if -> points to a bare word, we must scan for an identifier, + # otherwise something like ->y would look like the y operator + scan_identifier(); + }, + + # type = 'pp' for pre-increment, '++' for post-increment + '++' => sub { + if ( $expecting == TERM ) { $type = 'pp' } + elsif ( $expecting == UNKNOWN ) { + my ( $next_nonblank_token, $i_next ) = + find_next_nonblank_token( $i, $rtokens, $max_token_index ); + if ( $next_nonblank_token eq '$' ) { $type = 'pp' } + } + }, + + '=>' => sub { + if ( $last_nonblank_type eq $tok ) { + complain("Repeated '=>'s \n"); + } + + # patch for operator_expected: note if we are in the list (use.t) + # TODO: make version numbers a new token type + if ( $statement_type eq 'use' ) { $statement_type = '_use' } + }, + + # type = 'mm' for pre-decrement, '--' for post-decrement + '--' => sub { + + if ( $expecting == TERM ) { $type = 'mm' } + elsif ( $expecting == UNKNOWN ) { + my ( $next_nonblank_token, $i_next ) = + find_next_nonblank_token( $i, $rtokens, $max_token_index ); + if ( $next_nonblank_token eq '$' ) { $type = 'mm' } + } + }, + + '&&' => sub { + error_if_expecting_TERM() + if ( $expecting == TERM ); + }, + + '||' => sub { + error_if_expecting_TERM() + if ( $expecting == TERM ); + }, + + '//' => sub { + error_if_expecting_TERM() + if ( $expecting == TERM ); + }, + }; + + # ------------------------------------------------------------ + # end hash of code for handling individual token types + # ------------------------------------------------------------ + + my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' ); + + # These block types terminate statements and do not need a trailing + # semicolon + # patched for SWITCH/CASE/ + my %is_zero_continuation_block_type; + @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ; + if elsif else unless while until for foreach switch case given when); + @is_zero_continuation_block_type{@_} = (1) x scalar(@_); + + my %is_not_zero_continuation_block_type; + @_ = qw(sort grep map do eval); + @is_not_zero_continuation_block_type{@_} = (1) x scalar(@_); + + my %is_logical_container; + @_ = qw(if elsif unless while and or err not && ! || for foreach); + @is_logical_container{@_} = (1) x scalar(@_); + + my %is_binary_type; + @_ = qw(|| &&); + @is_binary_type{@_} = (1) x scalar(@_); + + my %is_binary_keyword; + @_ = qw(and or err eq ne cmp); + @is_binary_keyword{@_} = (1) x scalar(@_); + + # 'L' is token for opening { at hash key + my %is_opening_type; + @_ = qw< L { ( [ >; + @is_opening_type{@_} = (1) x scalar(@_); + + # 'R' is token for closing } at hash key + my %is_closing_type; + @_ = qw< R } ) ] >; + @is_closing_type{@_} = (1) x scalar(@_); + + my %is_redo_last_next_goto; + @_ = qw(redo last next goto); + @is_redo_last_next_goto{@_} = (1) x scalar(@_); + + my %is_use_require; + @_ = qw(use require); + @is_use_require{@_} = (1) x scalar(@_); + + my %is_sub_package; + @_ = qw(sub package); + @is_sub_package{@_} = (1) x scalar(@_); + + # This hash holds the hash key in $tokenizer_self for these keywords: + my %is_format_END_DATA = ( + 'format' => '_in_format', + '__END__' => '_in_end', + '__DATA__' => '_in_data', + ); + + # original ref: camel 3 p 147, + # but perl may accept undocumented flags + # perl 5.10 adds 'p' (preserve) + # Perl version 5.22 added 'n' + # From http://perldoc.perl.org/perlop.html we have + # /PATTERN/msixpodualngc or m?PATTERN?msixpodualngc + # s/PATTERN/REPLACEMENT/msixpodualngcer + # y/SEARCHLIST/REPLACEMENTLIST/cdsr + # tr/SEARCHLIST/REPLACEMENTLIST/cdsr + # qr/STRING/msixpodualn + my %quote_modifiers = ( + 's' => '[msixpodualngcer]', + 'y' => '[cdsr]', + 'tr' => '[cdsr]', + 'm' => '[msixpodualngc]', + 'qr' => '[msixpodualn]', + 'q' => "", + 'qq' => "", + 'qw' => "", + 'qx' => "", + ); + + # table showing how many quoted things to look for after quote operator.. + # s, y, tr have 2 (pattern and replacement) + # others have 1 (pattern only) + my %quote_items = ( + 's' => 2, + 'y' => 2, + 'tr' => 2, + 'm' => 1, + 'qr' => 1, + 'q' => 1, + 'qq' => 1, + 'qw' => 1, + 'qx' => 1, + ); + + sub tokenize_this_line { + + # This routine breaks a line of perl code into tokens which are of use in + # indentation and reformatting. One of my goals has been to define tokens + # such that a newline may be inserted between any pair of tokens without + # changing or invalidating the program. This version comes close to this, + # although there are necessarily a few exceptions which must be caught by + # the formatter. Many of these involve the treatment of bare words. + # + # The tokens and their types are returned in arrays. See previous + # routine for their names. + # + # See also the array "valid_token_types" in the BEGIN section for an + # up-to-date list. + # + # To simplify things, token types are either a single character, or they + # are identical to the tokens themselves. + # + # As a debugging aid, the -D flag creates a file containing a side-by-side + # comparison of the input string and its tokenization for each line of a file. + # This is an invaluable debugging aid. + # + # In addition to tokens, and some associated quantities, the tokenizer + # also returns flags indication any special line types. These include + # quotes, here_docs, formats. + # + # ----------------------------------------------------------------------- + # + # How to add NEW_TOKENS: + # + # New token types will undoubtedly be needed in the future both to keep up + # with changes in perl and to help adapt the tokenizer to other applications. + # + # Here are some notes on the minimal steps. I wrote these notes while + # adding the 'v' token type for v-strings, which are things like version + # numbers 5.6.0, and ip addresses, and will use that as an example. ( You + # can use your editor to search for the string "NEW_TOKENS" to find the + # appropriate sections to change): + # + # *. Try to talk somebody else into doing it! If not, .. + # + # *. Make a backup of your current version in case things don't work out! + # + # *. Think of a new, unused character for the token type, and add to + # the array @valid_token_types in the BEGIN section of this package. + # For example, I used 'v' for v-strings. + # + # *. Implement coding to recognize the $type of the token in this routine. + # This is the hardest part, and is best done by imitating or modifying + # some of the existing coding. For example, to recognize v-strings, I + # patched 'sub scan_bare_identifier' to recognize v-strings beginning with + # 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'. + # + # *. Update sub operator_expected. This update is critically important but + # the coding is trivial. Look at the comments in that routine for help. + # For v-strings, which should behave like numbers, I just added 'v' to the + # regex used to handle numbers and strings (types 'n' and 'Q'). + # + # *. Implement a 'bond strength' rule in sub set_bond_strengths in + # Perl::Tidy::Formatter for breaking lines around this token type. You can + # skip this step and take the default at first, then adjust later to get + # desired results. For adding type 'v', I looked at sub bond_strength and + # saw that number type 'n' was using default strengths, so I didn't do + # anything. I may tune it up someday if I don't like the way line + # breaks with v-strings look. + # + # *. Implement a 'whitespace' rule in sub set_whitespace_flags in + # Perl::Tidy::Formatter. For adding type 'v', I looked at this routine + # and saw that type 'n' used spaces on both sides, so I just added 'v' + # to the array @spaces_both_sides. + # + # *. Update HtmlWriter package so that users can colorize the token as + # desired. This is quite easy; see comments identified by 'NEW_TOKENS' in + # that package. For v-strings, I initially chose to use a default color + # equal to the default for numbers, but it might be nice to change that + # eventually. + # + # *. Update comments in Perl::Tidy::Tokenizer::dump_token_types. + # + # *. Run lots and lots of debug tests. Start with special files designed + # to test the new token type. Run with the -D flag to create a .DEBUG + # file which shows the tokenization. When these work ok, test as many old + # scripts as possible. Start with all of the '.t' files in the 'test' + # directory of the distribution file. Compare .tdy output with previous + # version and updated version to see the differences. Then include as + # many more files as possible. My own technique has been to collect a huge + # number of perl scripts (thousands!) into one directory and run perltidy + # *, then run diff between the output of the previous version and the + # current version. + # + # *. For another example, search for the smartmatch operator '~~' + # with your editor to see where updates were made for it. + # + # ----------------------------------------------------------------------- + + my $line_of_tokens = shift; + my ($untrimmed_input_line) = $line_of_tokens->{_line_text}; + + # patch while coding change is underway + # make callers private data to allow access + # $tokenizer_self = $caller_tokenizer_self; + + # extract line number for use in error messages + $input_line_number = $line_of_tokens->{_line_number}; + + # reinitialize for multi-line quote + $line_of_tokens->{_starting_in_quote} = $in_quote && $quote_type eq 'Q'; + + # check for pod documentation + if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) { + + # must not be in multi-line quote + # and must not be in an equation + if ( !$in_quote && ( operator_expected( 'b', '=', 'b' ) == TERM ) ) + { + $tokenizer_self->{_in_pod} = 1; + return; + } + } + + $input_line = $untrimmed_input_line; + + chomp $input_line; + + # trim start of this line unless we are continuing a quoted line + # do not trim end because we might end in a quote (test: deken4.pl) + # Perl::Tidy::Formatter will delete needless trailing blanks + unless ( $in_quote && ( $quote_type eq 'Q' ) ) { + $input_line =~ s/^\s*//; # trim left end + } + + # Set a flag to indicate if we might be at an __END__ or __DATA__ line + # This will be used below to avoid quoting a bare word followed by + # a fat comma. + my $is_END_or_DATA = $input_line =~ /^\s*__(END|DATA)__\s*$/; + + # update the copy of the line for use in error messages + # This must be exactly what we give the pre_tokenizer + $tokenizer_self->{_line_text} = $input_line; + + # re-initialize for the main loop + $routput_token_list = []; # stack of output token indexes + $routput_token_type = []; # token types + $routput_block_type = []; # types of code block + $routput_container_type = []; # paren types, such as if, elsif, .. + $routput_type_sequence = []; # nesting sequential number + + $rhere_target_list = []; + + $tok = $last_nonblank_token; + $type = $last_nonblank_type; + $prototype = $last_nonblank_prototype; + $last_nonblank_i = -1; + $block_type = $last_nonblank_block_type; + $container_type = $last_nonblank_container_type; + $type_sequence = $last_nonblank_type_sequence; + $indent_flag = 0; + $peeked_ahead = 0; + + # tokenization is done in two stages.. + # stage 1 is a very simple pre-tokenization + my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens + + # a little optimization for a full-line comment + if ( !$in_quote && ( $input_line =~ /^#/ ) ) { + $max_tokens_wanted = 1 # no use tokenizing a comment + } + + # start by breaking the line into pre-tokens + ( $rtokens, $rtoken_map, $rtoken_type ) = + pre_tokenize( $input_line, $max_tokens_wanted ); + + $max_token_index = scalar( @{$rtokens} ) - 1; + push( @{$rtokens}, ' ', ' ', ' ' ); # extra whitespace simplifies logic + push( @{$rtoken_map}, 0, 0, 0 ); # shouldn't be referenced + push( @{$rtoken_type}, 'b', 'b', 'b' ); + + # initialize for main loop + foreach my $ii ( 0 .. $max_token_index + 3 ) { + $routput_token_type->[$ii] = ""; + $routput_block_type->[$ii] = ""; + $routput_container_type->[$ii] = ""; + $routput_type_sequence->[$ii] = ""; + $routput_indent_flag->[$ii] = 0; + } + $i = -1; + $i_tok = -1; + + # ------------------------------------------------------------ + # begin main tokenization loop + # ------------------------------------------------------------ + + # we are looking at each pre-token of one line and combining them + # into tokens + while ( ++$i <= $max_token_index ) { + + if ($in_quote) { # continue looking for end of a quote + $type = $quote_type; + + unless ( @{$routput_token_list} ) + { # initialize if continuation line + push( @{$routput_token_list}, $i ); + $routput_token_type->[$i] = $type; + + } + $tok = $quote_character unless ( $quote_character =~ /^\s*$/ ); + + # scan for the end of the quote or pattern + ( + $i, $in_quote, $quote_character, $quote_pos, $quote_depth, + $quoted_string_1, $quoted_string_2 + ) + = do_quote( + $i, $in_quote, $quote_character, + $quote_pos, $quote_depth, $quoted_string_1, + $quoted_string_2, $rtokens, $rtoken_map, + $max_token_index + ); + + # all done if we didn't find it + last if ($in_quote); + + # save pattern and replacement text for rescanning + my $qs1 = $quoted_string_1; + my $qs2 = $quoted_string_2; + + # re-initialize for next search + $quote_character = ''; + $quote_pos = 0; + $quote_type = 'Q'; + $quoted_string_1 = ""; + $quoted_string_2 = ""; + last if ( ++$i > $max_token_index ); + + # look for any modifiers + if ($allowed_quote_modifiers) { + + # check for exact quote modifiers + if ( $rtokens->[$i] =~ /^[A-Za-z_]/ ) { + my $str = $rtokens->[$i]; + my $saw_modifier_e; + while ( $str =~ /\G$allowed_quote_modifiers/gc ) { + my $pos = pos($str); + my $char = substr( $str, $pos - 1, 1 ); + $saw_modifier_e ||= ( $char eq 'e' ); + } + + # For an 'e' quote modifier we must scan the replacement + # text for here-doc targets. + if ($saw_modifier_e) { + + my $rht = scan_replacement_text($qs1); + + # Change type from 'Q' to 'h' for quotes with + # here-doc targets so that the formatter (see sub + # print_line_of_tokens) will not make any line + # breaks after this point. + if ($rht) { + push @{$rhere_target_list}, @{$rht}; + $type = 'h'; + if ( $i_tok < 0 ) { + my $ilast = $routput_token_list->[-1]; + $routput_token_type->[$ilast] = $type; + } + } + } + + if ( defined( pos($str) ) ) { + + # matched + if ( pos($str) == length($str) ) { + last if ( ++$i > $max_token_index ); + } + + # Looks like a joined quote modifier + # and keyword, maybe something like + # s/xxx/yyy/gefor @k=... + # Example is "galgen.pl". Would have to split + # the word and insert a new token in the + # pre-token list. This is so rare that I haven't + # done it. Will just issue a warning citation. + + # This error might also be triggered if my quote + # modifier characters are incomplete + else { + warning(<[$i]\n"; + # my $num = length($str) - pos($str); + # $rtokens->[$i]=substr($rtokens->[$i],pos($str),$num); + # print "continuing with new token $rtokens->[$i]\n"; + + # skipping past this token does least damage + last if ( ++$i > $max_token_index ); + } + } + else { + + # example file: rokicki4.pl + # This error might also be triggered if my quote + # modifier characters are incomplete + write_logfile_entry( +"Note: found word $str at quote modifier location\n" + ); + } + } + + # re-initialize + $allowed_quote_modifiers = ""; + } + } + + unless ( $tok =~ /^\s*$/ || $tok eq 'CORE::' ) { + + # try to catch some common errors + if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) { + + if ( $last_nonblank_token eq 'eq' ) { + complain("Should 'eq' be '==' here ?\n"); + } + elsif ( $last_nonblank_token eq 'ne' ) { + complain("Should 'ne' be '!=' here ?\n"); + } + } + + $last_last_nonblank_token = $last_nonblank_token; + $last_last_nonblank_type = $last_nonblank_type; + $last_last_nonblank_block_type = $last_nonblank_block_type; + $last_last_nonblank_container_type = + $last_nonblank_container_type; + $last_last_nonblank_type_sequence = + $last_nonblank_type_sequence; + $last_nonblank_token = $tok; + $last_nonblank_type = $type; + $last_nonblank_prototype = $prototype; + $last_nonblank_block_type = $block_type; + $last_nonblank_container_type = $container_type; + $last_nonblank_type_sequence = $type_sequence; + $last_nonblank_i = $i_tok; + } + + # store previous token type + if ( $i_tok >= 0 ) { + $routput_token_type->[$i_tok] = $type; + $routput_block_type->[$i_tok] = $block_type; + $routput_container_type->[$i_tok] = $container_type; + $routput_type_sequence->[$i_tok] = $type_sequence; + $routput_indent_flag->[$i_tok] = $indent_flag; + } + my $pre_tok = $rtokens->[$i]; # get the next pre-token + my $pre_type = $rtoken_type->[$i]; # and type + $tok = $pre_tok; + $type = $pre_type; # to be modified as necessary + $block_type = ""; # blank for all tokens except code block braces + $container_type = ""; # blank for all tokens except some parens + $type_sequence = ""; # blank for all tokens except ?/: + $indent_flag = 0; + $prototype = ""; # blank for all tokens except user defined subs + $i_tok = $i; + + # this pre-token will start an output token + push( @{$routput_token_list}, $i_tok ); + + # continue gathering identifier if necessary + # but do not start on blanks and comments + if ( $id_scan_state && $pre_type !~ /[b#]/ ) { + + if ( $id_scan_state =~ /^(sub|package)/ ) { + scan_id(); + } + else { + scan_identifier(); + } + + last if ($id_scan_state); + next if ( ( $i > 0 ) || $type ); + + # didn't find any token; start over + $type = $pre_type; + $tok = $pre_tok; + } + + # handle whitespace tokens.. + next if ( $type eq 'b' ); + my $prev_tok = $i > 0 ? $rtokens->[ $i - 1 ] : ' '; + my $prev_type = $i > 0 ? $rtoken_type->[ $i - 1 ] : 'b'; + + # Build larger tokens where possible, since we are not in a quote. + # + # First try to assemble digraphs. The following tokens are + # excluded and handled specially: + # '/=' is excluded because the / might start a pattern. + # 'x=' is excluded since it might be $x=, with $ on previous line + # '**' and *= might be typeglobs of punctuation variables + # I have allowed tokens starting with <, such as <=, + # because I don't think these could be valid angle operators. + # test file: storrs4.pl + my $test_tok = $tok . $rtokens->[ $i + 1 ]; + my $combine_ok = $is_digraph{$test_tok}; + + # check for special cases which cannot be combined + if ($combine_ok) { + + # '//' must be defined_or operator if an operator is expected. + # TODO: Code for other ambiguous digraphs (/=, x=, **, *=) + # could be migrated here for clarity + + # Patch for RT#102371, misparsing a // in the following snippet: + # state $b //= ccc(); + # The solution is to always accept the digraph (or trigraph) after + # token type 'Z' (possible file handle). The reason is that + # sub operator_expected gives TERM expected here, which is + # wrong in this case. + if ( $test_tok eq '//' && $last_nonblank_type ne 'Z' ) { + my $next_type = $rtokens->[ $i + 1 ]; + my $expecting = + operator_expected( $prev_type, $tok, $next_type ); + + # Patched for RT#101547, was 'unless ($expecting==OPERATOR)' + $combine_ok = 0 if ( $expecting == TERM ); + } + + # Patch for RT #114359: Missparsing of "print $x ** 0.5; + # Accept the digraphs '**' only after type 'Z' + # Otherwise postpone the decision. + if ( $test_tok eq '**' ) { + if ( $last_nonblank_type ne 'Z' ) { $combine_ok = 0 } + } + } + + if ( + $combine_ok + + && ( $test_tok ne '/=' ) # might be pattern + && ( $test_tok ne 'x=' ) # might be $x + && ( $test_tok ne '*=' ) # typeglob? + + # Moved above as part of fix for + # RT #114359: Missparsing of "print $x ** 0.5; + # && ( $test_tok ne '**' ) # typeglob? + ) + { + $tok = $test_tok; + $i++; + + # Now try to assemble trigraphs. Note that all possible + # perl trigraphs can be constructed by appending a character + # to a digraph. + $test_tok = $tok . $rtokens->[ $i + 1 ]; + + if ( $is_trigraph{$test_tok} ) { + $tok = $test_tok; + $i++; + } + + # The only current tetragraph is the double diamond operator + # and its first three characters are not a trigraph, so + # we do can do a special test for it + elsif ( $test_tok eq '<<>' ) { + $test_tok .= $rtokens->[ $i + 2 ]; + if ( $is_tetragraph{$test_tok} ) { + $tok = $test_tok; + $i += 2; + } + } + } + + $type = $tok; + $next_tok = $rtokens->[ $i + 1 ]; + $next_type = $rtoken_type->[ $i + 1 ]; + + TOKENIZER_DEBUG_FLAG_TOKENIZE && do { + local $" = ')('; + my @debug_list = ( + $last_nonblank_token, $tok, + $next_tok, $brace_depth, + $brace_type[$brace_depth], $paren_depth, + $paren_type[$paren_depth] + ); + print STDOUT "TOKENIZE:(@debug_list)\n"; + }; + + # turn off attribute list on first non-blank, non-bareword + if ( $pre_type ne 'w' ) { $in_attribute_list = 0 } + + ############################################################### + # We have the next token, $tok. + # Now we have to examine this token and decide what it is + # and define its $type + # + # section 1: bare words + ############################################################### + + if ( $pre_type eq 'w' ) { + $expecting = operator_expected( $prev_type, $tok, $next_type ); + my ( $next_nonblank_token, $i_next ) = + find_next_nonblank_token( $i, $rtokens, $max_token_index ); + + # ATTRS: handle sub and variable attributes + if ($in_attribute_list) { + + # treat bare word followed by open paren like qw( + if ( $next_nonblank_token eq '(' ) { + $in_quote = $quote_items{'q'}; + $allowed_quote_modifiers = $quote_modifiers{'q'}; + $type = 'q'; + $quote_type = 'q'; + next; + } + + # handle bareword not followed by open paren + else { + $type = 'w'; + next; + } + } + + # quote a word followed by => operator + # unless the word __END__ or __DATA__ and the only word on + # the line. + if ( !$is_END_or_DATA && $next_nonblank_token eq '=' ) { + + if ( $rtokens->[ $i_next + 1 ] eq '>' ) { + if ( $is_constant{$current_package}{$tok} ) { + $type = 'C'; + } + elsif ( $is_user_function{$current_package}{$tok} ) { + $type = 'U'; + $prototype = + $user_function_prototype{$current_package}{$tok}; + } + elsif ( $tok =~ /^v\d+$/ ) { + $type = 'v'; + report_v_string($tok); + } + else { $type = 'w' } + + next; + } + } + + # quote a bare word within braces..like xxx->{s}; note that we + # must be sure this is not a structural brace, to avoid + # mistaking {s} in the following for a quoted bare word: + # for(@[){s}bla}BLA} + # Also treat q in something like var{-q} as a bare word, not qoute operator + if ( + $next_nonblank_token eq '}' + && ( + $last_nonblank_type eq 'L' + || ( $last_nonblank_type eq 'm' + && $last_last_nonblank_type eq 'L' ) + ) + ) + { + $type = 'w'; + next; + } + + # a bare word immediately followed by :: is not a keyword; + # use $tok_kw when testing for keywords to avoid a mistake + my $tok_kw = $tok; + if ( $rtokens->[ $i + 1 ] eq ':' + && $rtokens->[ $i + 2 ] eq ':' ) + { + $tok_kw .= '::'; + } + + # handle operator x (now we know it isn't $x=) + if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) { + if ( $tok eq 'x' ) { + + if ( $rtokens->[ $i + 1 ] eq '=' ) { # x= + $tok = 'x='; + $type = $tok; + $i++; + } + else { + $type = 'x'; + } + } + + # FIXME: Patch: mark something like x4 as an integer for now + # It gets fixed downstream. This is easier than + # splitting the pretoken. + else { + $type = 'n'; + } + } + elsif ( $tok_kw eq 'CORE::' ) { + $type = $tok = $tok_kw; + $i += 2; + } + elsif ( ( $tok eq 'strict' ) + and ( $last_nonblank_token eq 'use' ) ) + { + $tokenizer_self->{_saw_use_strict} = 1; + scan_bare_identifier(); + } + + elsif ( ( $tok eq 'warnings' ) + and ( $last_nonblank_token eq 'use' ) ) + { + $tokenizer_self->{_saw_perl_dash_w} = 1; + + # scan as identifier, so that we pick up something like: + # use warnings::register + scan_bare_identifier(); + } + + elsif ( + $tok eq 'AutoLoader' + && $tokenizer_self->{_look_for_autoloader} + && ( + $last_nonblank_token eq 'use' + + # these regexes are from AutoSplit.pm, which we want + # to mimic + || $input_line =~ /^\s*(use|require)\s+AutoLoader\b/ + || $input_line =~ /\bISA\s*=.*\bAutoLoader\b/ + ) + ) + { + write_logfile_entry("AutoLoader seen, -nlal deactivates\n"); + $tokenizer_self->{_saw_autoloader} = 1; + $tokenizer_self->{_look_for_autoloader} = 0; + scan_bare_identifier(); + } + + elsif ( + $tok eq 'SelfLoader' + && $tokenizer_self->{_look_for_selfloader} + && ( $last_nonblank_token eq 'use' + || $input_line =~ /^\s*(use|require)\s+SelfLoader\b/ + || $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ ) + ) + { + write_logfile_entry("SelfLoader seen, -nlsl deactivates\n"); + $tokenizer_self->{_saw_selfloader} = 1; + $tokenizer_self->{_look_for_selfloader} = 0; + scan_bare_identifier(); + } + + elsif ( ( $tok eq 'constant' ) + and ( $last_nonblank_token eq 'use' ) ) + { + scan_bare_identifier(); + my ( $next_nonblank_token, $i_next ) = + find_next_nonblank_token( $i, $rtokens, + $max_token_index ); + + if ($next_nonblank_token) { + + if ( $is_keyword{$next_nonblank_token} ) { + + # Assume qw is used as a quote and okay, as in: + # use constant qw{ DEBUG 0 }; + # Not worth trying to parse for just a warning + + # NOTE: This warning is deactivated because recent + # versions of perl do not complain here, but + # the coding is retained for reference. + if ( 0 && $next_nonblank_token ne 'qw' ) { + warning( +"Attempting to define constant '$next_nonblank_token' which is a perl keyword\n" + ); + } + } + + # FIXME: could check for error in which next token is + # not a word (number, punctuation, ..) + else { + $is_constant{$current_package}{$next_nonblank_token} + = 1; + } + } + } + + # various quote operators + elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) { +##NICOL PATCH + if ( $expecting == OPERATOR ) { + + # Be careful not to call an error for a qw quote + # where a parenthesized list is allowed. For example, + # it could also be a for/foreach construct such as + # + # foreach my $key qw\Uno Due Tres Quadro\ { + # print "Set $key\n"; + # } + # + + # Or it could be a function call. + # NOTE: Braces in something like &{ xxx } are not + # marked as a block, we might have a method call. + # &method(...), $method->(..), &{method}(...), + # $ref[2](list) is ok & short for $ref[2]->(list) + # + # See notes in 'sub code_block_type' and + # 'sub is_non_structural_brace' + + unless ( + $tok eq 'qw' + && ( $last_nonblank_token =~ /^([\]\}\&]|\-\>)/ + || $is_for_foreach{$want_paren} ) + ) + { + error_if_expecting_OPERATOR(); + } + } + $in_quote = $quote_items{$tok}; + $allowed_quote_modifiers = $quote_modifiers{$tok}; + + # All quote types are 'Q' except possibly qw quotes. + # qw quotes are special in that they may generally be trimmed + # of leading and trailing whitespace. So they are given a + # separate type, 'q', unless requested otherwise. + $type = + ( $tok eq 'qw' && $tokenizer_self->{_trim_qw} ) + ? 'q' + : 'Q'; + $quote_type = $type; + } + + # check for a statement label + elsif ( + ( $next_nonblank_token eq ':' ) + && ( $rtokens->[ $i_next + 1 ] ne ':' ) + && ( $i_next <= $max_token_index ) # colon on same line + && label_ok() + ) + { + if ( $tok !~ /[A-Z]/ ) { + push @{ $tokenizer_self->{_rlower_case_labels_at} }, + $input_line_number; + } + $type = 'J'; + $tok .= ':'; + $i = $i_next; + next; + } + + # 'sub' || 'package' + elsif ( $is_sub_package{$tok_kw} ) { + error_if_expecting_OPERATOR() + if ( $expecting == OPERATOR ); + scan_id(); + } + + # Note on token types for format, __DATA__, __END__: + # It simplifies things to give these type ';', so that when we + # start rescanning we will be expecting a token of type TERM. + # We will switch to type 'k' before outputting the tokens. + elsif ( $is_format_END_DATA{$tok_kw} ) { + $type = ';'; # make tokenizer look for TERM next + $tokenizer_self->{ $is_format_END_DATA{$tok_kw} } = 1; + last; + } + + elsif ( $is_keyword{$tok_kw} ) { + $type = 'k'; + + # Since for and foreach may not be followed immediately + # by an opening paren, we have to remember which keyword + # is associated with the next '(' + if ( $is_for_foreach{$tok} ) { + if ( new_statement_ok() ) { + $want_paren = $tok; + } + } + + # recognize 'use' statements, which are special + elsif ( $is_use_require{$tok} ) { + $statement_type = $tok; + error_if_expecting_OPERATOR() + if ( $expecting == OPERATOR ); + } + + # remember my and our to check for trailing ": shared" + elsif ( $is_my_our{$tok} ) { + $statement_type = $tok; + } + + # Check for misplaced 'elsif' and 'else', but allow isolated + # else or elsif blocks to be formatted. This is indicated + # by a last noblank token of ';' + elsif ( $tok eq 'elsif' ) { + if ( $last_nonblank_token ne ';' + && $last_nonblank_block_type !~ + /^(if|elsif|unless)$/ ) + { + warning( +"expecting '$tok' to follow one of 'if|elsif|unless'\n" + ); + } + } + elsif ( $tok eq 'else' ) { + + # patched for SWITCH/CASE + if ( + $last_nonblank_token ne ';' + && $last_nonblank_block_type !~ + /^(if|elsif|unless|case|when)$/ + + # patch to avoid an unwanted error message for + # the case of a parenless 'case' (RT 105484): + # switch ( 1 ) { case x { 2 } else { } } + && $statement_type !~ + /^(if|elsif|unless|case|when)$/ + ) + { + warning( +"expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n" + ); + } + } + elsif ( $tok eq 'continue' ) { + if ( $last_nonblank_token ne ';' + && $last_nonblank_block_type !~ + /(^(\{|\}|;|while|until|for|foreach)|:$)/ ) + { + + # note: ';' '{' and '}' in list above + # because continues can follow bare blocks; + # ':' is labeled block + # + ############################################ + # NOTE: This check has been deactivated because + # continue has an alternative usage for given/when + # blocks in perl 5.10 + ## warning("'$tok' should follow a block\n"); + ############################################ + } + } + + # patch for SWITCH/CASE if 'case' and 'when are + # treated as keywords. + elsif ( $tok eq 'when' || $tok eq 'case' ) { + $statement_type = $tok; # next '{' is block + } + + # + # indent trailing if/unless/while/until + # outdenting will be handled by later indentation loop +## DEACTIVATED: unfortunately this can cause some unwanted indentation like: +##$opt_o = 1 +## if !( +## $opt_b +## || $opt_c +## || $opt_d +## || $opt_f +## || $opt_i +## || $opt_l +## || $opt_o +## || $opt_x +## ); +## if ( $tok =~ /^(if|unless|while|until)$/ +## && $next_nonblank_token ne '(' ) +## { +## $indent_flag = 1; +## } + } + + # check for inline label following + # /^(redo|last|next|goto)$/ + elsif (( $last_nonblank_type eq 'k' ) + && ( $is_redo_last_next_goto{$last_nonblank_token} ) ) + { + $type = 'j'; + next; + } + + # something else -- + else { + + scan_bare_identifier(); + if ( $type eq 'w' ) { + + if ( $expecting == OPERATOR ) { + + # don't complain about possible indirect object + # notation. + # For example: + # package main; + # sub new($) { ... } + # $b = new A::; # calls A::new + # $c = new A; # same thing but suspicious + # This will call A::new but we have a 'new' in + # main:: which looks like a constant. + # + if ( $last_nonblank_type eq 'C' ) { + if ( $tok !~ /::$/ ) { + complain(<[ $i + 1 ]; + if ( $next_tok eq '(' ) { + $type = 'U'; + } + + # underscore after file test operator is file handle + if ( $tok eq '_' && $last_nonblank_type eq 'F' ) { + $type = 'Z'; + } + + # patch for SWITCH/CASE if 'case' and 'when are + # not treated as keywords: + if ( + ( + $tok eq 'case' + && $brace_type[$brace_depth] eq 'switch' + ) + || ( $tok eq 'when' + && $brace_type[$brace_depth] eq 'given' ) + ) + { + $statement_type = $tok; # next '{' is block + $type = 'k'; # for keyword syntax coloring + } + + # patch for SWITCH/CASE if switch and given not keywords + # Switch is not a perl 5 keyword, but we will gamble + # and mark switch followed by paren as a keyword. This + # is only necessary to get html syntax coloring nice, + # and does not commit this as being a switch/case. + if ( $next_nonblank_token eq '(' + && ( $tok eq 'switch' || $tok eq 'given' ) ) + { + $type = 'k'; # for keyword syntax coloring + } + } + } + } + + ############################################################### + # section 2: strings of digits + ############################################################### + elsif ( $pre_type eq 'd' ) { + $expecting = operator_expected( $prev_type, $tok, $next_type ); + error_if_expecting_OPERATOR("Number") + if ( $expecting == OPERATOR ); + my $number = scan_number(); + if ( !defined($number) ) { + + # shouldn't happen - we should always get a number + warning("non-number beginning with digit--program bug\n"); + report_definite_bug(); + } + } + + ############################################################### + # section 3: all other tokens + ############################################################### + + else { + last if ( $tok eq '#' ); + my $code = $tokenization_code->{$tok}; + if ($code) { + $expecting = + operator_expected( $prev_type, $tok, $next_type ); + $code->(); + redo if $in_quote; + } + } + } + + # ----------------------------- + # end of main tokenization loop + # ----------------------------- + + if ( $i_tok >= 0 ) { + $routput_token_type->[$i_tok] = $type; + $routput_block_type->[$i_tok] = $block_type; + $routput_container_type->[$i_tok] = $container_type; + $routput_type_sequence->[$i_tok] = $type_sequence; + $routput_indent_flag->[$i_tok] = $indent_flag; + } + + unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) { + $last_last_nonblank_token = $last_nonblank_token; + $last_last_nonblank_type = $last_nonblank_type; + $last_last_nonblank_block_type = $last_nonblank_block_type; + $last_last_nonblank_container_type = $last_nonblank_container_type; + $last_last_nonblank_type_sequence = $last_nonblank_type_sequence; + $last_nonblank_token = $tok; + $last_nonblank_type = $type; + $last_nonblank_block_type = $block_type; + $last_nonblank_container_type = $container_type; + $last_nonblank_type_sequence = $type_sequence; + $last_nonblank_prototype = $prototype; + } + + # reset indentation level if necessary at a sub or package + # in an attempt to recover from a nesting error + if ( $level_in_tokenizer < 0 ) { + if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) { + reset_indentation_level(0); + brace_warning("resetting level to 0 at $1 $2\n"); + } + } + + # all done tokenizing this line ... + # now prepare the final list of tokens and types + + my @token_type = (); # stack of output token types + my @block_type = (); # stack of output code block types + my @container_type = (); # stack of output code container types + my @type_sequence = (); # stack of output type sequence numbers + my @tokens = (); # output tokens + my @levels = (); # structural brace levels of output tokens + my @slevels = (); # secondary nesting levels of output tokens + my @nesting_tokens = (); # string of tokens leading to this depth + my @nesting_types = (); # string of token types leading to this depth + my @nesting_blocks = (); # string of block types leading to this depth + my @nesting_lists = (); # string of list types leading to this depth + my @ci_string = (); # string needed to compute continuation indentation + my @container_environment = (); # BLOCK or LIST + my $container_environment = ''; + my $im = -1; # previous $i value + my $num; + my $ci_string_sum = ones_count($ci_string_in_tokenizer); + +# Computing Token Indentation +# +# The final section of the tokenizer forms tokens and also computes +# parameters needed to find indentation. It is much easier to do it +# in the tokenizer than elsewhere. Here is a brief description of how +# indentation is computed. Perl::Tidy computes indentation as the sum +# of 2 terms: +# +# (1) structural indentation, such as if/else/elsif blocks +# (2) continuation indentation, such as long parameter call lists. +# +# These are occasionally called primary and secondary indentation. +# +# Structural indentation is introduced by tokens of type '{', although +# the actual tokens might be '{', '(', or '['. Structural indentation +# is of two types: BLOCK and non-BLOCK. Default structural indentation +# is 4 characters if the standard indentation scheme is used. +# +# Continuation indentation is introduced whenever a line at BLOCK level +# is broken before its termination. Default continuation indentation +# is 2 characters in the standard indentation scheme. +# +# Both types of indentation may be nested arbitrarily deep and +# interlaced. The distinction between the two is somewhat arbitrary. +# +# For each token, we will define two variables which would apply if +# the current statement were broken just before that token, so that +# that token started a new line: +# +# $level = the structural indentation level, +# $ci_level = the continuation indentation level +# +# The total indentation will be $level * (4 spaces) + $ci_level * (2 spaces), +# assuming defaults. However, in some special cases it is customary +# to modify $ci_level from this strict value. +# +# The total structural indentation is easy to compute by adding and +# subtracting 1 from a saved value as types '{' and '}' are seen. The +# running value of this variable is $level_in_tokenizer. +# +# The total continuation is much more difficult to compute, and requires +# several variables. These variables are: +# +# $ci_string_in_tokenizer = a string of 1's and 0's indicating, for +# each indentation level, if there are intervening open secondary +# structures just prior to that level. +# $continuation_string_in_tokenizer = a string of 1's and 0's indicating +# if the last token at that level is "continued", meaning that it +# is not the first token of an expression. +# $nesting_block_string = a string of 1's and 0's indicating, for each +# indentation level, if the level is of type BLOCK or not. +# $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string +# $nesting_list_string = a string of 1's and 0's indicating, for each +# indentation level, if it is appropriate for list formatting. +# If so, continuation indentation is used to indent long list items. +# $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string +# @{$rslevel_stack} = a stack of total nesting depths at each +# structural indentation level, where "total nesting depth" means +# the nesting depth that would occur if every nesting token -- '{', '[', +# and '(' -- , regardless of context, is used to compute a nesting +# depth. + + #my $nesting_block_flag = ($nesting_block_string =~ /1$/); + #my $nesting_list_flag = ($nesting_list_string =~ /1$/); + + my ( $ci_string_i, $level_i, $nesting_block_string_i, + $nesting_list_string_i, $nesting_token_string_i, + $nesting_type_string_i, ); + + foreach my $i ( @{$routput_token_list} ) + { # scan the list of pre-tokens indexes + + # self-checking for valid token types + my $type = $routput_token_type->[$i]; + my $forced_indentation_flag = $routput_indent_flag->[$i]; + + # See if we should undo the $forced_indentation_flag. + # Forced indentation after 'if', 'unless', 'while' and 'until' + # expressions without trailing parens is optional and doesn't + # always look good. It is usually okay for a trailing logical + # expression, but if the expression is a function call, code block, + # or some kind of list it puts in an unwanted extra indentation + # level which is hard to remove. + # + # Example where extra indentation looks ok: + # return 1 + # if $det_a < 0 and $det_b > 0 + # or $det_a > 0 and $det_b < 0; + # + # Example where extra indentation is not needed because + # the eval brace also provides indentation: + # print "not " if defined eval { + # reduce { die if $b > 2; $a + $b } 0, 1, 2, 3, 4; + # }; + # + # The following rule works fairly well: + # Undo the flag if the end of this line, or start of the next + # line, is an opening container token or a comma. + # This almost always works, but if not after another pass it will + # be stable. + if ( $forced_indentation_flag && $type eq 'k' ) { + my $ixlast = -1; + my $ilast = $routput_token_list->[$ixlast]; + my $toklast = $routput_token_type->[$ilast]; + if ( $toklast eq '#' ) { + $ixlast--; + $ilast = $routput_token_list->[$ixlast]; + $toklast = $routput_token_type->[$ilast]; + } + if ( $toklast eq 'b' ) { + $ixlast--; + $ilast = $routput_token_list->[$ixlast]; + $toklast = $routput_token_type->[$ilast]; + } + if ( $toklast =~ /^[\{,]$/ ) { + $forced_indentation_flag = 0; + } + else { + ( $toklast, my $i_next ) = + find_next_nonblank_token( $max_token_index, $rtokens, + $max_token_index ); + if ( $toklast =~ /^[\{,]$/ ) { + $forced_indentation_flag = 0; + } + } + } + + # if we are already in an indented if, see if we should outdent + if ($indented_if_level) { + + # don't try to nest trailing if's - shouldn't happen + if ( $type eq 'k' ) { + $forced_indentation_flag = 0; + } + + # check for the normal case - outdenting at next ';' + elsif ( $type eq ';' ) { + if ( $level_in_tokenizer == $indented_if_level ) { + $forced_indentation_flag = -1; + $indented_if_level = 0; + } + } + + # handle case of missing semicolon + elsif ( $type eq '}' ) { + if ( $level_in_tokenizer == $indented_if_level ) { + $indented_if_level = 0; + + # TBD: This could be a subroutine call + $level_in_tokenizer--; + if ( @{$rslevel_stack} > 1 ) { + pop( @{$rslevel_stack} ); + } + if ( length($nesting_block_string) > 1 ) + { # true for valid script + chop $nesting_block_string; + chop $nesting_list_string; + } + + } + } + } + + my $tok = $rtokens->[$i]; # the token, but ONLY if same as pretoken + $level_i = $level_in_tokenizer; + + # This can happen by running perltidy on non-scripts + # although it could also be bug introduced by programming change. + # Perl silently accepts a 032 (^Z) and takes it as the end + if ( !$is_valid_token_type{$type} ) { + my $val = ord($type); + warning( + "unexpected character decimal $val ($type) in script\n"); + $tokenizer_self->{_in_error} = 1; + } + + # ---------------------------------------------------------------- + # TOKEN TYPE PATCHES + # output __END__, __DATA__, and format as type 'k' instead of ';' + # to make html colors correct, etc. + my $fix_type = $type; + if ( $type eq ';' && $tok =~ /\w/ ) { $fix_type = 'k' } + + # output anonymous 'sub' as keyword + if ( $type eq 't' && $tok eq 'sub' ) { $fix_type = 'k' } + + # ----------------------------------------------------------------- + + $nesting_token_string_i = $nesting_token_string; + $nesting_type_string_i = $nesting_type_string; + $nesting_block_string_i = $nesting_block_string; + $nesting_list_string_i = $nesting_list_string; + + # set primary indentation levels based on structural braces + # Note: these are set so that the leading braces have a HIGHER + # level than their CONTENTS, which is convenient for indentation + # Also, define continuation indentation for each token. + if ( $type eq '{' || $type eq 'L' || $forced_indentation_flag > 0 ) + { + + # use environment before updating + $container_environment = + $nesting_block_flag ? 'BLOCK' + : $nesting_list_flag ? 'LIST' + : ""; + + # if the difference between total nesting levels is not 1, + # there are intervening non-structural nesting types between + # this '{' and the previous unclosed '{' + my $intervening_secondary_structure = 0; + if ( @{$rslevel_stack} ) { + $intervening_secondary_structure = + $slevel_in_tokenizer - $rslevel_stack->[-1]; + } + + # Continuation Indentation + # + # Having tried setting continuation indentation both in the formatter and + # in the tokenizer, I can say that setting it in the tokenizer is much, + # much easier. The formatter already has too much to do, and can't + # make decisions on line breaks without knowing what 'ci' will be at + # arbitrary locations. + # + # But a problem with setting the continuation indentation (ci) here + # in the tokenizer is that we do not know where line breaks will actually + # be. As a result, we don't know if we should propagate continuation + # indentation to higher levels of structure. + # + # For nesting of only structural indentation, we never need to do this. + # For example, in a long if statement, like this + # + # if ( !$output_block_type[$i] + # && ($in_statement_continuation) ) + # { <--outdented + # do_something(); + # } + # + # the second line has ci but we do normally give the lines within the BLOCK + # any ci. This would be true if we had blocks nested arbitrarily deeply. + # + # But consider something like this, where we have created a break after + # an opening paren on line 1, and the paren is not (currently) a + # structural indentation token: + # + # my $file = $menubar->Menubutton( + # qw/-text File -underline 0 -menuitems/ => [ + # [ + # Cascade => '~View', + # -menuitems => [ + # ... + # + # The second line has ci, so it would seem reasonable to propagate it + # down, giving the third line 1 ci + 1 indentation. This suggests the + # following rule, which is currently used to propagating ci down: if there + # are any non-structural opening parens (or brackets, or braces), before + # an opening structural brace, then ci is propagated down, and otherwise + # not. The variable $intervening_secondary_structure contains this + # information for the current token, and the string + # "$ci_string_in_tokenizer" is a stack of previous values of this + # variable. + + # save the current states + push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer ); + $level_in_tokenizer++; + + if ($forced_indentation_flag) { + + # break BEFORE '?' when there is forced indentation + if ( $type eq '?' ) { $level_i = $level_in_tokenizer; } + if ( $type eq 'k' ) { + $indented_if_level = $level_in_tokenizer; + } + + # do not change container environment here if we are not + # at a real list. Adding this check prevents "blinkers" + # often near 'unless" clauses, such as in the following + # code: +## next +## unless -e ( +## $archive = +## File::Spec->catdir( $_, "auto", $root, "$sub$lib_ext" ) +## ); + + $nesting_block_string .= "$nesting_block_flag"; + } + else { + + if ( $routput_block_type->[$i] ) { + $nesting_block_flag = 1; + $nesting_block_string .= '1'; + } + else { + $nesting_block_flag = 0; + $nesting_block_string .= '0'; + } + } + + # we will use continuation indentation within containers + # which are not blocks and not logical expressions + my $bit = 0; + if ( !$routput_block_type->[$i] ) { + + # propagate flag down at nested open parens + if ( $routput_container_type->[$i] eq '(' ) { + $bit = 1 if $nesting_list_flag; + } + + # use list continuation if not a logical grouping + # /^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/ + else { + $bit = 1 + unless + $is_logical_container{ $routput_container_type->[$i] + }; + } + } + $nesting_list_string .= $bit; + $nesting_list_flag = $bit; + + $ci_string_in_tokenizer .= + ( $intervening_secondary_structure != 0 ) ? '1' : '0'; + $ci_string_sum = ones_count($ci_string_in_tokenizer); + $continuation_string_in_tokenizer .= + ( $in_statement_continuation > 0 ) ? '1' : '0'; + + # Sometimes we want to give an opening brace continuation indentation, + # and sometimes not. For code blocks, we don't do it, so that the leading + # '{' gets outdented, like this: + # + # if ( !$output_block_type[$i] + # && ($in_statement_continuation) ) + # { <--outdented + # + # For other types, we will give them continuation indentation. For example, + # here is how a list looks with the opening paren indented: + # + # @LoL = + # ( [ "fred", "barney" ], [ "george", "jane", "elroy" ], + # [ "homer", "marge", "bart" ], ); + # + # This looks best when 'ci' is one-half of the indentation (i.e., 2 and 4) + + my $total_ci = $ci_string_sum; + if ( + !$routput_block_type->[$i] # patch: skip for BLOCK + && ($in_statement_continuation) + && !( $forced_indentation_flag && $type eq ':' ) + ) + { + $total_ci += $in_statement_continuation + unless ( $ci_string_in_tokenizer =~ /1$/ ); + } + + $ci_string_i = $total_ci; + $in_statement_continuation = 0; + } + + elsif ($type eq '}' + || $type eq 'R' + || $forced_indentation_flag < 0 ) + { + + # only a nesting error in the script would prevent popping here + if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); } + + $level_i = --$level_in_tokenizer; + + # restore previous level values + if ( length($nesting_block_string) > 1 ) + { # true for valid script + chop $nesting_block_string; + $nesting_block_flag = ( $nesting_block_string =~ /1$/ ); + chop $nesting_list_string; + $nesting_list_flag = ( $nesting_list_string =~ /1$/ ); + + chop $ci_string_in_tokenizer; + $ci_string_sum = ones_count($ci_string_in_tokenizer); + + $in_statement_continuation = + chop $continuation_string_in_tokenizer; + + # zero continuation flag at terminal BLOCK '}' which + # ends a statement. + if ( $routput_block_type->[$i] ) { + + # ...These include non-anonymous subs + # note: could be sub ::abc { or sub 'abc + if ( $routput_block_type->[$i] =~ m/^sub\s*/gc ) { + + # note: older versions of perl require the /gc modifier + # here or else the \G does not work. + if ( $routput_block_type->[$i] =~ /\G('|::|\w)/gc ) + { + $in_statement_continuation = 0; + } + } + +# ...and include all block types except user subs with +# block prototypes and these: (sort|grep|map|do|eval) +# /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|;|if|elsif|else|unless|while|until|for|foreach)$/ + elsif ( + $is_zero_continuation_block_type{ + $routput_block_type->[$i] + } ) + { + $in_statement_continuation = 0; + } + + # ..but these are not terminal types: + # /^(sort|grep|map|do|eval)$/ ) + elsif ( + $is_not_zero_continuation_block_type{ + $routput_block_type->[$i] + } ) + { + } + + # ..and a block introduced by a label + # /^\w+\s*:$/gc ) { + elsif ( $routput_block_type->[$i] =~ /:$/ ) { + $in_statement_continuation = 0; + } + + # user function with block prototype + else { + $in_statement_continuation = 0; + } + } + + # If we are in a list, then + # we must set continuation indentation at the closing + # paren of something like this (paren after $check): + # assert( + # __LINE__, + # ( not defined $check ) + # or ref $check + # or $check eq "new" + # or $check eq "old", + # ); + elsif ( $tok eq ')' ) { + $in_statement_continuation = 1 + if $routput_container_type->[$i] =~ /^[;,\{\}]$/; + } + + elsif ( $tok eq ';' ) { $in_statement_continuation = 0 } + } + + # use environment after updating + $container_environment = + $nesting_block_flag ? 'BLOCK' + : $nesting_list_flag ? 'LIST' + : ""; + $ci_string_i = $ci_string_sum + $in_statement_continuation; + $nesting_block_string_i = $nesting_block_string; + $nesting_list_string_i = $nesting_list_string; + } + + # not a structural indentation type.. + else { + + $container_environment = + $nesting_block_flag ? 'BLOCK' + : $nesting_list_flag ? 'LIST' + : ""; + + # zero the continuation indentation at certain tokens so + # that they will be at the same level as its container. For + # commas, this simplifies the -lp indentation logic, which + # counts commas. For ?: it makes them stand out. + if ($nesting_list_flag) { + if ( $type =~ /^[,\?\:]$/ ) { + $in_statement_continuation = 0; + } + } + + # be sure binary operators get continuation indentation + if ( + $container_environment + && ( $type eq 'k' && $is_binary_keyword{$tok} + || $is_binary_type{$type} ) + ) + { + $in_statement_continuation = 1; + } + + # continuation indentation is sum of any open ci from previous + # levels plus the current level + $ci_string_i = $ci_string_sum + $in_statement_continuation; + + # update continuation flag ... + # if this isn't a blank or comment.. + if ( $type ne 'b' && $type ne '#' ) { + + # and we are in a BLOCK + if ($nesting_block_flag) { + + # the next token after a ';' and label starts a new stmt + if ( $type eq ';' || $type eq 'J' ) { + $in_statement_continuation = 0; + } + + # otherwise, we are continuing the current statement + else { + $in_statement_continuation = 1; + } + } + + # if we are not in a BLOCK.. + else { + + # do not use continuation indentation if not list + # environment (could be within if/elsif clause) + if ( !$nesting_list_flag ) { + $in_statement_continuation = 0; + } + + # otherwise, the token after a ',' starts a new term + + # Patch FOR RT#99961; no continuation after a ';' + # This is needed because perltidy currently marks + # a block preceded by a type character like % or @ + # as a non block, to simplify formatting. But these + # are actually blocks and can have semicolons. + # See code_block_type() and is_non_structural_brace(). + elsif ( $type eq ',' || $type eq ';' ) { + $in_statement_continuation = 0; + } + + # otherwise, we are continuing the current term + else { + $in_statement_continuation = 1; + } + } + } + } + + if ( $level_in_tokenizer < 0 ) { + unless ( $tokenizer_self->{_saw_negative_indentation} ) { + $tokenizer_self->{_saw_negative_indentation} = 1; + warning("Starting negative indentation\n"); + } + } + + # set secondary nesting levels based on all containment token types + # Note: these are set so that the nesting depth is the depth + # of the PREVIOUS TOKEN, which is convenient for setting + # the strength of token bonds + my $slevel_i = $slevel_in_tokenizer; + + # /^[L\{\(\[]$/ + if ( $is_opening_type{$type} ) { + $slevel_in_tokenizer++; + $nesting_token_string .= $tok; + $nesting_type_string .= $type; + } + + # /^[R\}\)\]]$/ + elsif ( $is_closing_type{$type} ) { + $slevel_in_tokenizer--; + my $char = chop $nesting_token_string; + + if ( $char ne $matching_start_token{$tok} ) { + $nesting_token_string .= $char . $tok; + $nesting_type_string .= $type; + } + else { + chop $nesting_type_string; + } + } + + push( @block_type, $routput_block_type->[$i] ); + push( @ci_string, $ci_string_i ); + push( @container_environment, $container_environment ); + push( @container_type, $routput_container_type->[$i] ); + push( @levels, $level_i ); + push( @nesting_tokens, $nesting_token_string_i ); + push( @nesting_types, $nesting_type_string_i ); + push( @slevels, $slevel_i ); + push( @token_type, $fix_type ); + push( @type_sequence, $routput_type_sequence->[$i] ); + push( @nesting_blocks, $nesting_block_string ); + push( @nesting_lists, $nesting_list_string ); + + # now form the previous token + if ( $im >= 0 ) { + $num = + $rtoken_map->[$i] - $rtoken_map->[$im]; # how many characters + + if ( $num > 0 ) { + push( @tokens, + substr( $input_line, $rtoken_map->[$im], $num ) ); + } + } + $im = $i; + } + + $num = length($input_line) - $rtoken_map->[$im]; # make the last token + if ( $num > 0 ) { + push( @tokens, substr( $input_line, $rtoken_map->[$im], $num ) ); + } + + $tokenizer_self->{_in_attribute_list} = $in_attribute_list; + $tokenizer_self->{_in_quote} = $in_quote; + $tokenizer_self->{_quote_target} = + $in_quote ? matching_end_token($quote_character) : ""; + $tokenizer_self->{_rhere_target_list} = $rhere_target_list; + + $line_of_tokens->{_rtoken_type} = \@token_type; + $line_of_tokens->{_rtokens} = \@tokens; + $line_of_tokens->{_rblock_type} = \@block_type; + $line_of_tokens->{_rcontainer_type} = \@container_type; + $line_of_tokens->{_rcontainer_environment} = \@container_environment; + $line_of_tokens->{_rtype_sequence} = \@type_sequence; + $line_of_tokens->{_rlevels} = \@levels; + $line_of_tokens->{_rslevels} = \@slevels; + $line_of_tokens->{_rnesting_tokens} = \@nesting_tokens; + $line_of_tokens->{_rci_levels} = \@ci_string; + $line_of_tokens->{_rnesting_blocks} = \@nesting_blocks; + + return; + } +} # end tokenize_this_line + +#########i############################################################# +# Tokenizer routines which assist in identifying token types +####################################################################### + +sub operator_expected { + + # Many perl symbols have two or more meanings. For example, '<<' + # can be a shift operator or a here-doc operator. The + # interpretation of these symbols depends on the current state of + # the tokenizer, which may either be expecting a term or an + # operator. For this example, a << would be a shift if an operator + # is expected, and a here-doc if a term is expected. This routine + # is called to make this decision for any current token. It returns + # one of three possible values: + # + # OPERATOR - operator expected (or at least, not a term) + # UNKNOWN - can't tell + # TERM - a term is expected (or at least, not an operator) + # + # The decision is based on what has been seen so far. This + # information is stored in the "$last_nonblank_type" and + # "$last_nonblank_token" variables. For example, if the + # $last_nonblank_type is '=~', then we are expecting a TERM, whereas + # if $last_nonblank_type is 'n' (numeric), we are expecting an + # OPERATOR. + # + # If a UNKNOWN is returned, the calling routine must guess. A major + # goal of this tokenizer is to minimize the possibility of returning + # UNKNOWN, because a wrong guess can spoil the formatting of a + # script. + # + # adding NEW_TOKENS: it is critically important that this routine be + # updated to allow it to determine if an operator or term is to be + # expected after the new token. Doing this simply involves adding + # the new token character to one of the regexes in this routine or + # to one of the hash lists + # that it uses, which are initialized in the BEGIN section. + # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token, + # $statement_type + + my ( $prev_type, $tok, $next_type ) = @_; + + my $op_expected = UNKNOWN; + +##print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n"; + +# Note: function prototype is available for token type 'U' for future +# program development. It contains the leading and trailing parens, +# and no blanks. It might be used to eliminate token type 'C', for +# example (prototype = '()'). Thus: +# if ($last_nonblank_type eq 'U') { +# print "previous token=$last_nonblank_token type=$last_nonblank_type prototype=$last_nonblank_prototype\n"; +# } + + # A possible filehandle (or object) requires some care... + if ( $last_nonblank_type eq 'Z' ) { + + # angle.t + if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) { + $op_expected = UNKNOWN; + } + + # For possible file handle like "$a", Perl uses weird parsing rules. + # For example: + # print $a/2,"/hi"; - division + # print $a / 2,"/hi"; - division + # print $a/ 2,"/hi"; - division + # print $a /2,"/hi"; - pattern (and error)! + elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) { + $op_expected = TERM; + } + + # Note when an operation is being done where a + # filehandle might be expected, since a change in whitespace + # could change the interpretation of the statement. + else { + if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) { + complain("operator in print statement not recommended\n"); + $op_expected = OPERATOR; + } + } + } + + # Check for smartmatch operator before preceding brace or square bracket. + # For example, at the ? after the ] in the following expressions we are + # expecting an operator: + # + # qr/3/ ~~ ['1234'] ? 1 : 0; + # map { $_ ~~ [ '0', '1' ] ? 'x' : 'o' } @a; + elsif ( $last_nonblank_type eq '}' && $last_nonblank_token eq '~~' ) { + $op_expected = OPERATOR; + } + + # handle something after 'do' and 'eval' + elsif ( $is_block_operator{$last_nonblank_token} ) { + + # something like $a = eval "expression"; + # ^ + if ( $last_nonblank_type eq 'k' ) { + $op_expected = TERM; # expression or list mode following keyword + } + + # something like $a = do { BLOCK } / 2; + # or this ? after a smartmatch anonynmous hash or array reference: + # qr/3/ ~~ ['1234'] ? 1 : 0; + # ^ + else { + $op_expected = OPERATOR; # block mode following } + } + } + + # handle bare word.. + elsif ( $last_nonblank_type eq 'w' ) { + + # unfortunately, we can't tell what type of token to expect next + # after most bare words + $op_expected = UNKNOWN; + } + + # operator, but not term possible after these types + # Note: moved ')' from type to token because parens in list context + # get marked as '{' '}' now. This is a minor glitch in the following: + # my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : (); + # + elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ ) + || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) ) + { + $op_expected = OPERATOR; + + # in a 'use' statement, numbers and v-strings are not true + # numbers, so to avoid incorrect error messages, we will + # mark them as unknown for now (use.t) + # TODO: it would be much nicer to create a new token V for VERSION + # number in a use statement. Then this could be a check on type V + # and related patches which change $statement_type for '=>' + # and ',' could be removed. Further, it would clean things up to + # scan the 'use' statement with a separate subroutine. + if ( ( $statement_type eq 'use' ) + && ( $last_nonblank_type =~ /^[nv]$/ ) ) + { + $op_expected = UNKNOWN; + } + + # expecting VERSION or {} after package NAMESPACE + elsif ($statement_type =~ /^package\b/ + && $last_nonblank_token =~ /^package\b/ ) + { + $op_expected = TERM; + } + } + + # no operator after many keywords, such as "die", "warn", etc + elsif ( $expecting_term_token{$last_nonblank_token} ) { + + # patch for dor.t (defined or). + # perl functions which may be unary operators + # TODO: This list is incomplete, and these should be put + # into a hash. + if ( $tok eq '/' + && $next_type eq '/' + && $last_nonblank_type eq 'k' + && $last_nonblank_token =~ /^eof|undef|shift|pop$/ ) + { + $op_expected = OPERATOR; + } + else { + $op_expected = TERM; + } + } + + # no operator after things like + - ** (i.e., other operators) + elsif ( $expecting_term_types{$last_nonblank_type} ) { + $op_expected = TERM; + } + + # a few operators, like "time", have an empty prototype () and so + # take no parameters but produce a value to operate on + elsif ( $expecting_operator_token{$last_nonblank_token} ) { + $op_expected = OPERATOR; + } + + # post-increment and decrement produce values to be operated on + elsif ( $expecting_operator_types{$last_nonblank_type} ) { + $op_expected = OPERATOR; + } + + # no value to operate on after sub block + elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; } + + # a right brace here indicates the end of a simple block. + # all non-structural right braces have type 'R' + # all braces associated with block operator keywords have been given those + # keywords as "last_nonblank_token" and caught above. + # (This statement is order dependent, and must come after checking + # $last_nonblank_token). + elsif ( $last_nonblank_type eq '}' ) { + + # patch for dor.t (defined or). + if ( $tok eq '/' + && $next_type eq '/' + && $last_nonblank_token eq ']' ) + { + $op_expected = OPERATOR; + } + + # Patch for RT #116344: misparse a ternary operator after an anonymous + # hash, like this: + # return ref {} ? 1 : 0; + # The right brace should really be marked type 'R' in this case, and + # it is safest to return an UNKNOWN here. Expecting a TERM will + # cause the '?' to always be interpreted as a pattern delimiter + # rather than introducing a ternary operator. + elsif ( $tok eq '?' ) { + $op_expected = UNKNOWN; + } + else { + $op_expected = TERM; + } + } + + # something else..what did I forget? + else { + + # collecting diagnostics on unknown operator types..see what was missed + $op_expected = UNKNOWN; + write_diagnostics( +"OP: unknown after type=$last_nonblank_type token=$last_nonblank_token\n" + ); + } + + TOKENIZER_DEBUG_FLAG_EXPECT && do { + print STDOUT +"EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n"; + }; + return $op_expected; +} + +sub new_statement_ok { + + # return true if the current token can start a new statement + # USES GLOBAL VARIABLES: $last_nonblank_type + + return label_ok() # a label would be ok here + + || $last_nonblank_type eq 'J'; # or we follow a label + +} + +sub label_ok { + + # Decide if a bare word followed by a colon here is a label + # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type, + # $brace_depth, @brace_type + + # if it follows an opening or closing code block curly brace.. + if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' ) + && $last_nonblank_type eq $last_nonblank_token ) + { + + # it is a label if and only if the curly encloses a code block + return $brace_type[$brace_depth]; + } + + # otherwise, it is a label if and only if it follows a ';' (real or fake) + # or another label + else { + return ( $last_nonblank_type eq ';' || $last_nonblank_type eq 'J' ); + } +} + +sub code_block_type { + + # Decide if this is a block of code, and its type. + # Must be called only when $type = $token = '{' + # The problem is to distinguish between the start of a block of code + # and the start of an anonymous hash reference + # Returns "" if not code block, otherwise returns 'last_nonblank_token' + # to indicate the type of code block. (For example, 'last_nonblank_token' + # might be 'if' for an if block, 'else' for an else block, etc). + # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type, + # $last_nonblank_block_type, $brace_depth, @brace_type + + # handle case of multiple '{'s + +# print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n"; + + my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_; + if ( $last_nonblank_token eq '{' + && $last_nonblank_type eq $last_nonblank_token ) + { + + # opening brace where a statement may appear is probably + # a code block but might be and anonymous hash reference + if ( $brace_type[$brace_depth] ) { + return decide_if_code_block( $i, $rtokens, $rtoken_type, + $max_token_index ); + } + + # cannot start a code block within an anonymous hash + else { + return ""; + } + } + + elsif ( $last_nonblank_token eq ';' ) { + + # an opening brace where a statement may appear is probably + # a code block but might be and anonymous hash reference + return decide_if_code_block( $i, $rtokens, $rtoken_type, + $max_token_index ); + } + + # handle case of '}{' + elsif ($last_nonblank_token eq '}' + && $last_nonblank_type eq $last_nonblank_token ) + { + + # a } { situation ... + # could be hash reference after code block..(blktype1.t) + if ($last_nonblank_block_type) { + return decide_if_code_block( $i, $rtokens, $rtoken_type, + $max_token_index ); + } + + # must be a block if it follows a closing hash reference + else { + return $last_nonblank_token; + } + } + + ################################################################ + # NOTE: braces after type characters start code blocks, but for + # simplicity these are not identified as such. See also + # sub is_non_structural_brace. + ################################################################ + +## elsif ( $last_nonblank_type eq 't' ) { +## return $last_nonblank_token; +## } + + # brace after label: + elsif ( $last_nonblank_type eq 'J' ) { + return $last_nonblank_token; + } + +# otherwise, look at previous token. This must be a code block if +# it follows any of these: +# /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/ + elsif ( $is_code_block_token{$last_nonblank_token} ) { + + # Bug Patch: Note that the opening brace after the 'if' in the following + # snippet is an anonymous hash ref and not a code block! + # print 'hi' if { x => 1, }->{x}; + # We can identify this situation because the last nonblank type + # will be a keyword (instead of a closing peren) + if ( $last_nonblank_token =~ /^(if|unless)$/ + && $last_nonblank_type eq 'k' ) + { + return ""; + } + else { + return $last_nonblank_token; + } + } + + # or a sub or package BLOCK + elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' ) + && $last_nonblank_token =~ /^(sub|package)\b/ ) + { + return $last_nonblank_token; + } + + elsif ( $statement_type =~ /^(sub|package)\b/ ) { + return $statement_type; + } + + # user-defined subs with block parameters (like grep/map/eval) + elsif ( $last_nonblank_type eq 'G' ) { + return $last_nonblank_token; + } + + # check bareword + elsif ( $last_nonblank_type eq 'w' ) { + return decide_if_code_block( $i, $rtokens, $rtoken_type, + $max_token_index ); + } + + # Patch for bug # RT #94338 reported by Daniel Trizen + # for-loop in a parenthesized block-map triggering an error message: + # map( { foreach my $item ( '0', '1' ) { print $item} } qw(a b c) ); + # Check for a code block within a parenthesized function call + elsif ( $last_nonblank_token eq '(' ) { + my $paren_type = $paren_type[$paren_depth]; + if ( $paren_type && $paren_type =~ /^(map|grep|sort)$/ ) { + + # We will mark this as a code block but use type 't' instead + # of the name of the contining function. This will allow for + # correct parsing but will usually produce better formatting. + # Braces with block type 't' are not broken open automatically + # in the formatter as are other code block types, and this usually + # works best. + return 't'; # (Not $paren_type) + } + else { + return ""; + } + } + + # handle unknown syntax ') {' + # we previously appended a '()' to mark this case + elsif ( $last_nonblank_token =~ /\(\)$/ ) { + return $last_nonblank_token; + } + + # anything else must be anonymous hash reference + else { + return ""; + } +} + +sub decide_if_code_block { + + # USES GLOBAL VARIABLES: $last_nonblank_token + my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_; + + my ( $next_nonblank_token, $i_next ) = + find_next_nonblank_token( $i, $rtokens, $max_token_index ); + + # we are at a '{' where a statement may appear. + # We must decide if this brace starts an anonymous hash or a code + # block. + # return "" if anonymous hash, and $last_nonblank_token otherwise + + # initialize to be code BLOCK + my $code_block_type = $last_nonblank_token; + + # Check for the common case of an empty anonymous hash reference: + # Maybe something like sub { { } } + if ( $next_nonblank_token eq '}' ) { + $code_block_type = ""; + } + + else { + + # To guess if this '{' is an anonymous hash reference, look ahead + # and test as follows: + # + # it is a hash reference if next come: + # - a string or digit followed by a comma or => + # - bareword followed by => + # otherwise it is a code block + # + # Examples of anonymous hash ref: + # {'aa',}; + # {1,2} + # + # Examples of code blocks: + # {1; print "hello\n", 1;} + # {$a,1}; + + # We are only going to look ahead one more (nonblank/comment) line. + # Strange formatting could cause a bad guess, but that's unlikely. + my @pre_types; + my @pre_tokens; + + # Ignore the rest of this line if it is a side comment + if ( $next_nonblank_token ne '#' ) { + @pre_types = @{$rtoken_type}[ $i + 1 .. $max_token_index ]; + @pre_tokens = @{$rtokens}[ $i + 1 .. $max_token_index ]; + } + my ( $rpre_tokens, $rpre_types ) = + peek_ahead_for_n_nonblank_pre_tokens(20); # 20 is arbitrary but + # generous, and prevents + # wasting lots of + # time in mangled files + if ( defined($rpre_types) && @{$rpre_types} ) { + push @pre_types, @{$rpre_types}; + push @pre_tokens, @{$rpre_tokens}; + } + + # put a sentinel token to simplify stopping the search + push @pre_types, '}'; + push @pre_types, '}'; + + my $jbeg = 0; + $jbeg = 1 if $pre_types[0] eq 'b'; + + # first look for one of these + # - bareword + # - bareword with leading - + # - digit + # - quoted string + my $j = $jbeg; + if ( $pre_types[$j] =~ /^[\'\"]/ ) { + + # find the closing quote; don't worry about escapes + my $quote_mark = $pre_types[$j]; + foreach my $k ( $j + 1 .. $#pre_types - 1 ) { + if ( $pre_types[$k] eq $quote_mark ) { + $j = $k + 1; + my $next = $pre_types[$j]; + last; + } + } + } + elsif ( $pre_types[$j] eq 'd' ) { + $j++; + } + elsif ( $pre_types[$j] eq 'w' ) { + $j++; + } + elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) { + $j++; + } + if ( $j > $jbeg ) { + + $j++ if $pre_types[$j] eq 'b'; + + # Patched for RT #95708 + if ( + + # it is a comma which is not a pattern delimeter except for qw + ( + $pre_types[$j] eq ',' + && $pre_tokens[$jbeg] !~ /^(s|m|y|tr|qr|q|qq|qx)$/ + ) + + # or a => + || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) + ) + { + $code_block_type = ""; + } + } + } + + return $code_block_type; +} + +sub report_unexpected { + + # report unexpected token type and show where it is + # USES GLOBAL VARIABLES: $tokenizer_self + my ( $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map, + $rpretoken_type, $input_line ) + = @_; + + if ( ++$tokenizer_self->{_unexpected_error_count} <= MAX_NAG_MESSAGES ) { + my $msg = "found $found where $expecting expected"; + my $pos = $rpretoken_map->[$i_tok]; + interrupt_logfile(); + my $input_line_number = $tokenizer_self->{_last_line_number}; + my ( $offset, $numbered_line, $underline ) = + make_numbered_line( $input_line_number, $input_line, $pos ); + $underline = write_on_underline( $underline, $pos - $offset, '^' ); + + my $trailer = ""; + if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) { + my $pos_prev = $rpretoken_map->[$last_nonblank_i]; + my $num; + if ( $rpretoken_type->[ $i_tok - 1 ] eq 'b' ) { + $num = $rpretoken_map->[ $i_tok - 1 ] - $pos_prev; + } + else { + $num = $pos - $pos_prev; + } + if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; } + + $underline = + write_on_underline( $underline, $pos_prev - $offset, '-' x $num ); + $trailer = " (previous token underlined)"; + } + warning( $numbered_line . "\n" ); + warning( $underline . "\n" ); + warning( $msg . $trailer . "\n" ); + resume_logfile(); + } + return; +} + +sub is_non_structural_brace { + + # Decide if a brace or bracket is structural or non-structural + # by looking at the previous token and type + # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token + + # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting. + # Tentatively deactivated because it caused the wrong operator expectation + # for this code: + # $user = @vars[1] / 100; + # Must update sub operator_expected before re-implementing. + # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) { + # return 0; + # } + + ################################################################ + # NOTE: braces after type characters start code blocks, but for + # simplicity these are not identified as such. See also + # sub code_block_type + ################################################################ + + ##if ($last_nonblank_type eq 't') {return 0} + + # otherwise, it is non-structural if it is decorated + # by type information. + # For example, the '{' here is non-structural: ${xxx} + return ( + $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/ + + # or if we follow a hash or array closing curly brace or bracket + # For example, the second '{' in this is non-structural: $a{'x'}{'y'} + # because the first '}' would have been given type 'R' + || $last_nonblank_type =~ /^([R\]])$/ + ); +} + +#########i############################################################# +# Tokenizer routines for tracking container nesting depths +####################################################################### + +# The following routines keep track of nesting depths of the nesting +# types, ( [ { and ?. This is necessary for determining the indentation +# level, and also for debugging programs. Not only do they keep track of +# nesting depths of the individual brace types, but they check that each +# of the other brace types is balanced within matching pairs. For +# example, if the program sees this sequence: +# +# { ( ( ) } +# +# then it can determine that there is an extra left paren somewhere +# between the { and the }. And so on with every other possible +# combination of outer and inner brace types. For another +# example: +# +# ( [ ..... ] ] ) +# +# which has an extra ] within the parens. +# +# The brace types have indexes 0 .. 3 which are indexes into +# the matrices. +# +# The pair ? : are treated as just another nesting type, with ? acting +# as the opening brace and : acting as the closing brace. +# +# The matrix +# +# $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b]; +# +# saves the nesting depth of brace type $b (where $b is either of the other +# nesting types) when brace type $a enters a new depth. When this depth +# decreases, a check is made that the current depth of brace types $b is +# unchanged, or otherwise there must have been an error. This can +# be very useful for localizing errors, particularly when perl runs to +# the end of a large file (such as this one) and announces that there +# is a problem somewhere. +# +# A numerical sequence number is maintained for every nesting type, +# so that each matching pair can be uniquely identified in a simple +# way. + +sub increase_nesting_depth { + my ( $aa, $pos ) = @_; + + # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth, + # @current_sequence_number, @depth_array, @starting_line_of_current_depth, + # $statement_type + $current_depth[$aa]++; + $total_depth++; + $total_depth[$aa][ $current_depth[$aa] ] = $total_depth; + my $input_line_number = $tokenizer_self->{_last_line_number}; + my $input_line = $tokenizer_self->{_line_text}; + + # Sequence numbers increment by number of items. This keeps + # a unique set of numbers but still allows the relative location + # of any type to be determined. + $nesting_sequence_number[$aa] += scalar(@closing_brace_names); + my $seqno = $nesting_sequence_number[$aa]; + $current_sequence_number[$aa][ $current_depth[$aa] ] = $seqno; + + $starting_line_of_current_depth[$aa][ $current_depth[$aa] ] = + [ $input_line_number, $input_line, $pos ]; + + for my $bb ( 0 .. $#closing_brace_names ) { + next if ( $bb == $aa ); + $depth_array[$aa][$bb][ $current_depth[$aa] ] = $current_depth[$bb]; + } + + # set a flag for indenting a nested ternary statement + my $indent = 0; + if ( $aa == QUESTION_COLON ) { + $nested_ternary_flag[ $current_depth[$aa] ] = 0; + if ( $current_depth[$aa] > 1 ) { + if ( $nested_ternary_flag[ $current_depth[$aa] - 1 ] == 0 ) { + my $pdepth = $total_depth[$aa][ $current_depth[$aa] - 1 ]; + if ( $pdepth == $total_depth - 1 ) { + $indent = 1; + $nested_ternary_flag[ $current_depth[$aa] - 1 ] = -1; + } + } + } + } + $nested_statement_type[$aa][ $current_depth[$aa] ] = $statement_type; + $statement_type = ""; + return ( $seqno, $indent ); +} + +sub decrease_nesting_depth { + + my ( $aa, $pos ) = @_; + + # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth, + # @current_sequence_number, @depth_array, @starting_line_of_current_depth + # $statement_type + my $seqno = 0; + my $input_line_number = $tokenizer_self->{_last_line_number}; + my $input_line = $tokenizer_self->{_line_text}; + + my $outdent = 0; + $total_depth--; + if ( $current_depth[$aa] > 0 ) { + + # set a flag for un-indenting after seeing a nested ternary statement + $seqno = $current_sequence_number[$aa][ $current_depth[$aa] ]; + if ( $aa == QUESTION_COLON ) { + $outdent = $nested_ternary_flag[ $current_depth[$aa] ]; + } + $statement_type = $nested_statement_type[$aa][ $current_depth[$aa] ]; + + # check that any brace types $bb contained within are balanced + for my $bb ( 0 .. $#closing_brace_names ) { + next if ( $bb == $aa ); + + unless ( $depth_array[$aa][$bb][ $current_depth[$aa] ] == + $current_depth[$bb] ) + { + my $diff = + $current_depth[$bb] - + $depth_array[$aa][$bb][ $current_depth[$aa] ]; + + # don't whine too many times + my $saw_brace_error = get_saw_brace_error(); + if ( + $saw_brace_error <= MAX_NAG_MESSAGES + + # if too many closing types have occurred, we probably + # already caught this error + && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) ) + ) + { + interrupt_logfile(); + my $rsl = + $starting_line_of_current_depth[$aa] + [ $current_depth[$aa] ]; + my $sl = $rsl->[0]; + my $rel = [ $input_line_number, $input_line, $pos ]; + my $el = $rel->[0]; + my ($ess); + + if ( $diff == 1 || $diff == -1 ) { + $ess = ''; + } + else { + $ess = 's'; + } + my $bname = + ( $diff > 0 ) + ? $opening_brace_names[$bb] + : $closing_brace_names[$bb]; + write_error_indicator_pair( @{$rsl}, '^' ); + my $msg = <<"EOM"; +Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el +EOM + + if ( $diff > 0 ) { + my $rml = + $starting_line_of_current_depth[$bb] + [ $current_depth[$bb] ]; + my $ml = $rml->[0]; + $msg .= +" The most recent un-matched $bname is on line $ml\n"; + write_error_indicator_pair( @{$rml}, '^' ); + } + write_error_indicator_pair( @{$rel}, '^' ); + warning($msg); + resume_logfile(); + } + increment_brace_error(); + } + } + $current_depth[$aa]--; + } + else { + + my $saw_brace_error = get_saw_brace_error(); + if ( $saw_brace_error <= MAX_NAG_MESSAGES ) { + my $msg = <<"EOM"; +There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number +EOM + indicate_error( $msg, $input_line_number, $input_line, $pos, '^' ); + } + increment_brace_error(); + } + return ( $seqno, $outdent ); +} + +sub check_final_nesting_depths { + + # USES GLOBAL VARIABLES: @current_depth, @starting_line_of_current_depth + + for my $aa ( 0 .. $#closing_brace_names ) { + + if ( $current_depth[$aa] ) { + my $rsl = + $starting_line_of_current_depth[$aa][ $current_depth[$aa] ]; + my $sl = $rsl->[0]; + my $msg = <<"EOM"; +Final nesting depth of $opening_brace_names[$aa]s is $current_depth[$aa] +The most recent un-matched $opening_brace_names[$aa] is on line $sl +EOM + indicate_error( $msg, @{$rsl}, '^' ); + increment_brace_error(); + } + } + return; +} + +#########i############################################################# +# Tokenizer routines for looking ahead in input stream +####################################################################### + +sub peek_ahead_for_n_nonblank_pre_tokens { + + # returns next n pretokens if they exist + # returns undef's if hits eof without seeing any pretokens + # USES GLOBAL VARIABLES: $tokenizer_self + my $max_pretokens = shift; + my $line; + my $i = 0; + my ( $rpre_tokens, $rmap, $rpre_types ); + + while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) ) + { + $line =~ s/^\s*//; # trim leading blanks + next if ( length($line) <= 0 ); # skip blank + next if ( $line =~ /^#/ ); # skip comment + ( $rpre_tokens, $rmap, $rpre_types ) = + pre_tokenize( $line, $max_pretokens ); + last; + } + return ( $rpre_tokens, $rpre_types ); +} + +# look ahead for next non-blank, non-comment line of code +sub peek_ahead_for_nonblank_token { + + # USES GLOBAL VARIABLES: $tokenizer_self + my ( $rtokens, $max_token_index ) = @_; + my $line; + my $i = 0; + + while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) ) + { + $line =~ s/^\s*//; # trim leading blanks + next if ( length($line) <= 0 ); # skip blank + next if ( $line =~ /^#/ ); # skip comment + my ( $rtok, $rmap, $rtype ) = + pre_tokenize( $line, 2 ); # only need 2 pre-tokens + my $j = $max_token_index + 1; + + foreach my $tok ( @{$rtok} ) { + last if ( $tok =~ "\n" ); + $rtokens->[ ++$j ] = $tok; + } + last; + } + return $rtokens; +} + +#########i############################################################# +# Tokenizer guessing routines for ambiguous situations +####################################################################### + +sub guess_if_pattern_or_conditional { + + # this routine is called when we have encountered a ? following an + # unknown bareword, and we must decide if it starts a pattern or not + # input parameters: + # $i - token index of the ? starting possible pattern + # output parameters: + # $is_pattern = 0 if probably not pattern, =1 if probably a pattern + # msg = a warning or diagnostic message + # USES GLOBAL VARIABLES: $last_nonblank_token + + # FIXME: this needs to be rewritten + + my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_; + my $is_pattern = 0; + my $msg = "guessing that ? after $last_nonblank_token starts a "; + + if ( $i >= $max_token_index ) { + $msg .= "conditional (no end to pattern found on the line)\n"; + } + else { + my $ibeg = $i; + $i = $ibeg + 1; + my $next_token = $rtokens->[$i]; # first token after ? + + # look for a possible ending ? on this line.. + my $in_quote = 1; + my $quote_depth = 0; + my $quote_character = ''; + my $quote_pos = 0; + my $quoted_string; + ( + $i, $in_quote, $quote_character, $quote_pos, $quote_depth, + $quoted_string + ) + = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character, + $quote_pos, $quote_depth, $max_token_index ); + + if ($in_quote) { + + # we didn't find an ending ? on this line, + # so we bias towards conditional + $is_pattern = 0; + $msg .= "conditional (no ending ? on this line)\n"; + + # we found an ending ?, so we bias towards a pattern + } + else { + + # Watch out for an ending ? in quotes, like this + # my $case_flag = File::Spec->case_tolerant ? '(?i)' : ''; + my $s_quote = 0; + my $d_quote = 0; + my $colons = 0; + foreach my $ii ( $ibeg + 1 .. $i - 1 ) { + my $tok = $rtokens->[$ii]; + if ( $tok eq ":" ) { $colons++ } + if ( $tok eq "'" ) { $s_quote++ } + if ( $tok eq '"' ) { $d_quote++ } + } + if ( $s_quote % 2 || $d_quote % 2 || $colons ) { + $is_pattern = 0; + $msg .= "found ending ? but unbalanced quote chars\n"; + } + elsif ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) { + $is_pattern = 1; + $msg .= "pattern (found ending ? and pattern expected)\n"; + } + else { + $msg .= "pattern (uncertain, but found ending ?)\n"; + } + } + } + return ( $is_pattern, $msg ); +} + +sub guess_if_pattern_or_division { + + # this routine is called when we have encountered a / following an + # unknown bareword, and we must decide if it starts a pattern or is a + # division + # input parameters: + # $i - token index of the / starting possible pattern + # output parameters: + # $is_pattern = 0 if probably division, =1 if probably a pattern + # msg = a warning or diagnostic message + # USES GLOBAL VARIABLES: $last_nonblank_token + my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_; + my $is_pattern = 0; + my $msg = "guessing that / after $last_nonblank_token starts a "; + + if ( $i >= $max_token_index ) { + $msg .= "division (no end to pattern found on the line)\n"; + } + else { + my $ibeg = $i; + my $divide_expected = + numerator_expected( $i, $rtokens, $max_token_index ); + $i = $ibeg + 1; + my $next_token = $rtokens->[$i]; # first token after slash + + # look for a possible ending / on this line.. + my $in_quote = 1; + my $quote_depth = 0; + my $quote_character = ''; + my $quote_pos = 0; + my $quoted_string; + ( + $i, $in_quote, $quote_character, $quote_pos, $quote_depth, + $quoted_string + ) + = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character, + $quote_pos, $quote_depth, $max_token_index ); + + if ($in_quote) { + + # we didn't find an ending / on this line, + # so we bias towards division + if ( $divide_expected >= 0 ) { + $is_pattern = 0; + $msg .= "division (no ending / on this line)\n"; + } + else { + $msg = "multi-line pattern (division not possible)\n"; + $is_pattern = 1; + } + + } + + # we found an ending /, so we bias towards a pattern + else { + + if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) { + + if ( $divide_expected >= 0 ) { + + if ( $i - $ibeg > 60 ) { + $msg .= "division (matching / too distant)\n"; + $is_pattern = 0; + } + else { + $msg .= "pattern (but division possible too)\n"; + $is_pattern = 1; + } + } + else { + $is_pattern = 1; + $msg .= "pattern (division not possible)\n"; + } + } + else { + + if ( $divide_expected >= 0 ) { + $is_pattern = 0; + $msg .= "division (pattern not possible)\n"; + } + else { + $is_pattern = 1; + $msg .= + "pattern (uncertain, but division would not work here)\n"; + } + } + } + } + return ( $is_pattern, $msg ); +} + +# try to resolve here-doc vs. shift by looking ahead for +# non-code or the end token (currently only looks for end token) +# returns 1 if it is probably a here doc, 0 if not +sub guess_if_here_doc { + + # This is how many lines we will search for a target as part of the + # guessing strategy. It is a constant because there is probably + # little reason to change it. + # USES GLOBAL VARIABLES: $tokenizer_self, $current_package + # %is_constant, + my $HERE_DOC_WINDOW = 40; + + my $next_token = shift; + my $here_doc_expected = 0; + my $line; + my $k = 0; + my $msg = "checking <<"; + + while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) ) + { + chomp $line; + + if ( $line =~ /^$next_token$/ ) { + $msg .= " -- found target $next_token ahead $k lines\n"; + $here_doc_expected = 1; # got it + last; + } + last if ( $k >= $HERE_DOC_WINDOW ); + } + + unless ($here_doc_expected) { + + if ( !defined($line) ) { + $here_doc_expected = -1; # hit eof without seeing target + $msg .= " -- must be shift; target $next_token not in file\n"; + + } + else { # still unsure..taking a wild guess + + if ( !$is_constant{$current_package}{$next_token} ) { + $here_doc_expected = 1; + $msg .= + " -- guessing it's a here-doc ($next_token not a constant)\n"; + } + else { + $msg .= + " -- guessing it's a shift ($next_token is a constant)\n"; + } + } + } + write_logfile_entry($msg); + return $here_doc_expected; +} + +#########i############################################################# +# Tokenizer Routines for scanning identifiers and related items +####################################################################### + +sub scan_bare_identifier_do { + + # this routine is called to scan a token starting with an alphanumeric + # variable or package separator, :: or '. + # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, + # $last_nonblank_type,@paren_type, $paren_depth + + my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map, + $max_token_index ) + = @_; + my $i_begin = $i; + my $package = undef; + + my $i_beg = $i; + + # we have to back up one pretoken at a :: since each : is one pretoken + if ( $tok eq '::' ) { $i_beg-- } + if ( $tok eq '->' ) { $i_beg-- } + my $pos_beg = $rtoken_map->[$i_beg]; + pos($input_line) = $pos_beg; + + # Examples: + # A::B::C + # A:: + # ::A + # A'B + if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) { + + my $pos = pos($input_line); + my $numc = $pos - $pos_beg; + $tok = substr( $input_line, $pos_beg, $numc ); + + # type 'w' includes anything without leading type info + # ($,%,@,*) including something like abc::def::ghi + $type = 'w'; + + my $sub_name = ""; + if ( defined($2) ) { $sub_name = $2; } + if ( defined($1) ) { + $package = $1; + + # patch: don't allow isolated package name which just ends + # in the old style package separator (single quote). Example: + # use CGI':all'; + if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) { + $pos--; + } + + $package =~ s/\'/::/g; + if ( $package =~ /^\:/ ) { $package = 'main' . $package } + $package =~ s/::$//; + } + else { + $package = $current_package; + + if ( $is_keyword{$tok} ) { + $type = 'k'; + } + } + + # if it is a bareword.. + if ( $type eq 'w' ) { + + # check for v-string with leading 'v' type character + # (This seems to have precedence over filehandle, type 'Y') + if ( $tok =~ /^v\d[_\d]*$/ ) { + + # we only have the first part - something like 'v101' - + # look for more + if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) { + $pos = pos($input_line); + $numc = $pos - $pos_beg; + $tok = substr( $input_line, $pos_beg, $numc ); + } + $type = 'v'; + + # warn if this version can't handle v-strings + report_v_string($tok); + } + + elsif ( $is_constant{$package}{$sub_name} ) { + $type = 'C'; + } + + # bareword after sort has implied empty prototype; for example: + # @sorted = sort numerically ( 53, 29, 11, 32, 7 ); + # This has priority over whatever the user has specified. + elsif ($last_nonblank_token eq 'sort' + && $last_nonblank_type eq 'k' ) + { + $type = 'Z'; + } + + # Note: strangely, perl does not seem to really let you create + # functions which act like eval and do, in the sense that eval + # and do may have operators following the final }, but any operators + # that you create with prototype (&) apparently do not allow + # trailing operators, only terms. This seems strange. + # If this ever changes, here is the update + # to make perltidy behave accordingly: + + # elsif ( $is_block_function{$package}{$tok} ) { + # $tok='eval'; # patch to do braces like eval - doesn't work + # $type = 'k'; + #} + # FIXME: This could become a separate type to allow for different + # future behavior: + elsif ( $is_block_function{$package}{$sub_name} ) { + $type = 'G'; + } + + elsif ( $is_block_list_function{$package}{$sub_name} ) { + $type = 'G'; + } + elsif ( $is_user_function{$package}{$sub_name} ) { + $type = 'U'; + $prototype = $user_function_prototype{$package}{$sub_name}; + } + + # check for indirect object + elsif ( + + # added 2001-03-27: must not be followed immediately by '(' + # see fhandle.t + ( $input_line !~ m/\G\(/gc ) + + # and + && ( + + # preceded by keyword like 'print', 'printf' and friends + $is_indirect_object_taker{$last_nonblank_token} + + # or preceded by something like 'print(' or 'printf(' + || ( + ( $last_nonblank_token eq '(' ) + && $is_indirect_object_taker{ $paren_type[$paren_depth] + } + + ) + ) + ) + { + + # may not be indirect object unless followed by a space + if ( $input_line =~ m/\G\s+/gc ) { + $type = 'Y'; + + # Abandon Hope ... + # Perl's indirect object notation is a very bad + # thing and can cause subtle bugs, especially for + # beginning programmers. And I haven't even been + # able to figure out a sane warning scheme which + # doesn't get in the way of good scripts. + + # Complain if a filehandle has any lower case + # letters. This is suggested good practice. + # Use 'sub_name' because something like + # main::MYHANDLE is ok for filehandle + if ( $sub_name =~ /[a-z]/ ) { + + # could be bug caused by older perltidy if + # followed by '(' + if ( $input_line =~ m/\G\s*\(/gc ) { + complain( +"Caution: unknown word '$tok' in indirect object slot\n" + ); + } + } + } + + # bareword not followed by a space -- may not be filehandle + # (may be function call defined in a 'use' statement) + else { + $type = 'Z'; + } + } + } + + # Now we must convert back from character position + # to pre_token index. + # I don't think an error flag can occur here ..but who knows + my $error; + ( $i, $error ) = + inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); + if ($error) { + warning("scan_bare_identifier: Possibly invalid tokenization\n"); + } + } + + # no match but line not blank - could be syntax error + # perl will take '::' alone without complaint + else { + $type = 'w'; + + # change this warning to log message if it becomes annoying + warning("didn't find identifier after leading ::\n"); + } + return ( $i, $tok, $type, $prototype ); +} + +sub scan_id_do { + +# This is the new scanner and will eventually replace scan_identifier. +# Only type 'sub' and 'package' are implemented. +# Token types $ * % @ & -> are not yet implemented. +# +# Scan identifier following a type token. +# The type of call depends on $id_scan_state: $id_scan_state = '' +# for starting call, in which case $tok must be the token defining +# the type. +# +# If the type token is the last nonblank token on the line, a value +# of $id_scan_state = $tok is returned, indicating that further +# calls must be made to get the identifier. If the type token is +# not the last nonblank token on the line, the identifier is +# scanned and handled and a value of '' is returned. +# USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, $in_attribute_list, +# $statement_type, $tokenizer_self + + my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state, + $max_token_index ) + = @_; + my $type = ''; + my ( $i_beg, $pos_beg ); + + #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n"; + #my ($a,$b,$c) = caller; + #print "NSCAN: scan_id called with tok=$tok $a $b $c\n"; + + # on re-entry, start scanning at first token on the line + if ($id_scan_state) { + $i_beg = $i; + $type = ''; + } + + # on initial entry, start scanning just after type token + else { + $i_beg = $i + 1; + $id_scan_state = $tok; + $type = 't'; + } + + # find $i_beg = index of next nonblank token, + # and handle empty lines + my $blank_line = 0; + my $next_nonblank_token = $rtokens->[$i_beg]; + if ( $i_beg > $max_token_index ) { + $blank_line = 1; + } + else { + + # only a '#' immediately after a '$' is not a comment + if ( $next_nonblank_token eq '#' ) { + unless ( $tok eq '$' ) { + $blank_line = 1; + } + } + + if ( $next_nonblank_token =~ /^\s/ ) { + ( $next_nonblank_token, $i_beg ) = + find_next_nonblank_token_on_this_line( $i_beg, $rtokens, + $max_token_index ); + if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) { + $blank_line = 1; + } + } + } + + # handle non-blank line; identifier, if any, must follow + unless ($blank_line) { + + if ( $id_scan_state eq 'sub' ) { + ( $i, $tok, $type, $id_scan_state ) = do_scan_sub( + $input_line, $i, $i_beg, + $tok, $type, $rtokens, + $rtoken_map, $id_scan_state, $max_token_index + ); + } + + elsif ( $id_scan_state eq 'package' ) { + ( $i, $tok, $type ) = + do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens, + $rtoken_map, $max_token_index ); + $id_scan_state = ''; + } + + else { + warning("invalid token in scan_id: $tok\n"); + $id_scan_state = ''; + } + } + + if ( $id_scan_state && ( !defined($type) || !$type ) ) { + + # shouldn't happen: + warning( +"Program bug in scan_id: undefined type but scan_state=$id_scan_state\n" + ); + report_definite_bug(); + } + + TOKENIZER_DEBUG_FLAG_NSCAN && do { + print STDOUT + "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n"; + }; + return ( $i, $tok, $type, $id_scan_state ); +} + +sub check_prototype { + my ( $proto, $package, $subname ) = @_; + return unless ( defined($package) && defined($subname) ); + if ( defined($proto) ) { + $proto =~ s/^\s*\(\s*//; + $proto =~ s/\s*\)$//; + if ($proto) { + $is_user_function{$package}{$subname} = 1; + $user_function_prototype{$package}{$subname} = "($proto)"; + + # prototypes containing '&' must be treated specially.. + if ( $proto =~ /\&/ ) { + + # right curly braces of prototypes ending in + # '&' may be followed by an operator + if ( $proto =~ /\&$/ ) { + $is_block_function{$package}{$subname} = 1; + } + + # right curly braces of prototypes NOT ending in + # '&' may NOT be followed by an operator + elsif ( $proto !~ /\&$/ ) { + $is_block_list_function{$package}{$subname} = 1; + } + } + } + else { + $is_constant{$package}{$subname} = 1; + } + } + else { + $is_user_function{$package}{$subname} = 1; + } + return; +} + +sub do_scan_package { + + # do_scan_package parses a package name + # it is called with $i_beg equal to the index of the first nonblank + # token following a 'package' token. + # USES GLOBAL VARIABLES: $current_package, + + # package NAMESPACE + # package NAMESPACE VERSION + # package NAMESPACE BLOCK + # package NAMESPACE VERSION BLOCK + # + # If VERSION is provided, package sets the $VERSION variable in the given + # namespace to a version object with the VERSION provided. VERSION must be + # a "strict" style version number as defined by the version module: a + # positive decimal number (integer or decimal-fraction) without + # exponentiation or else a dotted-decimal v-string with a leading 'v' + # character and at least three components. + # reference http://perldoc.perl.org/functions/package.html + + my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map, + $max_token_index ) + = @_; + my $package = undef; + my $pos_beg = $rtoken_map->[$i_beg]; + pos($input_line) = $pos_beg; + + # handle non-blank line; package name, if any, must follow + if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) { + $package = $1; + $package = ( defined($1) && $1 ) ? $1 : 'main'; + $package =~ s/\'/::/g; + if ( $package =~ /^\:/ ) { $package = 'main' . $package } + $package =~ s/::$//; + my $pos = pos($input_line); + my $numc = $pos - $pos_beg; + $tok = 'package ' . substr( $input_line, $pos_beg, $numc ); + $type = 'i'; + + # Now we must convert back from character position + # to pre_token index. + # I don't think an error flag can occur here ..but ? + my $error; + ( $i, $error ) = + inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); + if ($error) { warning("Possibly invalid package\n") } + $current_package = $package; + + # we should now have package NAMESPACE + # now expecting VERSION, BLOCK, or ; to follow ... + # package NAMESPACE VERSION + # package NAMESPACE BLOCK + # package NAMESPACE VERSION BLOCK + my ( $next_nonblank_token, $i_next ) = + find_next_nonblank_token( $i, $rtokens, $max_token_index ); + + # check that something recognizable follows, but do not parse. + # A VERSION number will be parsed later as a number or v-string in the + # normal way. What is important is to set the statement type if + # everything looks okay so that the operator_expected() routine + # knows that the number is in a package statement. + # Examples of valid primitive tokens that might follow are: + # 1235 . ; { } v3 v + if ( $next_nonblank_token =~ /^([v\.\d;\{\}])|v\d|\d+$/ ) { + $statement_type = $tok; + } + else { + warning( + "Unexpected '$next_nonblank_token' after package name '$tok'\n" + ); + } + } + + # no match but line not blank -- + # could be a label with name package, like package: , for example. + else { + $type = 'k'; + } + + return ( $i, $tok, $type ); +} + +sub scan_identifier_do { + + # This routine assembles tokens into identifiers. It maintains a + # scan state, id_scan_state. It updates id_scan_state based upon + # current id_scan_state and token, and returns an updated + # id_scan_state and the next index after the identifier. + # USES GLOBAL VARIABLES: $context, $last_nonblank_token, + # $last_nonblank_type + + my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index, + $expecting, $container_type ) + = @_; + my $i_begin = $i; + my $type = ''; + my $tok_begin = $rtokens->[$i_begin]; + if ( $tok_begin eq ':' ) { $tok_begin = '::' } + my $id_scan_state_begin = $id_scan_state; + my $identifier_begin = $identifier; + my $tok = $tok_begin; + my $message = ""; + + my $in_prototype_or_signature = $container_type =~ /^sub/; + + # these flags will be used to help figure out the type: + my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ ); + my $saw_type; + + # allow old package separator (') except in 'use' statement + my $allow_tick = ( $last_nonblank_token ne 'use' ); + + # get started by defining a type and a state if necessary + unless ($id_scan_state) { + $context = UNKNOWN_CONTEXT; + + # fixup for digraph + if ( $tok eq '>' ) { + $tok = '->'; + $tok_begin = $tok; + } + $identifier = $tok; + + if ( $tok eq '$' || $tok eq '*' ) { + $id_scan_state = '$'; + $context = SCALAR_CONTEXT; + } + elsif ( $tok eq '%' || $tok eq '@' ) { + $id_scan_state = '$'; + $context = LIST_CONTEXT; + } + elsif ( $tok eq '&' ) { + $id_scan_state = '&'; + } + elsif ( $tok eq 'sub' or $tok eq 'package' ) { + $saw_alpha = 0; # 'sub' is considered type info here + $id_scan_state = '$'; + $identifier .= ' '; # need a space to separate sub from sub name + } + elsif ( $tok eq '::' ) { + $id_scan_state = 'A'; + } + elsif ( $tok =~ /^[A-Za-z_]/ ) { + $id_scan_state = ':'; + } + elsif ( $tok eq '->' ) { + $id_scan_state = '$'; + } + else { + + # shouldn't happen + my ( $a, $b, $c ) = caller; + warning("Program Bug: scan_identifier given bad token = $tok \n"); + warning(" called from sub $a line: $c\n"); + report_definite_bug(); + } + $saw_type = !$saw_alpha; + } + else { + $i--; + $saw_type = ( $tok =~ /([\$\%\@\*\&])/ ); + } + + # now loop to gather the identifier + my $i_save = $i; + + while ( $i < $max_token_index ) { + $i_save = $i unless ( $tok =~ /^\s*$/ ); + $tok = $rtokens->[ ++$i ]; + + if ( ( $tok eq ':' ) && ( $rtokens->[ $i + 1 ] eq ':' ) ) { + $tok = '::'; + $i++; + } + + if ( $id_scan_state eq '$' ) { # starting variable name + + if ( $tok eq '$' ) { + + $identifier .= $tok; + + # we've got a punctuation variable if end of line (punct.t) + if ( $i == $max_token_index ) { + $type = 'i'; + $id_scan_state = ''; + last; + } + } + + # POSTDEFREF ->@ ->% ->& ->* + elsif ( ( $tok =~ /^[\@\%\&\*]$/ ) && $identifier =~ /\-\>$/ ) { + $identifier .= $tok; + } + elsif ( $tok =~ /^[A-Za-z_]/ ) { # alphanumeric .. + $saw_alpha = 1; + $id_scan_state = ':'; # now need :: + $identifier .= $tok; + } + elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric .. + $saw_alpha = 1; + $id_scan_state = ':'; # now need :: + $identifier .= $tok; + + # Perl will accept leading digits in identifiers, + # although they may not always produce useful results. + # Something like $main::0 is ok. But this also works: + # + # sub howdy::123::bubba{ print "bubba $54321!\n" } + # howdy::123::bubba(); + # + } + elsif ( $tok =~ /^[0-9]/ ) { # numeric + $saw_alpha = 1; + $id_scan_state = ':'; # now need :: + $identifier .= $tok; + } + elsif ( $tok eq '::' ) { + $id_scan_state = 'A'; + $identifier .= $tok; + } + + # $# and POSTDEFREF ->$# + elsif ( ( $tok eq '#' ) && ( $identifier =~ /\$$/ ) ) { # $#array + $identifier .= $tok; # keep same state, a $ could follow + } + elsif ( $tok eq '{' ) { + + # check for something like ${#} or ${©} + if ( + ( + $identifier eq '$' + || $identifier eq '@' + || $identifier eq '$#' + ) + && $i + 2 <= $max_token_index + && $rtokens->[ $i + 2 ] eq '}' + && $rtokens->[ $i + 1 ] !~ /[\s\w]/ + ) + { + my $next2 = $rtokens->[ $i + 2 ]; + my $next1 = $rtokens->[ $i + 1 ]; + $identifier .= $tok . $next1 . $next2; + $i += 2; + $id_scan_state = ''; + last; + } + + # skip something like ${xxx} or ->{ + $id_scan_state = ''; + + # if this is the first token of a line, any tokens for this + # identifier have already been accumulated + if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; } + $i = $i_save; + last; + } + + # space ok after leading $ % * & @ + elsif ( $tok =~ /^\s*$/ ) { + + if ( $identifier =~ /^[\$\%\*\&\@]/ ) { + + if ( length($identifier) > 1 ) { + $id_scan_state = ''; + $i = $i_save; + $type = 'i'; # probably punctuation variable + last; + } + else { + + # spaces after $'s are common, and space after @ + # is harmless, so only complain about space + # after other type characters. Space after $ and + # @ will be removed in formatting. Report space + # after % and * because they might indicate a + # parsing error. In other words '% ' might be a + # modulo operator. Delete this warning if it + # gets annoying. + if ( $identifier !~ /^[\@\$]$/ ) { + $message = + "Space in identifier, following $identifier\n"; + } + } + } + + # else: + # space after '->' is ok + } + elsif ( $tok eq '^' ) { + + # check for some special variables like $^W + if ( $identifier =~ /^[\$\*\@\%]$/ ) { + $identifier .= $tok; + $id_scan_state = 'A'; + + # Perl accepts '$^]' or '@^]', but + # there must not be a space before the ']'. + my $next1 = $rtokens->[ $i + 1 ]; + if ( $next1 eq ']' ) { + $i++; + $identifier .= $next1; + $id_scan_state = ""; + last; + } + } + else { + $id_scan_state = ''; + } + } + else { # something else + + if ( $in_prototype_or_signature && $tok =~ /^[\),=]/ ) { + $id_scan_state = ''; + $i = $i_save; + $type = 'i'; # probably punctuation variable + last; + } + + # check for various punctuation variables + if ( $identifier =~ /^[\$\*\@\%]$/ ) { + $identifier .= $tok; + } + + # POSTDEFREF: Postfix reference ->$* ->%* ->@* ->** ->&* ->$#* + elsif ( $tok eq '*' && $identifier =~ /([\@\%\$\*\&]|\$\#)$/ ) { + $identifier .= $tok; + } + + elsif ( $identifier eq '$#' ) { + + if ( $tok eq '{' ) { $type = 'i'; $i = $i_save } + + # perl seems to allow just these: $#: $#- $#+ + elsif ( $tok =~ /^[\:\-\+]$/ ) { + $type = 'i'; + $identifier .= $tok; + } + else { + $i = $i_save; + write_logfile_entry( 'Use of $# is deprecated' . "\n" ); + } + } + elsif ( $identifier eq '$$' ) { + + # perl does not allow references to punctuation + # variables without braces. For example, this + # won't work: + # $:=\4; + # $a = $$:; + # You would have to use + # $a = ${$:}; + + $i = $i_save; + if ( $tok eq '{' ) { $type = 't' } + else { $type = 'i' } + } + elsif ( $identifier eq '->' ) { + $i = $i_save; + } + else { + $i = $i_save; + if ( length($identifier) == 1 ) { $identifier = ''; } + } + $id_scan_state = ''; + last; + } + } + elsif ( $id_scan_state eq '&' ) { # starting sub call? + + if ( $tok =~ /^[\$A-Za-z_]/ ) { # alphanumeric .. + $id_scan_state = ':'; # now need :: + $saw_alpha = 1; + $identifier .= $tok; + } + elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric .. + $id_scan_state = ':'; # now need :: + $saw_alpha = 1; + $identifier .= $tok; + } + elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above + $id_scan_state = ':'; # now need :: + $saw_alpha = 1; + $identifier .= $tok; + } + elsif ( $tok =~ /^\s*$/ ) { # allow space + } + elsif ( $tok eq '::' ) { # leading :: + $id_scan_state = 'A'; # accept alpha next + $identifier .= $tok; + } + elsif ( $tok eq '{' ) { + if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; } + $i = $i_save; + $id_scan_state = ''; + last; + } + else { + + # punctuation variable? + # testfile: cunningham4.pl + # + # We have to be careful here. If we are in an unknown state, + # we will reject the punctuation variable. In the following + # example the '&' is a binary operator but we are in an unknown + # state because there is no sigil on 'Prima', so we don't + # know what it is. But it is a bad guess that + # '&~' is a function variable. + # $self->{text}->{colorMap}->[ + # Prima::PodView::COLOR_CODE_FOREGROUND + # & ~tb::COLOR_INDEX ] = + # $sec->{ColorCode} + if ( $identifier eq '&' && $expecting ) { + $identifier .= $tok; + } + else { + $identifier = ''; + $i = $i_save; + $type = '&'; + } + $id_scan_state = ''; + last; + } + } + elsif ( $id_scan_state eq 'A' ) { # looking for alpha (after ::) + + if ( $tok =~ /^[A-Za-z_]/ ) { # found it + $identifier .= $tok; + $id_scan_state = ':'; # now need :: + $saw_alpha = 1; + } + elsif ( $tok eq "'" && $allow_tick ) { + $identifier .= $tok; + $id_scan_state = ':'; # now need :: + $saw_alpha = 1; + } + elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above + $identifier .= $tok; + $id_scan_state = ':'; # now need :: + $saw_alpha = 1; + } + elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) { + $id_scan_state = '('; + $identifier .= $tok; + } + elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) { + $id_scan_state = ')'; + $identifier .= $tok; + } + else { + $id_scan_state = ''; + $i = $i_save; + last; + } + } + elsif ( $id_scan_state eq ':' ) { # looking for :: after alpha + + if ( $tok eq '::' ) { # got it + $identifier .= $tok; + $id_scan_state = 'A'; # now require alpha + } + elsif ( $tok =~ /^[A-Za-z_]/ ) { # more alphanumeric is ok here + $identifier .= $tok; + $id_scan_state = ':'; # now need :: + $saw_alpha = 1; + } + elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above + $identifier .= $tok; + $id_scan_state = ':'; # now need :: + $saw_alpha = 1; + } + elsif ( $tok eq "'" && $allow_tick ) { # tick + + if ( $is_keyword{$identifier} ) { + $id_scan_state = ''; # that's all + $i = $i_save; + } + else { + $identifier .= $tok; + } + } + elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) { + $id_scan_state = '('; + $identifier .= $tok; + } + elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) { + $id_scan_state = ')'; + $identifier .= $tok; + } + else { + $id_scan_state = ''; # that's all + $i = $i_save; + last; + } + } + elsif ( $id_scan_state eq '(' ) { # looking for ( of prototype + + if ( $tok eq '(' ) { # got it + $identifier .= $tok; + $id_scan_state = ')'; # now find the end of it + } + elsif ( $tok =~ /^\s*$/ ) { # blank - keep going + $identifier .= $tok; + } + else { + $id_scan_state = ''; # that's all - no prototype + $i = $i_save; + last; + } + } + elsif ( $id_scan_state eq ')' ) { # looking for ) to end + + if ( $tok eq ')' ) { # got it + $identifier .= $tok; + $id_scan_state = ''; # all done + last; + } + elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) { + $identifier .= $tok; + } + else { # probable error in script, but keep going + warning("Unexpected '$tok' while seeking end of prototype\n"); + $identifier .= $tok; + } + } + else { # can get here due to error in initialization + $id_scan_state = ''; + $i = $i_save; + last; + } + } + + if ( $id_scan_state eq ')' ) { + warning("Hit end of line while seeking ) to end prototype\n"); + } + + # once we enter the actual identifier, it may not extend beyond + # the end of the current line + if ( $id_scan_state =~ /^[A\:\(\)]/ ) { + $id_scan_state = ''; + } + if ( $i < 0 ) { $i = 0 } + + unless ($type) { + + if ($saw_type) { + + if ($saw_alpha) { + if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) { + $type = 'w'; + } + else { $type = 'i' } + } + elsif ( $identifier eq '->' ) { + $type = '->'; + } + elsif ( + ( length($identifier) > 1 ) + + # In something like '@$=' we have an identifier '@$' + # In something like '$${' we have type '$$' (and only + # part of an identifier) + && !( $identifier =~ /\$$/ && $tok eq '{' ) + && ( $identifier !~ /^(sub |package )$/ ) + ) + { + $type = 'i'; + } + else { $type = 't' } + } + elsif ($saw_alpha) { + + # type 'w' includes anything without leading type info + # ($,%,@,*) including something like abc::def::ghi + $type = 'w'; + } + else { + $type = ''; + } # this can happen on a restart + } + + if ($identifier) { + $tok = $identifier; + if ($message) { write_logfile_entry($message) } + } + else { + $tok = $tok_begin; + $i = $i_begin; + } + + TOKENIZER_DEBUG_FLAG_SCAN_ID && do { + my ( $a, $b, $c ) = caller; + print STDOUT +"SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n"; + print STDOUT +"SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n"; + }; + return ( $i, $tok, $type, $id_scan_state, $identifier ); +} + +{ + + # saved package and subnames in case prototype is on separate line + my ( $package_saved, $subname_saved ); + + sub do_scan_sub { + + # do_scan_sub parses a sub name and prototype + # it is called with $i_beg equal to the index of the first nonblank + # token following a 'sub' token. + + # TODO: add future error checks to be sure we have a valid + # sub name. For example, 'sub &doit' is wrong. Also, be sure + # a name is given if and only if a non-anonymous sub is + # appropriate. + # USES GLOBAL VARS: $current_package, $last_nonblank_token, + # $in_attribute_list, %saw_function_definition, + # $statement_type + + my ( + $input_line, $i, $i_beg, + $tok, $type, $rtokens, + $rtoken_map, $id_scan_state, $max_token_index + ) = @_; + $id_scan_state = ""; # normally we get everything in one call + my $subname = undef; + my $package = undef; + my $proto = undef; + my $attrs = undef; + my $match; + + my $pos_beg = $rtoken_map->[$i_beg]; + pos($input_line) = $pos_beg; + + # Look for the sub NAME + if ( + $input_line =~ m/\G\s* + ((?:\w*(?:'|::))*) # package - something that ends in :: or ' + (\w+) # NAME - required + /gcx + ) + { + $match = 1; + $subname = $2; + + $package = ( defined($1) && $1 ) ? $1 : $current_package; + $package =~ s/\'/::/g; + if ( $package =~ /^\:/ ) { $package = 'main' . $package } + $package =~ s/::$//; + my $pos = pos($input_line); + my $numc = $pos - $pos_beg; + $tok = 'sub ' . substr( $input_line, $pos_beg, $numc ); + $type = 'i'; + } + + # Now look for PROTO ATTRS + # Look for prototype/attributes which are usually on the same + # line as the sub name but which might be on a separate line. + # For example, we might have an anonymous sub with attributes, + # or a prototype on a separate line from its sub name + + # NOTE: We only want to parse PROTOTYPES here. If we see anything that + # does not look like a prototype, we assume it is a SIGNATURE and we + # will stop and let the the standard tokenizer handle it. In + # particular, we stop if we see any nested parens, braces, or commas. + my $saw_opening_paren = $input_line =~ /\G\s*\(/; + if ( + $input_line =~ m/\G(\s*\([^\)\(\}\{\,]*\))? # PROTO + (\s*:)? # ATTRS leading ':' + /gcx + && ( $1 || $2 ) + ) + { + $proto = $1; + $attrs = $2; + + # If we also found the sub name on this call then append PROTO. + # This is not necessary but for compatability with previous + # versions when the -csc flag is used: + if ( $match && $proto ) { + $tok .= $proto; + } + $match ||= 1; + + # Handle prototype on separate line from subname + if ($subname_saved) { + $package = $package_saved; + $subname = $subname_saved; + $tok = $last_nonblank_token; + } + $type = 'i'; + } + + if ($match) { + + # ATTRS: if there are attributes, back up and let the ':' be + # found later by the scanner. + my $pos = pos($input_line); + if ($attrs) { + $pos -= length($attrs); + } + + my $next_nonblank_token = $tok; + + # catch case of line with leading ATTR ':' after anonymous sub + if ( $pos == $pos_beg && $tok eq ':' ) { + $type = 'A'; + $in_attribute_list = 1; + } + + # Otherwise, if we found a match we must convert back from + # string position to the pre_token index for continued parsing. + else { + + # I don't think an error flag can occur here ..but ? + my $error; + ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map, + $max_token_index ); + if ($error) { warning("Possibly invalid sub\n") } + + # check for multiple definitions of a sub + ( $next_nonblank_token, my $i_next ) = + find_next_nonblank_token_on_this_line( $i, $rtokens, + $max_token_index ); + } + + if ( $next_nonblank_token =~ /^(\s*|#)$/ ) + { # skip blank or side comment + my ( $rpre_tokens, $rpre_types ) = + peek_ahead_for_n_nonblank_pre_tokens(1); + if ( defined($rpre_tokens) && @{$rpre_tokens} ) { + $next_nonblank_token = $rpre_tokens->[0]; + } + else { + $next_nonblank_token = '}'; + } + } + $package_saved = ""; + $subname_saved = ""; + + # See what's next... + if ( $next_nonblank_token eq '{' ) { + if ($subname) { + + # Check for multiple definitions of a sub, but + # it is ok to have multiple sub BEGIN, etc, + # so we do not complain if name is all caps + if ( $saw_function_definition{$package}{$subname} + && $subname !~ /^[A-Z]+$/ ) + { + my $lno = $saw_function_definition{$package}{$subname}; + warning( +"already saw definition of 'sub $subname' in package '$package' at line $lno\n" + ); + } + $saw_function_definition{$package}{$subname} = + $tokenizer_self->{_last_line_number}; + } + } + elsif ( $next_nonblank_token eq ';' ) { + } + elsif ( $next_nonblank_token eq '}' ) { + } + + # ATTRS - if an attribute list follows, remember the name + # of the sub so the next opening brace can be labeled. + # Setting 'statement_type' causes any ':'s to introduce + # attributes. + elsif ( $next_nonblank_token eq ':' ) { + $statement_type = $tok; + } + + # if we stopped before an open paren ... + elsif ( $next_nonblank_token eq '(' ) { + + # If we DID NOT see this paren above then it must be on the + # next line so we will set a flag to come back here and see if + # it is a PROTOTYPE + + # Otherwise, we assume it is a SIGNATURE rather than a + # PROTOTYPE and let the normal tokenizer handle it as a list + if ( !$saw_opening_paren ) { + $id_scan_state = 'sub'; # we must come back to get proto + $package_saved = $package; + $subname_saved = $subname; + } + $statement_type = $tok; + } + elsif ($next_nonblank_token) { # EOF technically ok + warning( +"expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n" + ); + } + check_prototype( $proto, $package, $subname ); + } + + # no match but line not blank + else { + } + return ( $i, $tok, $type, $id_scan_state ); + } +} + +#########i############################################################### +# Tokenizer utility routines which may use CONSTANTS but no other GLOBALS +######################################################################### + +sub find_next_nonblank_token { + my ( $i, $rtokens, $max_token_index ) = @_; + + if ( $i >= $max_token_index ) { + if ( !peeked_ahead() ) { + peeked_ahead(1); + $rtokens = + peek_ahead_for_nonblank_token( $rtokens, $max_token_index ); + } + } + my $next_nonblank_token = $rtokens->[ ++$i ]; + + if ( $next_nonblank_token =~ /^\s*$/ ) { + $next_nonblank_token = $rtokens->[ ++$i ]; + } + return ( $next_nonblank_token, $i ); +} + +sub numerator_expected { + + # this is a filter for a possible numerator, in support of guessing + # for the / pattern delimiter token. + # returns - + # 1 - yes + # 0 - can't tell + # -1 - no + # Note: I am using the convention that variables ending in + # _expected have these 3 possible values. + my ( $i, $rtokens, $max_token_index ) = @_; + my $numerator_expected = 0; + + my $next_token = $rtokens->[ $i + 1 ]; + if ( $next_token eq '=' ) { $i++; } # handle /= + my ( $next_nonblank_token, $i_next ) = + find_next_nonblank_token( $i, $rtokens, $max_token_index ); + + if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) { + $numerator_expected = 1; + } + else { + + if ( $next_nonblank_token =~ /^\s*$/ ) { + $numerator_expected = 0; + } + else { + $numerator_expected = -1; + } + } + return $numerator_expected; +} + +sub pattern_expected { + + # This is the start of a filter for a possible pattern. + # It looks at the token after a possible pattern and tries to + # determine if that token could end a pattern. + # returns - + # 1 - yes + # 0 - can't tell + # -1 - no + my ( $i, $rtokens, $max_token_index ) = @_; + my $is_pattern = 0; + + my $next_token = $rtokens->[ $i + 1 ]; + if ( $next_token =~ /^[msixpodualgc]/ ) { $i++; } # skip possible modifier + my ( $next_nonblank_token, $i_next ) = + find_next_nonblank_token( $i, $rtokens, $max_token_index ); + + # list of tokens which may follow a pattern + # (can probably be expanded) + if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ ) + { + $is_pattern = 1; + } + else { + + if ( $next_nonblank_token =~ /^\s*$/ ) { + $is_pattern = 0; + } + else { + $is_pattern = -1; + } + } + return $is_pattern; +} + +sub find_next_nonblank_token_on_this_line { + my ( $i, $rtokens, $max_token_index ) = @_; + my $next_nonblank_token; + + if ( $i < $max_token_index ) { + $next_nonblank_token = $rtokens->[ ++$i ]; + + if ( $next_nonblank_token =~ /^\s*$/ ) { + + if ( $i < $max_token_index ) { + $next_nonblank_token = $rtokens->[ ++$i ]; + } + } + } + else { + $next_nonblank_token = ""; + } + return ( $next_nonblank_token, $i ); +} + +sub find_angle_operator_termination { + + # We are looking at a '<' and want to know if it is an angle operator. + # We are to return: + # $i = pretoken index of ending '>' if found, current $i otherwise + # $type = 'Q' if found, '>' otherwise + my ( $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) = @_; + my $i = $i_beg; + my $type = '<'; + pos($input_line) = 1 + $rtoken_map->[$i]; + + my $filter; + + # we just have to find the next '>' if a term is expected + if ( $expecting == TERM ) { $filter = '[\>]' } + + # we have to guess if we don't know what is expected + elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' } + + # shouldn't happen - we shouldn't be here if operator is expected + else { warning("Program Bug in find_angle_operator_termination\n") } + + # To illustrate what we might be looking at, in case we are + # guessing, here are some examples of valid angle operators + # (or file globs): + # + # + # <$fh> + # <*.c *.h> + # <_> + # ( glob.t) + # <${PREFIX}*img*.$IMAGE_TYPE> + # + # + # <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl> + # + # Here are some examples of lines which do not have angle operators: + # return undef unless $self->[2]++ < $#{$self->[1]}; + # < 2 || @$t > + # + # the following line from dlister.pl caused trouble: + # print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n"; + # + # If the '<' starts an angle operator, it must end on this line and + # it must not have certain characters like ';' and '=' in it. I use + # this to limit the testing. This filter should be improved if + # possible. + + if ( $input_line =~ /($filter)/g ) { + + if ( $1 eq '>' ) { + + # We MAY have found an angle operator termination if we get + # here, but we need to do more to be sure we haven't been + # fooled. + my $pos = pos($input_line); + + my $pos_beg = $rtoken_map->[$i]; + my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) ); + + # Reject if the closing '>' follows a '-' as in: + # if ( VERSION < 5.009 && $op-> name eq 'assign' ) { } + if ( $expecting eq UNKNOWN ) { + my $check = substr( $input_line, $pos - 2, 1 ); + if ( $check eq '-' ) { + return ( $i, $type ); + } + } + + ######################################debug##### + #write_diagnostics( "ANGLE? :$str\n"); + #print "ANGLE: found $1 at pos=$pos str=$str check=$check\n"; + ######################################debug##### + $type = 'Q'; + my $error; + ( $i, $error ) = + inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); + + # It may be possible that a quote ends midway in a pretoken. + # If this happens, it may be necessary to split the pretoken. + if ($error) { + warning( + "Possible tokinization error..please check this line\n"); + report_possible_bug(); + } + + # Now let's see where we stand.... + # OK if math op not possible + if ( $expecting == TERM ) { + } + + # OK if there are no more than 2 pre-tokens inside + # (not possible to write 2 token math between < and >) + # This catches most common cases + elsif ( $i <= $i_beg + 3 ) { + write_diagnostics("ANGLE(1 or 2 tokens): $str\n"); + } + + # Not sure.. + else { + + # Let's try a Brace Test: any braces inside must balance + my $br = 0; + while ( $str =~ /\{/g ) { $br++ } + while ( $str =~ /\}/g ) { $br-- } + my $sb = 0; + while ( $str =~ /\[/g ) { $sb++ } + while ( $str =~ /\]/g ) { $sb-- } + my $pr = 0; + while ( $str =~ /\(/g ) { $pr++ } + while ( $str =~ /\)/g ) { $pr-- } + + # if braces do not balance - not angle operator + if ( $br || $sb || $pr ) { + $i = $i_beg; + $type = '<'; + write_diagnostics( + "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n"); + } + + # we should keep doing more checks here...to be continued + # Tentatively accepting this as a valid angle operator. + # There are lots more things that can be checked. + else { + write_diagnostics( + "ANGLE-Guessing yes: $str expecting=$expecting\n"); + write_logfile_entry("Guessing angle operator here: $str\n"); + } + } + } + + # didn't find ending > + else { + if ( $expecting == TERM ) { + warning("No ending > for angle operator\n"); + } + } + } + return ( $i, $type ); +} + +sub scan_number_do { + + # scan a number in any of the formats that Perl accepts + # Underbars (_) are allowed in decimal numbers. + # input parameters - + # $input_line - the string to scan + # $i - pre_token index to start scanning + # $rtoken_map - reference to the pre_token map giving starting + # character position in $input_line of token $i + # output parameters - + # $i - last pre_token index of the number just scanned + # number - the number (characters); or undef if not a number + + my ( $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = @_; + my $pos_beg = $rtoken_map->[$i]; + my $pos; + my $i_begin = $i; + my $number = undef; + my $type = $input_type; + + my $first_char = substr( $input_line, $pos_beg, 1 ); + + # Look for bad starting characters; Shouldn't happen.. + if ( $first_char !~ /[\d\.\+\-Ee]/ ) { + warning("Program bug - scan_number given character $first_char\n"); + report_definite_bug(); + return ( $i, $type, $number ); + } + + # handle v-string without leading 'v' character ('Two Dot' rule) + # (vstring.t) + # TODO: v-strings may contain underscores + pos($input_line) = $pos_beg; + if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) { + $pos = pos($input_line); + my $numc = $pos - $pos_beg; + $number = substr( $input_line, $pos_beg, $numc ); + $type = 'v'; + report_v_string($number); + } + + # handle octal, hex, binary + if ( !defined($number) ) { + pos($input_line) = $pos_beg; + if ( $input_line =~ + /\G[+-]?0(([xX][0-9a-fA-F_]+)|([0-7_]+)|([bB][01_]+))/g ) + { + $pos = pos($input_line); + my $numc = $pos - $pos_beg; + $number = substr( $input_line, $pos_beg, $numc ); + $type = 'n'; + } + } + + # handle decimal + if ( !defined($number) ) { + pos($input_line) = $pos_beg; + + if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) { + $pos = pos($input_line); + + # watch out for things like 0..40 which would give 0. by this; + if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' ) + && ( substr( $input_line, $pos, 1 ) eq '.' ) ) + { + $pos--; + } + my $numc = $pos - $pos_beg; + $number = substr( $input_line, $pos_beg, $numc ); + $type = 'n'; + } + } + + # filter out non-numbers like e + - . e2 .e3 +e6 + # the rule: at least one digit, and any 'e' must be preceded by a digit + if ( + $number !~ /\d/ # no digits + || ( $number =~ /^(.*)[eE]/ + && $1 !~ /\d/ ) # or no digits before the 'e' + ) + { + $number = undef; + $type = $input_type; + return ( $i, $type, $number ); + } + + # Found a number; now we must convert back from character position + # to pre_token index. An error here implies user syntax error. + # An example would be an invalid octal number like '009'. + my $error; + ( $i, $error ) = + inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index ); + if ($error) { warning("Possibly invalid number\n") } + + return ( $i, $type, $number ); +} + +sub inverse_pretoken_map { + + # Starting with the current pre_token index $i, scan forward until + # finding the index of the next pre_token whose position is $pos. + my ( $i, $pos, $rtoken_map, $max_token_index ) = @_; + my $error = 0; + + while ( ++$i <= $max_token_index ) { + + if ( $pos <= $rtoken_map->[$i] ) { + + # Let the calling routine handle errors in which we do not + # land on a pre-token boundary. It can happen by running + # perltidy on some non-perl scripts, for example. + if ( $pos < $rtoken_map->[$i] ) { $error = 1 } + $i--; + last; + } + } + return ( $i, $error ); +} + +sub find_here_doc { + + # find the target of a here document, if any + # input parameters: + # $i - token index of the second < of << + # ($i must be less than the last token index if this is called) + # output parameters: + # $found_target = 0 didn't find target; =1 found target + # HERE_TARGET - the target string (may be empty string) + # $i - unchanged if not here doc, + # or index of the last token of the here target + # $saw_error - flag noting unbalanced quote on here target + my ( $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_; + my $ibeg = $i; + my $found_target = 0; + my $here_doc_target = ''; + my $here_quote_character = ''; + my $saw_error = 0; + my ( $next_nonblank_token, $i_next_nonblank, $next_token ); + $next_token = $rtokens->[ $i + 1 ]; + + # perl allows a backslash before the target string (heredoc.t) + my $backslash = 0; + if ( $next_token eq '\\' ) { + $backslash = 1; + $next_token = $rtokens->[ $i + 2 ]; + } + + ( $next_nonblank_token, $i_next_nonblank ) = + find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index ); + + if ( $next_nonblank_token =~ /[\'\"\`]/ ) { + + my $in_quote = 1; + my $quote_depth = 0; + my $quote_pos = 0; + my $quoted_string; + + ( + $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth, + $quoted_string + ) + = follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens, + $here_quote_character, $quote_pos, $quote_depth, $max_token_index ); + + if ($in_quote) { # didn't find end of quote, so no target found + $i = $ibeg; + if ( $expecting == TERM ) { + warning( +"Did not find here-doc string terminator ($here_quote_character) before end of line \n" + ); + $saw_error = 1; + } + } + else { # found ending quote + ##my $j; + $found_target = 1; + + my $tokj; + foreach my $j ( $i_next_nonblank + 1 .. $i - 1 ) { + $tokj = $rtokens->[$j]; + + # we have to remove any backslash before the quote character + # so that the here-doc-target exactly matches this string + next + if ( $tokj eq "\\" + && $j < $i - 1 + && $rtokens->[ $j + 1 ] eq $here_quote_character ); + $here_doc_target .= $tokj; + } + } + } + + elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) { + $found_target = 1; + write_logfile_entry( + "found blank here-target after <<; suggest using \"\"\n"); + $i = $ibeg; + } + elsif ( $next_token =~ /^\w/ ) { # simple bareword or integer after << + + my $here_doc_expected; + if ( $expecting == UNKNOWN ) { + $here_doc_expected = guess_if_here_doc($next_token); + } + else { + $here_doc_expected = 1; + } + + if ($here_doc_expected) { + $found_target = 1; + $here_doc_target = $next_token; + $i = $ibeg + 1; + } + + } + else { + + if ( $expecting == TERM ) { + $found_target = 1; + write_logfile_entry("Note: bare here-doc operator <<\n"); + } + else { + $i = $ibeg; + } + } + + # patch to neglect any prepended backslash + if ( $found_target && $backslash ) { $i++ } + + return ( $found_target, $here_doc_target, $here_quote_character, $i, + $saw_error ); +} + +sub do_quote { + + # follow (or continue following) quoted string(s) + # $in_quote return code: + # 0 - ok, found end + # 1 - still must find end of quote whose target is $quote_character + # 2 - still looking for end of first of two quotes + # + # Returns updated strings: + # $quoted_string_1 = quoted string seen while in_quote=1 + # $quoted_string_2 = quoted string seen while in_quote=2 + my ( + $i, $in_quote, $quote_character, + $quote_pos, $quote_depth, $quoted_string_1, + $quoted_string_2, $rtokens, $rtoken_map, + $max_token_index + ) = @_; + + my $in_quote_starting = $in_quote; + + my $quoted_string; + if ( $in_quote == 2 ) { # two quotes/quoted_string_1s to follow + my $ibeg = $i; + ( + $i, $in_quote, $quote_character, $quote_pos, $quote_depth, + $quoted_string + ) + = follow_quoted_string( $i, $in_quote, $rtokens, $quote_character, + $quote_pos, $quote_depth, $max_token_index ); + $quoted_string_2 .= $quoted_string; + if ( $in_quote == 1 ) { + if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; } + $quote_character = ''; + } + else { + $quoted_string_2 .= "\n"; + } + } + + if ( $in_quote == 1 ) { # one (more) quote to follow + my $ibeg = $i; + ( + $i, $in_quote, $quote_character, $quote_pos, $quote_depth, + $quoted_string + ) + = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character, + $quote_pos, $quote_depth, $max_token_index ); + $quoted_string_1 .= $quoted_string; + if ( $in_quote == 1 ) { + $quoted_string_1 .= "\n"; + } + } + return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth, + $quoted_string_1, $quoted_string_2 ); +} + +sub follow_quoted_string { + + # scan for a specific token, skipping escaped characters + # if the quote character is blank, use the first non-blank character + # input parameters: + # $rtokens = reference to the array of tokens + # $i = the token index of the first character to search + # $in_quote = number of quoted strings being followed + # $beginning_tok = the starting quote character + # $quote_pos = index to check next for alphanumeric delimiter + # output parameters: + # $i = the token index of the ending quote character + # $in_quote = decremented if found end, unchanged if not + # $beginning_tok = the starting quote character + # $quote_pos = index to check next for alphanumeric delimiter + # $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested. + # $quoted_string = the text of the quote (without quotation tokens) + my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth, + $max_token_index ) + = @_; + my ( $tok, $end_tok ); + my $i = $i_beg - 1; + my $quoted_string = ""; + + TOKENIZER_DEBUG_FLAG_QUOTE && do { + print STDOUT +"QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n"; + }; + + # get the corresponding end token + if ( $beginning_tok !~ /^\s*$/ ) { + $end_tok = matching_end_token($beginning_tok); + } + + # a blank token means we must find and use the first non-blank one + else { + my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a + + while ( $i < $max_token_index ) { + $tok = $rtokens->[ ++$i ]; + + if ( $tok !~ /^\s*$/ ) { + + if ( ( $tok eq '#' ) && ($allow_quote_comments) ) { + $i = $max_token_index; + } + else { + + if ( length($tok) > 1 ) { + if ( $quote_pos <= 0 ) { $quote_pos = 1 } + $beginning_tok = substr( $tok, $quote_pos - 1, 1 ); + } + else { + $beginning_tok = $tok; + $quote_pos = 0; + } + $end_tok = matching_end_token($beginning_tok); + $quote_depth = 1; + last; + } + } + else { + $allow_quote_comments = 1; + } + } + } + + # There are two different loops which search for the ending quote + # character. In the rare case of an alphanumeric quote delimiter, we + # have to look through alphanumeric tokens character-by-character, since + # the pre-tokenization process combines multiple alphanumeric + # characters, whereas for a non-alphanumeric delimiter, only tokens of + # length 1 can match. + + ################################################################### + # Case 1 (rare): loop for case of alphanumeric quote delimiter.. + # "quote_pos" is the position the current word to begin searching + ################################################################### + if ( $beginning_tok =~ /\w/ ) { + + # Note this because it is not recommended practice except + # for obfuscated perl contests + if ( $in_quote == 1 ) { + write_logfile_entry( + "Note: alphanumeric quote delimiter ($beginning_tok) \n"); + } + + while ( $i < $max_token_index ) { + + if ( $quote_pos == 0 || ( $i < 0 ) ) { + $tok = $rtokens->[ ++$i ]; + + if ( $tok eq '\\' ) { + + # retain backslash unless it hides the end token + $quoted_string .= $tok + unless $rtokens->[ $i + 1 ] eq $end_tok; + $quote_pos++; + last if ( $i >= $max_token_index ); + $tok = $rtokens->[ ++$i ]; + } + } + my $old_pos = $quote_pos; + + unless ( defined($tok) && defined($end_tok) && defined($quote_pos) ) + { + + } + $quote_pos = 1 + index( $tok, $end_tok, $quote_pos ); + + if ( $quote_pos > 0 ) { + + $quoted_string .= + substr( $tok, $old_pos, $quote_pos - $old_pos - 1 ); + + $quote_depth--; + + if ( $quote_depth == 0 ) { + $in_quote--; + last; + } + } + else { + $quoted_string .= substr( $tok, $old_pos ); + } + } + } + + ######################################################################## + # Case 2 (normal): loop for case of a non-alphanumeric quote delimiter.. + ######################################################################## + else { + + while ( $i < $max_token_index ) { + $tok = $rtokens->[ ++$i ]; + + if ( $tok eq $end_tok ) { + $quote_depth--; + + if ( $quote_depth == 0 ) { + $in_quote--; + last; + } + } + elsif ( $tok eq $beginning_tok ) { + $quote_depth++; + } + elsif ( $tok eq '\\' ) { + + # retain backslash unless it hides the beginning or end token + $tok = $rtokens->[ ++$i ]; + $quoted_string .= '\\' + unless ( $tok eq $end_tok || $tok eq $beginning_tok ); + } + $quoted_string .= $tok; + } + } + if ( $i > $max_token_index ) { $i = $max_token_index } + return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth, + $quoted_string ); +} + +sub indicate_error { + my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_; + interrupt_logfile(); + warning($msg); + write_error_indicator_pair( $line_number, $input_line, $pos, $carrat ); + resume_logfile(); + return; +} + +sub write_error_indicator_pair { + my ( $line_number, $input_line, $pos, $carrat ) = @_; + my ( $offset, $numbered_line, $underline ) = + make_numbered_line( $line_number, $input_line, $pos ); + $underline = write_on_underline( $underline, $pos - $offset, $carrat ); + warning( $numbered_line . "\n" ); + $underline =~ s/\s*$//; + warning( $underline . "\n" ); + return; +} + +sub make_numbered_line { + + # Given an input line, its line number, and a character position of + # interest, create a string not longer than 80 characters of the form + # $lineno: sub_string + # such that the sub_string of $str contains the position of interest + # + # Here is an example of what we want, in this case we add trailing + # '...' because the line is long. + # + # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ... + # + # Here is another example, this time in which we used leading '...' + # because of excessive length: + # + # 2: ... er of the World Wide Web Consortium's + # + # input parameters are: + # $lineno = line number + # $str = the text of the line + # $pos = position of interest (the error) : 0 = first character + # + # We return : + # - $offset = an offset which corrects the position in case we only + # display part of a line, such that $pos-$offset is the effective + # position from the start of the displayed line. + # - $numbered_line = the numbered line as above, + # - $underline = a blank 'underline' which is all spaces with the same + # number of characters as the numbered line. + + my ( $lineno, $str, $pos ) = @_; + my $offset = ( $pos < 60 ) ? 0 : $pos - 40; + my $excess = length($str) - $offset - 68; + my $numc = ( $excess > 0 ) ? 68 : undef; + + if ( defined($numc) ) { + if ( $offset == 0 ) { + $str = substr( $str, $offset, $numc - 4 ) . " ..."; + } + else { + $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ..."; + } + } + else { + + if ( $offset == 0 ) { + } + else { + $str = "... " . substr( $str, $offset + 4 ); + } + } + + my $numbered_line = sprintf( "%d: ", $lineno ); + $offset -= length($numbered_line); + $numbered_line .= $str; + my $underline = " " x length($numbered_line); + return ( $offset, $numbered_line, $underline ); +} + +sub write_on_underline { + + # The "underline" is a string that shows where an error is; it starts + # out as a string of blanks with the same length as the numbered line of + # code above it, and we have to add marking to show where an error is. + # In the example below, we want to write the string '--^' just below + # the line of bad code: + # + # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ... + # ---^ + # We are given the current underline string, plus a position and a + # string to write on it. + # + # In the above example, there will be 2 calls to do this: + # First call: $pos=19, pos_chr=^ + # Second call: $pos=16, pos_chr=--- + # + # This is a trivial thing to do with substr, but there is some + # checking to do. + + my ( $underline, $pos, $pos_chr ) = @_; + + # check for error..shouldn't happen + unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) { + return $underline; + } + my $excess = length($pos_chr) + $pos - length($underline); + if ( $excess > 0 ) { + $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess ); + } + substr( $underline, $pos, length($pos_chr) ) = $pos_chr; + return ($underline); +} + +sub pre_tokenize { + + # Break a string, $str, into a sequence of preliminary tokens. We + # are interested in these types of tokens: + # words (type='w'), example: 'max_tokens_wanted' + # digits (type = 'd'), example: '0755' + # whitespace (type = 'b'), example: ' ' + # any other single character (i.e. punct; type = the character itself). + # We cannot do better than this yet because we might be in a quoted + # string or pattern. Caller sets $max_tokens_wanted to 0 to get all + # tokens. + my ( $str, $max_tokens_wanted ) = @_; + + # we return references to these 3 arrays: + my @tokens = (); # array of the tokens themselves + my @token_map = (0); # string position of start of each token + my @type = (); # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct + + do { + + # whitespace + if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; } + + # numbers + # note that this must come before words! + elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; } + + # words + elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; } + + # single-character punctuation + elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; } + + # that's all.. + else { + return ( \@tokens, \@token_map, \@type ); + } + + push @tokens, $1; + push @token_map, pos($str); + + } while ( --$max_tokens_wanted != 0 ); + + return ( \@tokens, \@token_map, \@type ); +} + +sub show_tokens { + + # this is an old debug routine + # not called, but saved for reference + my ( $rtokens, $rtoken_map ) = @_; + my $num = scalar( @{$rtokens} ); + + foreach my $i ( 0 .. $num - 1 ) { + my $len = length( $rtokens->[$i] ); + print STDOUT "$i:$len:$rtoken_map->[$i]:$rtokens->[$i]:\n"; + } + return; +} + +{ + my %matching_end_token; + + BEGIN { + %matching_end_token = ( + '{' => '}', + '(' => ')', + '[' => ']', + '<' => '>', + ); + } + + sub matching_end_token { + + # return closing character for a pattern + my $beginning_token = shift; + if ( $matching_end_token{$beginning_token} ) { + return $matching_end_token{$beginning_token}; + } + return ($beginning_token); + } +} + +sub dump_token_types { + my ( $class, $fh ) = @_; + + # This should be the latest list of token types in use + # adding NEW_TOKENS: add a comment here + print $fh <<'END_OF_LIST'; + +Here is a list of the token types currently used for lines of type 'CODE'. +For the following tokens, the "type" of a token is just the token itself. + +.. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <> +( ) <= >= == =~ !~ != ++ -- /= x= +... **= <<= >>= &&= ||= //= <=> +, + - / * | % ! x ~ = \ ? : . < > ^ & + +The following additional token types are defined: + + type meaning + b blank (white space) + { indent: opening structural curly brace or square bracket or paren + (code block, anonymous hash reference, or anonymous array reference) + } outdent: right structural curly brace or square bracket or paren + [ left non-structural square bracket (enclosing an array index) + ] right non-structural square bracket + ( left non-structural paren (all but a list right of an =) + ) right non-structural paren + L left non-structural curly brace (enclosing a key) + R right non-structural curly brace + ; terminal semicolon + f indicates a semicolon in a "for" statement + h here_doc operator << + # a comment + Q indicates a quote or pattern + q indicates a qw quote block + k a perl keyword + C user-defined constant or constant function (with void prototype = ()) + U user-defined function taking parameters + G user-defined function taking block parameter (like grep/map/eval) + M (unused, but reserved for subroutine definition name) + P (unused, but -html uses it to label pod text) + t type indicater such as %,$,@,*,&,sub + w bare word (perhaps a subroutine call) + i identifier of some type (with leading %, $, @, *, &, sub, -> ) + n a number + v a v-string + F a file test operator (like -e) + Y File handle + Z identifier in indirect object slot: may be file handle, object + J LABEL: code block label + j LABEL after next, last, redo, goto + p unary + + m unary - + pp pre-increment operator ++ + mm pre-decrement operator -- + A : used as attribute separator + + Here are the '_line_type' codes used internally: + SYSTEM - system-specific code before hash-bang line + CODE - line of perl code (including comments) + POD_START - line starting pod, such as '=head' + POD - pod documentation text + POD_END - last line of pod section, '=cut' + HERE - text of here-document + HERE_END - last line of here-doc (target word) + FORMAT - format section + FORMAT_END - last line of format section, '.' + DATA_START - __DATA__ line + DATA - unidentified text following __DATA__ + END_START - __END__ line + END - unidentified text following __END__ + ERROR - we are in big trouble, probably not a perl script +END_OF_LIST + + return; +} + +BEGIN { + + # These names are used in error messages + @opening_brace_names = qw# '{' '[' '(' '?' #; + @closing_brace_names = qw# '}' ']' ')' ':' #; + + my @q; + + my @digraphs = qw( + .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <> + <= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^. + ); + @is_digraph{@digraphs} = (1) x scalar(@digraphs); + + my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~); + @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs); + + my @tetragraphs = qw( <<>> ); + @is_tetragraph{@tetragraphs} = (1) x scalar(@tetragraphs); + + # make a hash of all valid token types for self-checking the tokenizer + # (adding NEW_TOKENS : select a new character and add to this list) + my @valid_token_types = qw# + A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v + { } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ & + #; + push( @valid_token_types, @digraphs ); + push( @valid_token_types, @trigraphs ); + push( @valid_token_types, @tetragraphs ); + push( @valid_token_types, ( '#', ',', 'CORE::' ) ); + @is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types); + + # a list of file test letters, as in -e (Table 3-4 of 'camel 3') + my @file_test_operators = + qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z); + @is_file_test_operator{@file_test_operators} = + (1) x scalar(@file_test_operators); + + # these functions have prototypes of the form (&), so when they are + # followed by a block, that block MAY BE followed by an operator. + # Smartmatch operator ~~ may be followed by anonymous hash or array ref + @q = qw( do eval ); + @is_block_operator{@q} = (1) x scalar(@q); + + # these functions allow an identifier in the indirect object slot + @q = qw( print printf sort exec system say); + @is_indirect_object_taker{@q} = (1) x scalar(@q); + + # These tokens may precede a code block + # patched for SWITCH/CASE/CATCH. Actually these could be removed + # now and we could let the extended-syntax coding handle them + @q = + qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else + unless do while until eval for foreach map grep sort + switch case given when catch try finally); + @is_code_block_token{@q} = (1) x scalar(@q); + + # I'll build the list of keywords incrementally + my @Keywords = (); + + # keywords and tokens after which a value or pattern is expected, + # but not an operator. In other words, these should consume terms + # to their right, or at least they are not expected to be followed + # immediately by operators. + my @value_requestor = qw( + AUTOLOAD + BEGIN + CHECK + DESTROY + END + EQ + GE + GT + INIT + LE + LT + NE + UNITCHECK + abs + accept + alarm + and + atan2 + bind + binmode + bless + break + caller + chdir + chmod + chomp + chop + chown + chr + chroot + close + closedir + cmp + connect + continue + cos + crypt + dbmclose + dbmopen + defined + delete + die + dump + each + else + elsif + eof + eq + exec + exists + exit + exp + fcntl + fileno + flock + for + foreach + formline + ge + getc + getgrgid + getgrnam + gethostbyaddr + gethostbyname + getnetbyaddr + getnetbyname + getpeername + getpgrp + getpriority + getprotobyname + getprotobynumber + getpwnam + getpwuid + getservbyname + getservbyport + getsockname + getsockopt + glob + gmtime + goto + grep + gt + hex + if + index + int + ioctl + join + keys + kill + last + lc + lcfirst + le + length + link + listen + local + localtime + lock + log + lstat + lt + map + mkdir + msgctl + msgget + msgrcv + msgsnd + my + ne + next + no + not + oct + open + opendir + or + ord + our + pack + pipe + pop + pos + print + printf + prototype + push + quotemeta + rand + read + readdir + readlink + readline + readpipe + recv + redo + ref + rename + require + reset + return + reverse + rewinddir + rindex + rmdir + scalar + seek + seekdir + select + semctl + semget + semop + send + sethostent + setnetent + setpgrp + setpriority + setprotoent + setservent + setsockopt + shift + shmctl + shmget + shmread + shmwrite + shutdown + sin + sleep + socket + socketpair + sort + splice + split + sprintf + sqrt + srand + stat + study + substr + symlink + syscall + sysopen + sysread + sysseek + system + syswrite + tell + telldir + tie + tied + truncate + uc + ucfirst + umask + undef + unless + unlink + unpack + unshift + untie + until + use + utime + values + vec + waitpid + warn + while + write + xor + + switch + case + given + when + err + say + + catch + ); + + # patched above for SWITCH/CASE given/when err say + # 'err' is a fairly safe addition. + # TODO: 'default' still needed if appropriate + # 'use feature' seen, but perltidy works ok without it. + # Concerned that 'default' could break code. + push( @Keywords, @value_requestor ); + + # These are treated the same but are not keywords: + my @extra_vr = qw( + constant + vars + ); + push( @value_requestor, @extra_vr ); + + @expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor); + + # this list contains keywords which do not look for arguments, + # so that they might be followed by an operator, or at least + # not a term. + my @operator_requestor = qw( + endgrent + endhostent + endnetent + endprotoent + endpwent + endservent + fork + getgrent + gethostent + getlogin + getnetent + getppid + getprotoent + getpwent + getservent + setgrent + setpwent + time + times + wait + wantarray + ); + + push( @Keywords, @operator_requestor ); + + # These are treated the same but are not considered keywords: + my @extra_or = qw( + STDERR + STDIN + STDOUT + ); + + push( @operator_requestor, @extra_or ); + + @expecting_operator_token{@operator_requestor} = + (1) x scalar(@operator_requestor); + + # these token TYPES expect trailing operator but not a term + # note: ++ and -- are post-increment and decrement, 'C' = constant + my @operator_requestor_types = qw( ++ -- C <> q ); + @expecting_operator_types{@operator_requestor_types} = + (1) x scalar(@operator_requestor_types); + + # these token TYPES consume values (terms) + # note: pp and mm are pre-increment and decrement + # f=semicolon in for, F=file test operator + my @value_requestor_type = qw# + L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x + **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //= + <= >= == != => \ > < % * / ? & | ** <=> ~~ !~~ + f F pp mm Y p m U J G j >> << ^ t + ~. ^. |. &. ^.= |.= &.= + #; + push( @value_requestor_type, ',' ) + ; # (perl doesn't like a ',' in a qw block) + @expecting_term_types{@value_requestor_type} = + (1) x scalar(@value_requestor_type); + + # Note: the following valid token types are not assigned here to + # hashes requesting to be followed by values or terms, but are + # instead currently hard-coded into sub operator_expected: + # ) -> :: Q R Z ] b h i k n v w } # + + # For simple syntax checking, it is nice to have a list of operators which + # will really be unhappy if not followed by a term. This includes most + # of the above... + %really_want_term = %expecting_term_types; + + # with these exceptions... + delete $really_want_term{'U'}; # user sub, depends on prototype + delete $really_want_term{'F'}; # file test works on $_ if no following term + delete $really_want_term{'Y'}; # indirect object, too risky to check syntax; + # let perl do it + + @q = qw(q qq qw qx qr s y tr m); + @is_q_qq_qw_qx_qr_s_y_tr_m{@q} = (1) x scalar(@q); + + # These keywords are handled specially in the tokenizer code: + my @special_keywords = qw( + do + eval + format + m + package + q + qq + qr + qw + qx + s + sub + tr + y + ); + push( @Keywords, @special_keywords ); + + # Keywords after which list formatting may be used + # WARNING: do not include |map|grep|eval or perl may die on + # syntax errors (map1.t). + my @keyword_taking_list = qw( + and + chmod + chomp + chop + chown + dbmopen + die + elsif + exec + fcntl + for + foreach + formline + getsockopt + if + index + ioctl + join + kill + local + msgctl + msgrcv + msgsnd + my + open + or + our + pack + print + printf + push + read + readpipe + recv + return + reverse + rindex + seek + select + semctl + semget + send + setpriority + setsockopt + shmctl + shmget + shmread + shmwrite + socket + socketpair + sort + splice + split + sprintf + substr + syscall + sysopen + sysread + sysseek + system + syswrite + tie + unless + unlink + unpack + unshift + until + vec + warn + while + given + when + ); + @is_keyword_taking_list{@keyword_taking_list} = + (1) x scalar(@keyword_taking_list); + + # These are not used in any way yet + # my @unused_keywords = qw( + # __FILE__ + # __LINE__ + # __PACKAGE__ + # ); + + # The list of keywords was originally extracted from function 'keyword' in + # perl file toke.c version 5.005.03, using this utility, plus a + # little editing: (file getkwd.pl): + # while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } } + # Add 'get' prefix where necessary, then split into the above lists. + # This list should be updated as necessary. + # The list should not contain these special variables: + # ARGV DATA ENV SIG STDERR STDIN STDOUT + # __DATA__ __END__ + + @is_keyword{@Keywords} = (1) x scalar(@Keywords); +} +1; + diff --git a/lib/Perl/Tidy/VerticalAligner.pm b/lib/Perl/Tidy/VerticalAligner.pm new file mode 100644 index 00000000..a6fc4d0a --- /dev/null +++ b/lib/Perl/Tidy/VerticalAligner.pm @@ -0,0 +1,2752 @@ +package Perl::Tidy::VerticalAligner; +use strict; +use warnings; + +use Perl::Tidy::VerticalAligner::Alignment; +use Perl::Tidy::VerticalAligner::Line; + +# The Perl::Tidy::VerticalAligner package collects output lines and +# attempts to line up certain common tokens, such as => and #, which are +# identified by the calling routine. +# +# There are two main routines: valign_input and flush. Append acts as a +# storage buffer, collecting lines into a group which can be vertically +# aligned. When alignment is no longer possible or desirable, it dumps +# the group to flush. +# +# valign_input -----> flush +# +# collects writes +# vertical one +# groups group + +BEGIN { + + # Caution: these debug flags produce a lot of output + # They should all be 0 except when debugging small scripts + + use constant VALIGN_DEBUG_FLAG_APPEND => 0; + use constant VALIGN_DEBUG_FLAG_APPEND0 => 0; + use constant VALIGN_DEBUG_FLAG_TERNARY => 0; + use constant VALIGN_DEBUG_FLAG_TABS => 0; + + my $debug_warning = sub { + print STDOUT "VALIGN_DEBUGGING with key $_[0]\n"; + return; + }; + + VALIGN_DEBUG_FLAG_APPEND && $debug_warning->('APPEND'); + VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0'); + VALIGN_DEBUG_FLAG_TERNARY && $debug_warning->('TERNARY'); + VALIGN_DEBUG_FLAG_TABS && $debug_warning->('TABS'); + +} + +use vars qw( + $vertical_aligner_self + $current_line + $maximum_alignment_index + $ralignment_list + $maximum_jmax_seen + $minimum_jmax_seen + $previous_minimum_jmax_seen + $previous_maximum_jmax_seen + $maximum_line_index + $group_level + $group_type + $group_maximum_gap + $marginal_match + $last_level_written + $last_leading_space_count + $extra_indent_ok + $zero_count + @group_lines + $last_comment_column + $last_side_comment_line_number + $last_side_comment_length + $last_side_comment_level + $outdented_line_count + $first_outdented_line_at + $last_outdented_line_at + $diagnostics_object + $logger_object + $file_writer_object + @side_comment_history + $comment_leading_space_count + $is_matching_terminal_line + $consecutive_block_comments + + $cached_line_text + $cached_line_type + $cached_line_flag + $cached_seqno + $cached_line_valid + $cached_line_leading_space_count + $cached_seqno_string + + $valign_buffer_filling + @valign_buffer + + $seqno_string + $last_nonblank_seqno_string + + $rOpts + + $rOpts_maximum_line_length + $rOpts_variable_maximum_line_length + $rOpts_continuation_indentation + $rOpts_indent_columns + $rOpts_tabs + $rOpts_entab_leading_whitespace + $rOpts_valign + + $rOpts_fixed_position_side_comment + $rOpts_minimum_space_to_comment + +); + +sub initialize { + + ( + my $class, $rOpts, $file_writer_object, $logger_object, + $diagnostics_object + ) = @_; + + # variables describing the entire space group: + $ralignment_list = []; + $group_level = 0; + $last_level_written = -1; + $extra_indent_ok = 0; # can we move all lines to the right? + $last_side_comment_length = 0; + $maximum_jmax_seen = 0; + $minimum_jmax_seen = 0; + $previous_minimum_jmax_seen = 0; + $previous_maximum_jmax_seen = 0; + + # variables describing each line of the group + @group_lines = (); # list of all lines in group + + $outdented_line_count = 0; + $first_outdented_line_at = 0; + $last_outdented_line_at = 0; + $last_side_comment_line_number = 0; + $last_side_comment_level = -1; + $is_matching_terminal_line = 0; + + # most recent 3 side comments; [ line number, column ] + $side_comment_history[0] = [ -300, 0 ]; + $side_comment_history[1] = [ -200, 0 ]; + $side_comment_history[2] = [ -100, 0 ]; + + # valign_output_step_B cache: + $cached_line_text = ""; + $cached_line_type = 0; + $cached_line_flag = 0; + $cached_seqno = 0; + $cached_line_valid = 0; + $cached_line_leading_space_count = 0; + $cached_seqno_string = ""; + + # string of sequence numbers joined together + $seqno_string = ""; + $last_nonblank_seqno_string = ""; + + # frequently used parameters + $rOpts_indent_columns = $rOpts->{'indent-columns'}; + $rOpts_tabs = $rOpts->{'tabs'}; + $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'}; + $rOpts_fixed_position_side_comment = + $rOpts->{'fixed-position-side-comment'}; + $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'}; + $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'}; + $rOpts_variable_maximum_line_length = + $rOpts->{'variable-maximum-line-length'}; + $rOpts_valign = $rOpts->{'valign'}; + + $consecutive_block_comments = 0; + forget_side_comment(); + + initialize_for_new_group(); + + $vertical_aligner_self = {}; + bless $vertical_aligner_self, $class; + return $vertical_aligner_self; +} + +sub initialize_for_new_group { + $maximum_line_index = -1; # lines in the current group + $maximum_alignment_index = -1; # alignments in current group + $zero_count = 0; # count consecutive lines without tokens + $current_line = undef; # line being matched for alignment + $group_maximum_gap = 0; # largest gap introduced + $group_type = ""; + $marginal_match = 0; + $comment_leading_space_count = 0; + $last_leading_space_count = 0; + return; +} + +# interface to Perl::Tidy::Diagnostics routines +sub write_diagnostics { + my $msg = shift; + if ($diagnostics_object) { + $diagnostics_object->write_diagnostics($msg); + } + return; +} + +# interface to Perl::Tidy::Logger routines +sub warning { + my ($msg) = @_; + if ($logger_object) { + $logger_object->warning($msg); + } + return; +} + +sub write_logfile_entry { + my ($msg) = @_; + if ($logger_object) { + $logger_object->write_logfile_entry($msg); + } + return; +} + +sub report_definite_bug { + if ($logger_object) { + $logger_object->report_definite_bug(); + } + return; +} + +sub get_spaces { + + # return the number of leading spaces associated with an indentation + # variable $indentation is either a constant number of spaces or an + # object with a get_spaces method. + my $indentation = shift; + return ref($indentation) ? $indentation->get_spaces() : $indentation; +} + +sub get_recoverable_spaces { + + # return the number of spaces (+ means shift right, - means shift left) + # that we would like to shift a group of lines with the same indentation + # to get them to line up with their opening parens + my $indentation = shift; + return ref($indentation) ? $indentation->get_recoverable_spaces() : 0; +} + +sub get_stack_depth { + + my $indentation = shift; + return ref($indentation) ? $indentation->get_stack_depth() : 0; +} + +sub make_alignment { + my ( $col, $token ) = @_; + + # make one new alignment at column $col which aligns token $token + ++$maximum_alignment_index; + + #my $alignment = new Perl::Tidy::VerticalAligner::Alignment( + my $alignment = Perl::Tidy::VerticalAligner::Alignment->new( + column => $col, + starting_column => $col, + matching_token => $token, + starting_line => $maximum_line_index, + ending_line => $maximum_line_index, + serial_number => $maximum_alignment_index, + ); + $ralignment_list->[$maximum_alignment_index] = $alignment; + return $alignment; +} + +sub dump_alignments { + print STDOUT +"Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n"; + for my $i ( 0 .. $maximum_alignment_index ) { + my $column = $ralignment_list->[$i]->get_column(); + my $starting_column = $ralignment_list->[$i]->get_starting_column(); + my $matching_token = $ralignment_list->[$i]->get_matching_token(); + my $starting_line = $ralignment_list->[$i]->get_starting_line(); + my $ending_line = $ralignment_list->[$i]->get_ending_line(); + print STDOUT +"$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n"; + } + return; +} + +sub save_alignment_columns { + for my $i ( 0 .. $maximum_alignment_index ) { + $ralignment_list->[$i]->save_column(); + } + return; +} + +sub restore_alignment_columns { + for my $i ( 0 .. $maximum_alignment_index ) { + $ralignment_list->[$i]->restore_column(); + } + return; +} + +sub forget_side_comment { + $last_comment_column = 0; + return; +} + +sub maximum_line_length_for_level { + + # return maximum line length for line starting with a given level + my $maximum_line_length = $rOpts_maximum_line_length; + if ($rOpts_variable_maximum_line_length) { + my $level = shift; + if ( $level < 0 ) { $level = 0 } + $maximum_line_length += $level * $rOpts_indent_columns; + } + return $maximum_line_length; +} + +sub valign_input { + + # Place one line in the current vertical group. + # + # The input parameters are: + # $level = indentation level of this line + # $rfields = reference to array of fields + # $rpatterns = reference to array of patterns, one per field + # $rtokens = reference to array of tokens starting fields 1,2,.. + # + # Here is an example of what this package does. In this example, + # we are trying to line up both the '=>' and the '#'. + # + # '18' => 'grave', # \` + # '19' => 'acute', # `' + # '20' => 'caron', # \v + # <-tabs-><--field 2 ---><-f3-> + # | | | | + # | | | | + # col1 col2 col3 col4 + # + # The calling routine has already broken the entire line into 3 fields as + # indicated. (So the work of identifying promising common tokens has + # already been done). + # + # In this example, there will be 2 tokens being matched: '=>' and '#'. + # They are the leading parts of fields 2 and 3, but we do need to know + # what they are so that we can dump a group of lines when these tokens + # change. + # + # The fields contain the actual characters of each field. The patterns + # are like the fields, but they contain mainly token types instead + # of tokens, so they have fewer characters. They are used to be + # sure we are matching fields of similar type. + # + # In this example, there will be 4 column indexes being adjusted. The + # first one is always at zero. The interior columns are at the start of + # the matching tokens, and the last one tracks the maximum line length. + # + # Each time a new line comes in, it joins the current vertical + # group if possible. Otherwise it causes the current group to be dumped + # and a new group is started. + # + # For each new group member, the column locations are increased, as + # necessary, to make room for the new fields. When the group is finally + # output, these column numbers are used to compute the amount of spaces of + # padding needed for each field. + # + # Programming note: the fields are assumed not to have any tab characters. + # Tabs have been previously removed except for tabs in quoted strings and + # side comments. Tabs in these fields can mess up the column counting. + # The log file warns the user if there are any such tabs. + +## my ( +## $level, $level_end, +## $indentation, $rfields, +## $rtokens, $rpatterns, +## $is_forced_break, $outdent_long_lines, +## $is_terminal_ternary, $is_terminal_statement, +## $do_not_pad, $rvertical_tightness_flags, +## $level_jump, +## ) = @_; + + my ( $rline_hash, $rfields, $rtokens, $rpatterns ) = @_; + my $level = $rline_hash->{level}; + my $level_end = $rline_hash->{level_end}; + my $indentation = $rline_hash->{indentation}; + my $is_forced_break = $rline_hash->{is_forced_break}; + my $outdent_long_lines = $rline_hash->{outdent_long_lines}; + my $is_terminal_ternary = $rline_hash->{is_terminal_ternary}; + my $is_terminal_statement = $rline_hash->{is_terminal_statement}; + my $do_not_pad = $rline_hash->{do_not_pad}; + my $rvertical_tightness_flags = $rline_hash->{rvertical_tightness_flags}; + my $level_jump = $rline_hash->{level_jump}; + + # number of fields is $jmax + # number of tokens between fields is $jmax-1 + my $jmax = $#{$rfields}; + + my $leading_space_count = get_spaces($indentation); + + # set outdented flag to be sure we either align within statements or + # across statement boundaries, but not both. + my $is_outdented = $last_leading_space_count > $leading_space_count; + $last_leading_space_count = $leading_space_count; + + # Patch: undo for hanging side comment + my $is_hanging_side_comment = + ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ ); + $is_outdented = 0 if $is_hanging_side_comment; + + # Forget side comment alignment after seeing 2 or more block comments + my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ ); + if ($is_block_comment) { + $consecutive_block_comments++; + } + else { + if ( $consecutive_block_comments > 1 ) { forget_side_comment() } + $consecutive_block_comments = 0; + } + + VALIGN_DEBUG_FLAG_APPEND0 && do { + print STDOUT +"APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n"; + }; + + # Validate cached line if necessary: If we can produce a container + # with just 2 lines total by combining an existing cached opening + # token with the closing token to follow, then we will mark both + # cached flags as valid. + if ($rvertical_tightness_flags) { + if ( $maximum_line_index <= 0 + && $cached_line_type + && $cached_seqno + && $rvertical_tightness_flags->[2] + && $rvertical_tightness_flags->[2] == $cached_seqno ) + { + $rvertical_tightness_flags->[3] ||= 1; + $cached_line_valid ||= 1; + } + } + + # do not join an opening block brace with an unbalanced line + # unless requested with a flag value of 2 + if ( $cached_line_type == 3 + && $maximum_line_index < 0 + && $cached_line_flag < 2 + && $level_jump != 0 ) + { + $cached_line_valid = 0; + } + + # patch until new aligner is finished + if ($do_not_pad) { my_flush() } + + # shouldn't happen: + if ( $level < 0 ) { $level = 0 } + + # do not align code across indentation level changes + # or if vertical alignment is turned off for debugging + if ( $level != $group_level || $is_outdented || !$rOpts_valign ) { + + # we are allowed to shift a group of lines to the right if its + # level is greater than the previous and next group + $extra_indent_ok = + ( $level < $group_level && $last_level_written < $group_level ); + + my_flush(); + + # If we know that this line will get flushed out by itself because + # of level changes, we can leave the extra_indent_ok flag set. + # That way, if we get an external flush call, we will still be + # able to do some -lp alignment if necessary. + $extra_indent_ok = ( $is_terminal_statement && $level > $group_level ); + + $group_level = $level; + + # wait until after the above flush to get the leading space + # count because it may have been changed if the -icp flag is in + # effect + $leading_space_count = get_spaces($indentation); + + } + + # -------------------------------------------------------------------- + # Patch to collect outdentable block COMMENTS + # -------------------------------------------------------------------- + my $is_blank_line = ""; + if ( $group_type eq 'COMMENT' ) { + if ( + ( + $is_block_comment + && $outdent_long_lines + && $leading_space_count == $comment_leading_space_count + ) + || $is_blank_line + ) + { + $group_lines[ ++$maximum_line_index ] = $rfields->[0]; + return; + } + else { + my_flush(); + } + } + + # -------------------------------------------------------------------- + # add dummy fields for terminal ternary + # -------------------------------------------------------------------- + my $j_terminal_match; + if ( $is_terminal_ternary && $current_line ) { + $j_terminal_match = + fix_terminal_ternary( $rfields, $rtokens, $rpatterns ); + $jmax = @{$rfields} - 1; + } + + # -------------------------------------------------------------------- + # add dummy fields for else statement + # -------------------------------------------------------------------- + if ( $rfields->[0] =~ /^else\s*$/ + && $current_line + && $level_jump == 0 ) + { + $j_terminal_match = fix_terminal_else( $rfields, $rtokens, $rpatterns ); + $jmax = @{$rfields} - 1; + } + + # -------------------------------------------------------------------- + # Step 1. Handle simple line of code with no fields to match. + # -------------------------------------------------------------------- + if ( $jmax <= 0 ) { + $zero_count++; + + if ( $maximum_line_index >= 0 + && !get_recoverable_spaces( $group_lines[0]->get_indentation() ) ) + { + + # flush the current group if it has some aligned columns.. + if ( $group_lines[0]->get_jmax() > 1 ) { my_flush() } + + # flush current group if we are just collecting side comments.. + elsif ( + + # ...and we haven't seen a comment lately + ( $zero_count > 3 ) + + # ..or if this new line doesn't fit to the left of the comments + || ( ( $leading_space_count + length( $rfields->[0] ) ) > + $group_lines[0]->get_column(0) ) + ) + { + my_flush(); + } + } + + # patch to start new COMMENT group if this comment may be outdented + if ( $is_block_comment + && $outdent_long_lines + && $maximum_line_index < 0 ) + { + $group_type = 'COMMENT'; + $comment_leading_space_count = $leading_space_count; + $group_lines[ ++$maximum_line_index ] = $rfields->[0]; + return; + } + + # just write this line directly if no current group, no side comment, + # and no space recovery is needed. + if ( $maximum_line_index < 0 && !get_recoverable_spaces($indentation) ) + { + valign_output_step_B( $leading_space_count, $rfields->[0], 0, + $outdent_long_lines, $rvertical_tightness_flags, $level ); + return; + } + } + else { + $zero_count = 0; + } + + # programming check: (shouldn't happen) + # an error here implies an incorrect call was made + if ( $jmax > 0 && ( $#{$rtokens} != ( $jmax - 1 ) ) ) { + warning( +"Program bug in Perl::Tidy::VerticalAligner - number of tokens = $#{$rtokens} should be one less than number of fields: $#{$rfields})\n" + ); + report_definite_bug(); + } + + # -------------------------------------------------------------------- + # create an object to hold this line + # -------------------------------------------------------------------- + ##my $new_line = new Perl::Tidy::VerticalAligner::Line( + my $new_line = Perl::Tidy::VerticalAligner::Line->new( + jmax => $jmax, + jmax_original_line => $jmax, + rtokens => $rtokens, + rfields => $rfields, + rpatterns => $rpatterns, + indentation => $indentation, + leading_space_count => $leading_space_count, + outdent_long_lines => $outdent_long_lines, + list_type => "", + is_hanging_side_comment => $is_hanging_side_comment, + maximum_line_length => maximum_line_length_for_level($level), + rvertical_tightness_flags => $rvertical_tightness_flags, + ); + + # Initialize a global flag saying if the last line of the group should + # match end of group and also terminate the group. There should be no + # returns between here and where the flag is handled at the bottom. + my $col_matching_terminal = 0; + if ( defined($j_terminal_match) ) { + + # remember the column of the terminal ? or { to match with + $col_matching_terminal = $current_line->get_column($j_terminal_match); + + # set global flag for sub decide_if_aligned + $is_matching_terminal_line = 1; + } + + # -------------------------------------------------------------------- + # It simplifies things to create a zero length side comment + # if none exists. + # -------------------------------------------------------------------- + make_side_comment( $new_line, $level_end ); + + # -------------------------------------------------------------------- + # Decide if this is a simple list of items. + # There are 3 list types: none, comma, comma-arrow. + # We use this below to be less restrictive in deciding what to align. + # -------------------------------------------------------------------- + if ($is_forced_break) { + decide_if_list($new_line); + } + + if ($current_line) { + + # -------------------------------------------------------------------- + # Allow hanging side comment to join current group, if any + # This will help keep side comments aligned, because otherwise we + # will have to start a new group, making alignment less likely. + # -------------------------------------------------------------------- + join_hanging_comment( $new_line, $current_line ) + if $is_hanging_side_comment; + + # -------------------------------------------------------------------- + # If there is just one previous line, and it has more fields + # than the new line, try to join fields together to get a match with + # the new line. At the present time, only a single leading '=' is + # allowed to be compressed out. This is useful in rare cases where + # a table is forced to use old breakpoints because of side comments, + # and the table starts out something like this: + # my %MonthChars = ('0', 'Jan', # side comment + # '1', 'Feb', + # '2', 'Mar', + # Eliminating the '=' field will allow the remaining fields to line up. + # This situation does not occur if there are no side comments + # because scan_list would put a break after the opening '('. + # -------------------------------------------------------------------- + eliminate_old_fields( $new_line, $current_line ); + + # -------------------------------------------------------------------- + # If the new line has more fields than the current group, + # see if we can match the first fields and combine the remaining + # fields of the new line. + # -------------------------------------------------------------------- + eliminate_new_fields( $new_line, $current_line ); + + # -------------------------------------------------------------------- + # Flush previous group unless all common tokens and patterns match.. + # -------------------------------------------------------------------- + check_match( $new_line, $current_line ); + + # -------------------------------------------------------------------- + # See if there is space for this line in the current group (if any) + # -------------------------------------------------------------------- + if ($current_line) { + check_fit( $new_line, $current_line ); + } + } + + # -------------------------------------------------------------------- + # Append this line to the current group (or start new group) + # -------------------------------------------------------------------- + add_to_group($new_line); + + # Future update to allow this to vary: + $current_line = $new_line if ( $maximum_line_index == 0 ); + + # output this group if it ends in a terminal else or ternary line + if ( defined($j_terminal_match) ) { + + # if there is only one line in the group (maybe due to failure to match + # perfectly with previous lines), then align the ? or { of this + # terminal line with the previous one unless that would make the line + # too long + if ( $maximum_line_index == 0 ) { + my $col_now = $current_line->get_column($j_terminal_match); + my $pad = $col_matching_terminal - $col_now; + my $padding_available = + $current_line->get_available_space_on_right(); + if ( $pad > 0 && $pad <= $padding_available ) { + $current_line->increase_field_width( $j_terminal_match, $pad ); + } + } + my_flush(); + $is_matching_terminal_line = 0; + } + + # -------------------------------------------------------------------- + # Step 8. Some old debugging stuff + # -------------------------------------------------------------------- + VALIGN_DEBUG_FLAG_APPEND && do { + print STDOUT "APPEND fields:"; + dump_array( @{$rfields} ); + print STDOUT "APPEND tokens:"; + dump_array( @{$rtokens} ); + print STDOUT "APPEND patterns:"; + dump_array( @{$rpatterns} ); + dump_alignments(); + }; + + return; +} + +sub join_hanging_comment { + + my $line = shift; + my $jmax = $line->get_jmax(); + return 0 unless $jmax == 1; # must be 2 fields + my $rtokens = $line->get_rtokens(); + return 0 unless $rtokens->[0] eq '#'; # the second field is a comment.. + my $rfields = $line->get_rfields(); + return 0 unless $rfields->[0] =~ /^\s*$/; # the first field is empty... + my $old_line = shift; + my $maximum_field_index = $old_line->get_jmax(); + return 0 + unless $maximum_field_index > $jmax; # the current line has more fields + my $rpatterns = $line->get_rpatterns(); + + $line->set_is_hanging_side_comment(1); + $jmax = $maximum_field_index; + $line->set_jmax($jmax); + $rfields->[$jmax] = $rfields->[1]; + $rtokens->[ $jmax - 1 ] = $rtokens->[0]; + $rpatterns->[ $jmax - 1 ] = $rpatterns->[0]; + foreach my $j ( 1 .. $jmax - 1 ) { + $rfields->[$j] = " "; # NOTE: caused glitch unless 1 blank, why? + $rtokens->[ $j - 1 ] = ""; + $rpatterns->[ $j - 1 ] = ""; + } + return 1; +} + +sub eliminate_old_fields { + + my $new_line = shift; + my $jmax = $new_line->get_jmax(); + if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax } + if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax } + + # there must be one previous line + return unless ( $maximum_line_index == 0 ); + + my $old_line = shift; + my $maximum_field_index = $old_line->get_jmax(); + + ############################################### + # Moved below to allow new coding for => matches + # return unless $maximum_field_index > $jmax; + ############################################### + + # Identify specific cases where field elimination is allowed: + # case=1: both lines have comma-separated lists, and the first + # line has an equals + # case=2: both lines have leading equals + + # case 1 is the default + my $case = 1; + + # See if case 2: both lines have leading '=' + # We'll require similar leading patterns in this case + my $old_rtokens = $old_line->get_rtokens(); + my $rtokens = $new_line->get_rtokens(); + my $rpatterns = $new_line->get_rpatterns(); + my $old_rpatterns = $old_line->get_rpatterns(); + if ( $rtokens->[0] =~ /^=>?\d*$/ + && $old_rtokens->[0] eq $rtokens->[0] + && $old_rpatterns->[0] eq $rpatterns->[0] ) + { + $case = 2; + } + + # not too many fewer fields in new line for case 1 + return unless ( $case != 1 || $maximum_field_index - 2 <= $jmax ); + + # case 1 must have side comment + my $old_rfields = $old_line->get_rfields(); + return + if ( $case == 1 + && length( $old_rfields->[$maximum_field_index] ) == 0 ); + + my $rfields = $new_line->get_rfields(); + + my $hid_equals = 0; + + my @new_alignments = (); + my @new_fields = (); + my @new_matching_patterns = (); + my @new_matching_tokens = (); + + my $j = 0; + my $current_field = ''; + my $current_pattern = ''; + + # loop over all old tokens + my $in_match = 0; + foreach my $k ( 0 .. $maximum_field_index - 1 ) { + $current_field .= $old_rfields->[$k]; + $current_pattern .= $old_rpatterns->[$k]; + last if ( $j > $jmax - 1 ); + + if ( $old_rtokens->[$k] eq $rtokens->[$j] ) { + $in_match = 1; + $new_fields[$j] = $current_field; + $new_matching_patterns[$j] = $current_pattern; + $current_field = ''; + $current_pattern = ''; + $new_matching_tokens[$j] = $old_rtokens->[$k]; + $new_alignments[$j] = $old_line->get_alignment($k); + $j++; + } + else { + + if ( $old_rtokens->[$k] =~ /^\=\d*$/ ) { + last if ( $case == 2 ); # avoid problems with stuff + # like: $a=$b=$c=$d; + $hid_equals = 1; + } + last + if ( $in_match && $case == 1 ) + ; # disallow gaps in matching field types in case 1 + } + } + + # Modify the current state if we are successful. + # We must exactly reach the ends of the new list for success, and the old + # pattern must have more fields. Here is an example where the first and + # second lines have the same number, and we should not align: + # my @a = map chr, 0 .. 255; + # my @b = grep /\W/, @a; + # my @c = grep /[^\w]/, @a; + + # Otherwise, we would get all of the commas aligned, which doesn't work as + # well: + # my @a = map chr, 0 .. 255; + # my @b = grep /\W/, @a; + # my @c = grep /[^\w]/, @a; + + if ( ( $j == $jmax ) + && ( $current_field eq '' ) + && ( $case != 1 || $hid_equals ) + && ( $maximum_field_index > $jmax ) ) + { + my $k = $maximum_field_index; + $current_field .= $old_rfields->[$k]; + $current_pattern .= $old_rpatterns->[$k]; + $new_fields[$j] = $current_field; + $new_matching_patterns[$j] = $current_pattern; + + $new_alignments[$j] = $old_line->get_alignment($k); + $maximum_field_index = $j; + + $old_line->set_alignments(@new_alignments); + $old_line->set_jmax($jmax); + $old_line->set_rtokens( \@new_matching_tokens ); + $old_line->set_rfields( \@new_fields ); + $old_line->set_rpatterns( \@{$rpatterns} ); + } + + # Dumb Down starting match if necessary: + # + # Consider the following two lines: + # + # { + # $a => 20 > 3 ? 1 : 0, + # $xyz => 5, + # } + +# We would like to get alignment regardless of the order of the two lines. +# If the lines come in in this order, then we will simplify the patterns of the first line +# in sub eliminate_new_fields. +# If the lines come in reverse order, then we achieve this with eliminate_new_fields. + + # This update is currently restricted to leading '=>' matches. Although we + # could do this for both '=' and '=>', overall the results for '=' come out + # better without this step because this step can eliminate some other good + # matches. For example, with the '=' we get: + +# my @disilva = ( "di Silva", "diSilva", "di Si\x{301}lva", "diSi\x{301}lva" ); +# my @dsf = map "$_\x{FFFE}Fred", @disilva; +# my @dsj = map "$_\x{FFFE}John", @disilva; +# my @dsJ = map "$_ John", @disilva; + + # without including '=' we get: + +# my @disilva = ( "di Silva", "diSilva", "di Si\x{301}lva", "diSi\x{301}lva" ); +# my @dsf = map "$_\x{FFFE}Fred", @disilva; +# my @dsj = map "$_\x{FFFE}John", @disilva; +# my @dsJ = map "$_ John", @disilva; + elsif ( + $case == 2 + + && @new_matching_tokens == 1 + ##&& $new_matching_tokens[0] =~ /^=/ # see note above + && $new_matching_tokens[0] =~ /^=>/ + && $maximum_field_index > 2 + ) + { + my $jmaxm = $jmax - 1; + my $kmaxm = $maximum_field_index - 1; + my $have_side_comment = $old_rtokens->[$kmaxm] eq '#'; + + # We need to reduce the group pattern to be just two tokens, + # the leading equality or => and the final side comment + + my $mid_field = join "", + @{$old_rfields}[ 1 .. $maximum_field_index - 1 ]; + my $mid_patterns = join "", + @{$old_rpatterns}[ 1 .. $maximum_field_index - 1 ]; + my @new_alignments = ( + $old_line->get_alignment(0), + $old_line->get_alignment( $maximum_field_index - 1 ) + ); + my @new_tokens = + ( $old_rtokens->[0], $old_rtokens->[ $maximum_field_index - 1 ] ); + my @new_fields = ( + $old_rfields->[0], $mid_field, $old_rfields->[$maximum_field_index] + ); + my @new_patterns = ( + $old_rpatterns->[0], $mid_patterns, + $old_rpatterns->[$maximum_field_index] + ); + + $maximum_field_index = 2; + $old_line->set_jmax($maximum_field_index); + $old_line->set_rtokens( \@new_tokens ); + $old_line->set_rfields( \@new_fields ); + $old_line->set_rpatterns( \@new_patterns ); + + initialize_for_new_group(); + add_to_group($old_line); + $current_line = $old_line; + } + return; +} + +# create an empty side comment if none exists +sub make_side_comment { + my ( $new_line, $level_end ) = @_; + my $jmax = $new_line->get_jmax(); + my $rtokens = $new_line->get_rtokens(); + + # if line does not have a side comment... + if ( ( $jmax == 0 ) || ( $rtokens->[ $jmax - 1 ] ne '#' ) ) { + my $rfields = $new_line->get_rfields(); + my $rpatterns = $new_line->get_rpatterns(); + $rtokens->[$jmax] = '#'; + $rfields->[ ++$jmax ] = ''; + $rpatterns->[$jmax] = '#'; + $new_line->set_jmax($jmax); + $new_line->set_jmax_original_line($jmax); + } + + # line has a side comment.. + else { + + # don't remember old side comment location for very long + my $line_number = $vertical_aligner_self->get_output_line_number(); + my $rfields = $new_line->get_rfields(); + if ( + $line_number - $last_side_comment_line_number > 12 + + # and don't remember comment location across block level changes + || ( $level_end < $last_side_comment_level + && $rfields->[0] =~ /^}/ ) + ) + { + forget_side_comment(); + } + $last_side_comment_line_number = $line_number; + $last_side_comment_level = $level_end; + } + return; +} + +sub decide_if_list { + + my $line = shift; + + # A list will be taken to be a line with a forced break in which all + # of the field separators are commas or comma-arrows (except for the + # trailing #) + + # List separator tokens are things like ',3' or '=>2', + # where the trailing digit is the nesting depth. Allow braces + # to allow nested list items. + my $rtokens = $line->get_rtokens(); + my $test_token = $rtokens->[0]; + if ( $test_token =~ /^(\,|=>)/ ) { + my $list_type = $test_token; + my $jmax = $line->get_jmax(); + + foreach ( 1 .. $jmax - 2 ) { + if ( $rtokens->[$_] !~ /^(\,|=>|\{)/ ) { + $list_type = ""; + last; + } + } + $line->set_list_type($list_type); + } + return; +} + +sub eliminate_new_fields { + + my ( $new_line, $old_line ) = @_; + return unless ( $maximum_line_index >= 0 ); + my $jmax = $new_line->get_jmax(); + + my $old_rtokens = $old_line->get_rtokens(); + my $rtokens = $new_line->get_rtokens(); + my $is_assignment = + ( $rtokens->[0] =~ /^=>?\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) ); + + # must be monotonic variation + return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax ); + + # must be more fields in the new line + my $maximum_field_index = $old_line->get_jmax(); + return unless ( $maximum_field_index < $jmax ); + + unless ($is_assignment) { + return + unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen ) + ; # only if monotonic + + # never combine fields of a comma list + return + unless ( $maximum_field_index > 1 ) + && ( $new_line->get_list_type() !~ /^,/ ); + } + + my $rfields = $new_line->get_rfields(); + my $rpatterns = $new_line->get_rpatterns(); + my $old_rpatterns = $old_line->get_rpatterns(); + + # loop over all OLD tokens except comment and check match + my $match = 1; + foreach my $k ( 0 .. $maximum_field_index - 2 ) { + if ( ( $old_rtokens->[$k] ne $rtokens->[$k] ) + || ( $old_rpatterns->[$k] ne $rpatterns->[$k] ) ) + { + $match = 0; + last; + } + } + + # first tokens agree, so combine extra new tokens + if ($match) { + ##for my $k ( $maximum_field_index .. $jmax - 1 ) { + foreach my $k ( $maximum_field_index .. $jmax - 1 ) { + + $rfields->[ $maximum_field_index - 1 ] .= $rfields->[$k]; + $rfields->[$k] = ""; + $rpatterns->[ $maximum_field_index - 1 ] .= $rpatterns->[$k]; + $rpatterns->[$k] = ""; + } + + $rtokens->[ $maximum_field_index - 1 ] = '#'; + $rfields->[$maximum_field_index] = $rfields->[$jmax]; + $rpatterns->[$maximum_field_index] = $rpatterns->[$jmax]; + $jmax = $maximum_field_index; + } + $new_line->set_jmax($jmax); + return; +} + +sub fix_terminal_ternary { + + # Add empty fields as necessary to align a ternary term + # like this: + # + # my $leapyear = + # $year % 4 ? 0 + # : $year % 100 ? 1 + # : $year % 400 ? 0 + # : 1; + # + # returns 1 if the terminal item should be indented + + my ( $rfields, $rtokens, $rpatterns ) = @_; + + my $jmax = @{$rfields} - 1; + my $old_line = $group_lines[$maximum_line_index]; + my $rfields_old = $old_line->get_rfields(); + + my $rpatterns_old = $old_line->get_rpatterns(); + my $rtokens_old = $old_line->get_rtokens(); + my $maximum_field_index = $old_line->get_jmax(); + + # look for the question mark after the : + my ($jquestion); + my $depth_question; + my $pad = ""; + foreach my $j ( 0 .. $maximum_field_index - 1 ) { + my $tok = $rtokens_old->[$j]; + if ( $tok =~ /^\?(\d+)$/ ) { + $depth_question = $1; + + # depth must be correct + next unless ( $depth_question eq $group_level ); + + $jquestion = $j; + if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) { + $pad = " " x length($1); + } + else { + return; # shouldn't happen + } + last; + } + } + return unless ( defined($jquestion) ); # shouldn't happen + + # Now splice the tokens and patterns of the previous line + # into the else line to insure a match. Add empty fields + # as necessary. + my $jadd = $jquestion; + + # Work on copies of the actual arrays in case we have + # to return due to an error + my @fields = @{$rfields}; + my @patterns = @{$rpatterns}; + my @tokens = @{$rtokens}; + + VALIGN_DEBUG_FLAG_TERNARY && do { + local $" = '><'; + print STDOUT "CURRENT FIELDS=<@{$rfields_old}>\n"; + print STDOUT "CURRENT TOKENS=<@{$rtokens_old}>\n"; + print STDOUT "CURRENT PATTERNS=<@{$rpatterns_old}>\n"; + print STDOUT "UNMODIFIED FIELDS=<@{$rfields}>\n"; + print STDOUT "UNMODIFIED TOKENS=<@{$rtokens}>\n"; + print STDOUT "UNMODIFIED PATTERNS=<@{$rpatterns}>\n"; + }; + + # handle cases of leading colon on this line + if ( $fields[0] =~ /^(:\s*)(.*)$/ ) { + + my ( $colon, $therest ) = ( $1, $2 ); + + # Handle sub-case of first field with leading colon plus additional code + # This is the usual situation as at the '1' below: + # ... + # : $year % 400 ? 0 + # : 1; + if ($therest) { + + # Split the first field after the leading colon and insert padding. + # Note that this padding will remain even if the terminal value goes + # out on a separate line. This does not seem to look to bad, so no + # mechanism has been included to undo it. + my $field1 = shift @fields; + unshift @fields, ( $colon, $pad . $therest ); + + # change the leading pattern from : to ? + return unless ( $patterns[0] =~ s/^\:/?/ ); + + # install leading tokens and patterns of existing line + unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] ); + unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] ); + + # insert appropriate number of empty fields + splice( @fields, 1, 0, ('') x $jadd ) if $jadd; + } + + # handle sub-case of first field just equal to leading colon. + # This can happen for example in the example below where + # the leading '(' would create a new alignment token + # : ( $name =~ /[]}]$/ ) ? ( $mname = $name ) + # : ( $mname = $name . '->' ); + else { + + return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen + + # prepend a leading ? onto the second pattern + $patterns[1] = "?b" . $patterns[1]; + + # pad the second field + $fields[1] = $pad . $fields[1]; + + # install leading tokens and patterns of existing line, replacing + # leading token and inserting appropriate number of empty fields + splice( @tokens, 0, 1, @{$rtokens_old}[ 0 .. $jquestion ] ); + splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] ); + splice( @fields, 1, 0, ('') x $jadd ) if $jadd; + } + } + + # Handle case of no leading colon on this line. This will + # be the case when -wba=':' is used. For example, + # $year % 400 ? 0 : + # 1; + else { + + # install leading tokens and patterns of existing line + $patterns[0] = '?' . 'b' . $patterns[0]; + unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] ); + unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] ); + + # insert appropriate number of empty fields + $jadd = $jquestion + 1; + $fields[0] = $pad . $fields[0]; + splice( @fields, 0, 0, ('') x $jadd ) if $jadd; + } + + VALIGN_DEBUG_FLAG_TERNARY && do { + local $" = '><'; + print STDOUT "MODIFIED TOKENS=<@tokens>\n"; + print STDOUT "MODIFIED PATTERNS=<@patterns>\n"; + print STDOUT "MODIFIED FIELDS=<@fields>\n"; + }; + + # all ok .. update the arrays + @{$rfields} = @fields; + @{$rtokens} = @tokens; + @{$rpatterns} = @patterns; + + # force a flush after this line + return $jquestion; +} + +sub fix_terminal_else { + + # Add empty fields as necessary to align a balanced terminal + # else block to a previous if/elsif/unless block, + # like this: + # + # if ( 1 || $x ) { print "ok 13\n"; } + # else { print "not ok 13\n"; } + # + # returns 1 if the else block should be indented + # + my ( $rfields, $rtokens, $rpatterns ) = @_; + my $jmax = @{$rfields} - 1; + return unless ( $jmax > 0 ); + + # check for balanced else block following if/elsif/unless + my $rfields_old = $current_line->get_rfields(); + + # TBD: add handling for 'case' + return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ ); + + # look for the opening brace after the else, and extract the depth + my $tok_brace = $rtokens->[0]; + my $depth_brace; + if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; } + + # probably: "else # side_comment" + else { return } + + my $rpatterns_old = $current_line->get_rpatterns(); + my $rtokens_old = $current_line->get_rtokens(); + my $maximum_field_index = $current_line->get_jmax(); + + # be sure the previous if/elsif is followed by an opening paren + my $jparen = 0; + my $tok_paren = '(' . $depth_brace; + my $tok_test = $rtokens_old->[$jparen]; + return unless ( $tok_test eq $tok_paren ); # shouldn't happen + + # Now find the opening block brace + my ($jbrace); + foreach my $j ( 1 .. $maximum_field_index - 1 ) { + my $tok = $rtokens_old->[$j]; + if ( $tok eq $tok_brace ) { + $jbrace = $j; + last; + } + } + return unless ( defined($jbrace) ); # shouldn't happen + + # Now splice the tokens and patterns of the previous line + # into the else line to insure a match. Add empty fields + # as necessary. + my $jadd = $jbrace - $jparen; + splice( @{$rtokens}, 0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] ); + splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] ); + splice( @{$rfields}, 1, 0, ('') x $jadd ); + + # force a flush after this line if it does not follow a case + if ( $rfields_old->[0] =~ /^case\s*$/ ) { return } + else { return $jbrace } +} + +{ # sub check_match + my %is_good_alignment; + + BEGIN { + + # Vertically aligning on certain "good" tokens is usually okay + # so we can be less restrictive in marginal cases. + my @q = qw( { ? => = ); + push @q, (','); + @is_good_alignment{@q} = (1) x scalar(@q); + } + + sub check_match { + + # See if the current line matches the current vertical alignment group. + # If not, flush the current group. + my ( $new_line, $old_line ) = @_; + + # uses global variables: + # $previous_minimum_jmax_seen + # $maximum_jmax_seen + # $maximum_line_index + # $marginal_match + my $jmax = $new_line->get_jmax(); + my $maximum_field_index = $old_line->get_jmax(); + + # flush if this line has too many fields + if ( $jmax > $maximum_field_index ) { goto NO_MATCH } + + # flush if adding this line would make a non-monotonic field count + if ( + ( $maximum_field_index > $jmax ) # this has too few fields + && ( + ( $previous_minimum_jmax_seen < + $jmax ) # and wouldn't be monotonic + || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen ) + ) + ) + { + goto NO_MATCH; + } + + # otherwise see if this line matches the current group + my $jmax_original_line = $new_line->get_jmax_original_line(); + my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment(); + my $rtokens = $new_line->get_rtokens(); + my $rfields = $new_line->get_rfields(); + my $rpatterns = $new_line->get_rpatterns(); + my $list_type = $new_line->get_list_type(); + + my $group_list_type = $old_line->get_list_type(); + my $old_rpatterns = $old_line->get_rpatterns(); + my $old_rtokens = $old_line->get_rtokens(); + + my $jlimit = $jmax - 1; + if ( $maximum_field_index > $jmax ) { + $jlimit = $jmax_original_line; + --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) ); + } + + # handle comma-separated lists .. + if ( $group_list_type && ( $list_type eq $group_list_type ) ) { + for my $j ( 0 .. $jlimit ) { + my $old_tok = $old_rtokens->[$j]; + next unless $old_tok; + my $new_tok = $rtokens->[$j]; + next unless $new_tok; + + # lists always match ... + # unless they would align any '=>'s with ','s + goto NO_MATCH + if ( $old_tok =~ /^=>/ && $new_tok =~ /^,/ + || $new_tok =~ /^=>/ && $old_tok =~ /^,/ ); + } + } + + # do detailed check for everything else except hanging side comments + elsif ( !$is_hanging_side_comment ) { + + my $leading_space_count = $new_line->get_leading_space_count(); + + my $max_pad = 0; + my $min_pad = 0; + my $saw_good_alignment; + + for my $j ( 0 .. $jlimit ) { + + my $old_tok = $old_rtokens->[$j]; + my $new_tok = $rtokens->[$j]; + + # Note on encoding used for alignment tokens: + # ------------------------------------------- + # Tokens are "decorated" with information which can help + # prevent unwanted alignments. Consider for example the + # following two lines: + # local ( $xn, $xd ) = split( '/', &'rnorm(@_) ); + # local ( $i, $f ) = &'bdiv( $xn, $xd ); + # There are three alignment tokens in each line, a comma, + # an =, and a comma. In the first line these three tokens + # are encoded as: + # ,4+local-18 =3 ,4+split-7 + # and in the second line they are encoded as + # ,4+local-18 =3 ,4+&'bdiv-8 + # Tokens always at least have token name and nesting + # depth. So in this example the ='s are at depth 3 and + # the ,'s are at depth 4. This prevents aligning tokens + # of different depths. Commas contain additional + # information, as follows: + # , {depth} + {container name} - {spaces to opening paren} + # This allows us to reject matching the rightmost commas + # in the above two lines, since they are for different + # function calls. This encoding is done in + # 'sub send_lines_to_vertical_aligner'. + + # Pick off actual token. + # Everything up to the first digit is the actual token. + my $alignment_token = $new_tok; + if ( $alignment_token =~ /^([^\d]+)/ ) { $alignment_token = $1 } + + # see if the decorated tokens match + my $tokens_match = $new_tok eq $old_tok + + # Exception for matching terminal : of ternary statement.. + # consider containers prefixed by ? and : a match + || ( $new_tok =~ /^,\d*\+\:/ && $old_tok =~ /^,\d*\+\?/ ); + + # No match if the alignment tokens differ... + if ( !$tokens_match ) { + + # ...Unless this is a side comment + if ( + $j == $jlimit + + # and there is either at least one alignment token + # or this is a single item following a list. This + # latter rule is required for 'December' to join + # the following list: + # my (@months) = ( + # '', 'January', 'February', 'March', + # 'April', 'May', 'June', 'July', + # 'August', 'September', 'October', 'November', + # 'December' + # ); + # If it doesn't then the -lp formatting will fail. + && ( $j > 0 || $old_tok =~ /^,/ ) + ) + { + $marginal_match = 1 + if ( $marginal_match == 0 + && $maximum_line_index == 0 ); + last; + } + + goto NO_MATCH; + } + + # Calculate amount of padding required to fit this in. + # $pad is the number of spaces by which we must increase + # the current field to squeeze in this field. + my $pad = + length( $rfields->[$j] ) - $old_line->current_field_width($j); + if ( $j == 0 ) { $pad += $leading_space_count; } + + # remember max pads to limit marginal cases + if ( $alignment_token ne '#' ) { + if ( $pad > $max_pad ) { $max_pad = $pad } + if ( $pad < $min_pad ) { $min_pad = $pad } + } + if ( $is_good_alignment{$alignment_token} ) { + $saw_good_alignment = 1; + } + + # If patterns don't match, we have to be careful... + if ( $old_rpatterns->[$j] ne $rpatterns->[$j] ) { + + # flag this as a marginal match since patterns differ + $marginal_match = 1 + if ( $marginal_match == 0 && $maximum_line_index == 0 ); + + # We have to be very careful about aligning commas + # when the pattern's don't match, because it can be + # worse to create an alignment where none is needed + # than to omit one. Here's an example where the ','s + # are not in named containers. The first line below + # should not match the next two: + # ( $a, $b ) = ( $b, $r ); + # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 ); + # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 ); + if ( $alignment_token eq ',' ) { + + # do not align commas unless they are in named containers + goto NO_MATCH unless ( $new_tok =~ /[A-Za-z]/ ); + } + + # do not align parens unless patterns match; + # large ugly spaces can occur in math expressions. + elsif ( $alignment_token eq '(' ) { + + # But we can allow a match if the parens don't + # require any padding. + if ( $pad != 0 ) { goto NO_MATCH } + } + + # Handle an '=' alignment with different patterns to + # the left. + elsif ( $alignment_token eq '=' ) { + + # It is best to be a little restrictive when + # aligning '=' tokens. Here is an example of + # two lines that we will not align: + # my $variable=6; + # $bb=4; + # The problem is that one is a 'my' declaration, + # and the other isn't, so they're not very similar. + # We will filter these out by comparing the first + # letter of the pattern. This is crude, but works + # well enough. + if ( + substr( $old_rpatterns->[$j], 0, 1 ) ne + substr( $rpatterns->[$j], 0, 1 ) ) + { + goto NO_MATCH; + } + + # If we pass that test, we'll call it a marginal match. + # Here is an example of a marginal match: + # $done{$$op} = 1; + # $op = compile_bblock($op); + # The left tokens are both identifiers, but + # one accesses a hash and the other doesn't. + # We'll let this be a tentative match and undo + # it later if we don't find more than 2 lines + # in the group. + elsif ( $maximum_line_index == 0 ) { + $marginal_match = + 2; # =2 prevents being undone below + } + } + } + + # Don't let line with fewer fields increase column widths + # ( align3.t ) + if ( $maximum_field_index > $jmax ) { + + # Exception: suspend this rule to allow last lines to join + if ( $pad > 0 ) { goto NO_MATCH; } + } + } ## end for my $j ( 0 .. $jlimit) + + # Turn off the "marginal match" flag in some cases... + # A "marginal match" occurs when the alignment tokens agree + # but there are differences in the other tokens (patterns). + # If we leave the marginal match flag set, then the rule is that we + # will align only if there are more than two lines in the group. + # We will turn of the flag if we almost have a match + # and either we have seen a good alignment token or we + # just need a small pad (2 spaces) to fit. These rules are + # the result of experimentation. Tokens which misaligned by just + # one or two characters are annoying. On the other hand, + # large gaps to less important alignment tokens are also annoying. + if ( $marginal_match == 1 + && $jmax == $maximum_field_index + && ( $saw_good_alignment || ( $max_pad < 3 && $min_pad > -3 ) ) + ) + { + $marginal_match = 0; + } + ##print "marginal=$marginal_match saw=$saw_good_alignment jmax=$jmax max=$maximum_field_index maxpad=$max_pad minpad=$min_pad\n"; + } + + # We have a match (even if marginal). + # If the current line has fewer fields than the current group + # but otherwise matches, copy the remaining group fields to + # make it a perfect match. + if ( $maximum_field_index > $jmax ) { + + ########################################################## + # FIXME: The previous version had a bug which made side comments + # become regular fields, so for now the program does not allow a + # line with side comment to match. This should eventually be done. + # The best test file for experimenting is 'lista.t' + ########################################################## + + my $comment = $rfields->[$jmax]; + goto NO_MATCH if ($comment); + + # Corrected loop + for my $jj ( $jlimit .. $maximum_field_index ) { + $rtokens->[$jj] = $old_rtokens->[$jj]; + $rfields->[ $jj + 1 ] = ''; + $rpatterns->[ $jj + 1 ] = $old_rpatterns->[ $jj + 1 ]; + } + +## THESE DO NOT GIVE CORRECT RESULTS +## $rfields->[$jmax] = $comment; +## $new_line->set_jmax($jmax); + + } + return; + + NO_MATCH: + ##print "no match jmax=$jmax max=$maximum_field_index $group_list_type lines=$maximum_line_index token=$old_rtokens->[0]\n"; + my_flush(); + return; + } +} + +sub check_fit { + + my ( $new_line, $old_line ) = @_; + return unless ( $maximum_line_index >= 0 ); + + my $jmax = $new_line->get_jmax(); + my $leading_space_count = $new_line->get_leading_space_count(); + my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment(); + my $rtokens = $new_line->get_rtokens(); + my $rfields = $new_line->get_rfields(); + my $rpatterns = $new_line->get_rpatterns(); + + my $group_list_type = $group_lines[0]->get_list_type(); + + my $padding_so_far = 0; + my $padding_available = $old_line->get_available_space_on_right(); + + # save current columns in case this doesn't work + save_alignment_columns(); + + my $maximum_field_index = $old_line->get_jmax(); + for my $j ( 0 .. $jmax ) { + + my $pad = length( $rfields->[$j] ) - $old_line->current_field_width($j); + + if ( $j == 0 ) { + $pad += $leading_space_count; + } + + # remember largest gap of the group, excluding gap to side comment + if ( $pad < 0 + && $group_maximum_gap < -$pad + && $j > 0 + && $j < $jmax - 1 ) + { + $group_maximum_gap = -$pad; + } + + next if $pad < 0; + + ## OLD NOTES: + ## This patch helps sometimes, but it doesn't check to see if + ## the line is too long even without the side comment. It needs + ## to be reworked. + ##don't let a long token with no trailing side comment push + ##side comments out, or end a group. (sidecmt1.t) + ##next if ($j==$jmax-1 && length($rfields->[$jmax])==0); + + # BEGIN PATCH for keith1.txt. + # If the group began matching multiple tokens but later this got + # reduced to a fewer number of matching tokens, then the fields + # of the later lines will still have to fit into their corresponding + # fields. So a large later field will "push" the other fields to + # the right, including previous side comments, and if there is no room + # then there is no match. + # For example, look at the last line in the following snippet: + + # my $b_prod_db = ( $ENV{ORACLE_SID} =~ m/p$/ && !$testing ) ? true : false; + # my $env = ($b_prod_db) ? "prd" : "val"; + # my $plant = ( $OPT{p} ) ? $OPT{p} : "STL"; + # my $task = $OPT{t}; + # my $fnam = "longggggggggggggggg.$record_created.$env.$plant.idash"; + + # The long term will push the '?' to the right to fit in, and in this + # case there is not enough room so it will not match the equals unless + # we do something special. + + # Usually it looks good to keep an initial alignment of '=' going, and + # we can do this if the long term can fit in the space taken up by the + # remaining fields (the ? : fields here). + + # Allowing any matching token for now, but it could be restricted + # to an '='-like token if necessary. + + if ( + $pad > $padding_available + && $jmax == 2 # matching one thing (plus #) + && $j == $jmax - 1 # at last field + && $maximum_line_index > 0 # more than 1 line in group now + && $jmax < $maximum_field_index # other lines have more fields + && length( $rfields->[$jmax] ) == 0 # no side comment + + # Uncomment to match only equals (but this does not seem necessary) + # && $rtokens->[0] =~ /^=\d/ # matching an equals + ) + { + my $extra_padding = 0; + foreach my $jj ( $j + 1 .. $maximum_field_index - 1 ) { + $extra_padding += $old_line->current_field_width($jj); + } + + next if ( $pad <= $padding_available + $extra_padding ); + } + + # END PATCH for keith1.pl + + # This line will need space; lets see if we want to accept it.. + if ( + + # not if this won't fit + ( $pad > $padding_available ) + + # previously, there were upper bounds placed on padding here + # (maximum_whitespace_columns), but they were not really helpful + + ) + { + + # revert to starting state then flush; things didn't work out + restore_alignment_columns(); + my_flush(); + last; + } + + # patch to avoid excessive gaps in previous lines, + # due to a line of fewer fields. + # return join( ".", + # $self->{"dfi"}, $self->{"aa"}, $self->rsvd, $self->{"rd"}, + # $self->{"area"}, $self->{"id"}, $self->{"sel"} ); + next if ( $jmax < $maximum_field_index && $j == $jmax - 1 ); + + # looks ok, squeeze this field in + $old_line->increase_field_width( $j, $pad ); + $padding_available -= $pad; + + # remember largest gap of the group, excluding gap to side comment + if ( $pad > $group_maximum_gap && $j > 0 && $j < $jmax - 1 ) { + $group_maximum_gap = $pad; + } + } + return; +} + +sub add_to_group { + + # The current line either starts a new alignment group or is + # accepted into the current alignment group. + my $new_line = shift; + $group_lines[ ++$maximum_line_index ] = $new_line; + + # initialize field lengths if starting new group + if ( $maximum_line_index == 0 ) { + + my $jmax = $new_line->get_jmax(); + my $rfields = $new_line->get_rfields(); + my $rtokens = $new_line->get_rtokens(); + my $col = $new_line->get_leading_space_count(); + + for my $j ( 0 .. $jmax ) { + $col += length( $rfields->[$j] ); + + # create initial alignments for the new group + my $token = ""; + if ( $j < $jmax ) { $token = $rtokens->[$j] } + my $alignment = make_alignment( $col, $token ); + $new_line->set_alignment( $j, $alignment ); + } + + $maximum_jmax_seen = $jmax; + $minimum_jmax_seen = $jmax; + } + + # use previous alignments otherwise + else { + my @new_alignments = + $group_lines[ $maximum_line_index - 1 ]->get_alignments(); + $new_line->set_alignments(@new_alignments); + } + + # remember group jmax extremes for next call to valign_input + $previous_minimum_jmax_seen = $minimum_jmax_seen; + $previous_maximum_jmax_seen = $maximum_jmax_seen; + return; +} + +sub dump_array { + + # debug routine to dump array contents + local $" = ')('; + print STDOUT "(@_)\n"; + return; +} + +# flush() sends the current Perl::Tidy::VerticalAligner group down the +# pipeline to Perl::Tidy::FileWriter. + +# This is the external flush, which also empties the buffer and cache +sub flush { + + # the buffer must be emptied first, then any cached text + dump_valign_buffer(); + + if ( $maximum_line_index < 0 ) { + if ($cached_line_type) { + $seqno_string = $cached_seqno_string; + valign_output_step_C( $cached_line_text, + $cached_line_leading_space_count, + $last_level_written ); + $cached_line_type = 0; + $cached_line_text = ""; + $cached_seqno_string = ""; + } + } + else { + my_flush(); + } + return; +} + +sub reduce_valign_buffer_indentation { + + my ($diff) = @_; + if ( $valign_buffer_filling && $diff ) { + my $max_valign_buffer = @valign_buffer; + foreach my $i ( 0 .. $max_valign_buffer - 1 ) { + my ( $line, $leading_space_count, $level ) = + @{ $valign_buffer[$i] }; + my $ws = substr( $line, 0, $diff ); + if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) { + $line = substr( $line, $diff ); + } + if ( $leading_space_count >= $diff ) { + $leading_space_count -= $diff; + $level = level_change( $leading_space_count, $diff, $level ); + } + $valign_buffer[$i] = [ $line, $leading_space_count, $level ]; + } + } + return; +} + +sub level_change { + + # compute decrease in level when we remove $diff spaces from the + # leading spaces + my ( $leading_space_count, $diff, $level ) = @_; + if ($rOpts_indent_columns) { + my $olev = + int( ( $leading_space_count + $diff ) / $rOpts_indent_columns ); + my $nlev = int( $leading_space_count / $rOpts_indent_columns ); + $level -= ( $olev - $nlev ); + if ( $level < 0 ) { $level = 0 } + } + return $level; +} + +sub dump_valign_buffer { + if (@valign_buffer) { + foreach (@valign_buffer) { + valign_output_step_D( @{$_} ); + } + @valign_buffer = (); + } + $valign_buffer_filling = ""; + return; +} + +# This is the internal flush, which leaves the cache intact +sub my_flush { + + return if ( $maximum_line_index < 0 ); + + # handle a group of comment lines + if ( $group_type eq 'COMMENT' ) { + + VALIGN_DEBUG_FLAG_APPEND0 && do { + my ( $a, $b, $c ) = caller(); + print STDOUT +"APPEND0: Flush called from $a $b $c for COMMENT group: lines=$maximum_line_index \n"; + + }; + my $leading_space_count = $comment_leading_space_count; + my $leading_string = get_leading_string($leading_space_count); + + # zero leading space count if any lines are too long + my $max_excess = 0; + for my $i ( 0 .. $maximum_line_index ) { + my $str = $group_lines[$i]; + my $excess = + length($str) + + $leading_space_count - + maximum_line_length_for_level($group_level); + if ( $excess > $max_excess ) { + $max_excess = $excess; + } + } + + if ( $max_excess > 0 ) { + $leading_space_count -= $max_excess; + if ( $leading_space_count < 0 ) { $leading_space_count = 0 } + $last_outdented_line_at = + $file_writer_object->get_output_line_number(); + unless ($outdented_line_count) { + $first_outdented_line_at = $last_outdented_line_at; + } + $outdented_line_count += ( $maximum_line_index + 1 ); + } + + # write the group of lines + my $outdent_long_lines = 0; + for my $i ( 0 .. $maximum_line_index ) { + valign_output_step_B( $leading_space_count, $group_lines[$i], 0, + $outdent_long_lines, "", $group_level ); + } + } + + # handle a group of code lines + else { + + VALIGN_DEBUG_FLAG_APPEND0 && do { + my $group_list_type = $group_lines[0]->get_list_type(); + my ( $a, $b, $c ) = caller(); + my $maximum_field_index = $group_lines[0]->get_jmax(); + print STDOUT +"APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n"; + + }; + + # some small groups are best left unaligned + my $do_not_align = decide_if_aligned(); + + # optimize side comment location + $do_not_align = adjust_side_comment($do_not_align); + + # recover spaces for -lp option if possible + my $extra_leading_spaces = get_extra_leading_spaces(); + + # all lines of this group have the same basic leading spacing + my $group_leader_length = $group_lines[0]->get_leading_space_count(); + + # add extra leading spaces if helpful + # NOTE: Use zero; this did not work well + my $min_ci_gap = 0; + + # loop to output all lines + for my $i ( 0 .. $maximum_line_index ) { + my $line = $group_lines[$i]; + valign_output_step_A( $line, $min_ci_gap, $do_not_align, + $group_leader_length, $extra_leading_spaces ); + } + } + initialize_for_new_group(); + return; +} + +sub decide_if_aligned { + + # Do not try to align two lines which are not really similar + return unless $maximum_line_index == 1; + return if ($is_matching_terminal_line); + + my $group_list_type = $group_lines[0]->get_list_type(); + + my $do_not_align = ( + + # always align lists + !$group_list_type + + && ( + + # don't align if it was just a marginal match + $marginal_match + + # don't align two lines with big gap + || $group_maximum_gap > 12 + + # or lines with differing number of alignment tokens + # TODO: this could be improved. It occasionally rejects + # good matches. + || $previous_maximum_jmax_seen != $previous_minimum_jmax_seen + ) + ); + + # But try to convert them into a simple comment group if the first line + # a has side comment + my $rfields = $group_lines[0]->get_rfields(); + my $maximum_field_index = $group_lines[0]->get_jmax(); + if ( $do_not_align + && ( $maximum_line_index > 0 ) + && ( length( $rfields->[$maximum_field_index] ) > 0 ) ) + { + combine_fields(); + $do_not_align = 0; + } + return $do_not_align; +} + +sub adjust_side_comment { + + my $do_not_align = shift; + + # let's see if we can move the side comment field out a little + # to improve readability (the last field is always a side comment field) + my $have_side_comment = 0; + my $first_side_comment_line = -1; + my $maximum_field_index = $group_lines[0]->get_jmax(); + for my $i ( 0 .. $maximum_line_index ) { + my $line = $group_lines[$i]; + + if ( length( $line->get_rfields()->[$maximum_field_index] ) ) { + $have_side_comment = 1; + $first_side_comment_line = $i; + last; + } + } + + my $kmax = $maximum_field_index + 1; + + if ($have_side_comment) { + + my $line = $group_lines[0]; + + # the maximum space without exceeding the line length: + my $avail = $line->get_available_space_on_right(); + + # try to use the previous comment column + my $side_comment_column = $line->get_column( $kmax - 2 ); + my $move = $last_comment_column - $side_comment_column; + +## my $sc_line0 = $side_comment_history[0]->[0]; +## my $sc_col0 = $side_comment_history[0]->[1]; +## my $sc_line1 = $side_comment_history[1]->[0]; +## my $sc_col1 = $side_comment_history[1]->[1]; +## my $sc_line2 = $side_comment_history[2]->[0]; +## my $sc_col2 = $side_comment_history[2]->[1]; +## +## # FUTURE UPDATES: +## # Be sure to ignore 'do not align' and '} # end comments' +## # Find first $move > 0 and $move <= $avail as follows: +## # 1. try sc_col1 if sc_col1 == sc_col0 && (line-sc_line0) < 12 +## # 2. try sc_col2 if (line-sc_line2) < 12 +## # 3. try min possible space, plus up to 8, +## # 4. try min possible space + + if ( $kmax > 0 && !$do_not_align ) { + + # but if this doesn't work, give up and use the minimum space + if ( $move > $avail ) { + $move = $rOpts_minimum_space_to_comment - 1; + } + + # but we want some minimum space to the comment + my $min_move = $rOpts_minimum_space_to_comment - 1; + if ( $move >= 0 + && $last_side_comment_length > 0 + && ( $first_side_comment_line == 0 ) + && $group_level == $last_level_written ) + { + $min_move = 0; + } + + if ( $move < $min_move ) { + $move = $min_move; + } + + # previously, an upper bound was placed on $move here, + # (maximum_space_to_comment), but it was not helpful + + # don't exceed the available space + if ( $move > $avail ) { $move = $avail } + + # we can only increase space, never decrease + if ( $move > 0 ) { + $line->increase_field_width( $maximum_field_index - 1, $move ); + } + + # remember this column for the next group + $last_comment_column = $line->get_column( $kmax - 2 ); + } + else { + + # try to at least line up the existing side comment location + if ( $kmax > 0 && $move > 0 && $move < $avail ) { + $line->increase_field_width( $maximum_field_index - 1, $move ); + $do_not_align = 0; + } + + # reset side comment column if we can't align + else { + forget_side_comment(); + } + } + } + return $do_not_align; +} + +sub valign_output_step_A { + + ############################################################### + # This is Step A in writing vertically aligned lines. + # The line is prepared according to the alignments which have + # been found and shipped to the next step. + ############################################################### + + my ( $line, $min_ci_gap, $do_not_align, $group_leader_length, + $extra_leading_spaces ) + = @_; + my $rfields = $line->get_rfields(); + my $leading_space_count = $line->get_leading_space_count(); + my $outdent_long_lines = $line->get_outdent_long_lines(); + my $maximum_field_index = $line->get_jmax(); + my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags(); + + # add any extra spaces + if ( $leading_space_count > $group_leader_length ) { + $leading_space_count += $min_ci_gap; + } + + my $str = $rfields->[0]; + + # loop to concatenate all fields of this line and needed padding + my $total_pad_count = 0; + for my $j ( 1 .. $maximum_field_index ) { + + # skip zero-length side comments + last + if ( + ( $j == $maximum_field_index ) + && ( !defined( $rfields->[$j] ) + || ( length( $rfields->[$j] ) == 0 ) ) + ); + + # compute spaces of padding before this field + my $col = $line->get_column( $j - 1 ); + my $pad = $col - ( length($str) + $leading_space_count ); + + if ($do_not_align) { + $pad = + ( $j < $maximum_field_index ) + ? 0 + : $rOpts_minimum_space_to_comment - 1; + } + + # if the -fpsc flag is set, move the side comment to the selected + # column if and only if it is possible, ignoring constraints on + # line length and minimum space to comment + if ( $rOpts_fixed_position_side_comment && $j == $maximum_field_index ) + { + my $newpad = $pad + $rOpts_fixed_position_side_comment - $col - 1; + if ( $newpad >= 0 ) { $pad = $newpad; } + } + + # accumulate the padding + if ( $pad > 0 ) { $total_pad_count += $pad; } + + # add this field + if ( !defined $rfields->[$j] ) { + write_diagnostics("UNDEFined field at j=$j\n"); + } + + # only add padding when we have a finite field; + # this avoids extra terminal spaces if we have empty fields + if ( length( $rfields->[$j] ) > 0 ) { + $str .= ' ' x $total_pad_count; + $total_pad_count = 0; + $str .= $rfields->[$j]; + } + else { + $total_pad_count = 0; + } + + # update side comment history buffer + if ( $j == $maximum_field_index ) { + my $lineno = $file_writer_object->get_output_line_number(); + shift @side_comment_history; + push @side_comment_history, [ $lineno, $col ]; + } + } + + my $side_comment_length = ( length( $rfields->[$maximum_field_index] ) ); + + # ship this line off + valign_output_step_B( $leading_space_count + $extra_leading_spaces, + $str, $side_comment_length, $outdent_long_lines, + $rvertical_tightness_flags, $group_level ); + return; +} + +sub get_extra_leading_spaces { + + #---------------------------------------------------------- + # Define any extra indentation space (for the -lp option). + # Here is why: + # If a list has side comments, sub scan_list must dump the + # list before it sees everything. When this happens, it sets + # the indentation to the standard scheme, but notes how + # many spaces it would have liked to use. We may be able + # to recover that space here in the event that all of the + # lines of a list are back together again. + #---------------------------------------------------------- + + my $extra_leading_spaces = 0; + if ($extra_indent_ok) { + my $object = $group_lines[0]->get_indentation(); + if ( ref($object) ) { + my $extra_indentation_spaces_wanted = + get_recoverable_spaces($object); + + # all indentation objects must be the same + for my $i ( 1 .. $maximum_line_index ) { + if ( $object != $group_lines[$i]->get_indentation() ) { + $extra_indentation_spaces_wanted = 0; + last; + } + } + + if ($extra_indentation_spaces_wanted) { + + # the maximum space without exceeding the line length: + my $avail = $group_lines[0]->get_available_space_on_right(); + $extra_leading_spaces = + ( $avail > $extra_indentation_spaces_wanted ) + ? $extra_indentation_spaces_wanted + : $avail; + + # update the indentation object because with -icp the terminal + # ');' will use the same adjustment. + $object->permanently_decrease_available_spaces( + -$extra_leading_spaces ); + } + } + } + return $extra_leading_spaces; +} + +sub combine_fields { + + # combine all fields except for the comment field ( sidecmt.t ) + # Uses global variables: + # @group_lines + # $maximum_line_index + my $maximum_field_index = $group_lines[0]->get_jmax(); + foreach my $j ( 0 .. $maximum_line_index ) { + my $line = $group_lines[$j]; + my $rfields = $line->get_rfields(); + foreach ( 1 .. $maximum_field_index - 1 ) { + $rfields->[0] .= $rfields->[$_]; + } + $rfields->[1] = $rfields->[$maximum_field_index]; + + $line->set_jmax(1); + $line->set_column( 0, 0 ); + $line->set_column( 1, 0 ); + + } + $maximum_field_index = 1; + + for my $j ( 0 .. $maximum_line_index ) { + my $line = $group_lines[$j]; + my $rfields = $line->get_rfields(); + for my $k ( 0 .. $maximum_field_index ) { + my $pad = length( $rfields->[$k] ) - $line->current_field_width($k); + if ( $k == 0 ) { + $pad += $group_lines[$j]->get_leading_space_count(); + } + + if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) } + + } + } + return; +} + +sub get_output_line_number { + + # the output line number reported to a caller is the number of items + # written plus the number of items in the buffer + my $self = shift; + return 1 + $maximum_line_index + + $file_writer_object->get_output_line_number(); +} + +sub valign_output_step_B { + + ############################################################### + # This is Step B in writing vertically aligned lines. + # Vertical tightness is applied according to preset flags. + # In particular this routine handles stacking of opening + # and closing tokens. + ############################################################### + + my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines, + $rvertical_tightness_flags, $level ) + = @_; + + # handle outdenting of long lines: + if ($outdent_long_lines) { + my $excess = + length($str) - + $side_comment_length + + $leading_space_count - + maximum_line_length_for_level($level); + if ( $excess > 0 ) { + $leading_space_count = 0; + $last_outdented_line_at = + $file_writer_object->get_output_line_number(); + + unless ($outdented_line_count) { + $first_outdented_line_at = $last_outdented_line_at; + } + $outdented_line_count++; + } + } + + # Make preliminary leading whitespace. It could get changed + # later by entabbing, so we have to keep track of any changes + # to the leading_space_count from here on. + my $leading_string = + $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : ""; + + # Unpack any recombination data; it was packed by + # sub send_lines_to_vertical_aligner. Contents: + # + # [0] type: 1=opening non-block 2=closing non-block + # 3=opening block brace 4=closing block brace + # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok + # if closing: spaces of padding to use + # [2] sequence number of container + # [3] valid flag: do not append if this flag is false + # + my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg, + $seqno_end ); + if ($rvertical_tightness_flags) { + ( + $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg, + $seqno_end + ) = @{$rvertical_tightness_flags}; + } + + $seqno_string = $seqno_end; + + # handle any cached line .. + # either append this line to it or write it out + if ( length($cached_line_text) ) { + + # Dump an invalid cached line + if ( !$cached_line_valid ) { + valign_output_step_C( $cached_line_text, + $cached_line_leading_space_count, + $last_level_written ); + } + + # Handle cached line ending in OPENING tokens + elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) { + + my $gap = $leading_space_count - length($cached_line_text); + + # handle option of just one tight opening per line: + if ( $cached_line_flag == 1 ) { + if ( defined($open_or_close) && $open_or_close == 1 ) { + $gap = -1; + } + } + + if ( $gap >= 0 && defined($seqno_beg) ) { + $leading_string = $cached_line_text . ' ' x $gap; + $leading_space_count = $cached_line_leading_space_count; + $seqno_string = $cached_seqno_string . ':' . $seqno_beg; + $level = $last_level_written; + } + else { + valign_output_step_C( $cached_line_text, + $cached_line_leading_space_count, + $last_level_written ); + } + } + + # Handle cached line ending in CLOSING tokens + else { + my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str; + if ( + + # The new line must start with container + $seqno_beg + + # The container combination must be okay.. + && ( + + # okay to combine like types + ( $open_or_close == $cached_line_type ) + + # closing block brace may append to non-block + || ( $cached_line_type == 2 && $open_or_close == 4 ) + + # something like ');' + || ( !$open_or_close && $cached_line_type == 2 ) + + ) + + # The combined line must fit + && ( + length($test_line) <= + maximum_line_length_for_level($last_level_written) ) + ) + { + + $seqno_string = $cached_seqno_string . ':' . $seqno_beg; + + # Patch to outdent closing tokens ending # in ');' + # If we are joining a line like ');' to a previous stacked + # set of closing tokens, then decide if we may outdent the + # combined stack to the indentation of the ');'. Since we + # should not normally outdent any of the other tokens more than + # the indentation of the lines that contained them, we will + # only do this if all of the corresponding opening + # tokens were on the same line. This can happen with + # -sot and -sct. For example, it is ok here: + # __PACKAGE__->load_components( qw( + # PK::Auto + # Core + # )); + # + # But, for example, we do not outdent in this example because + # that would put the closing sub brace out farther than the + # opening sub brace: + # + # perltidy -sot -sct + # $c->Tk::bind( + # '' => sub { + # my ($c) = @_; + # my $e = $c->XEvent; + # itemsUnderArea $c; + # } ); + # + if ( $str =~ /^\);/ && $cached_line_text =~ /^[\)\}\]\s]*$/ ) { + + # The way to tell this is if the stacked sequence numbers + # of this output line are the reverse of the stacked + # sequence numbers of the previous non-blank line of + # sequence numbers. So we can join if the previous + # nonblank string of tokens is the mirror image. For + # example if stack )}] is 13:8:6 then we are looking for a + # leading stack like [{( which is 6:8:13 We only need to + # check the two ends, because the intermediate tokens must + # fall in order. Note on speed: having to split on colons + # and eliminate multiple colons might appear to be slow, + # but it's not an issue because we almost never come + # through here. In a typical file we don't. + $seqno_string =~ s/^:+//; + $last_nonblank_seqno_string =~ s/^:+//; + $seqno_string =~ s/:+/:/g; + $last_nonblank_seqno_string =~ s/:+/:/g; + + # how many spaces can we outdent? + my $diff = + $cached_line_leading_space_count - $leading_space_count; + if ( $diff > 0 + && length($seqno_string) + && length($last_nonblank_seqno_string) == + length($seqno_string) ) + { + my @seqno_last = + ( split /:/, $last_nonblank_seqno_string ); + my @seqno_now = ( split /:/, $seqno_string ); + if ( $seqno_now[-1] == $seqno_last[0] + && $seqno_now[0] == $seqno_last[-1] ) + { + + # OK to outdent .. + # for absolute safety, be sure we only remove + # whitespace + my $ws = substr( $test_line, 0, $diff ); + if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) { + + $test_line = substr( $test_line, $diff ); + $cached_line_leading_space_count -= $diff; + $last_level_written = + level_change( + $cached_line_leading_space_count, + $diff, $last_level_written ); + reduce_valign_buffer_indentation($diff); + } + + # shouldn't happen, but not critical: + ##else { + ## ERROR transferring indentation here + ##} + } + } + } + + $str = $test_line; + $leading_string = ""; + $leading_space_count = $cached_line_leading_space_count; + $level = $last_level_written; + } + else { + valign_output_step_C( $cached_line_text, + $cached_line_leading_space_count, + $last_level_written ); + } + } + } + $cached_line_type = 0; + $cached_line_text = ""; + + # make the line to be written + my $line = $leading_string . $str; + + # write or cache this line + if ( !$open_or_close || $side_comment_length > 0 ) { + valign_output_step_C( $line, $leading_space_count, $level ); + } + else { + $cached_line_text = $line; + $cached_line_type = $open_or_close; + $cached_line_flag = $tightness_flag; + $cached_seqno = $seqno; + $cached_line_valid = $valid; + $cached_line_leading_space_count = $leading_space_count; + $cached_seqno_string = $seqno_string; + } + + $last_level_written = $level; + $last_side_comment_length = $side_comment_length; + $extra_indent_ok = 0; + return; +} + +sub valign_output_step_C { + + ############################################################### + # This is Step C in writing vertically aligned lines. + # Lines are either stored in a buffer or passed along to the next step. + # The reason for storing lines is that we may later want to reduce their + # indentation when -sot and -sct are both used. + ############################################################### + my @args = @_; + + # Dump any saved lines if we see a line with an unbalanced opening or + # closing token. + dump_valign_buffer() if ( $seqno_string && $valign_buffer_filling ); + + # Either store or write this line + if ($valign_buffer_filling) { + push @valign_buffer, [@args]; + } + else { + valign_output_step_D(@args); + } + + # For lines starting or ending with opening or closing tokens.. + if ($seqno_string) { + $last_nonblank_seqno_string = $seqno_string; + + # Start storing lines when we see a line with multiple stacked opening + # tokens. + # patch for RT #94354, requested by Colin Williams + if ( $seqno_string =~ /^\d+(\:+\d+)+$/ && $args[0] !~ /^[\}\)\]\:\?]/ ) + { + + # This test is efficient but a little subtle: The first test says + # that we have multiple sequence numbers and hence multiple opening + # or closing tokens in this line. The second part of the test + # rejects stacked closing and ternary tokens. So if we get here + # then we should have stacked unbalanced opening tokens. + + # Here is a complex example: + + # Foo($Bar[0], { # (side comment) + # baz => 1, + # }); + + # The first line has sequence 6::4. It does not begin with + # a closing token or ternary, so it passes the test and must be + # stacked opening tokens. + + # The last line has sequence 4:6 but is a stack of closing tokens, + # so it gets rejected. + + # Note that the sequence number of an opening token for a qw quote + # is a negative number and will be rejected. + # For example, for the following line: + # skip_symbols([qw( + # $seqno_string='10:5:-1'. It would be okay to accept it but + # I decided not to do this after testing. + + $valign_buffer_filling = $seqno_string; + + } + } + return; +} + +sub valign_output_step_D { + + ############################################################### + # This is Step D in writing vertically aligned lines. + # Write one vertically aligned line of code to the output object. + ############################################################### + + my ( $line, $leading_space_count, $level ) = @_; + + # The line is currently correct if there is no tabbing (recommended!) + # We may have to lop off some leading spaces and replace with tabs. + if ( $leading_space_count > 0 ) { + + # Nothing to do if no tabs + if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace ) + || $rOpts_indent_columns <= 0 ) + { + + # nothing to do + } + + # Handle entab option + elsif ($rOpts_entab_leading_whitespace) { + my $space_count = + $leading_space_count % $rOpts_entab_leading_whitespace; + my $tab_count = + int( $leading_space_count / $rOpts_entab_leading_whitespace ); + my $leading_string = "\t" x $tab_count . ' ' x $space_count; + if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) { + substr( $line, 0, $leading_space_count ) = $leading_string; + } + else { + + # shouldn't happen - program error counting whitespace + # - skip entabbing + VALIGN_DEBUG_FLAG_TABS + && warning( +"Error entabbing in valign_output_step_D: expected count=$leading_space_count\n" + ); + } + } + + # Handle option of one tab per level + else { + my $leading_string = ( "\t" x $level ); + my $space_count = + $leading_space_count - $level * $rOpts_indent_columns; + + # shouldn't happen: + if ( $space_count < 0 ) { + + # But it could be an outdented comment + if ( $line !~ /^\s*#/ ) { + VALIGN_DEBUG_FLAG_TABS + && warning( +"Error entabbing in valign_output_step_D: for level=$group_level count=$leading_space_count\n" + ); + } + $leading_string = ( ' ' x $leading_space_count ); + } + else { + $leading_string .= ( ' ' x $space_count ); + } + if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) { + substr( $line, 0, $leading_space_count ) = $leading_string; + } + else { + + # shouldn't happen - program error counting whitespace + # we'll skip entabbing + VALIGN_DEBUG_FLAG_TABS + && warning( +"Error entabbing in valign_output_step_D: expected count=$leading_space_count\n" + ); + } + } + } + $file_writer_object->write_code_line( $line . "\n" ); + return; +} + +{ # begin get_leading_string + + my @leading_string_cache; + + sub get_leading_string { + + # define the leading whitespace string for this line.. + my $leading_whitespace_count = shift; + + # Handle case of zero whitespace, which includes multi-line quotes + # (which may have a finite level; this prevents tab problems) + if ( $leading_whitespace_count <= 0 ) { + return ""; + } + + # look for previous result + elsif ( $leading_string_cache[$leading_whitespace_count] ) { + return $leading_string_cache[$leading_whitespace_count]; + } + + # must compute a string for this number of spaces + my $leading_string; + + # Handle simple case of no tabs + if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace ) + || $rOpts_indent_columns <= 0 ) + { + $leading_string = ( ' ' x $leading_whitespace_count ); + } + + # Handle entab option + elsif ($rOpts_entab_leading_whitespace) { + my $space_count = + $leading_whitespace_count % $rOpts_entab_leading_whitespace; + my $tab_count = int( + $leading_whitespace_count / $rOpts_entab_leading_whitespace ); + $leading_string = "\t" x $tab_count . ' ' x $space_count; + } + + # Handle option of one tab per level + else { + $leading_string = ( "\t" x $group_level ); + my $space_count = + $leading_whitespace_count - $group_level * $rOpts_indent_columns; + + # shouldn't happen: + if ( $space_count < 0 ) { + VALIGN_DEBUG_FLAG_TABS + && warning( +"Error in get_leading_string: for level=$group_level count=$leading_whitespace_count\n" + ); + + # -- skip entabbing + $leading_string = ( ' ' x $leading_whitespace_count ); + } + else { + $leading_string .= ( ' ' x $space_count ); + } + } + $leading_string_cache[$leading_whitespace_count] = $leading_string; + return $leading_string; + } +} # end get_leading_string + +sub report_anything_unusual { + my $self = shift; + if ( $outdented_line_count > 0 ) { + write_logfile_entry( + "$outdented_line_count long lines were outdented:\n"); + write_logfile_entry( + " First at output line $first_outdented_line_at\n"); + + if ( $outdented_line_count > 1 ) { + write_logfile_entry( + " Last at output line $last_outdented_line_at\n"); + } + write_logfile_entry( + " use -noll to prevent outdenting, -l=n to increase line length\n" + ); + write_logfile_entry("\n"); + } + return; +} +1; + diff --git a/lib/Perl/Tidy/VerticalAligner/Alignment.pm b/lib/Perl/Tidy/VerticalAligner/Alignment.pm new file mode 100644 index 00000000..194694e1 --- /dev/null +++ b/lib/Perl/Tidy/VerticalAligner/Alignment.pm @@ -0,0 +1,122 @@ +##################################################################### +# +# the Perl::Tidy::VerticalAligner::Alignment class holds information +# on a single column being aligned +# +##################################################################### +package Perl::Tidy::VerticalAligner::Alignment; +use strict; +use warnings; + +{ + + #use Carp; + + # _column # the current column number + # _starting_column # column number when created + # _matching_token # what token we are matching + # _starting_line # the line index of creation + # _ending_line + # the most recent line to use it + # _saved_column + # _serial_number # unique number for this alignment + + my %default_data = ( + column => undef, + starting_column => undef, + matching_token => undef, + starting_line => undef, + ending_line => undef, + saved_column => undef, + serial_number => undef, + ); + + # class population count + { + my $_count = 0; + sub get_count { return $_count } + sub _increment_count { return ++$_count } + sub _decrement_count { return --$_count } + } + + # constructor + sub new { + my ( $caller, %arg ) = @_; + my $caller_is_obj = ref($caller); + my $class = $caller_is_obj || $caller; + ##no strict "refs"; + my $self = bless {}, $class; + + foreach my $key ( keys %default_data ) { + my $_key = '_' . $key; + if ( exists $arg{$key} ) { $self->{$_key} = $arg{$key} } + elsif ($caller_is_obj) { $self->{$_key} = $caller->{$_key} } + else { $self->{$_key} = $default_data{$_key} } + } + $self->_increment_count(); + return $self; + } + + sub DESTROY { + my $self = shift; + $self->_decrement_count(); + return; + } + + sub get_column { my $self = shift; return $self->{_column} } + + sub get_starting_column { + my $self = shift; + return $self->{_starting_column}; + } + sub get_matching_token { my $self = shift; return $self->{_matching_token} } + sub get_starting_line { my $self = shift; return $self->{_starting_line} } + sub get_ending_line { my $self = shift; return $self->{_ending_line} } + sub get_serial_number { my $self = shift; return $self->{_serial_number} } + + sub set_column { my ( $self, $val ) = @_; $self->{_column} = $val; return } + + sub set_starting_column { + my ( $self, $val ) = @_; + $self->{_starting_column} = $val; + return; + } + + sub set_matching_token { + my ( $self, $val ) = @_; + $self->{_matching_token} = $val; + return; + } + + sub set_starting_line { + my ( $self, $val ) = @_; + $self->{_starting_line} = $val; + return; + } + + sub set_ending_line { + my ( $self, $val ) = @_; + $self->{_ending_line} = $val; + return; + } + + sub increment_column { + my ( $self, $val ) = @_; + $self->{_column} += $val; + return; + } + + sub save_column { + my $self = shift; + $self->{_saved_column} = $self->{_column}; + return; + } + + sub restore_column { + my $self = shift; + $self->{_column} = $self->{_saved_column}; + return; + } +} + +1; diff --git a/lib/Perl/Tidy/VerticalAligner/Line.pm b/lib/Perl/Tidy/VerticalAligner/Line.pm new file mode 100644 index 00000000..7cd7cc7e --- /dev/null +++ b/lib/Perl/Tidy/VerticalAligner/Line.pm @@ -0,0 +1,242 @@ +##################################################################### +# +# the Perl::Tidy::VerticalAligner::Line class supplies an object to +# contain a single output line +# +##################################################################### + +package Perl::Tidy::VerticalAligner::Line; +use strict; +use warnings; + +{ + + ##use Carp; + + my %default_data = ( + jmax => undef, + jmax_original_line => undef, + rtokens => undef, + rfields => undef, + rpatterns => undef, + indentation => undef, + leading_space_count => undef, + outdent_long_lines => undef, + list_type => undef, + is_hanging_side_comment => undef, + ralignments => [], + maximum_line_length => undef, + rvertical_tightness_flags => undef + ); + { + + # methods to count object population + my $_count = 0; + sub get_count { return $_count; } + sub _increment_count { return ++$_count } + sub _decrement_count { return --$_count } + } + + # Constructor may be called as a class method + sub new { + my ( $caller, %arg ) = @_; + my $caller_is_obj = ref($caller); + my $class = $caller_is_obj || $caller; + ##no strict "refs"; + my $self = bless {}, $class; + + $self->{_ralignments} = []; + + foreach my $key ( keys %default_data ) { + my $_key = '_' . $key; + + # Caller keys do not have an underscore + if ( exists $arg{$key} ) { $self->{$_key} = $arg{$key} } + elsif ($caller_is_obj) { $self->{$_key} = $caller->{$_key} } + else { $self->{$_key} = $default_data{$_key} } + } + + $self->_increment_count(); + return $self; + } + + sub DESTROY { + my $self = shift; + $self->_decrement_count(); + return; + } + + sub get_jmax { my $self = shift; return $self->{_jmax} } + + sub get_jmax_original_line { + my $self = shift; + return $self->{_jmax_original_line}; + } + sub get_rtokens { my $self = shift; return $self->{_rtokens} } + sub get_rfields { my $self = shift; return $self->{_rfields} } + sub get_rpatterns { my $self = shift; return $self->{_rpatterns} } + sub get_indentation { my $self = shift; return $self->{_indentation} } + + sub get_leading_space_count { + my $self = shift; + return $self->{_leading_space_count}; + } + + sub get_outdent_long_lines { + my $self = shift; + return $self->{_outdent_long_lines}; + } + sub get_list_type { my $self = shift; return $self->{_list_type} } + + sub get_is_hanging_side_comment { + my $self = shift; + return $self->{_is_hanging_side_comment}; + } + + sub get_rvertical_tightness_flags { + my $self = shift; + return $self->{_rvertical_tightness_flags}; + } + + sub set_column { + ## FIXME: does caller ever supply $val?? + my ( $self, $j, $val ) = @_; + return $self->{_ralignments}->[$j]->set_column($val); + } + + sub get_alignment { + my ( $self, $j ) = @_; + return $self->{_ralignments}->[$j]; + } + sub get_alignments { my $self = shift; return @{ $self->{_ralignments} } } + + sub get_column { + my ( $self, $j ) = @_; + return $self->{_ralignments}->[$j]->get_column(); + } + + sub get_starting_column { + my ( $self, $j ) = @_; + return $self->{_ralignments}->[$j]->get_starting_column(); + } + + sub increment_column { + my ( $self, $k, $pad ) = @_; + $self->{_ralignments}->[$k]->increment_column($pad); + return; + } + + sub set_alignments { + my ( $self, @args ) = @_; + @{ $self->{_ralignments} } = @args; + return; + } + + sub current_field_width { + my ( $self, $j ) = @_; + if ( $j == 0 ) { + return $self->get_column($j); + } + else { + return $self->get_column($j) - $self->get_column( $j - 1 ); + } + } + + sub field_width_growth { + my ( $self, $j ) = @_; + return $self->get_column($j) - $self->get_starting_column($j); + } + + sub starting_field_width { + my ( $self, $j ) = @_; + if ( $j == 0 ) { + return $self->get_starting_column($j); + } + else { + return $self->get_starting_column($j) - + $self->get_starting_column( $j - 1 ); + } + } + + sub increase_field_width { + + my ( $self, $j, $pad ) = @_; + my $jmax = $self->get_jmax(); + for my $k ( $j .. $jmax ) { + $self->increment_column( $k, $pad ); + } + return; + } + + sub get_available_space_on_right { + my $self = shift; + my $jmax = $self->get_jmax(); + return $self->{_maximum_line_length} - $self->get_column($jmax); + } + + sub set_jmax { my ( $self, $val ) = @_; $self->{_jmax} = $val; return } + + sub set_jmax_original_line { + my ( $self, $val ) = @_; + $self->{_jmax_original_line} = $val; + return; + } + + sub set_rtokens { + my ( $self, $val ) = @_; + $self->{_rtokens} = $val; + return; + } + + sub set_rfields { + my ( $self, $val ) = @_; + $self->{_rfields} = $val; + return; + } + + sub set_rpatterns { + my ( $self, $val ) = @_; + $self->{_rpatterns} = $val; + return; + } + + sub set_indentation { + my ( $self, $val ) = @_; + $self->{_indentation} = $val; + return; + } + + sub set_leading_space_count { + my ( $self, $val ) = @_; + $self->{_leading_space_count} = $val; + return; + } + + sub set_outdent_long_lines { + my ( $self, $val ) = @_; + $self->{_outdent_long_lines} = $val; + return; + } + + sub set_list_type { + my ( $self, $val ) = @_; + $self->{_list_type} = $val; + return; + } + + sub set_is_hanging_side_comment { + my ( $self, $val ) = @_; + $self->{_is_hanging_side_comment} = $val; + return; + } + + sub set_alignment { + my ( $self, $j, $val ) = @_; + $self->{_ralignments}->[$j] = $val; + return; + } + +} + +1; + diff --git a/local-docs/BUGS.md b/local-docs/BUGS.md new file mode 100644 index 00000000..1ec2b487 --- /dev/null +++ b/local-docs/BUGS.md @@ -0,0 +1,45 @@ +# Perltidy open BUGS and LIMITATIONS + +You can help perltidy evolve into a better program. If you think you +have hit a bug or weird behavior, or have a suggested improvement, +please send a note to perltidy at users.sourceforge.net. + +This file only lists open bugs. For bugs which have been fixed, +see the ChangeLog. + +## The --extrude and --mangle options can produce code with syntax errors + +The --extrude tries to put as many newlines in the formatted code as possible. +The --mangle tries to remove as many newlines as possible. These options are +very useful for stress testing perltidy (and Perl) but not so much for normal +formatting. Occasionally they will produce code which Perl considers to have a +syntax error. These problems often involve code where Perl is having to guess +the tokenization based on whitespace. The given/when and switch/case +statements are also particularly vulnerable to unusual line breaks and +whitespace. This type of error should not normally occur in practice, but if +it does it should be easy to fix the problem by rerunning perltidy with more +normal parameters or by manually changing whitespace or newlines. + +## The Pod:Html module has some bugs + +For the most part Pod::Html works very well and is very convenient because +it part of the standard Perl distribution. But for example the following line + + =item B<< = Session->new_cflt_deck; >> + +which uses double brackets to contain single brackets does not render correctly. + +## Two iterations are sometimes needed + +Usually the code produced by perltidy on the first pass does not change if it +is run again, but sometimes a second pass will produce some small additional +change. This mainly happens if a major style change is made, particularly when +perltidy is untangling complex ternary statements. Use the iteration parameter +**-it=2** if it is important that the results be unchanged on subsequent passes, +but note that this doubles the run time. + +## Latest Bug and Wishlist at CPAN: + +For the latest list of bugs and feature requests at CPAN see: + +https://rt.cpan.org/Public/Dist/Display.html?Name=Perl-Tidy diff --git a/local-docs/BUGS.pod b/local-docs/BUGS.pod new file mode 100644 index 00000000..92a17a24 --- /dev/null +++ b/local-docs/BUGS.pod @@ -0,0 +1,45 @@ +=head1 Perltidy open BUGS and LIMITATIONS + +You can help perltidy evolve into a better program. If you think you +have hit a bug or weird behavior, or have a suggested improvement, +please send a note to perltidy at users.sourceforge.net. + +This file only lists open bugs. For bugs which have been fixed, +see the ChangeLog. + +=head2 The --extrude and --mangle options can produce code with syntax errors + +The --extrude tries to put as many newlines in the formatted code as possible. +The --mangle tries to remove as many newlines as possible. These options are +very useful for stress testing perltidy (and Perl) but not so much for normal +formatting. Occasionally they will produce code which Perl considers to have a +syntax error. These problems often involve code where Perl is having to guess +the tokenization based on whitespace. The given/when and switch/case +statements are also particularly vulnerable to unusual line breaks and +whitespace. This type of error should not normally occur in practice, but if +it does it should be easy to fix the problem by rerunning perltidy with more +normal parameters or by manually changing whitespace or newlines. + +=head2 The Pod:Html module has some bugs + +For the most part Pod::Html works very well and is very convenient because +it part of the standard Perl distribution. But for example the following line + + =item B<< = Session->new_cflt_deck; >> + +which uses double brackets to contain single brackets does not render correctly. + +=head2 Two iterations are sometimes needed + +Usually the code produced by perltidy on the first pass does not change if it +is run again, but sometimes a second pass will produce some small additional +change. This mainly happens if a major style change is made, particularly when +perltidy is untangling complex ternary statements. Use the iteration parameter +B<-it=2> if it is important that the results be unchanged on subsequent passes, +but note that this doubles the run time. + +=head2 Latest Bug and Wishlist at CPAN: + +For the latest list of bugs and feature requests at CPAN see: + +https://rt.cpan.org/Public/Dist/Display.html?Name=Perl-Tidy diff --git a/local-docs/ChangeLog.pod b/local-docs/ChangeLog.pod new file mode 100644 index 00000000..208b2ae0 --- /dev/null +++ b/local-docs/ChangeLog.pod @@ -0,0 +1,3285 @@ +=head1 Perltidy Change Log + +=head2 2018 02 20.01 + + - Fixed RT #125012: bug in -mangle --delete-all-comments + A needed blank space bareword tokens was being removed when comments were + deleted + + - Fixed RT #81852: Stacked containers and quoting operators. Quoted words + (qw) delimited by container tokens ('{', '[', '(', '<') are now included in + the --weld-nested (-wn) flag: + + # perltidy -wn + use_all_ok( qw{ + PPI + PPI::Tokenizer + PPI::Lexer + PPI::Dumper + PPI::Find + PPI::Normal + PPI::Util + PPI::Cache + } ); + + - The cuddled-else (-ce) coding was merged with the new cuddled-block (-cb) + coding. The change is backward compatible and simplifies input. + The --cuddled-block-option=n (-cbo=n) flag now applies to both -ce and -cb + formatting. In fact the -cb flag is just an alias for -ce now. + + - Fixed RT #124594, license text desc. changed from 'GPL-2.0+' to 'gpl_2' + + - Numerous installation test snippets have been added. + + - Fixed bug in which a warning about a possible code bug was issued in a + script with brace errors. + + - added option --notimestamp or -nts to eliminate any time stamps in output + files. This is used to prevent differences in test scripts from causing + failure at installation. For example, the -cscw option will put a date + stamp on certain closing side comments. We need to avoid this in order + to test this feature in an installation test. + + - The packaging for this version has changed. The Tidy.pm file has + been split into a smaller Tidy.pm file plus supporting modules in the path + Perl/Tidy/*. + +=head2 2018 02 20 + + + - RT #124469, #124494, perltidy often making empty files. The previous had + an index error causing it to fail, particularly in version 5.18 of Perl. + + Please avoid version 20180219. + +=head2 2018 02 19 + + - RT #79947, cuddled-else generalization. A new flag -cb provides + 'cuddled-else' type formatting for an arbitrary type of block chain. The + default is try-catch-finally, but this can be modified with the + parameter -cbl. + + - Fixed RT #124298: add space after ! operator without breaking !! secret + operator + + - RT #123749: numerous minor improvements to the -wn flag were made. + + - Fixed a problem with convergence tests in which iterations were stopping + prematurely. + + - Here doc targets for <<~ type here-docs may now have leading whitespace. + + - Fixed RT #124354. The '-indent-only' flag was not working correctly in the + previous release. A bug in version 20180101 caused extra blank lines + to be output. + + - Issue RT #124114. Some improvements were made in vertical alignment + involving 'fat commas'. + +=head2 2018 01 01 + + - Added new flag -wn (--weld-nested-containers) which addresses these issues: + RT #123749: Problem with promises; + RT #119970: opening token stacking strange behavior; + RT #81853: Can't stack block braces + + This option causes closely nested pairs of opening and closing containers + to be "welded" together and essentially be formatted as a single unit, + with just one level of indentation. + + Since this is a new flag it is set to be "off" by default but it has given + excellent results in testing. + + EXAMPLE 1, multiple blocks, default formatting: + do { + { + next if $x == $y; # do something here + } + } until $x++ > $z; + + perltidy -wn + do { { + next if $x == $y; + } } until $x++ > $z; + + EXAMPLE 2, three levels of wrapped function calls, default formatting: + p( + em( + conjug( + translate( param('verb') ), param('tense'), + param('person') + ) + ) + ); + + # perltidy -wn + p( em( conjug( + translate( param('verb') ), + param('tense'), param('person') + ) ) ); + + # EXAMPLE 3, chained method calls, default formatting: + get('http://mojolicious.org')->then( + sub { + my $mojo = shift; + say $mojo->res->code; + return get('http://metacpan.org'); + } + )->then( + sub { + my $cpan = shift; + say $cpan->res->code; + } + )->catch( + sub { + my $err = shift; + warn "Something went wrong: $err"; + } + )->wait; + + # perltidy -wn + get('http://mojolicious.org')->then( sub { + my $mojo = shift; + say $mojo->res->code; + return get('http://metacpan.org'); + } )->then( sub { + my $cpan = shift; + say $cpan->res->code; + } )->catch( sub { + my $err = shift; + warn "Something went wrong: $err"; + } )->wait; + + + - Fixed RT #114359: Missparsing of "print $x ** 0.5; + + - Deactivated the --check-syntax flag for better security. It will be + ignored if set. + + - Corrected minimum perl version from 5.004 to 5.008 based on perlver + report. The change is required for coding involving wide characters. + + - For certain severe errors, the source file will be copied directly to the + output without formatting. These include ending in a quote, ending in a + here doc, and encountering an unidentified character. + +=head2 2017 12 14 + + - RT #123749, partial fix. "Continuation indentation" is removed from lines + with leading closing parens which are part of a call chain. + For example, the call to pack() is is now outdented to the starting + indentation in the following experession: + + # OLD + $mw->Button( + -text => "New Document", + -command => \&new_document + )->pack( + -side => 'bottom', + -anchor => 'e' + ); + + # NEW + $mw->Button( + -text => "New Document", + -command => \&new_document + )->pack( + -side => 'bottom', + -anchor => 'e' + ); + + This modification improves readability of complex expressions, especially + when the user uses the same value for continuation indentation (-ci=n) and + normal indentation (-i=n). Perltidy was already programmed to + do this but a minor bug was preventing it. + + - RT #123774, added flag to control space between a backslash and a single or + double quote, requested by Robert Rothenberg. The issue is that lines like + + $str1=\"string1"; + $str2=\'string2'; + + confuse syntax highlighters unless a space is left between the backslash and + the quote. + + The new flag to control this is -sbq=n (--space-backslash-quote=n), + where n=0 means no space, n=1 means follow existing code, n=2 means always + space. The default is n=1, meaning that a space will be retained if there + is one in the source code. + + - Fixed RT #123492, support added for indented here doc operator <<~ added + in v5.26. Thanks to Chris Weyl for the report. + + - Fixed docs; --closing-side-comment-list-string should have been just + --closing-side-comment-list. Thanks to F.Li. + + - Added patch RT #122030] Perl::Tidy sometimes does not call binmode. + Thanks to Irilis Aelae. + + - Fixed RT #121959, PERLTIDY doesn't honor the 'three dot' notation for + locating a config file using environment variables. Thanks to John + Wittkowski. + + - Minor improvements to formatting, in which some additional vertical + aligmnemt is done. Thanks to Keith Neargarder. + + - RT #119588. Vertical alignment is no longer done for // operator. + + +=head2 2017 05 21 + + - Fixed debian #862667: failure to check for perltidy.ERR deletion can lead + to overwriting abritrary files by symlink attack. Perltidy was continuing + to write files after an unlink failure. Thanks to Don Armstrong + for a patch. + + - Fixed RT #116344, perltidy fails on certain anonymous hash references: + in the following code snippet the '?' was misparsed as a pattern + delimiter rather than a ternary operator. + return ref {} ? 1 : 0; + + - Fixed RT #113792: misparsing of a fat comma (=>) right after + the __END__ or __DATA__ tokens. These keywords were getting + incorrectly quoted by the following => operator. + + - Fixed RT #118558. Custom Getopt::Long configuration breaks parsing + of perltidyrc. Perltidy was resetting the users configuration too soon. + + - Fixed RT #119140, failure to parse double diamond operator. Code to + handle this new operator has been added. + + - Fixed RT #120968. Fixed problem where -enc=utf8 didn't work + with --backup-and-modify-in-place. Thanks to Heinz Knutzen for this patch. + + - Fixed minor formatting issue where one-line blocks for subs with signatures + were unnecesarily broken + + - RT #32905, patch to fix utf-8 error when output was STDOUT. + + - RT #79947, improved spacing of try/catch/finally blocks. Thanks to qsimpleq + for a patch. + + - Fixed #114909, Anonymous subs with signatures and prototypes misparsed as + broken ternaries, in which a statement such as this was not being parsed + correctly: + return sub ( $fh, $out ) : prototype(*$) { ... } + + - Implemented RT #113689, option to introduces spaces after an opening block + brace and before a closing block brace. Four new optional controls are + added. The first two define the minimum number of blank lines to be + inserted + + -blao=i or --blank-lines-after-opening-block=i + -blbc=i or --blank-lines-before-closing-block=i + + where i is an integer, the number of lines (the default is 0). + + The second two define the types of blocks to which the first two apply + + -blaol=s or --blank-lines-after-opening-block-list=s + -blbcl=s or --blank-lines-before-closing-block-list=s + + where s is a string of possible block keywords (default is just 'sub', + meaning a named subroutine). + + For more information please see the documentation. + + - The method for specifying block types for certain input parameters has + been generalized to distinguish between normal named subroutines and + anonymous subs. The keyword for normal subroutines remains 'sub', and + the new keyword for anonymous subs is 'asub'. + + - Minor documentation changes. The BUGS sections now have a link + to CPAN where most open bugs and issues can be reviewed and bug reports + can be submitted. The information in the AUTHOR and CREDITS sections of + the man pages have been removed from the man pages to streamline the + documentation. This information is still in the source code. + +=head2 2016 03 02 + + - RT #112534. Corrected a minor problem in which an unwanted newline + was placed before the closing brace of an anonymous sub with + a signature, if it was in a list. Thanks to Dmytro Zagashev. + + - Corrected a minor problem in which occasional extra indentation was + given to the closing brace of an anonymous sub in a list when the -lp + parameter was set. + +=head2 2016 03 01 + + - RT #104427. Added support for signatures. + + - RT #111512. Changed global warning flag $^W = 1 to use warnings; + Thanks to Dmytro Zagashev. + + - RT #110297, added support for new regexp modifier /n + Thanks to Dmytro Zagashev. + + - RT #111519. The -io (--indent-only) and -dac (--delete-all-comments) + can now both be used in one pass. Thanks to Dmitry Veltishev. + + - Patch to avoid error message with 'catch' used by TryCatch, as in + catch($err){ + # do something + } + Thanks to Nick Tonkin. + + - RT #32905, UTF-8 coding is now more robust. Thanks to qsimpleq + and Dmytro for patches. + + - RT #106885. Added string bitwise operators ^. &. |. ~. ^.= &.= |.= + + - Fixed RT #107832 and #106492, lack of vertical alignment of two lines + when -boc flag (break at old commas) is set. This bug was + inadvertantly introduced in previous bug fix RT #98902. + + - Some common extensions to Perl syntax are handled better. + In particular, the following snippet is now foratted cleanly: + + method deposit( Num $amount) { + $self->balance( $self->balance + $amount ); + } + + A new flag -xs (--extended-syntax) was added to enable this, and the default + is to use -xs. + + In previous versions, and now only when -nxs is set, this snippet of code + generates the following error message: + + "syntax error at ') {', didn't see one of: case elsif for foreach given if switch unless until when while" + + +=head2 2015 08 15 + + - Fixed RT# 105484, Invalid warning about 'else' in 'switch' statement. The + warning happened if a 'case' statement did not use parens. + + - Fixed RT# 101547, misparse of // caused error message. Also.. + + - Fixed RT# 102371, misparse of // caused unwated space in //= + + - Fixed RT# 100871, "silent failure of HTML Output on Windows". + Changed calls to tempfile() from: + my ( $fh_tmp, $tmpfile ) = tempfile(); + to have the full path name: + my ( $fh_tmp, $tmpfile ) = File::Temp::tempfile() + because of problems in the Windows version reported by Dean Pearce. + + - Fixed RT# 99514, calling the perltidy module multiple times with + a .perltidyrc file containing the parameter --output-line-ending + caused a crash. This was a glitch in the memoization logic. + + - Fixed RT#99961, multiple lines inside a cast block caused unwanted + continuation indentation. + + - RT# 32905, broken handling of UTF-8 strings. + A new flag -utf8 causes perltidy assume UTF-8 encoding for input and + output of an io stream. Thanks to Sebastian Podjasek for a patch. + This feature may not work correctly in older versions of Perl. + It worked in a linux version 5.10.1 but not in a Windows version 5.8.3 (but + otherwise perltidy ran correctly). + + - Warning files now report perltidy VERSION. Suggested by John Karr. + + - Fixed long flag --nostack-closing-tokens (-nsct has always worked though). + This was due to a typo. This also fixed --nostack-opening-tokens to + behave correctly. Thanks to Rob Dixon. + + +=head2 2014 07 11 + + - Fixed RT #94902: abbreviation parsing in .perltidyrc files was not + working for multi-line abbreviations. Thanks to Eric Fung for + supplying a patch. + + - Fixed RT #95708, misparsing of a hash when the first key was a perl + keyword, causing a semicolon to be incorrectly added. + + - Fixed RT #94338 for-loop in a parenthesized block-map. A code block within + parentheses of a map, sort, or grep function was being mistokenized. In + rare cases this could produce in an incorrect error message. The fix will + produce some minor formatting changes. Thanks to Daniel Trizen + discovering and documenting this. + + - Fixed RT #94354, excess indentation for stacked tokens. Thanks to + Colin Williams for supplying a patch. + + - Added support for experimental postfix dereferencing notation introduced in + perl 5.20. RT #96021. + + - Updated documentation to clarify the behavior of the -io flag + in response to RT #95709. You can add -noll or -l=0 to prevent + long comments from being outdented when -io is used. + + - Added a check to prevent a problem reported in RT #81866, where large + scripts which had been compressed to a single line could not be formatted + because of a check for VERSION for MakeMaker. The workaround was to + use -nvpl, but this shouldn't be necessary now. + + - Fixed RT #96101; Closing brace of anonymous sub in a list was being + indented. For example, the closing brace of the anonymous sub below + will now be lined up with the word 'callback'. This problem + occured if there was no comma after the closing brace of the anonymous sub. + This update may cause minor changes to formatting of code with lists + of anonymous subs, especially TK code. + + # OLD + my @menu_items = ( + + #... + { + path => '/_Operate/Transcode and split', + callback => sub { + return 1 if not $self->project_opened; + $self->comp('project')->transcode( split => 1 ); + } + } + ); + + # NEW + my @menu_items = ( + + #... + { + path => '/_Operate/Transcode and split', + callback => sub { + return 1 if not $self->project_opened; + $self->comp('project')->transcode( split => 1 ); + } + } + ); + +=head2 2014 03 28 + + - Fixed RT #94190 and debian Bug #742004: perltidy.LOG file left behind. + Thanks to George Hartzell for debugging this. The problem was + caused by the memoization speedup patch in version 20121207. An + unwanted flag was being set which caused a LOG to be written if + perltidy was called multiple times. + + - New default behavior for LOG files: If the source is from an array or + string (through a call to the perltidy module) then a LOG output is only + possible if a logfile stream is specified. This is to prevent + unexpected perltidy.LOG files. + + - Fixed debian Bug #740670, insecure temporary file usage. File::Temp is now + used to get a temporary file. Thanks to Don Anderson for a patch. + + - Any -b (--backup-and-modify-in-place) flag is silently ignored when a + source stream, destination stream, or standard output is used. + This is because the -b flag may have been in a .perltidyrc file and + warnings break Test::NoWarnings. Thanks to Marijn Brand. + +=head2 2013 09 22 + + - Fixed RT #88020. --converge was not working with wide characters. + + - Fixed RT #78156. package NAMESPACE VERSION syntax not accepted. + + - First attempt to fix RT #88588. INDEX END tag change in pod2html breaks + perltidy -html. I put in a patch which should work but I don't yet have + a way of testing it. + + +=head2 2013 08 06 + + - Fixed RT #87107, spelling + +=head2 2013 08 05 + + - Fixed RT #87502, incorrect of parsing of smartmatch before hash brace + + - Added feature request RT #87330, trim whitespace after POD. + The flag -trp (--trim-pod) will trim trailing whitespace from lines of POD + +=head2 2013 07 17 + + - Fixed RT #86929, #86930, missing lhs of assignment. + + - Fixed RT #84922, moved pod from Tidy.pm into Tidy.pod + +=head2 2012 12 07 + + + - The flag -cab=n or --comma-arrow-breakpoints=n has been generalized + to give better control over breaking open short containers. The + possible values are now: + + n=0 break at all commas after => + n=1 stable: break at all commas after => if container is open, + EXCEPT FOR one-line containers + n=2 break at all commas after =>, BUT try to form the maximum + maximum one-line container lengths + n=3 do not treat commas after => specially at all + n=4 break everything: like n=0 but also break a short container with + a => not followed by a comma + n=5 stable: like n=1 but ALSO break at open one-line containers (default) + + New values n=4 and n=5 have been added to allow short blocks to be + broken open. The new default is n=5, stable. It should more closely + follow the breaks in the input file, and previously formatted code + should remain unchanged. If this causes problems use -cab=1 to recover + the former behavior. Thanks to Tony Maszeroski for the suggestion. + + To illustrate the need for the new options, if perltidy is given + the following code, then the old default (-cab=1) was to close up + the 'index' container even if it was open in the source. The new + default (-cab=5) will keep it open if it was open in the source. + + our $fancypkg = { + 'ALL' => { + 'index' => { + 'key' => 'value', + }, + 'alpine' => { + 'one' => '+', + 'two' => '+', + 'three' => '+', + }, + } + }; + + - New debug flag --memoize (-mem). This version contains a + patch supplied by Jonathan Swartz which can significantly speed up + repeated calls to Perl::Tidy::perltidy in a single process by caching + the result of parsing the formatting parameters. A factor of up to 10 + speedup was achieved for masontidy (https://metacpan.org/module/masontidy). + The memoization patch is on by default but can be deactivated for + testing with -nmem (or --no-memoize). + + - New flag -tso (--tight-secret-operators) causes certain perl operator + sequences (secret operators) to be formatted "tightly" (without spaces). + The most common of these are 0 + and + 0 which become 0+ and +0. The + operators currently modified by this flag are: + =( )= 0+ +0 ()x!! ~~<> ,=> + Suggested by by Philippe Bruhat. See https://metacpan.org/module/perlsecret + This flag is off by default. + + - New flag -vmll (--variable-maximum-line-length) makes the maximum + line length increase with the nesting depth of a line of code. + Basically, it causes the length of leading whitespace to be ignored when + setting line breaks, so the formatting of a block of code is independent + of its nesting depth. Try this option if you have deeply nested + code or data structures, perhaps in conjunction with the -wc flag + described next. The default is not todo this. + + - New flag -wc=n (--whitespace-cycle=n) also addresses problems with + very deeply nested code and data structures. When this parameter is + used and the nesting depth exceeds the value n, the leading whitespace + will be reduced and start at 1 again. The result is that deeply + nested blocks of code will shift back to the left. This occurs cyclically + to any nesting depth. This flag may be used either with or without -vmll. + The default is not to use this (-wc=0). + + - Fixed RT #78764, error parsing smartmatch operator followed by anonymous + hash or array and then a ternary operator; two examples: + + qr/3/ ~~ ['1234'] ? 1 : 0; + map { $_ ~~ [ '0', '1' ] ? 'x' : 'o' } @a; + + - Fixed problem with specifying spaces around arrows using -wls='->' + and -wrs='->'. Thanks to Alain Valleton for documenting this problem. + + - Implemented RT #53183, wishlist, lines of code with the same indentation + level which are contained with multiple stacked opening and closing tokens + (requested with flags -sot -sct) now have reduced indentation. + + # Default + $sender->MailMsg( + { + to => $addr, + subject => $subject, + msg => $body + } + ); + + # OLD: perltidy -sot -sct + $sender->MailMsg( { + to => $addr, + subject => $subject, + msg => $body + } ); + + # NEW: perltidy -sot -sct + $sender->MailMsg( { + to => $addr, + subject => $subject, + msg => $body + } ); + + - New flag -act=n (--all-containers-tightness=n) is an abbreviation for + -pt=n -sbt=n -bt=n -bbt=n, where n=0,1, or 2. It simplifies input when all + containers have the same tightness. Using the same example: + + # NEW: perltidy -sot -sct -act=2 + $sender->MailMsg({ + to => $addr, + subject => $subject, + msg => $body + }); + + - New flag -sac (--stack-all-containers) is an abbreviation for -sot -sct + This is part of wishlist item RT #53183. Using the same example again: + + # NEW: perltidy -sac -act=2 + $sender->MailMsg({ + to => $addr, + subject => $subject, + msg => $body + }); + + - new flag -scbb (--stack-closing-block-brace) causes isolated closing + block braces to stack as in the following example. (Wishlist item RT#73788) + + DEFAULT: + for $w1 (@w1) { + for $w2 (@w2) { + for $w3 (@w3) { + for $w4 (@w4) { + push( @lines, "$w1 $w2 $w3 $w4\n" ); + } + } + } + } + + perltidy -scbb: + for $w1 (@w1) { + for $w2 (@w2) { + for $w3 (@w3) { + for $w4 (@w4) { + push( @lines, "$w1 $w2 $w3 $w4\n" ); + } } } } + + There is, at present, no flag to place these closing braces at the end + of the previous line. It seems difficult to develop good rules for + doing this for a wide variety of code and data structures. + + - Parameters defining block types may use a wildcard '*' to indicate + all block types. Previously it was not possible to include bare blocks. + + - A flag -sobb (--stack-opening-block-brace) has been introduced as an + alias for -bbvt=2 -bbvtl='*'. So for example the following test code: + + {{{{{{{ $testing }}}}}}} + + cannot be formatted as above but can at least be kept vertically compact + using perltidy -sobb -scbb + + { { { { { { { $testing + } } } } } } } + + Or even, perltidy -sobb -scbb -i=1 -bbt=2 + {{{{{{{$testing + }}}}}}} + + + - Error message improved for conflicts due to -pbp; thanks to Djun Kim. + + - Fixed RT #80645, error parsing special array name '@$' when used as + @{$} or $#{$} + + - Eliminated the -chk debug flag which was included in version 20010406 to + do a one-time check for a bug with multi-line quotes. It has not been + needed since then. + + - Numerous other minor formatting improvements. + +=head2 2012 07 14 + + - Added flag -iscl (--ignore-side-comment-lengths) which causes perltidy + to ignore the length of side comments when setting line breaks, + RT #71848. The default is to include the length of side comments when + breaking lines to stay within the length prescribed by the -l=n + maximum line length parameter. For example, + + Default behavior on a single line with long side comment: + $vmsfile =~ s/;[\d\-]*$// + ; # Clip off version number; we can use a newer version as well + + perltidy -iscl leaves the line intact: + + $vmsfile =~ s/;[\d\-]*$//; # Clip off version number; we can use a newer version as well + + - Fixed RT #78182, side effects with STDERR. Error handling has been + revised and the documentation has been updated. STDERR can now be + redirected to a string reference, and perltidy now returns an + error flag instead of calling die when input errors are detected. + If the error flag is set then no tidied output was produced. + See man Perl::Tidy for an example. + + - Fixed RT #78156, erroneous warning message for package VERSION syntax. + + - Added abbreviations -conv (--converge) to simplify iteration control. + -conv is equivalent to -it=4 and will insure that the tidied code is + converged to its final state with the minimum number of iterations. + + - Minor formatting modifications have been made to insure convergence. + + - Simplified and hopefully improved the method for guessing the starting + indentation level of entabbed code. Added flag -dt=n (--default_tabsize=n) + which might be helpful if the guessing method does not work well for + some editors. + + - Added support for stacked labels, upper case X/B in hex and binary, and + CORE:: namespace. + + - Eliminated warning messages for using keyword names as constants. + + +=head2 2012 07 01 + + - Corrected problem introduced by using a chomp on scalar references, RT #77978 + + - Added support for Perl 5.14 package block syntax, RT #78114. + + - A convergence test is made if three or more iterations are requested with + the -it=n parameter to avoid wasting computer time. Several hundred Mb of + code gleaned from the internet were searched with the results that: + - It is unusual for two iterations to be required unless a major + style change is being made. + - Only one case has been found where three iterations were required. + - No cases requiring four iterations have been found with this version. + For the previous version several cases where found the results could + oscillate between two semi-stable states. This version corrects this. + + So if it is important that the code be converged it is okay to set -it=4 + with this version and it will probably stop after the second iteration. + + - Improved ability to identify and retain good line break points in the + input stream, such as at commas and equals. You can always tell + perltidy to ignore old breakpoints with -iob. + + - Fixed glitch in which a terminal closing hash brace followed by semicolon + was not outdented back to the leading line depth like other closing + tokens. Thanks to Keith Neargarder for noting this. + + OLD: + my ( $pre, $post ) = @{ + { + "pp_anonlist" => [ "[", "]" ], + "pp_anonhash" => [ "{", "}" ] + }->{ $kid->ppaddr } + }; # terminal brace + + NEW: + my ( $pre, $post ) = @{ + { + "pp_anonlist" => [ "[", "]" ], + "pp_anonhash" => [ "{", "}" ] + }->{ $kid->ppaddr } + }; # terminal brace + + - Removed extra indentation given to trailing 'if' and 'unless' clauses + without parentheses because this occasionally produced undesirable + results. This only applies where parens are not used after the if or + unless. + + OLD: + return undef + unless my ( $who, $actions ) = + $clause =~ /^($who_re)((?:$action_re)+)$/o; + + NEW: + return undef + unless my ( $who, $actions ) = + $clause =~ /^($who_re)((?:$action_re)+)$/o; + + +=head2 2012 06 19 + + - Updated perltidy to handle all quote modifiers defined for perl 5 version 16. + + - Side comment text in perltidyrc configuration files must now begin with + at least one space before the #. Thus: + + OK: + -l=78 # Max line width is 78 cols + BAD: + -l=78# Max line width is 78 cols + + This is probably true of almost all existing perltidyrc files, + but if you get an error message about bad parameters + involving a '#' the first time you run this version, please check the side + comments in your perltidyrc file, and add a space before the # if necessary. + You can quickly see the contents your perltidyrc file, if any, with the + command: + + perltidy -dpro + + The reason for this change is that some parameters naturally involve + the # symbol, and this can get interpreted as a side comment unless the + parameter is quoted. For example, to define -sphb=# it used to be necessary + to write + -sbcp='#' + to keep the # from becoming part of a comment. This was causing + trouble for new users. Now it can also be written without quotes: + -sbcp=# + + - Fixed bug in processing some .perltidyrc files containing parameters with + an opening brace character, '{'. For example the following was + incorrectly processed: + --static-block-comment-prefix="^#{2,}[^\s#]" + Thanks to pdagosto. + + - Added flag -boa (--break-at-old-attribute-breakpoints) which retains + any existing line breaks at attribute separation ':'. This is now the + default, use -nboa to deactivate. Thanks to Daphne Phister for the patch. + For example, given the following code, the line breaks at the ':'s will be + retained: + + my @field + : field + : Default(1) + : Get('Name' => 'foo') : Set('Name'); + + whereas the previous version would have output a single line. If + the attributes are on a single line then they will remain on a single line. + + - Added new flags --blank-lines-before-subs=n (-blbs=n) and + --blank-lines-before-packages=n (-blbp=n) to put n blank lines before + subs and packages. The old flag -bbs is now equivalent to -blbs=1 -blbp=1. + and -nbbs is equivalent to -blbs=0 -blbp=0. Requested by M. Schwern and + several others. + + - Added feature -nsak='*' meaning no space between any keyword and opening + paren. This avoids listing entering a long list of keywords. Requested + by M. Schwern. + + - Added option to delete a backup of original file with in-place-modify (-b) + if there were no errors. This can be requested with the flag -bext='/'. + See documentation for details. Requested by M. Schwern and others. + + - Fixed bug where the module postfilter parameter was not applied when -b + flag was used. This was discovered during testing. + + - Fixed in-place-modify (-b) to work with symbolic links to source files. + Thanks to Ted Johnson. + + - Fixed bug where the Perl::Tidy module did not allow -b to be used + in some cases. + + - No extra blank line is added before a comment which follows + a short line ending in an opening token, for example like this: + OLD: + if ( + + # unless we follow a blank or comment line + $last_line_leading_type !~ /^[#b]$/ + ... + + NEW: + if ( + # unless we follow a blank or comment line + $last_line_leading_type !~ /^[#b]$/ + ... + + The blank is not needed for readability in these cases because there + already is already space above the comment. If a blank already + exists there it will not be removed, so this change should not + change code which has previously been formatted with perltidy. + Thanks to R.W.Stauner. + + - Likewise, no extra blank line is added above a comment consisting of a + single #, since nothing is gained in readability. + + - Fixed error in which a blank line was removed after a #>>> directive. + Thanks to Ricky Morse. + + - Unnecessary semicolons after given/when/default blocks are now removed. + + - Fixed bug where an unwanted blank line could be added before + pod text in __DATA__ or __END__ section. Thanks to jidani. + + - Changed exit flags from 1 to 0 to indicate success for -help, -version, + and all -dump commands. Also added -? as another way to dump the help. + Requested by Keith Neargarder. + + - Fixed bug where .ERR and .LOG files were not written except for -it=2 or more + + - Fixed bug where trailing blank lines at the end of a file were dropped when + -it>1. + + - Fixed bug where a line occasionally ended with an extra space. This reduces + rhe number of instances where a second iteration gives a result different + from the first. + + - Updated documentation to note that the Tidy.pm module parameter may + not be a reference to SCALAR or ARRAY; it must be a file. + + - Syntax check with perl now work when the Tidy.pm module is processing + references to arrays and strings. Thanks to Charles Alderman. + + - Zero-length files are no longer processed due to concerns for data loss + due to side effects in some scenarios. + + - block labels, if any, are now included in closing side comment text + when the -csc flag is used. Suggested by Aaron. For example, + the label L102 in the following block is now included in the -csc text: + + L102: for my $i ( 1 .. 10 ) { + ... + } ## end L102: for my $i ( 1 .. 10 ) + + +=head2 2010 12 17 + + - added new flag -it=n or --iterations=n + This flag causes perltidy to do n complete iterations. + For most purposes the default of n=1 should be satisfactory. However n=2 + can be useful when a major style change is being made, or when code is being + beautified on check-in to a source code control system. The run time will be + approximately proportional to n, and it should seldom be necessary to use a + value greater than n=2. Thanks to Jonathan Swartz + + - A configuration file pathname begins with three dots, e.g. + ".../.perltidyrc", indicates that the file should be searched for starting + in the current directory and working upwards. This makes it easier to have + multiple projects each with their own .perltidyrc in their root directories. + Thanks to Jonathan Swartz for this patch. + + - Added flag --notidy which disables all formatting and causes the input to be + copied unchanged. This can be useful in conjunction with hierarchical + F<.perltidyrc> files to prevent unwanted tidying. + Thanks to Jonathan Swartz for this patch. + + - Added prefilters and postfilters in the call to the Tidy.pm module. + Prefilters and postfilters. The prefilter is a code reference that + will be applied to the source before tidying, and the postfilter + is a code reference to the result before outputting. + + Thanks to Jonathan Swartz for this patch. He writes: + This is useful for all manner of customizations. For example, I use + it to convert the 'method' keyword to 'sub' so that perltidy will work for + Method::Signature::Simple code: + + Perl::Tidy::perltidy( + prefilter => sub { $_ = $_[0]; s/^method (.*)/sub $1 \#__METHOD/gm; return $_ }, + postfilter => sub { $_ = $_[0]; s/^sub (.*?)\s* \#__METHOD/method $1/gm; return $_ } + ); + + - The starting indentation level of sections of code entabbed with -et=n + is correctly guessed if it was also produced with the same -et=n flag. This + keeps the indentation stable on repeated formatting passes within an editor. + Thanks to Sam Kington and Glenn. + + - Functions with prototype '&' had a space between the function and opening + peren. This space now only occurs if the flag --space-function-paren (-sfp) + is set. Thanks to Zrajm Akfohg. + + - Patch to never put spaces around a bare word in braces beginning with ^ as in: + my $before = ${^PREMATCH}; + even if requested with the -bt=0 flag because any spaces cause a syntax error in perl. + Thanks to Fabrice Dulanoy. + +=head2 2009 06 16 + + - Allow configuration file to be 'perltidy.ini' for Windows systems. + i.e. C:\Documents and Settings\User\perltidy.ini + and added documentation for setting configuation file under Windows in man + page. Thanks to Stuart Clark. + + - Corrected problem of unwanted semicolons in hash ref within given/when code. + Thanks to Nelo Onyiah. + + - added new flag -cscb or --closing-side-comments-balanced + When using closing-side-comments, and the closing-side-comment-maximum-text + limit is exceeded, then the comment text must be truncated. Previous + versions of perltidy terminate with three dots, and this can still be + achieved with -ncscb: + + perltidy -csc -ncscb + + } ## end foreach my $foo (sort { $b cmp $a ... + + However this causes a problem with older editors which cannot recognize + comments or are not configured to doso because they cannot "bounce" around in + the text correctly. The B<-cscb> flag tries to help them by + appending appropriate terminal balancing structure: + + perltidy -csc -cscb + + } ## end foreach my $foo (sort { $b cmp $a ... }) + + Since there is much to be gained and little to be lost by doing this, + the default is B<-cscb>. Use B<-ncscb> if you do not want this. + + Thanks to Daniel Becker for suggesting this option. + + - After an isolated closing eval block the continuation indentation will be + removed so that the braces line up more like other blocks. Thanks to Yves Orton. + + OLD: + eval { + #STUFF; + 1; # return true + } + or do { + #handle error + }; + + NEW: + eval { + #STUFF; + 1; # return true + } or do { + #handle error + }; + + -A new flag -asbl (or --opening-anonymous-sub-brace-on-new-line) has + been added to put the opening brace of anonymous sub's on a new line, + as in the following snippet: + + my $code = sub + { + my $arg = shift; + return $arg->(@_); + }; + + This was not possible before because the -sbl flag only applies to named + subs. Thanks to Benjamin Krupp. + + -Fix tokenization bug with the following snippet + print 'hi' if { x => 1, }->{x}; + which resulted in a semicolon being added after the comma. The workaround + was to use -nasc, but this is no longer necessary. Thanks to Brian Duggan. + + -Fixed problem in which an incorrect error message could be triggered + by the (unusual) combination of parameters -lp -i=0 -l=2 -ci=0 for + example. Thanks to Richard Jelinek. + + -A new flag --keep-old-blank-lines=n has been added to + give more control over the treatment of old blank lines in + a script. The manual has been revised to discuss the new + flag and clarify the treatment of old blank lines. Thanks + to Oliver Schaefer. + +=head2 2007 12 05 + + -Improved support for perl 5.10: New quote modifier 'p', new block type UNITCHECK, + new keyword break, improved formatting of given/when. + + -Corrected tokenization bug of something like $var{-q}. + + -Numerous minor formatting improvements. + + -Corrected list of operators controlled by -baao -bbao to include + . : ? && || and or err xor + + -Corrected very minor error in log file involving incorrect comment + regarding need for upper case of labels. + + -Fixed problem where perltidy could run for a very long time + when given certain non-perl text files. + + -Line breaks in un-parenthesized lists now try to follow + line breaks in the input file rather than trying to fill + lines. This usually works better, but if this causes + trouble you can use -iob to ignore any old line breaks. + Example for the following input snippet: + + print + "conformability (Not the same dimension)\n", + "\t", $have, " is ", text_unit($hu), "\n", + "\t", $want, " is ", text_unit($wu), "\n", + ; + + OLD: + print "conformability (Not the same dimension)\n", "\t", $have, " is ", + text_unit($hu), "\n", "\t", $want, " is ", text_unit($wu), "\n",; + + NEW: + print "conformability (Not the same dimension)\n", + "\t", $have, " is ", text_unit($hu), "\n", + "\t", $want, " is ", text_unit($wu), "\n", + ; + +=head2 2007 08 01 + + -Added -fpsc option (--fixed-position-side-comment). Thanks to Ueli Hugenschmidt. + For example -fpsc=40 tells perltidy to put side comments in column 40 + if possible. + + -Added -bbao and -baao options (--break-before-all-operators and + --break-after-all-operators) to simplify command lines and configuration + files. These define an initial preference for breaking at operators which can + be modified with -wba and -wbb flags. For example to break before all operators + except an = one could use --bbao -wba='=' rather than listing every + single perl operator (except =) on a -wbb flag. + + -Added -kis option (--keep-interior-semicolons). Use the B<-kis> flag + to prevent breaking at a semicolon if there was no break there in the + input file. To illustrate, consider the following input lines: + + dbmclose(%verb_delim); undef %verb_delim; + dbmclose(%expanded); undef %expanded; + dbmclose(%global); undef %global; + + Normally these would be broken into six lines, but + perltidy -kis gives: + + dbmclose(%verb_delim); undef %verb_delim; + dbmclose(%expanded); undef %expanded; + dbmclose(%global); undef %global; + + -Improved formatting of complex ternary statements, with indentation + of nested statements. + OLD: + return defined( $cw->{Selected} ) + ? (wantarray) + ? @{ $cw->{Selected} } + : $cw->{Selected}[0] + : undef; + + NEW: + return defined( $cw->{Selected} ) + ? (wantarray) + ? @{ $cw->{Selected} } + : $cw->{Selected}[0] + : undef; + + -Text following un-parenthesized if/unless/while/until statements get a + full level of indentation. Suggested by Jeff Armstorng and others. + OLD: + return $ship->chargeWeapons("phaser-canon") + if $encounter->description eq 'klingon' + and $ship->firepower >= $encounter->firepower + and $location->status ne 'neutral'; + NEW: + return $ship->chargeWeapons("phaser-canon") + if $encounter->description eq 'klingon' + and $ship->firepower >= $encounter->firepower + and $location->status ne 'neutral'; + +=head2 2007 05 08 + + -Fixed bug where #line directives were being indented. Thanks to + Philippe Bruhat. + +=head2 2007 05 04 + + -Fixed problem where an extra blank line was added after an =cut when either + (a) the =cut started (not stopped) a POD section, or (b) -mbl > 1. + Thanks to J. Robert Ray and Bill Moseley. + +=head2 2007 04 24 + + -ole (--output-line-ending) and -ple (--preserve-line-endings) should + now work on all systems rather than just unix systems. Thanks to Dan + Tyrell. + + -Fixed problem of a warning issued for multiple subs for BEGIN subs + and other control subs. Thanks to Heiko Eissfeldt. + + -Fixed problem where no space was introduced between a keyword or + bareword and a colon, such as: + + ( ref($result) eq 'HASH' && !%$result ) ? undef: $result; + + Thanks to Niek. + + -Added a utility program 'break_long_quotes.pl' to the examples directory of + the distribution. It breaks long quoted strings into a chain of concatenated + sub strings no longer than a selected length. Suggested by Michael Renner as + a perltidy feature but was judged to be best done in a separate program. + + -Updated docs to remove extra < and >= from list of tokens + after which breaks are made by default. Thanks to Bob Kleemann. + + -Removed improper uses of $_ to avoid conflicts with external calls, giving + error message similar to: + Modification of a read-only value attempted at + /usr/share/perl5/Perl/Tidy.pm line 6907. + Thanks to Michael Renner. + + -Fixed problem when errorfile was not a plain filename or filehandle + in a call to Tidy.pm. The call + perltidy(source => \$input, destination => \$output, errorfile => \$err); + gave the following error message: + Not a GLOB reference at /usr/share/perl5/Perl/Tidy.pm line 3827. + Thanks to Michael Renner and Phillipe Bruhat. + + -Fixed problem where -sot would not stack an opening token followed by + a side comment. Thanks to Jens Schicke. + + -improved breakpoints in complex math and other long statements. Example: + OLD: + return + log($n) + 0.577215664901532 + ( 1 / ( 2 * $n ) ) - + ( 1 / ( 12 * ( $n**2 ) ) ) + ( 1 / ( 120 * ( $n**4 ) ) ); + NEW: + return + log($n) + 0.577215664901532 + + ( 1 / ( 2 * $n ) ) - + ( 1 / ( 12 * ( $n**2 ) ) ) + + ( 1 / ( 120 * ( $n**4 ) ) ); + + -more robust vertical alignment of complex terminal else blocks and ternary + statements. + +=head2 2006 07 19 + + -Eliminated bug where a here-doc invoked through an 'e' modifier on a pattern + replacement text was not recognized. The tokenizer now recursively scans + replacement text (but does not reformat it). + + -improved vertical alignment of terminal else blocks and ternary statements. + Thanks to Chris for the suggestion. + + OLD: + if ( IsBitmap() ) { return GetBitmap(); } + elsif ( IsFiles() ) { return GetFiles(); } + else { return GetText(); } + + NEW: + if ( IsBitmap() ) { return GetBitmap(); } + elsif ( IsFiles() ) { return GetFiles(); } + else { return GetText(); } + + OLD: + $which_search = + $opts{"t"} ? 'title' + : $opts{"s"} ? 'subject' + : $opts{"a"} ? 'author' + : 'title'; + + NEW: + $which_search = + $opts{"t"} ? 'title' + : $opts{"s"} ? 'subject' + : $opts{"a"} ? 'author' + : 'title'; + + -improved indentation of try/catch blocks and other externally defined + functions accepting a block argument. Thanks to jae. + + -Added support for Perl 5.10 features say and smartmatch. + + -Added flag -pbp (--perl-best-practices) as an abbreviation for parameters + suggested in Damian Conway's "Perl Best Practices". -pbp is the same as: + + -l=78 -i=4 -ci=4 -st -se -vt=2 -cti=0 -pt=1 -bt=1 -sbt=1 -bbt=1 -nsfs -nolq + -wbb="% + - * / x != == >= <= =~ !~ < > | & >= < = + **= += *= &= <<= &&= -= /= |= >>= ||= .= %= ^= x=" + + Please note that the -st here restricts input to standard input; use + -nst if necessary to override. + + -Eliminated some needless breaks at equals signs in -lp indentation. + + OLD: + $c = + Math::Complex->make(LEFT + $x * (RIGHT - LEFT) / SIZE, + TOP + $y * (BOTTOM - TOP) / SIZE); + NEW: + $c = Math::Complex->make(LEFT + $x * (RIGHT - LEFT) / SIZE, + TOP + $y * (BOTTOM - TOP) / SIZE); + + A break at an equals is sometimes useful for preventing complex statements + from hitting the line length limit. The decision to do this was + over-eager in some cases and has been improved. Thanks to Royce Reece. + + -qw quotes contained in braces, square brackets, and parens are being + treated more like those containers as far as stacking of tokens. Also + stack of closing tokens ending ');' will outdent to where the ');' would + have outdented if the closing stack is matched with a similar opening stack. + + OLD: perltidy -soc -sct + __PACKAGE__->load_components( + qw( + PK::Auto + Core + ) + ); + NEW: perltidy -soc -sct + __PACKAGE__->load_components( qw( + PK::Auto + Core + ) ); + Thanks to Aran Deltac + + -Eliminated some undesirable or marginally desirable vertical alignments. + These include terminal colons, opening braces, and equals, and particularly + when just two lines would be aligned. + + OLD: + my $accurate_timestamps = $Stamps{lnk}; + my $has_link = + ... + NEW: + my $accurate_timestamps = $Stamps{lnk}; + my $has_link = + + -Corrected a problem with -mangle in which a space would be removed + between a keyword and variable beginning with ::. + +=head2 2006 06 14 + + -Attribute argument lists are now correctly treated as quoted strings + and not formatted. This is the most important update in this version. + Thanks to Borris Zentner, Greg Ferguson, Steve Kirkup. + + -Updated to recognize the defined or operator, //, to be released in Perl 10. + Thanks to Sebastien Aperghis-Tramoni. + + -A useful utility perltidyrc_dump.pl is included in the examples section. It + will read any perltidyrc file and write it back out in a standard format + (though comments are lost). + + -Added option to have perltidy read and return a hash with the contents of a + perltidyrc file. This may be used by Leif Eriksen's tidyview code. This + feature is used by the demonstration program 'perltidyrc_dump.pl' in the + examples directory. + + -Improved error checking in perltidyrc files. Unknown bare words were not + being caught. + + -The --dump-options parameter now dumps parameters in the format required by a + perltidyrc file. + + -V-Strings with underscores are now recognized. + For example: $v = v1.2_3; + + -cti=3 option added which gives one extra indentation level to closing + tokens always. This provides more predictable closing token placement + than cti=2. If you are using cti=2 you might want to try cti=3. + + -To identify all left-adjusted comments as static block comments, use C<-sbcp='^#'>. + + -New parameters -fs, -fsb, -fse added to allow sections of code between #<<< + and #>>> to be passed through verbatim. This is enabled by default and turned + off by -nfs. Flags -fsb and -fse allow other beginning and ending markers. + Thanks to Wolfgang Werner and Marion Berryman for suggesting this. + + -added flag -skp to put a space between all Perl keywords and following paren. + The default is to only do this for certain keywords. Suggested by + H.Merijn Brand. + + -added flag -sfp to put a space between a function name and following paren. + The default is not to do this. Suggested by H.Merijn Brand. + + -Added patch to avoid breaking GetOpt::Long::Configure set by calling program. + Thanks to Philippe Bruhat. + + -An error was fixed in which certain parameters in a .perltidyrc file given + without the equals sign were not recognized. That is, + '--brace-tightness 0' gave an error but '--brace-tightness=0' worked + ok. Thanks to Zac Hansen. + + -An error preventing the -nwrs flag from working was corrected. Thanks to + Greg Ferguson. + + -Corrected some alignment problems with entab option. + + -A bug with the combination of -lp and -extrude was fixed (though this + combination doesn't really make sense). The bug was that a line with + a single zero would be dropped. Thanks to Cameron Hayne. + + -Updated Windows detection code to avoid an undefined variable. + Thanks to Joe Yates and Russ Jones. + + -Improved formatting for short trailing statements following a closing paren. + Thanks to Joe Matarazzo. + + -The handling of the -icb (indent closing block braces) flag has been changed + slightly to provide more consistent and predictable formatting of complex + structures. Instead of giving a closing block brace the indentation of the + previous line, it is now given one extra indentation level. The two methods + give the same result if the previous line was a complete statement, as in this + example: + + if ($task) { + yyy(); + } # -icb + else { + zzz(); + } + The change also fixes a problem with empty blocks such as: + + OLD, -icb: + elsif ($debug) { + } + + NEW, -icb: + elsif ($debug) { + } + + -A problem with -icb was fixed in which a closing brace was misplaced when + it followed a quote which spanned multiple lines. + + -Some improved breakpoints for -wba='&& || and or' + + -Fixed problem with misaligned cuddled else in complex statements + when the -bar flag was also used. Thanks to Alex and Royce Reese. + + -Corrected documentation to show that --outdent-long-comments is the default. + Thanks to Mario Lia. + + -New flag -otr (opening-token-right) is similar to -bar (braces-always-right) + but applies to non-structural opening tokens. + + -new flags -sot (stack-opening-token), -sct (stack-closing-token). + Suggested by Tony. + +=head2 2003 10 21 + + -The default has been changed to not do syntax checking with perl. + Use -syn if you want it. Perltidy is very robust now, and the -syn + flag now causes more problems than it's worth because of BEGIN blocks + (which get executed with perl -c). For example, perltidy will never + return when trying to beautify this code if -syn is used: + + BEGIN { 1 while { }; } + + Although this is an obvious error, perltidy is often run on untested + code which is more likely to have this sort of problem. A more subtle + example is: + + BEGIN { use FindBin; } + + which may hang on some systems using -syn if a shared file system is + unavailable. + + -Changed style -gnu to use -cti=1 instead of -cti=2 (see next item). + In most cases it looks better. To recover the previous format, use + '-gnu -cti=2' + + -Added flags -cti=n for finer control of closing token indentation. + -cti = 0 no extra indentation (default; same as -nicp) + -cti = 1 enough indentation so that the closing token + aligns with its opening token. + -cti = 2 one extra indentation level if the line has the form + ); ]; or }; (same as -icp). + + The new option -cti=1 works well with -lp: + + EXAMPLES: + + # perltidy -lp -cti=1 + @month_of_year = ( + 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' + ); + + # perltidy -lp -cti=2 + @month_of_year = ( + 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' + ); + This is backwards compatible with -icp. See revised manual for + details. Suggested by Mike Pennington. + + -Added flag '--preserve-line-endings' or '-ple' to cause the output + line ending to be the same as in the input file, for unix, dos, + or mac line endings. Only works under unix. Suggested by + Rainer Hochschild. + + -Added flag '--output-line-ending=s' or '-ole=s' where s=dos or win, + unix, or mac. Only works under unix. + + -Files with Mac line endings should now be handled properly under unix + and dos without being passed through a converter. + + -You may now include 'and', 'or', and 'xor' in the list following + '--want-break-after' to get line breaks after those keywords rather than + before them. Suggested by Rainer Hochschild. + + -Corrected problem with command line option for -vtc=n and -vt=n. The + equals sign was being eaten up by the Windows shell so perltidy didn't + see it. + +=head2 2003 07 26 + + -Corrected cause of warning message with recent versions of Perl: + "Possible precedence problem on bitwise & operator at ..." + Thanks to Jim Files. + + -fixed bug with -html with '=for pod2html' sections, in which code/pod + output order was incorrect. Thanks to Tassilo von Parseval. + + -fixed bug when the -html flag is used, in which the following error + message, plus others, appear: + did not see in pod2html output + This was caused by a change in the format of html output by pod2html + VERSION 1.04 (included with perl 5.8). Thanks to Tassilo von Parseval. + + -Fixed bug where an __END__ statement would be mistaken for a label + if it is immediately followed by a line with a leading colon. Thanks + to John Bayes. + + -Implemented guessing logic for brace types when it is ambiguous. This + has been on the TODO list a long time. Thanks to Boris Zentner for + an example. + + -Long options may now be negated either as '--nolong-option' + or '--no-long-option'. Thanks to Philip Newton for the suggestion. + + -added flag --html-entities or -hent which controls the use of + Html::Entities for html formatting. Use --nohtml-entities or -nhent to + prevent the use of Html::Entities to encode special symbols. The + default is -hent. Html::Entities when formatting perl text to escape + special symbols. This may or may not be the right thing to do, + depending on browser/language combinations. Thanks to Burak Gursoy for + this suggestion. + + -Bareword strings with leading '-', like, '-foo' now count as 1 token + for horizontal tightness. This way $a{'-foo'}, $a{foo}, and $a{-foo} + are now all treated similarly. Thus, by default, OLD: $a{ -foo } will + now be NEW: $a{-foo}. Suggested by Mark Olesen. + + -added 2 new flags to control spaces between keywords and opening parens: + -sak=s or --space-after-keyword=s, and + -nsak=s or --nospace-after-keyword=s, where 's' is a list of keywords. + + The new default list of keywords which get a space is: + + "my local our and or eq ne if else elsif until unless while for foreach + return switch case given when" + + Use -sak=s and -nsak=s to add and remove keywords from this list, + respectively. + + Explanation: Stephen Hildrey noted that perltidy was being inconsistent + in placing spaces between keywords and opening parens, and sent a patch + to give user control over this. The above list was selected as being + a reasonable default keyword list. Previously, perltidy + had a hardwired list which also included these keywords: + + push pop shift unshift join split die + + but did not have 'our'. Example: if you prefer to make perltidy behave + exactly as before, you can include the following two lines in your + .perltidyrc file: + + -sak="push pop local shift unshift join split die" + -nsak="our" + + -Corrected html error in .toc file when -frm -html is used (extra "); + browsers were tolerant of it. + + -Improved alignment of chains of binary and ?/: operators. Example: + OLD: + $leapyear = + $year % 4 ? 0 + : $year % 100 ? 1 + : $year % 400 ? 0 + : 1; + NEW: + $leapyear = + $year % 4 ? 0 + : $year % 100 ? 1 + : $year % 400 ? 0 + : 1; + + -improved breakpoint choices involving '->' + + -Corrected tokenization of things like ${#}. For example, + ${#} is valid, but ${# } is a syntax error. + + -Corrected minor tokenization errors with indirect object notation. + For example, 'new A::()' works now. + + -Minor tokenization improvements; all perl code distributed with perl 5.8 + seems to be parsed correctly except for one instance (lextest.t) + of the known bug. + +=head2 2002 11 30 + + -Implemented scalar attributes. Thanks to Sean Tobin for noting this. + + -Fixed glitch introduced in previous release where -pre option + was not outputting a leading html
 tag.
+
+ -Numerous minor improvements in vertical alignment, including the following:
+
+ -Improved alignment of opening braces in many cases.  Needed for improved
+ switch/case formatting, and also suggested by Mark Olesen for sort/map/grep
+ formatting.  For example:
+
+  OLD:
+    @modified =
+      map { $_->[0] }
+      sort { $a->[1] <=> $b->[1] }
+      map { [ $_, -M ] } @filenames;
+
+  NEW:
+    @modified =
+      map  { $_->[0] }
+      sort { $a->[1] <=> $b->[1] }
+      map  { [ $_, -M ] } @filenames;
+
+ -Eliminated alignments across unrelated statements. Example:
+  OLD:
+    $borrowerinfo->configure( -state => 'disabled' );
+    $borrowerinfo->grid( -col        => 1, -row => 0, -sticky => 'w' );
+
+  NEW:  
+    $borrowerinfo->configure( -state => 'disabled' );
+    $borrowerinfo->grid( -col => 1, -row => 0, -sticky => 'w' );
+
+  Thanks to Mark Olesen for suggesting this.
+
+ -Improved alignement of '='s in certain cases.
+  Thanks to Norbert Gruener for sending an example.
+
+ -Outdent-long-comments (-olc) has been re-instated as a default, since
+  it works much better now.  Use -nolc if you want to prevent it.
+
+ -Added check for 'perltidy file.pl -o file.pl', which causes file.pl
+ to be lost. (The -b option should be used instead). Thanks to mreister
+ for reporting this problem.
+
+=head2 2002 11 06
+
+ -Switch/case or given/when syntax is now recognized.  Its vertical alignment
+ is not great yet, but it parses ok.  The words 'switch', 'case', 'given',
+ and 'when' are now treated as keywords.  If this causes trouble with older
+ code, we could introduce a switch to deactivate it.  Thanks to Stan Brown
+ and Jochen Schneider for recommending this.
+
+ -Corrected error parsing sub attributes with call parameters.
+ Thanks to Marc Kerr for catching this.
+
+ -Sub prototypes no longer need to be on the same line as sub names.  
+
+ -a new flag -frm or --frames will cause html output to be in a
+ frame, with table of contents in the left panel and formatted source
+ in the right panel.  Try 'perltidy -html -frm somemodule.pm' for example.
+
+ -The new default for -html formatting is to pass the pod through Pod::Html.
+ The result is syntax colored code within your pod documents. This can be
+ deactivated with -npod.  Thanks to those who have written to discuss this,
+ particularly Mark Olesen and Hugh Myers.
+
+ -the -olc (--outdent-long-comments) option works much better.  It now outdents
+ groups of consecutive comments together, and by just the amount needed to
+ avoid having any one line exceeding the maximum line length.
+
+ -block comments are now trimmed of trailing whitespace.
+
+ -if a directory specified with -opath does not exist, it will be created.
+
+ -a table of contents to packages and subs is output when -html is used.
+ Use -ntoc to prevent this. 
+
+ -fixed an unusual bug in which a 'for' statement following a 'format'
+ statement was not correctly tokenized.  Thanks to Boris Zentner for
+ catching this.
+
+ -Tidy.pm is no longer dependent on modules IO::Scalar and IO::ScalarArray.  
+ There were some speed issues.  Suggested by Joerg Walter.
+
+ -The treatment of quoted wildcards (file globs) is now system-independent. 
+ For example
+
+    perltidy 'b*x.p[lm]'
+
+ would match box.pl, box.pm, brinx.pm under any operating system.  Of
+ course, anything unquoted will be subject to expansion by any shell.
+
+ -default color for keywords under -html changed from 
+ SaddleBrown (#8B4513) to magenta4 (#8B008B).
+
+ -fixed an arg parsing glitch in which something like:
+   perltidy quick-help
+ would trigger the help message and exit, rather than operate on the
+ file 'quick-help'.
+
+=head2 2002 09 22
+ 
+ -New option '-b' or '--backup-and-modify-in-place' will cause perltidy to
+ overwrite the original file with the tidied output file.  The original
+ file will be saved with a '.bak' extension (which can be changed with
+ -bext=s).  Thanks to Rudi Farkas for the suggestion.
+
+ -An index to all subs is included at the top of -html output, unless
+ only the 
 section is written.
+
+ -Anchor lines of the form  are now inserted at key points
+ in html output, such as before sub definitions, for the convenience of
+ postprocessing scripts.  Suggested by Howard Owen.
+
+ -The cuddled-else (-ce) flag now also makes cuddled continues, like
+ this:
+
+    while ( ( $pack, $file, $line ) = caller( $i++ ) ) {
+       # bla bla
+    } continue {
+        $prevpack = $pack;
+    }
+
+ Suggested by Simon Perreault.  
+
+ -Fixed bug in which an extra blank line was added before an =head or 
+ similar pod line after an __END__ or __DATA__ line each time 
+ perltidy was run.  Also, an extra blank was being added after
+ a terminal =cut.  Thanks to Mike Birdsall for reporting this.
+
+=head2 2002 08 26
+
+ -Fixed bug in which space was inserted in a hyphenated hash key:
+    my $val = $myhash{USER-NAME};
+  was converted to:
+    my $val = $myhash{USER -NAME}; 
+  Thanks to an anonymous bug reporter at sourceforge.
+
+ -Fixed problem with the '-io' ('--indent-only') where all lines 
+  were double spaced.  Thanks to Nick Andrew for reporting this bug.
+
+ -Fixed tokenization error in which something like '-e1' was 
+  parsed as a number. 
+
+ -Corrected a rare problem involving older perl versions, in which 
+  a line break before a bareword caused problems with 'use strict'.
+  Thanks to Wolfgang Weisselberg for noting this.
+
+ -More syntax error checking added.
+
+ -Outdenting labels (-ola) has been made the default, in order to follow the
+  perlstyle guidelines better.  It's probably a good idea in general, but
+  if you do not want this, use -nola in your .perltidyrc file.
+  
+ -Updated rules for padding logical expressions to include more cases.
+  Thanks to Wolfgang Weisselberg for helpful discussions.
+
+ -Added new flag -osbc (--outdent-static-block-comments) which will
+  outdent static block comments by 2 spaces (or whatever -ci equals).
+  Requested by Jon Robison.
+
+=head2 2002 04 25
+ 
+ -Corrected a bug, introduced in the previous release, in which some
+  closing side comments (-csc) could have incorrect text.  This is
+  annoying but will be correct the next time perltidy is run with -csc.
+
+ -Fixed bug where whitespace was being removed between 'Bar' and '()' 
+  in a use statement like:
+
+       use Foo::Bar ();
+
+ -Whenever possible, if a logical expression is broken with leading
+  '&&', '||', 'and', or 'or', then the leading line will be padded
+  with additional space to produce alignment.  This has been on the
+  todo list for a long time; thanks to Frank Steinhauer for reminding
+  me to do it.  Notice the first line after the open parens here:
+
+        OLD: perltidy -lp
+        if (
+             !param("rules.to.$linecount")
+             && !param("rules.from.$linecount")
+             && !param("rules.subject.$linecount")
+             && !(
+                   param("rules.fieldname.$linecount")
+                   && param("rules.fieldval.$linecount")
+             )
+             && !param("rules.size.$linecount")
+             && !param("rules.custom.$linecount")
+          )
+
+        NEW: perltidy -lp
+        if (
+                !param("rules.to.$linecount")
+             && !param("rules.from.$linecount")
+             && !param("rules.subject.$linecount")
+             && !(
+                      param("rules.fieldname.$linecount")
+                   && param("rules.fieldval.$linecount")
+             )
+             && !param("rules.size.$linecount")
+             && !param("rules.custom.$linecount")
+          )
+
+=head2 2002 04 16
+
+ -Corrected a mistokenization of variables for a package with a name
+  equal to a perl keyword.  For example: 
+
+     my::qx();
+     package my;
+     sub qx{print "Hello from my::qx\n";}
+
+  In this case, the leading 'my' was mistokenized as a keyword, and a
+  space was being place between 'my' and '::'.  This has been
+  corrected.  Thanks to Martin Sluka for discovering this. 
+
+ -A new flag -bol (--break-at-old-logic-breakpoints)
+  has been added to control whether containers with logical expressions
+  should be broken open.  This is the default.
+
+ -A new flag -bok (--break-at-old-keyword-breakpoints)
+  has been added to follow breaks at old keywords which return lists,
+  such as sort and map.  This is the default.
+
+ -A new flag -bot (--break-at-old-trinary-breakpoints) has been added to
+  follow breaks at trinary (conditional) operators.  This is the default.
+
+ -A new flag -cab=n has been added to control breaks at commas after
+  '=>' tokens.  The default is n=1, meaning break unless this breaks
+  open an existing on-line container.
+
+ -A new flag -boc has been added to allow existing list formatting
+  to be retained.  (--break-at-old-comma-breakpoints).  See updated manual.
+
+ -A new flag -iob (--ignore-old-breakpoints) has been added to
+  prevent the locations of old breakpoints from influencing the output
+  format.
+
+ -Corrected problem where nested parentheses were not getting full
+  indentation.  This has been on the todo list for some time; thanks 
+  to Axel Rose for a snippet demonstrating this issue.
+
+            OLD: inner list is not indented
+            $this->sendnumeric(
+                $this->server,
+                (
+                  $ret->name,        $user->username, $user->host,
+                $user->server->name, $user->nick,     "H"
+                ),
+            );
+
+            NEW:
+            $this->sendnumeric(
+                $this->server,
+                (
+                    $ret->name,          $user->username, $user->host,
+                    $user->server->name, $user->nick,     "H"
+                ),
+            );
+
+ -Code cleaned up by removing the following unused, undocumented flags.
+  They should not be in any .perltidyrc files because they were just
+  experimental flags which were never documented.  Most of them placed
+  artificial limits on spaces, and Wolfgang Weisselberg convinced me that
+  most of them they do more harm than good by causing unexpected results.
+
+  --maximum-continuation-indentation (-mci)
+  --maximum-whitespace-columns
+  --maximum-space-to-comment (-xsc)
+  --big-space-jump (-bsj)
+
+ -Pod file 'perltidy.pod' has been appended to the script 'perltidy', and
+  Tidy.pod has been append to the module 'Tidy.pm'.  Older MakeMaker's
+  were having trouble.
+ 
+ -A new flag -isbc has been added for more control on comments. This flag
+  has the effect that if there is no leading space on the line, then the
+  comment will not be indented, and otherwise it may be.  If both -ibc and
+  -isbc are set, then -isbc takes priority.  Thanks to Frank Steinhauer
+  for suggesting this.
+
+ -A new document 'stylekey.pod' has been created to quickly guide new users
+  through the maze of perltidy style parameters.  An html version is 
+  on the perltidy web page.  Take a look! It should be very helpful.
+
+ -Parameters for controlling 'vertical tightness' have been added:
+  -vt and -vtc are the main controls, but finer control is provided
+  with -pvt, -pcvt, -bvt, -bcvt, -sbvt, -sbcvt.  Block brace vertical
+  tightness controls have also been added.
+  See updated manual and also see 'stylekey.pod'. Simple examples:
+
+    # perltidy -lp -vt=1 -vtc=1
+    @month_of_year = ( 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+                       'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' );
+
+    # perltidy -lp -vt=1 -vtc=0
+    @month_of_year = ( 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+                       'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
+    );
+
+ -Lists which do not format well in uniform columns are now better
+  identified and formated.
+
+    OLD:
+    return $c->create( 'polygon', $x, $y, $x + $ruler_info{'size'},
+        $y + $ruler_info{'size'}, $x - $ruler_info{'size'},
+        $y + $ruler_info{'size'} );
+
+    NEW:
+    return $c->create(
+        'polygon', $x, $y,
+        $x + $ruler_info{'size'},
+        $y + $ruler_info{'size'},
+        $x - $ruler_info{'size'},
+        $y + $ruler_info{'size'}
+    );
+
+    OLD:
+      radlablist($f1, pad('Initial', $p), $b->{Init}->get_panel_ref, 'None ',
+                 'None', 'Default', 'Default', 'Simple', 'Simple');
+    NEW:
+      radlablist($f1,
+                 pad('Initial', $p),
+                 $b->{Init}->get_panel_ref,
+                 'None ', 'None', 'Default', 'Default', 'Simple', 'Simple');
+
+ -Corrected problem where an incorrect html filename was generated for 
+  external calls to Tidy.pm module.  Fixed incorrect html title when
+  Tidy.pm is called with IO::Scalar or IO::Array source.
+
+ -Output file permissons are now set as follows.  An output script file
+  gets the same permission as the input file, except that owner
+  read/write permission is added (otherwise, perltidy could not be
+  rerun).  Html output files use system defaults.  Previously chmod 0755
+  was used in all cases.  Thanks to Mark Olesen for bringing this up.
+
+ -Missing semicolons will not be added in multi-line blocks of type
+  sort, map, or grep.  This brings perltidy into closer agreement
+  with common practice.  Of course, you can still put semicolons 
+  there if you like.  Thanks to Simon Perreault for a discussion of this.
+
+ -Most instances of extra semicolons are now deleted.  This is
+  particularly important if the -csc option is used.  Thanks to Wolfgang
+  Weisselberg for noting this.  For example, the following line
+  (produced by 'h2xs' :) has an extra semicolon which will now be
+  removed:
+
+     BEGIN { plan tests => 1 };
+
+ -New parameter -csce (--closing-side-comment-else-flag) can be used
+  to control what text is appended to 'else' and 'elsif' blocks.
+  Default is to just add leading 'if' text to an 'else'.  See manual.
+
+ -The -csc option now labels 'else' blocks with additinal information
+  from the opening if statement and elsif statements, if space.
+  Thanks to Wolfgang Weisselberg for suggesting this.
+
+ -The -csc option will now remove any old closing side comments
+  below the line interval threshold. Thanks to Wolfgang Weisselberg for
+  suggesting this.
+
+ -The abbreviation feature, which was broken in the previous version,
+  is now fixed.  Thanks to Michael Cartmell for noting this.
+
+ -Vertical alignment is now done for '||='  .. somehow this was 
+  overlooked.
+
+=head2 2002 02 25
+
+ -This version uses modules for the first time, and a standard perl
+  Makefile.PL has been supplied.  However, perltidy may still be
+  installed as a single script, without modules.  See INSTALL for
+  details.
+
+ -The man page 'perl2web' has been merged back into the main 'perltidy'
+  man page to simplify installation.  So you may remove that man page
+  if you have an older installation.
+
+ -Added patch from Axel Rose for MacPerl.  The patch prompts the user
+  for command line arguments before calling the module 
+  Perl::Tidy::perltidy.
+
+ -Corrected bug with '-bar' which was introduced in the previous
+  version.  A closing block brace was being indented.  Thanks to
+  Alexandros M Manoussakis for reporting this.
+
+ -New parameter '--entab-leading-whitespace=n', or '-et=n', has been
+  added for those who prefer tabs.  This behaves different from the
+  existing '-t' parameter; see updated man page.  Suggested by Mark
+  Olesen.
+
+ -New parameter '--perl-syntax-check-flags=s'  or '-pcsf=s' can be
+  used to change the flags passed to perltidy in a syntax check.
+  See updated man page.  Suggested by Mark Olesen. 
+
+ -New parameter '--output-path=s'  or '-opath=s' will cause output
+  files to be placed in directory s.  See updated man page.  Thanks for
+  Mark Olesen for suggesting this.
+
+ -New parameter --dump-profile (or -dpro) will dump to
+  standard output information about the search for a
+  configuration file, the name of whatever configuration file
+  is selected, and its contents.  This should help debugging
+  config files, especially on different Windows systems.
+
+ -The -w parameter now notes possible errors of the form:
+
+	$comment = s/^\s*(\S+)\..*/$1/;   # trim whitespace
+
+ -Corrections added for a leading ':' and for leaving a leading 'tcsh'
+  line untouched.  Mark Olesen reported that lines of this form were
+  accepted by perl but not by perltidy:
+
+        : # use -*- perl -*-
+        eval 'exec perl -wS $0 "$@"'  # shell should exec 'perl'
+        unless 1;                     # but Perl should skip this one
+
+  Perl will silently swallow a leading colon on line 1 of a
+  script, and now perltidy will do likewise.  For example,
+  this is a valid script, provided that it is the first line,
+  but not otherwise:
+
+        : print "Hello World\n";
+  
+  Also, perltidy will now mark a first line with leading ':' followed by
+  '#' as type SYSTEM (just as a #!  line), not to be formatted.
+
+ -List formatting improved for certain lists with special
+  initial terms, such as occur with 'printf', 'sprintf',
+  'push', 'pack', 'join', 'chmod'.  The special initial term is
+  now placed on a line by itself.  For example, perltidy -gnu
+
+     OLD:
+        $Addr = pack(
+                     "C4",                hex($SourceAddr[0]),
+                     hex($SourceAddr[1]), hex($SourceAddr[2]),
+                     hex($SourceAddr[3])
+                     );
+
+     NEW:
+        $Addr = pack("C4",
+                     hex($SourceAddr[0]), hex($SourceAddr[1]),
+                     hex($SourceAddr[2]), hex($SourceAddr[3]));
+
+      OLD:
+            push (
+                  @{$$self{states}}, '64', '66', '68',
+                  '70',              '72', '74', '76',
+                  '78',              '80', '82', '84',
+                  '86',              '88', '90', '92',
+                  '94',              '96', '98', '100',
+                  '102',             '104'
+                  );
+
+      NEW:
+            push (
+                  @{$$self{states}},
+                  '64', '66', '68', '70', '72',  '74',  '76',
+                  '78', '80', '82', '84', '86',  '88',  '90',
+                  '92', '94', '96', '98', '100', '102', '104'
+                  );
+
+ -Lists of complex items, such as matricies, are now detected
+  and displayed with just one item per row:
+
+    OLD:
+    $this->{'CURRENT'}{'gfx'}{'MatrixSkew'} = Text::PDF::API::Matrix->new(
+        [ 1, tan( deg2rad($a) ), 0 ], [ tan( deg2rad($b) ), 1, 0 ],
+        [ 0, 0, 1 ]
+    );
+
+    NEW:
+    $this->{'CURRENT'}{'gfx'}{'MatrixSkew'} = Text::PDF::API::Matrix->new(
+        [ 1,                  tan( deg2rad($a) ), 0 ],
+        [ tan( deg2rad($b) ), 1,                  0 ],
+        [ 0,                  0,                  1 ]
+    );
+
+ -The perl syntax check will be turned off for now when input is from
+  standard input or standard output.  The reason is that this requires
+  temporary files, which has produced far too many problems during
+  Windows testing.  For example, the POSIX module under Windows XP/2000
+  creates temporary names in the root directory, to which only the
+  administrator should have permission to write.
+
+ -Merged patch sent by Yves Orton to handle appropriate
+  configuration file locations for different Windows varieties
+  (2000, NT, Me, XP, 95, 98).
+
+ -Added patch to properly handle a for/foreach loop without
+  parens around a list represented as a qw.  I didn't know this
+  was possible until Wolfgang Weisselberg pointed it out:
+
+        foreach my $key qw\Uno Due Tres Quadro\ {
+            print "Set $key\n";
+        }
+
+  But Perl will give a syntax error without the $ variable; ie this will
+  not work:
+
+        foreach qw\Uno Due Tres Quadro\ {
+            print "Set $_\n";
+        }
+
+ -Merged Windows version detection code sent by Yves Orton.  Perltidy
+  now automatically turns off syntax checking for Win 9x/ME versions,
+  and this has solved a lot of robustness problems.  These systems 
+  cannot reliably handle backtick operators.  See man page for
+  details.
+  
+ -Merged VMS filename handling patch sent by Michael Cartmell.  (Invalid
+  output filenames were being created in some cases). 
+
+ -Numerous minor improvements have been made for -lp style indentation.
+
+ -Long C-style 'for' expressions will be broken after each ';'.   
+
+  'perltidy -gnu' gives:
+
+    OLD:
+    for ($status = $db->seq($key, $value, R_CURSOR()) ; $status == 0
+         and $key eq $origkey ; $status = $db->seq($key, $value, R_NEXT())) 
+
+    NEW:
+    for ($status = $db->seq($key, $value, R_CURSOR()) ;
+         $status == 0 and $key eq $origkey ;
+         $status = $db->seq($key, $value, R_NEXT()))
+
+ -For the -lp option, a single long term within parens
+  (without commas) now has better alignment.  For example,
+  perltidy -gnu
+
+            OLD:
+            $self->throw("Must specify a known host, not $location,"
+                  . " possible values ("
+                  . join (",", sort keys %hosts) . ")");
+
+            NEW:
+            $self->throw("Must specify a known host, not $location,"
+                         . " possible values ("
+                         . join (",", sort keys %hosts) . ")");
+
+
+=head2 2001 12 31
+
+ -This version is about 20 percent faster than the previous
+  version as a result of optimization work.  The largest gain
+  came from switching to a dispatch hash table in the
+  tokenizer.
+
+ -perltidy -html will check to see if HTML::Entities is
+  installed, and if so, it will use it to encode unsafe
+  characters.
+
+ -Added flag -oext=ext to change the output file extension to
+  be different from the default ('tdy' or 'html').  For
+  example:
+
+    perltidy -html -oext=htm filename
+
+ will produce filename.htm
+
+ -Added flag -cscw to issue warnings if a closing side comment would replace
+ an existing, different side comments.  See the man page for details.
+ Thanks to Peter Masiar for helpful discussions.
+
+ -Corrected tokenization error of signed hex/octal/binary numbers. For
+ example, the first hex number below would have been parsed correctly
+ but the second one was not:
+    if ( ( $tmp >= 0x80_00_00 ) || ( $tmp < -0x80_00_00 ) ) { }
+
+ -'**=' was incorrectly tokenized as '**' and '='.  This only
+     caused a problem with the -extrude opton.
+
+ -Corrected a divide by zero when -extrude option is used
+
+ -The flag -w will now contain all errors reported by 'perl -c' on the
+ input file, but otherwise they are not reported.  The reason is that
+ perl will report lots of problems and syntax errors which are not of
+ interest when only a small snippet is being formatted (such as missing
+ modules and unknown bare words).  Perltidy will always report all
+ significant syntax errors that it finds, such as unbalanced braces,
+ unless the -q (quiet) flag is set.
+
+ -Merged modifications created by Hugh Myers into perltidy.
+  These include a 'streamhandle' routine which allows perltidy
+  as a module to operate on input and output arrays and strings
+  in addition to files.  Documentation and new packaging as a
+  module should be ready early next year; This is an elegant,
+  powerful update; many thanks to Hugh for contributing it.
+
+=head2 2001 11 28
+
+ -added a tentative patch which tries to keep any existing breakpoints
+ at lines with leading keywords map,sort,eval,grep. The idea is to
+ improve formatting of sequences of list operations, as in a schwartzian
+ transform.  Example:
+
+    INPUT:
+    my @sorted = map { $_->[0] }
+                 sort { $a->[1] <=> $b->[1] }
+                 map { [ $_, rand ] } @list;
+
+    OLD:
+    my @sorted =
+      map { $_->[0] } sort { $a->[1] <=> $b->[1] } map { [ $_, rand ] } @list;
+
+    NEW:
+    my @sorted = map { $_->[0] }
+      sort { $a->[1] <=> $b->[1] }
+      map { [ $_, rand ] } @list;
+
+  The new alignment is not as nice as the input, but this is an improvement.
+  Thanks to Yves Orton for this suggestion.
+
+ -modified indentation logic so that a line with leading opening paren,
+ brace, or square bracket will never have less indentation than the
+ line with the corresponding opening token.  Here's a simple example:
+
+    OLD:
+        $mw->Button(
+            -text    => "New Document",
+            -command => \&new_document
+          )->pack(
+            -side   => 'bottom',
+            -anchor => 'e'
+        );
+
+    Note how the closing ');' is lined up with the first line, even
+    though it closes a paren in the 'pack' line.  That seems wrong.
+ 
+    NEW:
+        $mw->Button(
+            -text    => "New Document",
+            -command => \&new_document
+          )->pack(
+            -side   => 'bottom',
+            -anchor => 'e'
+          );
+
+   This seems nicer: you can up-arrow with an editor and arrive at the
+   opening 'pack' line.
+ 
+ -corrected minor glitch in which cuddled else (-ce) did not get applied
+ to an 'unless' block, which should look like this:
+
+        unless ($test) {
+
+        } else {
+
+        }
+
+  Thanks to Jeremy Mates for reporting this.
+
+ -The man page has been reorganized to parameters easier to find.
+ 
+ -Added check for multiple definitions of same subroutine.  It is easy
+  to introduce this problem when cutting and pasting. Perl does not
+  complain about it, but it can lead to disaster.
+
+ -The command -pro=filename  or -profile=filename may be used to specify a
+  configuration file which will override the default name of .perltidyrc.
+  There must not be a space on either side of the '=' sign.  I needed
+  this to be able to easily test perltidy with a variety of different
+  configuration files.
+
+ -Side comment alignment has been improved somewhat across frequent level
+  changes, as in short if/else blocks.  Thanks to Wolfgang Weisselberg 
+  for pointing out this problem.  For example:
+    
+    OLD:
+    if ( ref $self ) {    # Called as a method
+        $format = shift;
+    }
+    else {    # Regular procedure call
+        $format = $self;
+        undef $self;
+    }
+
+    NEW:
+    if ( ref $self ) {    # Called as a method
+        $format = shift;
+    }
+    else {                # Regular procedure call
+        $format = $self;
+        undef $self;
+    }
+
+ -New command -ssc (--static-side-comment) and related command allows
+  side comments to be spaced close to preceding character.  This is
+  useful for displaying commented code as side comments.
+
+ -New command -csc (--closing-side-comment) and several related
+  commands allow comments to be added to (and deleted from) any or all
+  closing block braces.  This can be useful if you have to maintain large
+  programs, especially those that you didn't write.  See updated man page.
+  Thanks to Peter Masiar for this suggestion.  For a simple example:
+
+        perltidy -csc
+
+        sub foo {
+            if ( !defined( $_[0] ) ) {
+                print("Hello, World\n");
+            }
+            else {
+                print( $_[0], "\n" );
+            }
+        } ## end sub foo
+
+  This added '## end sub foo' to the closing brace.  
+  To remove it, perltidy -ncsc.
+
+ -New commands -ola, for outdenting labels, and -okw, for outdenting
+  selected control keywords, were implemented.  See the perltidy man
+  page for details.  Thanks to Peter Masiar for this suggestion.
+
+ -Hanging side comment change: a comment will not be considered to be a
+  hanging side comment if there is no leading whitespace on the line.
+  This should improve the reliability of identifying hanging side comments.
+  Thanks to Peter Masiar for this suggestion.
+
+ -Two new commands for outdenting, -olq (outdent-long-quotes) and -olc
+  (outdent-long-comments), have been added.  The original -oll
+  (outdent-long-lines) remains, and now is an abbreviation for -olq and -olc.
+  The new default is just -olq.  This was necessary to avoid inconsistency with
+  the new static block comment option.
+
+ -Static block comments:  to provide a way to display commented code
+  better, the convention is used that comments with a leading '##' should
+  not be formatted as usual.  Please see '-sbc' (or '--static-block-comment')
+  for documentation.  It can be deactivated with with -nsbc, but
+  should not normally be necessary. Thanks to Peter Masiar for this 
+  suggestion.
+
+ -Two changes were made to help show structure of complex lists:
+  (1) breakpoints are forced after every ',' in a list where any of
+  the list items spans multiple lines, and
+  (2) List items which span multiple lines now get continuation indentation.
+
+  The following example illustrates both of these points.  Many thanks to
+  Wolfgang Weisselberg for this snippet and a discussion of it; this is a
+  significant formatting improvement. Note how it is easier to see the call
+  parameters in the NEW version:
+
+    OLD:
+    assert( __LINE__, ( not defined $check )
+        or ref $check
+        or $check eq "new"
+        or $check eq "old", "Error in parameters",
+        defined $old_new ? ( ref $old_new ? ref $old_new : $old_new ) : "undef",
+        defined $db_new  ? ( ref $db_new  ? ref $db_new  : $db_new )  : "undef",
+        defined $old_db ? ( ref $old_db ? ref $old_db : $old_db ) : "undef" );
+
+    NEW: 
+    assert(
+        __LINE__,
+        ( not defined $check )
+          or ref $check
+          or $check eq "new"
+          or $check eq "old",
+        "Error in parameters",
+        defined $old_new ? ( ref $old_new ? ref $old_new : $old_new ) : "undef",
+        defined $db_new  ? ( ref $db_new  ? ref $db_new  : $db_new )  : "undef",
+        defined $old_db  ? ( ref $old_db  ? ref $old_db  : $old_db )  : "undef"
+    );
+
+    Another example shows how this helps displaying lists:
+
+    OLD:
+    %{ $self->{COMPONENTS} } = (
+        fname =>
+        { type => 'name', adj => 'yes', font => 'Helvetica', 'index' => 0 },
+        street =>
+        { type => 'road', adj => 'yes', font => 'Helvetica', 'index' => 2 },
+    );
+
+    The structure is clearer with the added indentation:
+    
+    NEW:
+    %{ $self->{COMPONENTS} } = (
+        fname =>
+          { type => 'name', adj => 'yes', font => 'Helvetica', 'index' => 0 },
+        street =>
+          { type => 'road', adj => 'yes', font => 'Helvetica', 'index' => 2 },
+    );
+
+    -The structure of nested logical expressions is now displayed better.
+    Thanks to Wolfgang Weisselberg for helpful discussions.  For example,
+    note how the status of the final 'or' is displayed in the following:
+
+    OLD:
+    return ( !null($op)
+          and null( $op->sibling )
+          and $op->ppaddr eq "pp_null"
+          and class($op) eq "UNOP"
+          and ( ( $op->first->ppaddr =~ /^pp_(and|or)$/
+            and $op->first->first->sibling->ppaddr eq "pp_lineseq" )
+            or ( $op->first->ppaddr eq "pp_lineseq"
+                and not null $op->first->first->sibling
+                and $op->first->first->sibling->ppaddr eq "pp_unstack" ) ) );
+
+    NEW:
+    return (
+        !null($op)
+          and null( $op->sibling )
+          and $op->ppaddr eq "pp_null"
+          and class($op) eq "UNOP"
+          and (
+            (
+                $op->first->ppaddr =~ /^pp_(and|or)$/
+                and $op->first->first->sibling->ppaddr eq "pp_lineseq"
+            )
+            or ( $op->first->ppaddr eq "pp_lineseq"
+                and not null $op->first->first->sibling
+                and $op->first->first->sibling->ppaddr eq "pp_unstack" )
+          )
+    );
+
+   -A break will always be put before a list item containing a comma-arrow.
+   This will improve formatting of mixed lists of this form:
+
+        OLD:
+        $c->create(
+            'text', 225, 20, -text => 'A Simple Plot',
+            -font => $font,
+            -fill => 'brown'
+        );
+
+        NEW:
+        $c->create(
+            'text', 225, 20,
+            -text => 'A Simple Plot',
+            -font => $font,
+            -fill => 'brown'
+        );
+
+  -For convenience, the command -dac (--delete-all-comments) now also
+  deletes pod.  Likewise, -tac (--tee-all-comments) now also sends pod
+  to a '.TEE' file.  Complete control over the treatment of pod and
+  comments is still possible, as described in the updated help message 
+  and man page.
+
+  -The logic which breaks open 'containers' has been rewritten to be completely
+  symmetric in the following sense: if a line break is placed after an opening
+  {, [, or (, then a break will be placed before the corresponding closing
+  token.  Thus, a container either remains closed or is completely cracked
+  open.
+
+  -Improved indentation of parenthesized lists.  For example, 
+
+            OLD:
+            $GPSCompCourse =
+              int(
+              atan2( $GPSTempCompLong - $GPSLongitude,
+              $GPSLatitude - $GPSTempCompLat ) * 180 / 3.14159265 );
+
+            NEW:
+            $GPSCompCourse = int(
+                atan2(
+                    $GPSTempCompLong - $GPSLongitude,
+                    $GPSLatitude - $GPSTempCompLat
+                  ) * 180 / 3.14159265
+            );
+
+   Further improvements will be made in future releases.
+
+  -Some improvements were made in formatting small lists.
+
+  -Correspondence between Input and Output line numbers reported in a 
+   .LOG file should now be exact.  They were sometimes off due to the size
+   of intermediate buffers.
+
+  -Corrected minor tokenization error in which a ';' in a foreach loop
+   control was tokenized as a statement termination, which forced a 
+   line break:
+
+        OLD:
+        foreach ( $i = 0;
+            $i <= 10;
+            $i += 2
+          )
+        {
+            print "$i ";
+        }
+
+        NEW:
+        foreach ( $i = 0 ; $i <= 10 ; $i += 2 ) {
+            print "$i ";
+        }
+
+  -Corrected a problem with reading config files, in which quote marks were not
+   stripped.  As a result, something like -wba="&& . || " would have the leading
+   quote attached to the && and not work correctly.  A workaround for older
+   versions is to place a space around all tokens within the quotes, like this:
+   -wba=" && . || "
+
+  -Removed any existing space between a label and its ':'
+    OLD    : { }
+    NEW: { }
+   This was necessary because the label and its colon are a single token.
+
+  -Corrected tokenization error for the following (highly non-recommended) 
+   construct:
+    $user = @vars[1] / 100;
+ 
+  -Resolved cause of a difference between perltidy under perl v5.6.1 and
+  5.005_03; the problem was different behavior of \G regex position
+  marker(!)
+
+=head2 2001 10 20
+ 
+ -Corrected a bug in which a break was not being made after a full-line
+ comment within a short eval/sort/map/grep block.  A flag was not being
+ zeroed.  The syntax error check catches this.  Here is a snippet which
+ illustrates the bug:
+
+        eval {
+            #open Socket to Dispatcher
+            $sock = &OpenSocket;
+        };
+
+ The formatter mistakenly thought that it had found the following 
+ one-line block:
+ 
+        eval {#open Socket to Dispatcher$sock = &OpenSocket; };
+
+ The patch fixes this. Many thanks to Henry Story for reporting this bug.
+
+ -Changes were made to help diagnose and resolve problems in a
+ .perltidyrc file: 
+   (1) processing of command parameters has been into two separate
+   batches so that any errors in a .perltidyrc file can be localized.  
+   (2) commands --help, --version, and as many of the --dump-xxx
+   commands are handled immediately, without any command line processing
+   at all.  
+   (3) Perltidy will ignore any commands in the .perltidyrc file which
+   cause immediate exit.  These are:  -h -v -ddf -dln -dop -dsn -dtt
+   -dwls -dwrs -ss.  Thanks to Wolfgang Weisselberg for helpful
+   suggestions regarding these updates.
+
+ -Syntax check has been reinstated as default for MSWin32 systems.  This
+ way Windows 2000 users will get syntax check by default, which seems
+ like a better idea, since the number of Win 95/98 systems will be
+ decreasing over time.  Documentation revised to warn Windows 95/98
+ users about the problem with empty '&1'.  Too bad these systems
+ all report themselves as MSWin32.
+
+=head2 2001 10 16
+
+ -Fixed tokenization error in which a method call of the form
+
+    Module::->new();
+ 
+  got a space before the '::' like this:
+
+    Module ::->new();
+
+  Thanks to David Holden for reporting this.
+ 
+ -Added -html control over pod text, using a new abbreviation 'pd'.  See
+ updated perl2web man page. The default is to use the color of a comment,
+ but italicized.  Old .css style sheets will need a new line for
+ .pd to use this.  The old color was the color of a string, and there
+ was no control.  
+ 
+ -.css lines are now printed in sorted order.
+
+ -Fixed interpolation problem where html files had '$input_file' as title
+ instead of actual input file name.  Thanks to Simon Perreault for finding
+ this and sending a patch, and also to Tobias Weber.
+
+ -Breaks will now have the ':' placed at the start of a line, 
+ one per line by default because this shows logical structure
+ more clearly. This coding has been completely redone. Some 
+ examples of new ?/: formatting:
+
+       OLD:
+            wantarray ? map( $dir::cwd->lookup($_)->path, @_ ) :
+              $dir::cwd->lookup( $_[0] )->path;
+
+       NEW:
+            wantarray 
+              ? map( $dir::cwd->lookup($_)->path, @_ )
+              : $dir::cwd->lookup( $_[0] )->path;
+
+       OLD:
+                $a = ( $b > 0 ) ? {
+                    a => 1,
+                    b => 2
+                } : { a => 6, b => 8 };
+
+       NEW:
+                $a = ( $b > 0 )
+                  ? {
+                    a => 1,
+                    b => 2
+                  }
+                  : { a => 6, b => 8 };
+
+    OLD: (-gnu):
+    $self->note($self->{skip} ? "Hunk #$self->{hunk} ignored at 1.\n" :
+                "Hunk #$self->{hunk} failed--$@");
+
+    NEW: (-gnu):
+    $self->note($self->{skip} 
+                ? "Hunk #$self->{hunk} ignored at 1.\n"
+                : "Hunk #$self->{hunk} failed--$@");
+
+    OLD:
+        $which_search =
+          $opts{"t"} ? 'title'   :
+          $opts{"s"} ? 'subject' : $opts{"a"} ? 'author' : 'title';
+
+    NEW:
+        $which_search =
+          $opts{"t"} ? 'title'
+          : $opts{"s"} ? 'subject'
+          : $opts{"a"} ? 'author'
+          : 'title';
+ 
+ You can use -wba=':' to recover the previous default which placed ':'
+ at the end of a line.  Thanks to Michael Cartmell for helpful
+ discussions and examples.  
+
+ -Tokenizer updated to do syntax checking for matched ?/: pairs.  Also,
+ the tokenizer now outputs a unique serial number for every balanced
+ pair of brace types and ?/: pairs.  This greatly simplifies the
+ formatter.
+
+ -Long lines with repeated 'and', 'or', '&&', '||'  will now have
+ one such item per line.  For example:
+
+    OLD:
+        if ( $opt_d || $opt_m || $opt_p || $opt_t || $opt_x
+            || ( -e $archive && $opt_r ) )
+        {
+            ( $pAr, $pNames ) = readAr($archive);
+        }
+
+    NEW:
+        if ( $opt_d
+            || $opt_m
+            || $opt_p
+            || $opt_t
+            || $opt_x
+            || ( -e $archive && $opt_r ) )
+        {
+            ( $pAr, $pNames ) = readAr($archive);
+        }
+
+   OLD:
+        if ( $vp->{X0} + 4 <= $x && $vp->{X0} + $vp->{W} - 4 >= $x
+            && $vp->{Y0} + 4 <= $y && $vp->{Y0} + $vp->{H} - 4 >= $y ) 
+
+   NEW:
+        if ( $vp->{X0} + 4 <= $x
+            && $vp->{X0} + $vp->{W} - 4 >= $x
+            && $vp->{Y0} + 4 <= $y
+            && $vp->{Y0} + $vp->{H} - 4 >= $y )
+
+ -Long lines with multiple concatenated tokens will have concatenated
+ terms (see below) placed one per line, except for short items.  For
+ example:
+
+   OLD:
+        $report .=
+          "Device type:" . $ib->family . "  ID:" . $ib->serial . "  CRC:"
+          . $ib->crc . ": " . $ib->model() . "\n";
+
+   NEW:
+        $report .= "Device type:"
+          . $ib->family . "  ID:"
+          . $ib->serial . "  CRC:"
+          . $ib->model()
+          . $ib->crc . ": " . "\n";
+
+ NOTE: at present 'short' means 8 characters or less.  There is a
+ tentative flag to change this (-scl), but it is undocumented and
+ is likely to be changed or removed later, so only use it for testing.  
+ In the above example, the tokens "  ID:", "  CRC:", and "\n" are below
+ this limit.  
+
+ -If a line which is short enough to fit on a single line was
+ nevertheless broken in the input file at a 'good' location (see below), 
+ perltidy will try to retain a break.  For example, the following line
+ will be formatted as:
+ 
+    open SUM, "<$file"
+      or die "Cannot open $file ($!)";
+ 
+ if it was broken in the input file, and like this if not:
+
+    open SUM, "<$file" or die "Cannot open $file ($!)";
+
+ GOOD: 'good' location means before 'and','or','if','unless','&&','||'
+
+ The reason perltidy does not just always break at these points is that if
+ there are multiple, similar statements, this would preclude alignment.  So
+ rather than check for this, perltidy just tries to follow the input style,
+ in the hopes that the author made a good choice. Here is an example where 
+ we might not want to break before each 'if':
+
+    ($Locale, @Locale) = ($English, @English) if (@English > @Locale);
+    ($Locale, @Locale) = ($German,  @German)  if (@German > @Locale);
+    ($Locale, @Locale) = ($French,  @French)  if (@French > @Locale);
+    ($Locale, @Locale) = ($Spanish, @Spanish) if (@Spanish > @Locale);
+
+ -Added wildcard file expansion for systems with shells which lack this.
+ Now 'perltidy *.pl' should work under MSDOS/Windows.  Thanks to Hugh Myers 
+ for suggesting this.  This uses builtin glob() for now; I may change that.
+
+ -Added new flag -sbl which, if specified, overrides the value of -bl
+ for opening sub braces.  This allows formatting of this type:
+
+ perltidy -sbl 
+
+ sub foo
+ {
+    if (!defined($_[0])) {
+        print("Hello, World\n");
+    }
+    else {
+        print($_[0], "\n");
+    }
+ }
+ Requested by Don Alexander.
+
+ -Fixed minor parsing error which prevented a space after a $$ variable
+ (pid) in some cases.  Thanks to Michael Cartmell for noting this.
+ For example, 
+   old: $$< 700 
+   new: $$ < 700
+
+ -Improved line break choices 'and' and 'or' to display logic better.
+ For example:
+
+    OLD:
+        exists $self->{'build_dir'} and push @e,
+          "Unwrapped into directory $self->{'build_dir'}";
+
+    NEW:
+        exists $self->{'build_dir'}
+          and push @e, "Unwrapped into directory $self->{'build_dir'}";
+
+ -Fixed error of multiple use of abbreviatioin '-dsc'.  -dsc remains 
+ abbreviation for delete-side-comments; -dsm is new abbreviation for 
+ delete-semicolons.
+
+ -Corrected and updated 'usage' help routine.  Thanks to Slaven Rezic for 
+ noting an error.
+
+ -The default for Windows is, for now, not to do a 'perl -c' syntax
+ check (but -syn will activate it).  This is because of problems with
+ command.com.  James Freeman sent me a patch which tries to get around
+ the problems, and it works in many cases, but testing revealed several
+ issues that still need to be resolved.  So for now, the default is no
+ syntax check for Windows.
+
+ -I added a -T flag when doing perl -c syntax check.
+ This is because I test it on a large number of scripts from sources
+ unknown, and who knows what might be hidden in initialization blocks?
+ Also, deactivated the syntax check if perltidy is run as root.  As a
+ benign example, running the previous version of perltidy on the
+ following file would cause it to disappear:
+
+        BEGIN{
+                print "Bye, bye baby!\n";
+                unlink $0;
+        }
+        
+ The new version will not let that happen.
+
+ -I am contemplating (but have not yet implemented) making '-lp' the
+ default indentation, because it is stable now and may be closer to how
+ perl is commonly formatted.  This could be in the next release.  The
+ reason that '-lp' was not the original default is that the coding for
+ it was complex and not ready for the initial release of perltidy.  If
+ anyone has any strong feelings about this, I'd like to hear.  The
+ current default could always be recovered with the '-nlp' flag.  
+
+=head2 2001 09 03 
+
+ -html updates:
+     - sub definition names are now specially colored, red by default.  
+       The letter 'm' is used to identify them.
+     - keyword 'sub' now has color of other keywords.
+     - restored html keyword color to __END__ and __DATA__, which was 
+       accidentally removed in the previous version.
+
+ -A new -se (--standard-error-output) flag has been implemented and
+ documented which causes all errors to be written to standard output
+ instead of a .ERR file.
+
+ -A new -w (--warning-output) flag has been implemented and documented
+  which causes perltidy to output certain non-critical messages to the
+  error output file, .ERR.  These include complaints about pod usage,
+  for example.  The default is to not include these.
+
+  NOTE: This replaces an undocumented -w=0 or --warning-level flag
+  which was tentatively introduced in the previous version to avoid some
+  unwanted messages.  The new default is the same as the old -w=0, so
+  that is no longer needed. 
+
+  -Improved syntax checking and corrected tokenization of functions such
+  as rand, srand, sqrt, ...  These can accept either an operator or a term
+  to their right.  This has been corrected.
+ 
+ -Corrected tokenization of semicolon: testing of the previous update showed 
+ that the semicolon in the following statement was being mis-tokenized.  That
+ did no harm, other than adding an extra blank space, but has been corrected.
+
+          for (sort {strcoll($a,$b);} keys %investments) {
+             ...
+          }
+
+ -New syntax check: after wasting 5 minutes trying to resolve a syntax
+  error in which I had an extra terminal ';' in a complex for (;;) statement, 
+  I spent a few more minutes adding a check for this in perltidy so it won't
+  happen again.
+
+ -The behavior of --break-before-subs (-bbs) and --break-before-blocks
+ (-bbb) has been modified.  Also, a new control parameter,
+ --long-block-line-count=n (-lbl=n) has been introduced to give more
+ control on -bbb.  This was previously a hardwired value.  The reason
+ for the change is to reduce the number of unwanted blank lines that
+ perltidy introduces, and make it less erratic.  It's annoying to remove
+ an unwanted blank line and have perltidy put it back.  The goal is to
+ be able to sprinkle a few blank lines in that dense script you
+ inherited from Bubba.  I did a lot of experimenting with different
+ schemes for introducing blank lines before and after code blocks, and
+ decided that there is no really good way to do it.  But I think the new
+ scheme is an improvement.  You can always deactivate this with -nbbb.
+ I've been meaning to work on this; thanks to Erik Thaysen for bringing
+ it to my attention.
+
+ -The .LOG file is seldom needed, and I get tired of deleting them, so
+  they will now only be automatically saved if perltidy thinks that it
+  made an error, which is almost never.  You can still force the logfile
+  to be saved with -log or -g.
+
+ -Improved method for computing number of columns in a table.  The old
+ method always tried for an even number.  The new method allows odd
+ numbers when it is obvious that a list is not a hash initialization
+ list.
+
+   old: my (
+             $name,       $xsargs, $parobjs, $optypes,
+             $hasp2child, $pmcode, $hdrcode, $inplacecode,
+             $globalnew,  $callcopy
+          )
+          = @_;
+
+   new: my (
+             $name,   $xsargs,  $parobjs,     $optypes,   $hasp2child,
+             $pmcode, $hdrcode, $inplacecode, $globalnew, $callcopy
+          )
+          = @_;
+
+ -I fiddled with the list threshold adjustment, and some small lists
+ look better now.  Here is the change for one of the lists in test file
+ 'sparse.t':
+ old:
+   %units =
+     ("in", "in", "pt", "pt", "pc", "pi", "mm", "mm", "cm", "cm", "\\hsize", "%",
+       "\\vsize", "%", "\\textwidth", "%", "\\textheight", "%");
+
+ new:
+   %units = (
+              "in",      "in", "pt",          "pt", "pc",           "pi",
+              "mm",      "mm", "cm",          "cm", "\\hsize",      "%",
+              "\\vsize", "%",  "\\textwidth", "%",  "\\textheight", "%"
+              );
+
+ -Improved -lp formatting at '=' sign.  A break was always being added after
+ the '=' sign in a statement such as this, (to be sure there was enough room
+ for the parameters):
+
+ old: my $fee =
+        CalcReserveFee(
+                        $env,          $borrnum,
+                        $biblionumber, $constraint,
+                        $bibitems
+                        );
+ 
+ The updated version doesn't do this unless the space is really needed:
+
+ new: my $fee = CalcReserveFee(
+                               $env,          $borrnum,
+                               $biblionumber, $constraint,
+                               $bibitems
+                               );
+
+ -I updated the tokenizer to allow $#+ and $#-, which seem to be new to
+ Perl 5.6.  Some experimenting with a recent version of Perl indicated
+ that it allows these non-alphanumeric '$#' array maximum index
+ varaibles: $#: $#- $#+ so I updated the parser accordingly.  Only $#:
+ seems to be valid in older versions of Perl.
+
+ -Fixed a rare formatting problem with -lp (and -gnu) which caused
+ excessive indentation.
+
+ -Many additional syntax checks have been added.
+
+ -Revised method for testing here-doc target strings; the following
+ was causing trouble with a regex test because of the '*' characters:
+  print <<"*EOF*";
+  bla bla
+  *EOF*
+ Perl seems to allow almost anything to be a here doc target, so an
+ exact string comparison is now used.
+
+ -Made update to allow underscores in binary numbers, like '0b1100_0000'.
+
+ -Corrected problem with scanning certain module names; a blank space was 
+ being inserted after 'warnings' in the following:
+    use warnings::register;
+ The problem was that warnings (and a couple of other key modules) were 
+ being tokenized as keywords.  They should have just been identifiers.
+
+ -Corrected tokenization of indirect objects after sort, system, and exec,
+ after testing produced an incorrect error message for the following
+ line of code:
+    print sort $sortsubref @list;
+
+ -Corrected minor problem where a line after a format had unwanted
+ extra continuation indentation.  
+
+ -Delete-block-comments (and -dac) now retain any leading hash-bang line
+
+ -Update for -lp (and -gnu) to not align the leading '=' of a list
+ with a previous '=', since this interferes with alignment of parameters.
+
+  old:  my $hireDay = new Date;
+        my $self    = {
+                     firstName => undef,
+                     lastName  => undef,
+                     hireDay   => $hireDay
+                     };
+    
+  new:  my $hireDay = new Date;
+        my $self = {
+                     firstName => undef,
+                     lastName  => undef,
+                     hireDay   => $hireDay
+                     };
+
+ -Modifications made to display tables more compactly when possible,
+  without adding lines. For example,
+  old:
+                '1', "I", '2', "II", '3', "III", '4', "IV",
+                '5', "V", '6', "VI", '7', "VII", '8', "VIII",
+                '9', "IX"
+  new:
+                '1', "I",   '2', "II",   '3', "III",
+                '4', "IV",  '5', "V",    '6', "VI",
+                '7', "VII", '8', "VIII", '9', "IX"
+
+ -Corrected minor bug in which -pt=2 did not keep the right paren tight
+ around a '++' or '--' token, like this:
+
+            for ($i = 0 ; $i < length $key ; $i++ )
+
+ The formatting for this should be, and now is: 
+
+            for ($i = 0 ; $i < length $key ; $i++)
+
+ Thanks to Erik Thaysen for noting this.
+
+ -Discovered a new bug involving here-docs during testing!  See BUGS.html.  
+
+ -Finally fixed parsing of subroutine attributes (A Perl 5.6 feature).
+ However, the attributes and prototypes must still be on the same line
+ as the sub name.
+
+=head2 2001 07 31 
+
+ -Corrected minor, uncommon bug found during routine testing, in which a
+ blank got inserted between a function name and its opening paren after
+ a file test operator, but only in the case that the function had not
+ been previously seen.  Perl uses the existence (or lack thereof) of 
+ the blank to guess if it is a function call.  That is,
+    if (-l pid_filename()) {
+ became
+    if (-l pid_filename ()) {
+ which is a syntax error if pid_filename has not been seen by perl.
+
+ -If the AutoLoader module is used, perltidy will continue formatting
+ code after seeing an __END__ line.  Use -nlal to deactivate this feature.  
+ Likewise, if the SelfLoader module is used, perltidy will continue 
+ formatting code after seeing a __DATA__ line.  Use -nlsl to
+ deactivate this feature.  Thanks to Slaven Rezic for this suggestion.
+
+ -pod text after __END__ and __DATA__ is now identified by perltidy
+ so that -dp works correctly.  Thanks to Slaven Rezic for this suggestion.
+
+ -The first $VERSION line which might be eval'd by MakeMaker
+ is now passed through unchanged.  Use -npvl to deactivate this feature.
+ Thanks to Manfred Winter for this suggestion.
+
+ -Improved indentation of nested parenthesized expressions.  Tests have
+ given favorable results.  Thanks to Wolfgang Weisselberg for helpful
+ examples.
+
+=head2 2001 07 23 
+ 
+ -Fixed a very rare problem in which an unwanted semicolon was inserted
+ due to misidentification of anonymous hash reference curly as a code
+ block curly.  (No instances of this have been reported; I discovered it
+ during testing).  A workaround for older versions of perltidy is to use
+ -nasc.
+
+ -Added -icb (-indent-closing-brace) parameter to indent a brace which
+ terminates a code block to the same level as the previous line.
+ Suggested by Andrew Cutler.  For example, 
+
+        if ($task) {
+            yyy();
+            }    # -icb
+        else {
+            zzz();
+            }
+
+ -Rewrote error message triggered by an unknown bareword in a print or
+ printf filehandle position, and added flag -w=0 to prevent issuing this
+ error message.  Suggested by Byron Jones.
+
+ -Added modification to align a one-line 'if' block with similar
+ following 'elsif' one-line blocks, like this:
+      if    ( $something eq "simple" )  { &handle_simple }
+      elsif ( $something eq "hard" )    { &handle_hard }
+ (Suggested by  Wolfgang Weisselberg).
+
+=head2 2001 07 02 
+
+ -Eliminated all constants with leading underscores because perl 5.005_03
+ does not support that.  For example, _SPACES changed to XX_SPACES.
+ Thanks to kromJx for this update.
+
+=head2 2001 07 01 
+
+ -the directory of test files has been moved to a separate distribution
+ file because it is getting large but is of little interest to most users.
+ For the current distribution:
+   perltidy-20010701.tgz        contains the source and docs for perltidy
+   perltidy-20010701-test.tgz   contains the test files
+
+ -fixed bug where temporary file perltidy.TMPI was not being deleted 
+ when input was from stdin.
+
+ -adjusted line break logic to not break after closing brace of an
+ eval block (suggested by Boris Zentner).
+
+ -added flag -gnu (--gnu-style) to give an approximation to the GNU
+ style as sometimes applied to perl.  The programming style in GNU
+ 'automake' was used as a guide in setting the parameters; these
+ parameters will probably be adjusted over time.
+
+ -an empty code block now has one space for emphasis:
+   if ( $cmd eq "bg_untested" ) {}    # old
+   if ( $cmd eq "bg_untested" ) { }   # new
+ If this bothers anyone, we could create a parameter.
+
+ -the -bt (--brace-tightness) parameter has been split into two
+ parameters to give more control. -bt now applies only to non-BLOCK
+ braces, while a new parameter -bbt (block-brace-tightness) applies to
+ curly braces which contain code BLOCKS. The default value is -bbt=0.
+
+ -added flag -icp (--indent-closing-paren) which leaves a statement
+ termination of the form );, };, or ]; indented with the same
+ indentation as the previous line.  For example,
+
+    @month_of_year = (          # default, or -nicp
+        'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct',
+        'Nov', 'Dec'
+    );
+
+    @month_of_year = (          # -icp
+        'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct',
+        'Nov', 'Dec'
+        );
+
+ -Vertical alignment updated to synchronize with tokens &&, ||,
+ and, or, if, unless.  Allowable space before forcing
+ resynchronization has been increased.  (Suggested by  Wolfgang
+ Weisselberg).
+
+ -html corrected to use -nohtml-bold-xxxxxxx or -nhbx to negate bold,
+ and likewise -nohtml-italic-xxxxxxx or -nhbi to negate italic.  There
+ was no way to negate these previously.  html documentation updated and
+ corrected.  (Suggested by  Wolfgang Weisselberg).
+
+ -Some modifications have been made which improve the -lp formatting in
+ a few cases.
+
+ -Perltidy now retains or creates a blank line after an =cut to keep
+ podchecker happy (Suggested by Manfred H. Winter).  This appears to be
+ a glitch in podchecker, but it was annoying.
+
+=head2 2001 06 17  
+
+ -Added -bli flag to give continuation indentation to braces, like this
+
+        if ($bli_flag)
+          {
+            extra_indentation();
+          }
+
+ -Corrected an error with the tab (-t) option which caused the last line
+ of a multi-line quote to receive a leading tab.  This error was in
+ version 2001 06 08  but not 2001 04 06.  If you formatted a script
+ with -t with this version, please check it by running once with the
+ -chk flag and perltidy will scan for this possible error.
+
+ -Corrected an invalid pattern (\R should have been just R), changed
+ $^W =1 to BEGIN {$^W=1} to use warnings in compile phase, and corrected
+ several unnecessary 'my' declarations. Many thanks to Wolfgang Weisselberg,
+ 2001-06-12, for catching these errors.
+ 
+ -A '-bar' flag has been added to require braces to always be on the
+ right, even for multi-line if and foreach statements.  For example,
+ the default formatting of a long if statement would be:
+
+        if ($bigwasteofspace1 && $bigwasteofspace2
+          || $bigwasteofspace3 && $bigwasteofspace4)
+        {
+            bigwastoftime();
+        }
+
+ With -bar, the formatting is:
+
+        if ($bigwasteofspace1 && $bigwasteofspace2
+          || $bigwasteofspace3 && $bigwasteofspace4) {
+            bigwastoftime();
+        }
+ Suggested by Eli Fidler 2001-06-11.
+
+ -Uploaded perltidy to sourceforge cvs 2001-06-10.
+
+ -An '-lp' flag (--line-up-parentheses) has been added which causes lists
+ to be indented with extra indentation in the manner sometimes
+ associated with emacs or the GNU suggestions.  Thanks to Ian Stuart for
+ this suggestion and for extensive help in testing it. 
+
+ -Subroutine call parameter lists are now formatted as other lists.
+ This should improve formatting of tables being passed via subroutine
+ calls.  This will also cause full indentation ('-i=n, default n= 4) of
+ continued parameter list lines rather than just the number of spaces
+ given with -ci=n, default n=2.
+ 
+ -Added support for hanging side comments.  Perltidy identifies a hanging
+ side comment as a comment immediately following a line with a side
+ comment or another hanging side comment.  This should work in most
+ cases.  It can be deactivated with --no-hanging-side-comments (-nhsc).
+ The manual has been updated to discuss this.  Suggested by Brad
+ Eisenberg some time ago, and finally implemented.
+
+=head2 2001 06 08  
+
+ -fixed problem with parsing command parameters containing quoted
+ strings in .perltidyrc files. (Reported by Roger Espel Llima 2001-06-07).
+
+ -added two command line flags, --want-break-after and 
+ --want-break-before, which allow changing whether perltidy
+ breaks lines before or after any operators.  Please see the revised 
+ man pages for details.
+
+ -added system-wide configuration file capability.
+ If perltidy does not find a .perltidyrc command line file in
+ the current directory, nor in the home directory, it now looks
+ for '/usr/local/etc/perltidyrc' and then for '/etc/perltidyrc'.
+ (Suggested by Roger Espel Llima 2001-05-31).
+
+ -fixed problem in which spaces were trimmed from lines of a multi-line
+ quote. (Reported by Roger Espel Llima 2001-05-30).  This is an 
+ uncommon situation, but serious, because it could conceivably change
+ the proper function of a script.
+
+ -fixed problem in which a semicolon was incorrectly added within 
+ an anonymous hash.  (Reported by A.C. Yardley, 2001-5-23).
+ (You would know if this happened, because perl would give a syntax
+ error for the resulting script).
+
+ -fixed problem in which an incorrect error message was produced
+  after a version number on a 'use' line, like this ( Reported 
+  by Andres Kroonmaa, 2001-5-14):
+
+              use CGI 2.42 qw(fatalsToBrowser);
+
+  Other than the extraneous error message, this bug was harmless.
+
+=head2 2001 04 06 
+ 
+ -fixed serious bug in which the last line of some multi-line quotes or
+  patterns was given continuation indentation spaces.  This may make
+  a pattern incorrect unless it uses the /x modifier.  To find
+  instances of this error in scripts which have been formatted with
+  earlier versions of perltidy, run with the -chk flag, which has
+  been added for this purpose (SLH, 2001-04-05).
+
+  ** So, please check previously formatted scripts by running with -chk
+  at least once **
+
+ -continuation indentation has been reprogrammed to be hierarchical, 
+  which improves deeply nested structures.
+
+ -fixed problem with undefined value in list formatting (reported by Michael
+  Langner 2001-04-05)
+
+ -Switched to graphical display of nesting in .LOG files.  If an
+  old format string was "(1 [0 {2", the new string is "{{(".  This
+  is easier to read and also shows the order of nesting.
+
+ -added outdenting of cuddled paren structures, like  ")->pack(".
+
+ -added line break and outdenting of ')->' so that instead of
+
+	$mw->Label(
+	  -text   => "perltidy",
+	  -relief => 'ridge')->pack;
+ 
+  the current default is:
+
+	$mw->Label(
+	  -text   => "perltidy",
+	  -relief => 'ridge'
+	)->pack;
+
+  (requested by Michael Langner 2001-03-31; in the future this could 
+  be controlled by a command-line parameter).
+
+ -revised list indentation logic, so that lists following an assignment
+  operator get one full indentation level, rather than just continuation 
+  indentation.  Also corrected some minor glitches in the continuation 
+  indentation logic. 
+
+ -Fixed problem with unwanted continuation indentation after a blank line 
+ (reported by Erik Thaysen 2001-03-28):
+
+ -minor update to avoid stranding a single '(' on one line
+
+=head2 2001 03 28:
+
+ -corrected serious error tokenizing filehandles, in which a sub call 
+ after a print or printf, like this:
+    print usage() and exit;
+ became this:
+    print usage () and exit;
+ Unfortunately, this converts 'usage' to a filehandle.  To fix this, rerun
+ perltidy; it will look for this situation and issue a warning. 
+
+ -fixed another cuddled-else formatting bug (Reported by Craig Bourne)
+
+ -added several diagnostic --dump routines
+ 
+ -added token-level whitespace controls (suggested by Hans Ecke)
+
+=head2 2001 03 23:
+
+ -added support for special variables of the form ${^WANT_BITS}
+
+ -space added between scalar and left paren in 'for' and 'foreach' loops,
+  (suggestion by Michael Cartmell):
+
+    for $i( 1 .. 20 )   # old
+    for $i ( 1 .. 20 )   # new
+
+ -html now outputs cascading style sheets (thanks to suggestion from
+  Hans Ecke)
+
+ -flags -o and -st now work with -html
+
+ -added missing -html documentation for comments (noted by Alex Izvorski)
+
+ -support for VMS added (thanks to Michael Cartmell for code patches and 
+   testing)
+
+ -v-strings implemented (noted by Hans Ecke and Michael Cartmell; extensive
+   testing by Michael Cartmell)
+
+ -fixed problem where operand may be empty at line 3970 
+  (\b should be just b in lines 3970, 3973) (Thanks to Erik Thaysen, 
+  Keith Marshall for bug reports)
+
+ -fixed -ce bug (cuddled else), where lines like '} else {' were indented
+  (Thanks to Shawn Stepper and Rick Measham for reporting this)
+
+=head2 2001 03 04:
+
+ -fixed undefined value in line 153 (only worked with -I set)
+ (Thanks to Mike Stok, Phantom of the Opcodes, Ian Ehrenwald, and others)
+
+ -fixed undefined value in line 1069 (filehandle problem with perl versions <
+ 5.6) (Thanks to Yuri Leikind, Mike Stok, Michael Holve, Jeff Kolber)
+
+=head2 2001 03 03:
+
+ -Initial announcement at freshmeat.net; started Change Log
+ (Unfortunately this version was DOA, but it was fixed the next day)
diff --git a/local-docs/INSTALL.pod b/local-docs/INSTALL.pod
new file mode 100644
index 00000000..00b8999d
--- /dev/null
+++ b/local-docs/INSTALL.pod
@@ -0,0 +1,422 @@
+=head1 PERLTIDY INSTALLATION NOTES
+
+=head1 Get a distribution file
+
+=over 4
+
+=item Source Files in .tar.gz and .zip format
+
+This document tells how to install perltidy from the basic source
+distribution files in F<.tar.gz> or F<.zip> format.  These files are
+identical except for the line endings.  The F<.tar.gz> has Unix style
+line endings, and the F<.zip> file has Windows style line endings.  The
+standard perl MakeMaker method should work for these in most cases. 
+
+=item Source files in RPM and .deb format
+
+The web site also has links to RPM and Debian .deb Linux packages, which may be
+convenient for some users.
+
+=back
+
+=head1 Quick Test Drive
+
+If you want to do a quick test of perltidy without doing any installation, get
+a F<.tar.gz> or a F<.zip> source file and see the section below "Method 2: Installation
+as a single binary script".
+
+=head1 Uninstall older versions
+
+In certain circumstances, it is best to remove an older version
+of perltidy before installing the latest version.  These are:
+
+=over 4
+
+=item Uninstall a Version older than 20020225
+
+You can use perltidy -v to determine the version number.  The first
+version of perltidy to use Makefile.PL for installation was 20020225, so
+if your previous installation is older than this, it is best to remove
+it, because the installation path may now be different.  There were up
+to 3 files these older installations: the script F and
+possibly two man pages, F and F.  If you saved
+your Makefile, you can probably use C.  Otherwise, you
+can use a F or F command to find and remove these files.
+
+=item Uninstall older versions when changing installation method
+
+If you switch from one installation method to another, the paths to the
+components of perltidy may change, so it is probably best to remove the older
+version before installing the new version.  If your older installation method
+had an uninstall option (such as with RPM's and debian packages), use it.
+Otherwise, you can locate and remove the older files by hand.  There are two
+key files: F and F.  In addition, there may be one or two
+man pages, something like F and F.  You can use a
+C and/or C command to find and remove these files.  After
+installation, you can verify that the new version of perltidy is working with
+the C command.
+
+=back
+
+=head1 Two Installation Methods - Overview
+
+These are generic instructions.  Some system-specific notes and hints
+are given in later sections.  
+
+Two separate installation methods are possible.  
+
+=over 4
+
+=item Method 1: Standard Installation Method
+
+The standard method based on MakeMaker should work in a normal perl
+environment.  This is the recommended installation procedure for
+systems which support it.
+
+        perl Makefile.PL
+        make
+        make test
+        make install
+
+The C command is probably C under a Windows system.  You
+may need to become root (or administrator) before doing the C step.  
+
+=item Method 2: Installation as a single binary script
+
+If you just want to take perltidy for a quick test drive without installing it,
+or are having trouble installing modules, you can bundle it all in one
+independent executable script.  This might also be helpful on a system for
+which the Makefile.PL method does not work, or if you are temporarily a guest
+on some system, or if you want to try hacking a special version of perltidy
+without messing up your regular version.  
+
+You just need to uncompress the source distribution, cd down into it, and enter
+the command:
+
+        perl pm2pl
+
+which will combine the pieces of perltidy into a single script named
+F in the current directory.  This script should be 
+fully functional.  Try it out on a handy perl script, for example
+  
+  perl perltidy Makefile.PL
+
+This should create F.
+
+=item After Installation
+
+After installation by either method, verify that the installation worked
+and that the correct new version is being by entering:
+
+  perltidy -v
+
+If the version number disagrees with the version number embedded in the
+distribution file name, search for and remove the old version.
+For example, under a Unix system, the command C might 
+show where it is.  Also, see the above notes on uninstalling older
+versions.
+
+On a Unix system running the C shell, if you had a previous
+installation of perltidy, you may have to use 
+
+ hash -r
+
+to get the shell to find the new one.
+
+After F is installed, you can find where it will look for
+configuration files and environment variables on your system with
+the command:
+
+  perltidy -dpro
+
+=item How to Uninstall
+
+Unfortunately, the standard Perl installation method does not seem able
+to do an uninstall.
+
+But try this:
+
+  make uninstall
+
+On some systems, it will give you a list of files to remove by hand.  If
+not, you need to find the script F and its module file
+F, which will be in a subdirectory named F in the site
+library.
+
+If you installed perltidy with the alternative method, you should just
+reverse the steps that you used.
+
+=back
+
+=head2 Unix Installation Notes
+
+=over 4
+
+=item Alternative method - Unix
+
+If the alternative method is used, test the script produced by the
+C perl script:
+
+  perl ./perltidy somefile.pl
+
+where F is any convenient test file, such as F
+itself.  Then,
+
+1. If the script is not executable, use 
+
+ chmod +x perltidy
+
+2. Verify that the initial line in perltidy works for your system by
+entering:
+
+ ./perltidy -h
+
+which should produce the usage text and then exit.  This should usually
+work, but if it does not, you will need to change the first line in
+F to reflect the location of perl on your system.  On a Unix
+system, you might find the path to perl with the command 'which perl'.
+
+3. A sample F for this installation method is F.
+Edit it to have the correct paths.
+
+You will need to become root unless you change the paths to point to
+somewhere in your home directory.  Then issue the command
+
+ make -f Makefile.npm install
+
+This installs perltidy and the man page perltidy.1. 
+
+5. Test the installation using
+
+ perltidy -h
+
+You should see the usage screen.  Then, if you installed the man pages, 
+try
+
+ man perltidy
+
+which should bring up the manual page. 
+
+If you ever want to remove perltidy, you can remove perltidy and its man
+pages by hand or use
+
+ make uninstall
+
+=back
+
+=head2 Windows Installation Notes
+
+On a Windows 9x/Me system you should CLOSE ANY OPEN APPLICATIONS to
+avoid losing unsaved data in case of trouble.
+
+=over 4
+
+=item Standard Method - Windows
+
+After you unzip the distribution file, the procedure is probably this:
+
+        perl Makefile.PL
+        nmake
+        nmake test
+        nmake install
+
+You may need to download a copy of F to unzip the F<.zip> distribution
+file; you can get this at
+http://www.info-zip.org/pub/infozip/UnZip.html
+
+If you have ActiveState
+Perl, the installation method is outlined at
+http://aspn.activestate.com//ASPN/Reference/Products/ActivePerl/faq/Windows/ActivePerl-Winfaq9.html#How_can_I_use_modules_from_CPAN_
+
+You may need to download a copy of Microsoft's F program from
+ftp://ftp.microsoft.com/Softlib/MSLFILES/nmake15.exe
+
+If you are not familiar with installing modules, or have trouble doing
+so, and want to start testing perltidy quickly, you may want to use the
+alternative method instead (next section).
+
+=item Alternative Method - Windows
+
+From the main installation directory, just enter
+
+   perl pm2pl 
+
+Placing the resulting file F and the example batch file
+F, located in the F directory, in your path should
+work.  (You can determine your path by issuing the msdos command
+C).  However, the batch file probably will not support file
+redirection.  So, for example, to pipe the long help message through
+'more', you might have to invoke perltidy with perl directly, like this:
+
+ perl \somepath\perltidy -h | more
+
+The batch file will not work properly with wildcard filenames, but you may
+use wildcard filenames if you place them in quotes.  For example
+
+ perltidy '*.pl'
+
+=back
+
+=head2 VMS Installation Notes
+
+=over 4
+
+=item Links to VMS Utilities and Documentation
+
+To install perltidy you will need the following utilities Perl, of
+course, source with VMS goodies available from
+http://www.sidhe.org/vmsperl or binary available from the Compaq OpenVMS
+freeware CD.  To unpack the source either gunzip and vmstar available
+from the Compaq OpenVMS freeware CD or zip available from
+http://www.info-zip.org/
+
+To build perltidy you can use either B, Compaq's VMS equivalent of
+make, or B, an B clone available from
+http://www.madgoat.com.
+
+Information on running perl under VMS can be found at:
+http://w4.lns.cornell.edu/~pvhp/perl/VMS.html
+
+=item Unpack the source:
+
+ $ unzip -a perl-tidy-yyyymmdd.zip  ! or
+
+ $ unzip /text=auto perl-tidy-yyyymmdd.zip ! or
+
+ $ gunzip perl-tidy-yyyymmdd.tgz
+ $ vmstar perl-tidy-yyyymmdd.tar
+
+=item Build and install perltidy under VMS:
+
+ $ set default [.perl-tidy-yyymmdd]
+ $ perl perltidy.pl
+ $ mmk
+ $ mmk test
+ $ mmk install
+
+=item Using Perltidy under VMS
+
+Create a symbol. This should be put in a logon script, eg sylogin.com
+
+ $ perltidy == "perl perl_root:[utils]perltidy."
+
+Default parameters can be placed in a F file.  Perltidy
+looks for one in the following places and uses the first found if the
+logical C is a file and the file exists then that is used if the
+logical C is a directory then look for a F<.perltidyrc> file in the
+directory look for a F<.perltidyrc> file in the user's home directory
+
+To see where the search is done and which F<.perltidyrc> is used type
+
+ $ perltidy -dpro
+
+A system C logical can be defined pointing to a file with a
+minimal configuration,  and users can defined their own logical to use a
+personal F<.perltidyrc> file.
+
+ $ define /system perltidy perl_root:[utils]perltidy.rc
+
+=item The -x Parameter
+
+If you have one of the magic incantations at the start of perl scripts,
+so that they can be invoked as a .com file, then you will need to use
+the B<-x> parameter which causes perltidy to skip all lines until it
+finds a hash bang line eg C<#!perl -w>.  Since it is such a common
+option this is probably a good thing to put in a F<.perltidyrc> file.
+
+=item VMS File Extensions
+
+VMS file extensions will use an underscore character instead of a dot, 
+when necessary, to create a valid filename.  So 
+
+      perltidy myfile.pl
+
+will generate the output file F instead of
+F, and so on. 
+
+=back
+
+=head1 Troubleshooting / Other Operating Systems
+
+If there seems to be a problem locating a configuration file, you can see
+what is going on in the config file search with:
+
+ perltidy -dpro
+
+If you want to customize where perltidy looks for configuration files,
+look at the routine 'find_config_file' in module 'Tidy.pm'.  You should
+be able to at least use the '-pro=filename' method under most systems.  
+
+Remember to place quotes (either single or double) around input
+parameters which contain spaces, such as file names.  For example:
+
+ perltidy "file name with spaces"
+
+Without the quotes, perltidy would look for four files: F,
+F, F, and F.
+
+If you develop a system-dependent patch that might be of general
+interest, please let us know.
+
+=head1 CONFIGURATION FILE
+
+You do not need a configuration file, but you may eventually want to
+create one to save typing; the tutorial and man page discuss this.
+
+=head1 SYSTEM TEMPORARY FILES
+
+Perltidy needs to create a system temporary file when it invokes
+Pod::Html to format pod text under the -html option.  For Unix systems,
+this will normally be a file in /tmp, and for other systems, it will be
+a file in the current working directory named F.  This file
+will be removed when the run finishes.
+
+=head1 DOCUMENTATION
+
+Documentation is contained in B<.pod> format, either in the F directory
+or appended to the scripts.  
+
+These documents can also be found at http://perltidy.sourceforge.net
+
+Reading the brief tutorial should help you use perltidy effectively.  
+The tutorial can be read interactively with B, for
+example
+  
+  cd docs
+  perldoc tutorial.pod
+
+or else an F version can be made with B:
+
+  pod2html tutorial.pod >tutorial.html
+
+If you use the Makefile.PL installation method on a Unix system, the
+B and B man pages should automatically be installed.
+Otherwise, you can extract the man pages with the B utilities, as
+follows:
+
+  cd bin
+  pod2text perltidy >perltidy.txt
+  pod2html perltidy >perltidy.html
+  
+  cd lib/Perl
+  pod2text Tidy.pm >Tidy.txt
+  pod2html Tidy.pm >Tidy.html
+
+After installation, the installation directory of files may be deleted. 
+
+Perltidy is still being developed, so please check sourceforge occasionally
+for updates if you find that it is useful.  New releases are announced
+on freshmeat.net.
+
+=head1 CREDITS
+
+Thanks to the many programmers who have documented problems, made suggestions and sent patches.  
+
+=head1 FEEDBACK / BUG REPORTS
+
+If you see ways to improve these notes, please let us know.
+
+A list of current bugs and issues can be found at the CPAN site L
+
+To report a new bug or problem, use the link on this page .  
diff --git a/local-docs/Makefile b/local-docs/Makefile
new file mode 100644
index 00000000..95c15c73
--- /dev/null
+++ b/local-docs/Makefile
@@ -0,0 +1,34 @@
+.phony : docs
+
+CHANGES="../CHANGES.md"
+INSTALL="../INSTALL.md"
+README="../README.md"
+BUGS="../BUGS.md"
+
+DOCS=INSTALL \
+README \
+BUGS \
+CHANGES \
+perltidy.md \
+tutorial.md
+
+docs: $(DOCS)
+
+README: README.pod
+	pod2markdown README.pod >../README.md
+
+INSTALL: INSTALL.pod
+	pod2markdown INSTALL.pod >../INSTALL.md
+
+perltidy.md: ../bin/perltidy
+	pod2markdown ../bin/perltidy >perltidy.md
+
+tutorial.md: tutorial.pod
+	pod2markdown tutorial.pod >tutorial.md
+
+BUGS: BUGS.pod
+	pod2markdown BUGS.pod ../BUGS.md
+
+CHANGES: ChangeLog.pod
+	pod2markdown ChangeLog.pod >../CHANGES.md
+	
diff --git a/local-docs/README.pod b/local-docs/README.pod
new file mode 100644
index 00000000..ac006f7d
--- /dev/null
+++ b/local-docs/README.pod
@@ -0,0 +1,66 @@
+=head1 Welcome to Perltidy!
+
+Perltidy is a tool to indent and reformat perl scripts.  It can also
+write scripts in html format.
+
+Perltidy is free software released under the GNU General Public
+License -- please see the included file "COPYING" for details.
+
+=head1 PREREQUISITES
+
+C should work with most standard Perl installations.
+The following modules are not required, but perltidy may use them if
+detected:
+
+  HTML::Entities  will be used to encode HTML entities if detected
+  Pod::Html will be used to format pod text
+
+The total disk space needed after removing the installation directory will
+about 2 Mb.  
+
+=head1 DOWNLOAD
+
+There are two source distribution files:
+
+=over 4
+
+=item *
+
+A F<.tgz> "tarball", with Unix-style  line endings, and 
+
+=item *
+
+A zip file, F<.zip>, with Windows-style  line endings.
+
+=back
+
+In addition, the web site has links to debian and RPM packages.
+
+=head1 INSTALLATION
+
+For most standard installations, the standard Makefile.PL method should work:
+
+ perl Makefile.PL
+ make
+ make test
+ make install
+
+The INSTALL file has additional installation notes, and tells how
+to use perltidy without doing an installation.
+
+=head1 WHAT NEXT
+
+Please see the CHANGES file for notices of any recent updates.
+
+Please see the BUGS file for a list of all known open bugs.
+
+Documentation can be found in the F directory, and it can also be 
+found at http://perltidy.sourceforge.net
+
+Reading the brief tutorial should help you use perltidy effectively.  
+
+=head1 FEEDBACK / BUG REPORTS
+
+A list of current bugs and issues can be found at the CPAN site L
+
+To report a new bug or problem, use the link on this page .  
diff --git a/local-docs/Release-Checklist.md b/local-docs/Release-Checklist.md
new file mode 100644
index 00000000..98130f0a
--- /dev/null
+++ b/local-docs/Release-Checklist.md
@@ -0,0 +1,41 @@
+# Checklist of some things to when preparing a new version and/or release
+
+- review tickets at [rt.cpan.org](https://rt.cpan.org/Public/Dist/Display.html?Name=Perl-Tidy) 
+- compare the new version with previous version on all files in test area
+- run 'author tests' on a much larger body of code than is covered by the .t
+  files.
+    - compare results of the current version with previous version
+- review tickets at sourceforge (hardly used now, but possible)
+- review/update the ChangeLog.pod file
+- Review code, especially any ## commented out sections and "FIXME's"
+- run perlver on all modules to check minimum version; should be 5.8.0
+  - The first line in Tidy.pm has the required version of Perl
+  - travis-CI is setup to test on version 5.8 so we should catch this type of error automatically
+  - use perlbrew to do local checks and debugging on earlier versions of perl
+- Run tidyall -a to be sure code is tidied
+  - note that I have tidyall set to also run perlcritic right now
+- Run perlcritic (if not done by tidyall)
+- run podchecker on all .pod files
+- run ispell on all .pod files
+- Be sure build at Travis.CI is clean for all version of perl
+- update VERSION numbers in these files (build.pl can do this):
+   - lib/Perl/Tidy.pm
+   - lib/Perl/Tidy.pod
+   - bin/perltidy
+   - local-docs/ChangeLog.pod
+- make manifest
+    - check MANIFEST over carefully
+    - sometimes it is necessary to remove MANIFEST and then do "make manifest"
+- make the .tar.gz
+   - perl Makefile.PL
+   - make 
+   - make test
+   - make dist
+- *IMPORTANT:* Now untar the file (perhaps in /tmp) and take a look at the
+  contents.  Be sure it does not have unwanted stuff
+   - If necessary, remove MANIFEST, fix MANIFEST.SKIP and run make manifest again
+- Do test installs on several systems 
+- Upload Release to CPAN
+- Update CPAN tickets
+- Upload release to sourceforge
+- Update web site
diff --git a/local-docs/perl2web.pod b/local-docs/perl2web.pod
new file mode 100644
index 00000000..fbc60c6f
--- /dev/null
+++ b/local-docs/perl2web.pod
@@ -0,0 +1,270 @@
+
+=head1 NAME
+
+perl2web - documentation for perltidy -html
+
+=head1 PERLTIDY HTML DOCUMENTATION
+
+This document explains perltidy options available for outputting perl
+scripts in html format.  For other perltidy options, see the perltidy
+man page, or go to the home page at http://perltidy.sourceforge.net.
+
+Please note that the B<-html> flag is the "master switch" which tells
+perltidy to write output in html; without it, the html formatting
+parameters described here will all be ignored.  Also please note that at
+present perltidy is either in "html mode" or "beautification mode", but
+not both, so it does not do any indentation or formatting when the
+B<-html> option is used.  The input file is decorated with HTML tags but
+otherwise left unchanged.  Therefore any indenting or reformatting must
+be done in a preliminary separate run without the B<-html> switch.
+
+This documentation has been split from the rest of the perltidy
+documentation because it is expected that the perltidy -html capability
+will eventually be spun off into a new, independent program, to allow it
+to grow more easily.
+
+=head1 SYNOPSIS
+
+    perltidy -html [ other options ] file1 file2 file3 ...
+            (output goes to file1.html, file2.html, file3.html, ...)
+    perltidy -html [ other options ] file1 -o outfile
+    perltidy -html [ options ] file1 -st >outfile
+    perltidy -html [ options ] outfile
+
+=head1 DESCRIPTION
+
+Perltidy -html reads a Perl script and writes an a copy suitable for
+viewing with a web browser.
+
+For a quick introduction, see L<"EXAMPLES">. 
+
+For a complete description of the command line parameters, see L<"OPTIONS">. 
+
+=head1 EXAMPLES
+
+  perltidy -html somefile.pl
+
+This will produce a file F containing the script with
+html markup.  The output file will contain an embedded style sheet in
+the  section which may be edited to change the appearance.
+
+  perltidy -html -css=mystyle.css somefile.pl
+
+This will produce a file F containing the script with
+html markup.  This output file will contain a link to a separate style
+sheet file F.  If the file F does not exist,
+it will be created.  If it exists, it will not be overwritten.
+
+  perltidy -html -pre somefile.pl
+
+Write an html snippet with only the PRE section to F.
+This is useful when code snippets are being formatted for inclusion in a
+larger web page.  No style sheet will be written in this case.  
+
+  perltidy -html -ss >mystyle.css
+
+Write a style sheet to F and exit.
+
+=head1 OPTIONS
+
+=over 4
+
+=item  The B<-html> master switch
+
+The flag B<-html> causes perltidy to write an html file with extension
+F<.html>.  So, for example, the following command
+
+	perltidy -html somefile.pl
+
+will produce a syntax-colored html file named F
+which may be viewed with a browser.
+
+B: In this case, perltidy does not do any formatting to the
+input file, and it does not write a formatted file with extension
+F<.tdy>.  This means that two perltidy runs are required to create a
+fully reformatted, html copy of a script.  
+
+=item	B<-oext>=ext,    B<--output-file-extension>=ext  
+
+Change the extension of the output file to be F<.ext> instead of the
+default F<.html>.
+
+=item  The B<-pre> flag for code snippets
+
+When the B<-pre> flag is given, only the pre-formatted section, within
+the 
 and 
tags, will be output. This simplifies inclusion +of the output in other files. The default is to output a complete +web page. + +=item The B<-nnn> flag for line numbering + +When the B<-nnn> flag is given, the output lines will be numbered. + +=item Style Sheets + +Style sheets make it very convenient to control and adjust the +appearance of html pages. The default behavior is to write a page of +html with an embedded style sheet. + +An alternative to an embedded style sheet is to create a page with a +link to an external style sheet. This is indicated with the +B<-css=filename>, where the external style sheet is F. The +external style sheet F will be created if and only if it does +not exist. This option is useful for controlling multiple pages from a +single style sheet. + +To cause perltidy to write a style sheet to standard output and exit, +use the B<-ss>, or B<--stylesheet>, flag. This is useful if the style +sheet could not be written for some reason, such as if the B<-pre> flag +was used. Thus, for example, + + perltidy -html -ss >mystyle.css + +will write a style sheet with the default properties to file +F. + +The use of style sheets is encouraged, but a web page without a style +sheets can be created with the flag B<-nss>. Use this option if you +must to be sure that older browsers (roughly speaking, versions prior to +4.0 of Netscape Navigator and Internet Explorer) can display the +syntax-coloring of the html files. + +=item Controlling HTML properties + +Syntax colors may be changed from their default values by flags of the either +the long form, B<-html-color-xxxxxx=n>, or more conveniently the short form, +B<-hcx=n>, where B is one of the following words, and B is the +corresponding abbreviation: + + Token Type xxxxxx x + ---------- -------- -- + comment comment c + number numeric n + identifier identifier i + bareword, function bareword w + keyword keyword k + quite, pattern quote q + here doc text here-doc-text h + here doc target here-doc-target hh + punctuation punctuation pu + parentheses paren p + structural braces structure s + semicolon semicolon sc + colon colon co + comma comma cm + label label j + sub definition name subroutine m + pod text pod-text pd + +A default set of colors has been defined, but they may be changed by providing +values to any of the following parameters, where B is either a 6 digit +hex RGB color value or an ascii name for a color, such as 'red'. + +To illustrate, the following command will produce an html +file F with "aqua" keywords: + + perltidy -html -hck=00ffff somefile.pl + +and this should be equivalent for most browsers: + + perltidy -html -hck=aqua somefile.pl + +Perltidy merely writes any non-hex names that it sees in the html file. +The following 16 color names are defined in the HTML 3.2 standard: + + black => 000000, + silver => c0c0c0, + gray => 808080, + white => ffffff, + maroon => 800000, + red => ff0000, + purple => 800080, + fuchsia => ff00ff, + green => 008000, + lime => 00ff00, + olive => 808000, + yellow => ffff00 + navy => 000080, + blue => 0000ff, + teal => 008080, + aqua => 00ffff, + +Many more names are supported in specific browsers, but it is safest +to use the hex codes for other colors. Helpful color tables can be +located with an internet search for "HTML color tables". + +Besides color, two other character attributes may be set: bold, and italics. +To set a token type to use bold, use the flag +B<-html-bold-xxxxxx> or B<-hbx>, where B or B are the long +or short names from the above table. Conversely, to set a token type to +NOT use bold, use B<-nohtml-bold-xxxxxx> or B<-nhbx>. + +Likewise, to set a token type to use an italic font, use the flag +B<-html-italic-xxxxxx> or B<-hix>, where again B or B are the +long or short names from the above table. And to set a token type to +NOT use italics, use B<-nohtml-italic-xxxxxx> or B<-nhix>. + +For example, to use bold braces and lime color, non-bold, italics keywords the +following command would be used: + + perltidy -html -hbs -hck=00FF00 -nhbk -hik somefile.pl + +The background color can be specified with B<-html-color-background=n>, +or B<-hcbg=n> for short, where n is a 6 character hex RGB value. The +default color of text is the value given to B, which is +black as a default. + +Here are some notes and hints: + +1. If you find a preferred set of these parameters, you may want +to create a F<.perltidyrc> file containing them. See the perltidy man +page for an explanation. + +2. Rather than specifying values for these parameters, it may be easier +to accept the defaults and then edit a style sheet. The style sheet +contains helpful comments which should make this easy. + +3. The syntax-colored html files can be very large, so it may be best to +split large files into smaller pieces to improve download times. + +4. The list of token types is expected to evolve over time as further +tokenizer improvements allow a refinement in the available token types, +so you should occasionally check for updates to this program if you use +it frequently. + +=back + +=head1 SEE ALSO + +perltidy(1) + +=head1 VERSION + +This man page documents perltidy version 20020214. + +=head1 AUTHOR + + Steven L. Hancock + email: perltidy at users.sourceforge.net + http://perltidy.sourceforge.net + +Bug reports and suggestions for new features are always welcome. + +=head1 COPYRIGHT + +Copyright (c) 2000-2002 by Steven L. Hancock + +=head1 LICENSE + +This package is free software; you can redistribute it and/or modify it +under the terms of the "GNU General Public License". + +Please refer to the file "COPYING" for details. + +=head1 DISCLAIMER + +This package is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +See the "GNU General Public License" for more details. diff --git a/local-docs/perltidy.1 b/local-docs/perltidy.1 new file mode 100644 index 00000000..621e3915 --- /dev/null +++ b/local-docs/perltidy.1 @@ -0,0 +1,3596 @@ +.\" Automatically generated by Pod::Man 4.10 (Pod::Simple 3.29) +.\" +.\" Standard preamble: +.\" ======================================================================== +.de Sp \" Vertical space (when we can't use .PP) +.if t .sp .5v +.if n .sp +.. +.de Vb \" Begin verbatim text +.ft CW +.nf +.ne \\$1 +.. +.de Ve \" End verbatim text +.ft R +.fi +.. +.\" Set up some character translations and predefined strings. \*(-- will +.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left +.\" double quote, and \*(R" will give a right double quote. \*(C+ will +.\" give a nicer C++. Capital omega is used to do unbreakable dashes and +.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, +.\" nothing in troff, for use with C<>. +.tr \(*W- +.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' +.ie n \{\ +. ds -- \(*W- +. ds PI pi +. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch +. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch +. ds L" "" +. ds R" "" +. ds C` "" +. ds C' "" +'br\} +.el\{\ +. ds -- \|\(em\| +. ds PI \(*p +. ds L" `` +. ds R" '' +. ds C` +. ds C' +'br\} +.\" +.\" Escape single quotes in literal strings from groff's Unicode transform. +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.\" +.\" If the F register is >0, we'll generate index entries on stderr for +.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index +.\" entries marked with X<> in POD. Of course, you'll have to process the +.\" output yourself in some meaningful fashion. +.\" +.\" Avoid warning from groff about undefined register 'F'. +.de IX +.. +.nr rF 0 +.if \n(.g .if rF .nr rF 1 +.if (\n(rF:(\n(.g==0)) \{\ +. if \nF \{\ +. de IX +. tm Index:\\$1\t\\n%\t"\\$2" +.. +. if !\nF==2 \{\ +. nr % 0 +. nr F 2 +. \} +. \} +.\} +.rr rF +.\" +.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). +.\" Fear. Run. Save yourself. No user-serviceable parts. +. \" fudge factors for nroff and troff +.if n \{\ +. ds #H 0 +. ds #V .8m +. ds #F .3m +. ds #[ \f1 +. ds #] \fP +.\} +.if t \{\ +. ds #H ((1u-(\\\\n(.fu%2u))*.13m) +. ds #V .6m +. ds #F 0 +. ds #[ \& +. ds #] \& +.\} +. \" simple accents for nroff and troff +.if n \{\ +. ds ' \& +. ds ` \& +. ds ^ \& +. ds , \& +. ds ~ ~ +. ds / +.\} +.if t \{\ +. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" +. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' +. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' +. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' +. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' +. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' +.\} +. \" troff and (daisy-wheel) nroff accents +.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' +.ds 8 \h'\*(#H'\(*b\h'-\*(#H' +.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] +.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' +.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' +.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] +.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] +.ds ae a\h'-(\w'a'u*4/10)'e +.ds Ae A\h'-(\w'A'u*4/10)'E +. \" corrections for vroff +.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' +.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' +. \" for low resolution devices (crt and lpr) +.if \n(.H>23 .if \n(.V>19 \ +\{\ +. ds : e +. ds 8 ss +. ds o a +. ds d- d\h'-1'\(ga +. ds D- D\h'-1'\(hy +. ds th \o'bp' +. ds Th \o'LP' +. ds ae ae +. ds Ae AE +.\} +.rm #[ #] #H #V #F C +.\" ======================================================================== +.\" +.IX Title "PERLTIDY 1" +.TH PERLTIDY 1 "2018-03-22" "perl v5.22.1" "User Contributed Perl Documentation" +.\" For nroff, turn off justification. Always turn off hyphenation; it makes +.\" way too many mistakes in technical documents. +.if n .ad l +.nh +.SH "NAME" +perltidy \- a perl script indenter and reformatter +.SH "SYNOPSIS" +.IX Header "SYNOPSIS" +.Vb 5 +\& perltidy [ options ] file1 file2 file3 ... +\& (output goes to file1.tdy, file2.tdy, file3.tdy, ...) +\& perltidy [ options ] file1 \-o outfile +\& perltidy [ options ] file1 \-st >outfile +\& perltidy [ options ] outfile +.Ve +.SH "DESCRIPTION" +.IX Header "DESCRIPTION" +Perltidy reads a perl script and writes an indented, reformatted script. +.PP +Many users will find enough information in \*(L"\s-1EXAMPLES\*(R"\s0 to get +started. New users may benefit from the short tutorial +which can be found at +http://perltidy.sourceforge.net/tutorial.html +.PP +A convenient aid to systematically defining a set of style parameters +can be found at +http://perltidy.sourceforge.net/stylekey.html +.PP +Perltidy can produce output on either of two modes, depending on the +existence of an \fB\-html\fR flag. Without this flag, the output is passed +through a formatter. The default formatting tries to follow the +recommendations in \fBperlstyle\fR\|(1), but it can be controlled in detail with +numerous input parameters, which are described in \*(L"\s-1FORMATTING +OPTIONS\*(R"\s0. +.PP +When the \fB\-html\fR flag is given, the output is passed through an \s-1HTML\s0 +formatter which is described in \*(L"\s-1HTML OPTIONS\*(R"\s0. +.SH "EXAMPLES" +.IX Header "EXAMPLES" +.Vb 1 +\& perltidy somefile.pl +.Ve +.PP +This will produce a file \fIsomefile.pl.tdy\fR containing the script reformatted +using the default options, which approximate the style suggested in +\&\fBperlstyle\fR\|(1). The source file \fIsomefile.pl\fR is unchanged. +.PP +.Vb 1 +\& perltidy *.pl +.Ve +.PP +Execute perltidy on all \fI.pl\fR files in the current directory with the +default options. The output will be in files with an appended \fI.tdy\fR +extension. For any file with an error, there will be a file with extension +\&\fI.ERR\fR. +.PP +.Vb 1 +\& perltidy \-b file1.pl file2.pl +.Ve +.PP +Modify \fIfile1.pl\fR and \fIfile2.pl\fR in place, and backup the originals to +\&\fIfile1.pl.bak\fR and \fIfile2.pl.bak\fR. If \fIfile1.pl.bak\fR and/or \fIfile2.pl.bak\fR +already exist, they will be overwritten. +.PP +.Vb 1 +\& perltidy \-b \-bext=\*(Aq/\*(Aq file1.pl file2.pl +.Ve +.PP +Same as the previous example except that the backup files \fIfile1.pl.bak\fR and \fIfile2.pl.bak\fR will be deleted if there are no errors. +.PP +.Vb 1 +\& perltidy \-gnu somefile.pl +.Ve +.PP +Execute perltidy on file \fIsomefile.pl\fR with a style which approximates the +\&\s-1GNU\s0 Coding Standards for C programs. The output will be \fIsomefile.pl.tdy\fR. +.PP +.Vb 1 +\& perltidy \-i=3 somefile.pl +.Ve +.PP +Execute perltidy on file \fIsomefile.pl\fR, with 3 columns for each level of +indentation (\fB\-i=3\fR) instead of the default 4 columns. There will not be any +tabs in the reformatted script, except for any which already exist in comments, +pod documents, quotes, and here documents. Output will be \fIsomefile.pl.tdy\fR. +.PP +.Vb 1 +\& perltidy \-i=3 \-et=8 somefile.pl +.Ve +.PP +Same as the previous example, except that leading whitespace will +be entabbed with one tab character per 8 spaces. +.PP +.Vb 1 +\& perltidy \-ce \-l=72 somefile.pl +.Ve +.PP +Execute perltidy on file \fIsomefile.pl\fR with all defaults except use \*(L"cuddled +elses\*(R" (\fB\-ce\fR) and a maximum line length of 72 columns (\fB\-l=72\fR) instead of +the default 80 columns. +.PP +.Vb 1 +\& perltidy \-g somefile.pl +.Ve +.PP +Execute perltidy on file \fIsomefile.pl\fR and save a log file \fIsomefile.pl.LOG\fR +which shows the nesting of braces, parentheses, and square brackets at +the start of every line. +.PP +.Vb 1 +\& perltidy \-html somefile.pl +.Ve +.PP +This will produce a file \fIsomefile.pl.html\fR containing the script with +html markup. The output file will contain an embedded style sheet in +the <\s-1HEAD\s0> section which may be edited to change the appearance. +.PP +.Vb 1 +\& perltidy \-html \-css=mystyle.css somefile.pl +.Ve +.PP +This will produce a file \fIsomefile.pl.html\fR containing the script with +html markup. This output file will contain a link to a separate style +sheet file \fImystyle.css\fR. If the file \fImystyle.css\fR does not exist, +it will be created. If it exists, it will not be overwritten. +.PP +.Vb 1 +\& perltidy \-html \-pre somefile.pl +.Ve +.PP +Write an html snippet with only the \s-1PRE\s0 section to \fIsomefile.pl.html\fR. +This is useful when code snippets are being formatted for inclusion in a +larger web page. No style sheet will be written in this case. +.PP +.Vb 1 +\& perltidy \-html \-ss >mystyle.css +.Ve +.PP +Write a style sheet to \fImystyle.css\fR and exit. +.PP +.Vb 1 +\& perltidy \-html \-frm mymodule.pm +.Ve +.PP +Write html with a frame holding a table of contents and the source code. The +output files will be \fImymodule.pm.html\fR (the frame), \fImymodule.pm.toc.html\fR +(the table of contents), and \fImymodule.pm.src.html\fR (the source code). +.SH "OPTIONS \- OVERVIEW" +.IX Header "OPTIONS - OVERVIEW" +The entire command line is scanned for options, and they are processed +before any files are processed. As a result, it does not matter +whether flags are before or after any filenames. However, the relative +order of parameters is important, with later parameters overriding the +values of earlier parameters. +.PP +For each parameter, there is a long name and a short name. The short +names are convenient for keyboard input, while the long names are +self-documenting and therefore useful in scripts. It is customary to +use two leading dashes for long names, but one may be used. +.PP +Most parameters which serve as on/off flags can be negated with a +leading \*(L"n\*(R" (for the short name) or a leading \*(L"no\*(R" or \*(L"no\-\*(R" (for the +long name). For example, the flag to outdent long quotes is \fB\-olq\fR +or \fB\-\-outdent\-long\-quotes\fR. The flag to skip this is \fB\-nolq\fR +or \fB\-\-nooutdent\-long\-quotes\fR or \fB\-\-no\-outdent\-long\-quotes\fR. +.PP +Options may not be bundled together. In other words, options \fB\-q\fR and +\&\fB\-g\fR may \s-1NOT\s0 be entered as \fB\-qg\fR. +.PP +Option names may be terminated early as long as they are uniquely identified. +For example, instead of \fB\-\-dump\-token\-types\fR, it would be sufficient to enter +\&\fB\-\-dump\-tok\fR, or even \fB\-\-dump\-t\fR, to uniquely identify this command. +.SS "I/O control" +.IX Subsection "I/O control" +The following parameters concern the files which are read and written. +.IP "\fB\-h\fR, \fB\-\-help\fR" 4 +.IX Item "-h, --help" +Show summary of usage and exit. +.IP "\fB\-o\fR=filename, \fB\-\-outfile\fR=filename" 4 +.IX Item "-o=filename, --outfile=filename" +Name of the output file (only if a single input file is being +processed). If no output file is specified, and output is not +redirected to the standard output, the output will go to \fIfilename.tdy\fR. +.IP "\fB\-st\fR, \fB\-\-standard\-output\fR" 4 +.IX Item "-st, --standard-output" +Perltidy must be able to operate on an arbitrarily large number of files +in a single run, with each output being directed to a different output +file. Obviously this would conflict with outputting to the single +standard output device, so a special flag, \fB\-st\fR, is required to +request outputting to the standard output. For example, +.Sp +.Vb 1 +\& perltidy somefile.pl \-st >somefile.new.pl +.Ve +.Sp +This option may only be used if there is just a single input file. +The default is \fB\-nst\fR or \fB\-\-nostandard\-output\fR. +.IP "\fB\-se\fR, \fB\-\-standard\-error\-output\fR" 4 +.IX Item "-se, --standard-error-output" +If perltidy detects an error when processing file \fIsomefile.pl\fR, its +default behavior is to write error messages to file \fIsomefile.pl.ERR\fR. +Use \fB\-se\fR to cause all error messages to be sent to the standard error +output stream instead. This directive may be negated with \fB\-nse\fR. +Thus, you may place \fB\-se\fR in a \fI.perltidyrc\fR and override it when +desired with \fB\-nse\fR on the command line. +.IP "\fB\-oext\fR=ext, \fB\-\-output\-file\-extension\fR=ext" 4 +.IX Item "-oext=ext, --output-file-extension=ext" +Change the extension of the output file to be \fIext\fR instead of the +default \fItdy\fR (or \fIhtml\fR in case the \-\fB\-html\fR option is used). +See \*(L"Specifying File Extensions\*(R". +.IP "\fB\-opath\fR=path, \fB\-\-output\-path\fR=path" 4 +.IX Item "-opath=path, --output-path=path" +When perltidy creates a filename for an output file, by default it merely +appends an extension to the path and basename of the input file. This +parameter causes the path to be changed to \fIpath\fR instead. +.Sp +The path should end in a valid path separator character, but perltidy will try +to add one if it is missing. +.Sp +For example +.Sp +.Vb 1 +\& perltidy somefile.pl \-opath=/tmp/ +.Ve +.Sp +will produce \fI/tmp/somefile.pl.tdy\fR. Otherwise, \fIsomefile.pl.tdy\fR will +appear in whatever directory contains \fIsomefile.pl\fR. +.Sp +If the path contains spaces, it should be placed in quotes. +.Sp +This parameter will be ignored if output is being directed to standard output, +or if it is being specified explicitly with the \fB\-o=s\fR parameter. +.IP "\fB\-b\fR, \fB\-\-backup\-and\-modify\-in\-place\fR" 4 +.IX Item "-b, --backup-and-modify-in-place" +Modify the input file or files in-place and save the original with the +extension \fI.bak\fR. Any existing \fI.bak\fR file will be deleted. See next +item for changing the default backup extension, and for eliminating the +backup file altogether. +.Sp +A \fB\-b\fR flag will be ignored if input is from standard input or goes to +standard output, or if the \fB\-html\fR flag is set. +.Sp +In particular, if you want to use both the \fB\-b\fR flag and the \fB\-pbp\fR +(\-\-perl\-best\-practices) flag, then you must put a \fB\-nst\fR flag after the +\&\fB\-pbp\fR flag because it contains a \fB\-st\fR flag as one of its components, +which means that output will go to the standard output stream. +.IP "\fB\-bext\fR=ext, \fB\-\-backup\-file\-extension\fR=ext" 4 +.IX Item "-bext=ext, --backup-file-extension=ext" +This parameter serves two purposes: (1) to change the extension of the backup +file to be something other than the default \fI.bak\fR, and (2) to indicate +that no backup file should be saved. +.Sp +To change the default extension to something other than \fI.bak\fR see +\&\*(L"Specifying File Extensions\*(R". +.Sp +A backup file of the source is always written, but you can request that it +be deleted at the end of processing if there were no errors. This is risky +unless the source code is being maintained with a source code control +system. +.Sp +To indicate that the backup should be deleted include one forward slash, +\&\fB/\fR, in the extension. If any text remains after the slash is removed +it will be used to define the backup file extension (which is always +created and only deleted if there were no errors). +.Sp +Here are some examples: +.Sp +.Vb 5 +\& Parameter Extension Backup File Treatment +\& <\-bext=bak> F<.bak> Keep (same as the default behavior) +\& <\-bext=\*(Aq/\*(Aq> F<.bak> Delete if no errors +\& <\-bext=\*(Aq/backup\*(Aq> F<.backup> Delete if no errors +\& <\-bext=\*(Aqoriginal/\*(Aq> F<.original> Delete if no errors +.Ve +.IP "\fB\-w\fR, \fB\-\-warning\-output\fR" 4 +.IX Item "-w, --warning-output" +Setting \fB\-w\fR causes any non-critical warning +messages to be reported as errors. These include messages +about possible pod problems, possibly bad starting indentation level, +and cautions about indirect object usage. The default, \fB\-nw\fR or +\&\fB\-\-nowarning\-output\fR, is not to include these warnings. +.IP "\fB\-q\fR, \fB\-\-quiet\fR" 4 +.IX Item "-q, --quiet" +Deactivate error messages and syntax checking (for running under +an editor). +.Sp +For example, if you use a vi-style editor, such as vim, you may execute +perltidy as a filter from within the editor using something like +.Sp +.Vb 1 +\& :n1,n2!perltidy \-q +.Ve +.Sp +where \f(CW\*(C`n1,n2\*(C'\fR represents the selected text. Without the \fB\-q\fR flag, +any error message may mess up your screen, so be prepared to use your +\&\*(L"undo\*(R" key. +.IP "\fB\-log\fR, \fB\-\-logfile\fR" 4 +.IX Item "-log, --logfile" +Save the \fI.LOG\fR file, which has many useful diagnostics. Perltidy always +creates a \fI.LOG\fR file, but by default it is deleted unless a program bug is +suspected. Setting the \fB\-log\fR flag forces the log file to be saved. +.IP "\fB\-g=n\fR, \fB\-\-logfile\-gap=n\fR" 4 +.IX Item "-g=n, --logfile-gap=n" +Set maximum interval between input code lines in the logfile. This purpose of +this flag is to assist in debugging nesting errors. The value of \f(CW\*(C`n\*(C'\fR is +optional. If you set the flag \fB\-g\fR without the value of \f(CW\*(C`n\*(C'\fR, it will be +taken to be 1, meaning that every line will be written to the log file. This +can be helpful if you are looking for a brace, paren, or bracket nesting error. +.Sp +Setting \fB\-g\fR also causes the logfile to be saved, so it is not necessary to +also include \fB\-log\fR. +.Sp +If no \fB\-g\fR flag is given, a value of 50 will be used, meaning that at least +every 50th line will be recorded in the logfile. This helps prevent +excessively long log files. +.Sp +Setting a negative value of \f(CW\*(C`n\*(C'\fR is the same as not setting \fB\-g\fR at all. +.IP "\fB\-npro\fR \fB\-\-noprofile\fR" 4 +.IX Item "-npro --noprofile" +Ignore any \fI.perltidyrc\fR command file. Normally, perltidy looks first in +your current directory for a \fI.perltidyrc\fR file of parameters. (The format +is described below). If it finds one, it applies those options to the +initial default values, and then it applies any that have been defined +on the command line. If no \fI.perltidyrc\fR file is found, it looks for one +in your home directory. +.Sp +If you set the \fB\-npro\fR flag, perltidy will not look for this file. +.IP "\fB\-pro=filename\fR or \fB\-\-profile=filename\fR" 4 +.IX Item "-pro=filename or --profile=filename" +To simplify testing and switching .perltidyrc files, this command may be +used to specify a configuration file which will override the default +name of .perltidyrc. There must not be a space on either side of the +\&'=' sign. For example, the line +.Sp +.Vb 1 +\& perltidy \-pro=testcfg +.Ve +.Sp +would cause file \fItestcfg\fR to be used instead of the +default \fI.perltidyrc\fR. +.Sp +A pathname begins with three dots, e.g. \*(L".../.perltidyrc\*(R", indicates that +the file should be searched for starting in the current directory and +working upwards. This makes it easier to have multiple projects each with +their own .perltidyrc in their root directories. +.IP "\fB\-opt\fR, \fB\-\-show\-options\fR" 4 +.IX Item "-opt, --show-options" +Write a list of all options used to the \fI.LOG\fR file. +Please see \fB\-\-dump\-options\fR for a simpler way to do this. +.IP "\fB\-f\fR, \fB\-\-force\-read\-binary\fR" 4 +.IX Item "-f, --force-read-binary" +Force perltidy to process binary files. To avoid producing excessive +error messages, perltidy skips files identified by the system as non-text. +However, valid perl scripts containing binary data may sometimes be identified +as non-text, and this flag forces perltidy to process them. +.SH "FORMATTING OPTIONS" +.IX Header "FORMATTING OPTIONS" +.SS "Basic Options" +.IX Subsection "Basic Options" +.IP "\fB\-\-notidy\fR" 4 +.IX Item "--notidy" +This flag disables all formatting and causes the input to be copied unchanged +to the output except for possible changes in line ending characters and any +pre\- and post-filters. This can be useful in conjunction with a hierarchical +set of \fI.perltidyrc\fR files to avoid unwanted code tidying. See also +\&\*(L"Skipping Selected Sections of Code\*(R" for a way to avoid tidying specific +sections of code. +.IP "\fB\-i=n\fR, \fB\-\-indent\-columns=n\fR" 4 +.IX Item "-i=n, --indent-columns=n" +Use n columns per indentation level (default n=4). +.IP "\fB\-l=n\fR, \fB\-\-maximum\-line\-length=n\fR" 4 +.IX Item "-l=n, --maximum-line-length=n" +The default maximum line length is n=80 characters. Perltidy will try +to find line break points to keep lines below this length. However, long +quotes and side comments may cause lines to exceed this length. +Setting \fB\-l=0\fR is equivalent to setting \fB\-l=(a large number)\fR. +.IP "\fB\-vmll\fR, \fB\-\-variable\-maximum\-line\-length\fR" 4 +.IX Item "-vmll, --variable-maximum-line-length" +A problem arises using a fixed maximum line length with very deeply nested code +and data structures because eventually the amount of leading whitespace used +for indicating indentation takes up most or all of the available line width, +leaving little or no space for the actual code or data. One solution is to use +a vary long line length. Another solution is to use the \fB\-vmll\fR flag, which +basically tells perltidy to ignore leading whitespace when measuring the line +length. +.Sp +To be precise, when the \fB\-vmll\fR parameter is set, the maximum line length of a +line of code will be M+L*I, where +.Sp +.Vb 3 +\& M is the value of \-\-maximum\-line\-length=M (\-l=M), default 80, +\& I is the value of \-\-indent\-columns=I (\-i=I), default 4, +\& L is the indentation level of the line of code +.Ve +.Sp +When this flag is set, the choice of breakpoints for a block of code should be +essentially independent of its nesting depth. However, the absolute line +lengths, including leading whitespace, can still be arbitrarily large. This +problem can be avoided by including the next parameter. +.Sp +The default is not to do this (\fB\-nvmll\fR). +.IP "\fB\-wc=n\fR, \fB\-\-whitespace\-cycle=n\fR" 4 +.IX Item "-wc=n, --whitespace-cycle=n" +This flag also addresses problems with very deeply nested code and data +structures. When the nesting depth exceeds the value \fBn\fR the leading +whitespace will be reduced and start at a depth of 1 again. The result is that +blocks of code will shift back to the left rather than moving arbitrarily far +to the right. This occurs cyclically to any depth. +.Sp +For example if one level of indentation equals 4 spaces (\fB\-i=4\fR, the default), +and one uses \fB\-wc=15\fR, then if the leading whitespace on a line exceeds about +4*15=60 spaces it will be reduced back to 4*1=4 spaces and continue increasing +from there. If the whitespace never exceeds this limit the formatting remains +unchanged. +.Sp +The combination of \fB\-vmll\fR and \fB\-wc=n\fR provides a solution to the problem of +displaying arbitrarily deep data structures and code in a finite window, +although \fB\-wc=n\fR may of course be used without \fB\-vmll\fR. +.Sp +The default is not to use this, which can also be indicated using \fB\-wc=0\fR. +.IP "tabs" 4 +.IX Item "tabs" +Using tab characters will almost certainly lead to future portability +and maintenance problems, so the default and recommendation is not to +use them. For those who prefer tabs, however, there are two different +options. +.Sp +Except for possibly introducing tab indentation characters, as outlined +below, perltidy does not introduce any tab characters into your file, +and it removes any tabs from the code (unless requested not to do so +with \fB\-fws\fR). If you have any tabs in your comments, quotes, or +here-documents, they will remain. +.RS 4 +.IP "\fB\-et=n\fR, \fB\-\-entab\-leading\-whitespace\fR" 4 +.IX Item "-et=n, --entab-leading-whitespace" +This flag causes each \fBn\fR initial space characters to be replaced by +one tab character. Note that the integer \fBn\fR is completely independent +of the integer specified for indentation parameter, \fB\-i=n\fR. +.IP "\fB\-t\fR, \fB\-\-tabs\fR" 4 +.IX Item "-t, --tabs" +This flag causes one leading tab character to be inserted for each level +of indentation. Certain other features are incompatible with this +option, and if these options are also given, then a warning message will +be issued and this flag will be unset. One example is the \fB\-lp\fR +option. +.IP "\fB\-dt=n\fR, \fB\-\-default\-tabsize=n\fR" 4 +.IX Item "-dt=n, --default-tabsize=n" +If the first line of code passed to perltidy contains leading tabs but no +tab scheme is specified for the output stream then perltidy must guess how many +spaces correspond to each leading tab. This number of spaces \fBn\fR +corresponding to each leading tab of the input stream may be specified with +\&\fB\-dt=n\fR. The default is \fBn=8\fR. +.Sp +This flag has no effect if a tab scheme is specified for the output stream, +because then the input stream is assumed to use the same tab scheme and +indentation spaces as for the output stream (any other assumption would lead to +unstable editing). +.RE +.RS 4 +.RE +.IP "\fB\-syn\fR, \fB\-\-check\-syntax\fR" 4 +.IX Item "-syn, --check-syntax" +This flag is now ignored for safety, but the following documentation +has been retained for reference. +.Sp +This flag causes perltidy to run \f(CW\*(C`perl \-c \-T\*(C'\fR to check syntax of input +and output. (To change the flags passed to perl, see the next +item, \fB\-pscf\fR). The results are written to the \fI.LOG\fR file, which +will be saved if an error is detected in the output script. The output +script is not checked if the input script has a syntax error. Perltidy +does its own checking, but this option employs perl to get a \*(L"second +opinion\*(R". +.Sp +If perl reports errors in the input file, they will not be reported in +the error output unless the \fB\-\-warning\-output\fR flag is given. +.Sp +The default is \fB\s-1NOT\s0\fR to do this type of syntax checking (although +perltidy will still do as much self-checking as possible). The reason +is that it causes all code in \s-1BEGIN\s0 blocks to be executed, for all +modules being used, and this opens the door to security issues and +infinite loops when running perltidy. +.IP "\fB\-pscf=s\fR, \fB\-perl\-syntax\-check\-flags=s\fR" 4 +.IX Item "-pscf=s, -perl-syntax-check-flags=s" +When perl is invoked to check syntax, the normal flags are \f(CW\*(C`\-c \-T\*(C'\fR. In +addition, if the \fB\-x\fR flag is given to perltidy, then perl will also be +passed a \fB\-x\fR flag. It should not normally be necessary to change +these flags, but it can be done with the \fB\-pscf=s\fR flag. For example, +if the taint flag, \f(CW\*(C`\-T\*(C'\fR, is not wanted, the flag could be set to be just +\&\fB\-pscf=\-c\fR. +.Sp +Perltidy will pass your string to perl with the exception that it will +add a \fB\-c\fR and \fB\-x\fR if appropriate. The \fI.LOG\fR file will show +exactly what flags were passed to perl. +.IP "\fB\-xs\fR, \fB\-\-extended\-syntax\fR" 4 +.IX Item "-xs, --extended-syntax" +A problem with formatting Perl code is that some modules can introduce new +syntax. This flag allows perltidy to handle certain common extensions +to the standard syntax without complaint. +.Sp +For example, without this flag a structure such as the following would generate +a syntax error and the braces would not be balanced: +.Sp +.Vb 3 +\& method deposit( Num $amount) { +\& $self\->balance( $self\->balance + $amount ); +\& } +.Ve +.Sp +This flag is enabled by default but it can be deactivated with \fB\-nxs\fR. +Probably the only reason to deactivate this flag is to generate more diagnostic +messages when debugging a script. +.IP "\fB\-io\fR, \fB\-\-indent\-only\fR" 4 +.IX Item "-io, --indent-only" +This flag is used to deactivate all whitespace and line break changes +within non-blank lines of code. +When it is in effect, the only change to the script will be +to the indentation and to the number of blank lines. +And any flags controlling whitespace and newlines will be ignored. You +might want to use this if you are perfectly happy with your whitespace +and line breaks, and merely want perltidy to handle the indentation. +(This also speeds up perltidy by well over a factor of two, so it might be +useful when perltidy is merely being used to help find a brace error in +a large script). +.Sp +Setting this flag is equivalent to setting \fB\-\-freeze\-newlines\fR and +\&\fB\-\-freeze\-whitespace\fR. +.Sp +If you also want to keep your existing blank lines exactly +as they are, you can add \fB\-\-freeze\-blank\-lines\fR. +.Sp +With this option perltidy is still free to modify the indenting (and +outdenting) of code and comments as it normally would. If you also want to +prevent long comment lines from being outdented, you can add either \fB\-noll\fR or +\&\fB\-l=0\fR. +.Sp +Setting this flag will prevent perltidy from doing any special operations on +closing side comments. You may still delete all side comments however when +this flag is in effect. +.IP "\fB\-enc=s\fR, \fB\-\-character\-encoding=s\fR" 4 +.IX Item "-enc=s, --character-encoding=s" +where \fBs\fR=\fBnone\fR or \fButf8\fR. This flag tells perltidy the character encoding +of both the input and output character streams. The value \fButf8\fR causes the +stream to be read and written as \s-1UTF\-8.\s0 The value \fBnone\fR causes the stream to +be processed without special encoding assumptions. At present there is no +automatic detection of character encoding (even if there is a \f(CW\*(Aquse utf8\*(Aq\fR +statement in your code) so this flag must be set for streams encoded in \s-1UTF\-8.\s0 +Incorrectly setting this parameter can cause data corruption, so please +carefully check the output. +.Sp +The default is \fBnone\fR. +.Sp +The abbreviations \fB\-utf8\fR or \fB\-UTF8\fR are equivalent to \fB\-enc=utf8\fR. +So to process a file named \fBfile.pl\fR which is encoded in \s-1UTF\-8\s0 you can use: +.Sp +.Vb 1 +\& perltidy \-utf8 file.pl +.Ve +.IP "\fB\-ole=s\fR, \fB\-\-output\-line\-ending=s\fR" 4 +.IX Item "-ole=s, --output-line-ending=s" +where s=\f(CW\*(C`win\*(C'\fR, \f(CW\*(C`dos\*(C'\fR, \f(CW\*(C`unix\*(C'\fR, or \f(CW\*(C`mac\*(C'\fR. This flag tells perltidy +to output line endings for a specific system. Normally, +perltidy writes files with the line separator character of the host +system. The \f(CW\*(C`win\*(C'\fR and \f(CW\*(C`dos\*(C'\fR flags have an identical result. +.IP "\fB\-ple\fR, \fB\-\-preserve\-line\-endings\fR" 4 +.IX Item "-ple, --preserve-line-endings" +This flag tells perltidy to write its output files with the same line +endings as the input file, if possible. It should work for +\&\fBdos\fR, \fBunix\fR, and \fBmac\fR line endings. It will only work if perltidy +input comes from a filename (rather than stdin, for example). If +perltidy has trouble determining the input file line ending, it will +revert to the default behavior of using the line ending of the host system. +.IP "\fB\-it=n\fR, \fB\-\-iterations=n\fR" 4 +.IX Item "-it=n, --iterations=n" +This flag causes perltidy to do \fBn\fR complete iterations. The reason for this +flag is that code beautification is an iterative process and in some +cases the output from perltidy can be different if it is applied a second time. +For most purposes the default of \fBn=1\fR should be satisfactory. However \fBn=2\fR +can be useful when a major style change is being made, or when code is being +beautified on check-in to a source code control system. It has been found to +be extremely rare for the output to change after 2 iterations. If a value +\&\fBn\fR is greater than 2 is input then a convergence test will be used to stop +the iterations as soon as possible, almost always after 2 iterations. See +the next item for a simplified iteration control. +.Sp +This flag has no effect when perltidy is used to generate html. +.IP "\fB\-conv\fR, \fB\-\-converge\fR" 4 +.IX Item "-conv, --converge" +This flag is equivalent to \fB\-it=4\fR and is included to simplify iteration +control. For all practical purposes one either does or does not want to be +sure that the output is converged, and there is no penalty to using a large +iteration limit since perltidy will check for convergence and stop iterating as +soon as possible. The default is \fB\-nconv\fR (no convergence check). Using +\&\fB\-conv\fR will approximately double run time since normally one extra iteration +is required to verify convergence. +.SS "Code Indentation Control" +.IX Subsection "Code Indentation Control" +.IP "\fB\-ci=n\fR, \fB\-\-continuation\-indentation=n\fR" 4 +.IX Item "-ci=n, --continuation-indentation=n" +Continuation indentation is extra indentation spaces applied when +a long line is broken. The default is n=2, illustrated here: +.Sp +.Vb 2 +\& my $level = # \-ci=2 +\& ( $max_index_to_go >= 0 ) ? $levels_to_go[0] : $last_output_level; +.Ve +.Sp +The same example, with n=0, is a little harder to read: +.Sp +.Vb 2 +\& my $level = # \-ci=0 +\& ( $max_index_to_go >= 0 ) ? $levels_to_go[0] : $last_output_level; +.Ve +.Sp +The value given to \fB\-ci\fR is also used by some commands when a small +space is required. Examples are commands for outdenting labels, +\&\fB\-ola\fR, and control keywords, \fB\-okw\fR. +.Sp +When default values are not used, it is suggested that the value \fBn\fR +given with \fB\-ci=n\fR be no more than about one-half of the number of +spaces assigned to a full indentation level on the \fB\-i=n\fR command. +.IP "\fB\-sil=n\fR \fB\-\-starting\-indentation\-level=n\fR" 4 +.IX Item "-sil=n --starting-indentation-level=n" +By default, perltidy examines the input file and tries to determine the +starting indentation level. While it is often zero, it may not be +zero for a code snippet being sent from an editing session. +.Sp +To guess the starting indentation level perltidy simply assumes that +indentation scheme used to create the code snippet is the same as is being used +for the current perltidy process. This is the only sensible guess that can be +made. It should be correct if this is true, but otherwise it probably won't. +For example, if the input script was written with \-i=2 and the current peltidy +flags have \-i=4, the wrong initial indentation will be guessed for a code +snippet which has non-zero initial indentation. Likewise, if an entabbing +scheme is used in the input script and not in the current process then the +guessed indentation will be wrong. +.Sp +If the default method does not work correctly, or you want to change the +starting level, use \fB\-sil=n\fR, to force the starting level to be n. +.IP "List indentation using \fB\-lp\fR, \fB\-\-line\-up\-parentheses\fR" 4 +.IX Item "List indentation using -lp, --line-up-parentheses" +By default, perltidy indents lists with 4 spaces, or whatever value +is specified with \fB\-i=n\fR. Here is a small list formatted in this way: +.Sp +.Vb 5 +\& # perltidy (default) +\& @month_of_year = ( +\& \*(AqJan\*(Aq, \*(AqFeb\*(Aq, \*(AqMar\*(Aq, \*(AqApr\*(Aq, \*(AqMay\*(Aq, \*(AqJun\*(Aq, +\& \*(AqJul\*(Aq, \*(AqAug\*(Aq, \*(AqSep\*(Aq, \*(AqOct\*(Aq, \*(AqNov\*(Aq, \*(AqDec\*(Aq +\& ); +.Ve +.Sp +Use the \fB\-lp\fR flag to add extra indentation to cause the data to begin +past the opening parentheses of a sub call or list, or opening square +bracket of an anonymous array, or opening curly brace of an anonymous +hash. With this option, the above list would become: +.Sp +.Vb 5 +\& # perltidy \-lp +\& @month_of_year = ( +\& \*(AqJan\*(Aq, \*(AqFeb\*(Aq, \*(AqMar\*(Aq, \*(AqApr\*(Aq, \*(AqMay\*(Aq, \*(AqJun\*(Aq, +\& \*(AqJul\*(Aq, \*(AqAug\*(Aq, \*(AqSep\*(Aq, \*(AqOct\*(Aq, \*(AqNov\*(Aq, \*(AqDec\*(Aq +\& ); +.Ve +.Sp +If the available line length (see \fB\-l=n\fR ) does not permit this much +space, perltidy will use less. For alternate placement of the +closing paren, see the next section. +.Sp +This option has no effect on code \s-1BLOCKS,\s0 such as if/then/else blocks, +which always use whatever is specified with \fB\-i=n\fR. Also, the +existence of line breaks and/or block comments between the opening and +closing parens may cause perltidy to temporarily revert to its default +method. +.Sp +Note: The \fB\-lp\fR option may not be used together with the \fB\-t\fR tabs option. +It may, however, be used with the \fB\-et=n\fR tab method. +.Sp +In addition, any parameter which significantly restricts the ability of +perltidy to choose newlines will conflict with \fB\-lp\fR and will cause +\&\fB\-lp\fR to be deactivated. These include \fB\-io\fR, \fB\-fnl\fR, \fB\-nanl\fR, and +\&\fB\-ndnl\fR. The reason is that the \fB\-lp\fR indentation style can require +the careful coordination of an arbitrary number of break points in +hierarchical lists, and these flags may prevent that. +.IP "\fB\-cti=n\fR, \fB\-\-closing\-token\-indentation\fR" 4 +.IX Item "-cti=n, --closing-token-indentation" +The \fB\-cti=n\fR flag controls the indentation of a line beginning with +a \f(CW\*(C`)\*(C'\fR, \f(CW\*(C`]\*(C'\fR, or a non-block \f(CW\*(C`}\*(C'\fR. Such a line receives: +.Sp +.Vb 6 +\& \-cti = 0 no extra indentation (default) +\& \-cti = 1 extra indentation such that the closing token +\& aligns with its opening token. +\& \-cti = 2 one extra indentation level if the line looks like: +\& ); or ]; or }; +\& \-cti = 3 one extra indentation level always +.Ve +.Sp +The flags \fB\-cti=1\fR and \fB\-cti=2\fR work well with the \fB\-lp\fR flag (previous +section). +.Sp +.Vb 5 +\& # perltidy \-lp \-cti=1 +\& @month_of_year = ( +\& \*(AqJan\*(Aq, \*(AqFeb\*(Aq, \*(AqMar\*(Aq, \*(AqApr\*(Aq, \*(AqMay\*(Aq, \*(AqJun\*(Aq, +\& \*(AqJul\*(Aq, \*(AqAug\*(Aq, \*(AqSep\*(Aq, \*(AqOct\*(Aq, \*(AqNov\*(Aq, \*(AqDec\*(Aq +\& ); +\& +\& # perltidy \-lp \-cti=2 +\& @month_of_year = ( +\& \*(AqJan\*(Aq, \*(AqFeb\*(Aq, \*(AqMar\*(Aq, \*(AqApr\*(Aq, \*(AqMay\*(Aq, \*(AqJun\*(Aq, +\& \*(AqJul\*(Aq, \*(AqAug\*(Aq, \*(AqSep\*(Aq, \*(AqOct\*(Aq, \*(AqNov\*(Aq, \*(AqDec\*(Aq +\& ); +.Ve +.Sp +These flags are merely hints to the formatter and they may not always be +followed. In particular, if \-lp is not being used, the indentation for +\&\fBcti=1\fR is constrained to be no more than one indentation level. +.Sp +If desired, this control can be applied independently to each of the +closing container token types. In fact, \fB\-cti=n\fR is merely an +abbreviation for \fB\-cpi=n \-csbi=n \-cbi=n\fR, where: +\&\fB\-cpi\fR or \fB\-\-closing\-paren\-indentation\fR controls \fB)\fR's, +\&\fB\-csbi\fR or \fB\-\-closing\-square\-bracket\-indentation\fR controls \fB]\fR's, +\&\fB\-cbi\fR or \fB\-\-closing\-brace\-indentation\fR controls non-block \fB}\fR's. +.IP "\fB\-icp\fR, \fB\-\-indent\-closing\-paren\fR" 4 +.IX Item "-icp, --indent-closing-paren" +The \fB\-icp\fR flag is equivalent to +\&\fB\-cti=2\fR, described in the previous section. The \fB\-nicp\fR flag is +equivalent \fB\-cti=0\fR. They are included for backwards compatibility. +.IP "\fB\-icb\fR, \fB\-\-indent\-closing\-brace\fR" 4 +.IX Item "-icb, --indent-closing-brace" +The \fB\-icb\fR option gives one extra level of indentation to a brace which +terminates a code block . For example, +.Sp +.Vb 6 +\& if ($task) { +\& yyy(); +\& } # \-icb +\& else { +\& zzz(); +\& } +.Ve +.Sp +The default is not to do this, indicated by \fB\-nicb\fR. +.IP "\fB\-olq\fR, \fB\-\-outdent\-long\-quotes\fR" 4 +.IX Item "-olq, --outdent-long-quotes" +When \fB\-olq\fR is set, lines which is a quoted string longer than the +value \fBmaximum-line-length\fR will have their indentation removed to make +them more readable. This is the default. To prevent such out-denting, +use \fB\-nolq\fR or \fB\-\-nooutdent\-long\-lines\fR. +.IP "\fB\-oll\fR, \fB\-\-outdent\-long\-lines\fR" 4 +.IX Item "-oll, --outdent-long-lines" +This command is equivalent to \fB\-\-outdent\-long\-quotes\fR and +\&\fB\-\-outdent\-long\-comments\fR, and it is included for compatibility with previous +versions of perltidy. The negation of this also works, \fB\-noll\fR or +\&\fB\-\-nooutdent\-long\-lines\fR, and is equivalent to setting \fB\-nolq\fR and \fB\-nolc\fR. +.IP "Outdenting Labels: \fB\-ola\fR, \fB\-\-outdent\-labels\fR" 4 +.IX Item "Outdenting Labels: -ola, --outdent-labels" +This command will cause labels to be outdented by 2 spaces (or whatever \fB\-ci\fR +has been set to), if possible. This is the default. For example: +.Sp +.Vb 6 +\& my $i; +\& LOOP: while ( $i = ) { +\& chomp($i); +\& next unless $i; +\& fixit($i); +\& } +.Ve +.Sp +Use \fB\-nola\fR to not outdent labels. +.IP "Outdenting Keywords" 4 +.IX Item "Outdenting Keywords" +.RS 4 +.PD 0 +.IP "\fB\-okw\fR, \fB\-\-outdent\-keywords\fR" 4 +.IX Item "-okw, --outdent-keywords" +.PD +The command \fB\-okw\fR will cause certain leading control keywords to +be outdented by 2 spaces (or whatever \fB\-ci\fR has been set to), if +possible. By default, these keywords are \f(CW\*(C`redo\*(C'\fR, \f(CW\*(C`next\*(C'\fR, \f(CW\*(C`last\*(C'\fR, +\&\f(CW\*(C`goto\*(C'\fR, and \f(CW\*(C`return\*(C'\fR. The intention is to make these control keywords +easier to see. To change this list of keywords being outdented, see +the next section. +.Sp +For example, using \f(CW\*(C`perltidy \-okw\*(C'\fR on the previous example gives: +.Sp +.Vb 6 +\& my $i; +\& LOOP: while ( $i = ) { +\& chomp($i); +\& next unless $i; +\& fixit($i); +\& } +.Ve +.Sp +The default is not to do this. +.IP "Specifying Outdented Keywords: \fB\-okwl=string\fR, \fB\-\-outdent\-keyword\-list=string\fR" 4 +.IX Item "Specifying Outdented Keywords: -okwl=string, --outdent-keyword-list=string" +This command can be used to change the keywords which are outdented with +the \fB\-okw\fR command. The parameter \fBstring\fR is a required list of perl +keywords, which should be placed in quotes if there are more than one. +By itself, it does not cause any outdenting to occur, so the \fB\-okw\fR +command is still required. +.Sp +For example, the commands \f(CW\*(C`\-okwl="next last redo goto" \-okw\*(C'\fR will cause +those four keywords to be outdented. It is probably simplest to place +any \fB\-okwl\fR command in a \fI.perltidyrc\fR file. +.RE +.RS 4 +.RE +.SS "Whitespace Control" +.IX Subsection "Whitespace Control" +Whitespace refers to the blank space between variables, operators, +and other code tokens. +.IP "\fB\-fws\fR, \fB\-\-freeze\-whitespace\fR" 4 +.IX Item "-fws, --freeze-whitespace" +This flag causes your original whitespace to remain unchanged, and +causes the rest of the whitespace commands in this section, the +Code Indentation section, and +the Comment Control section to be ignored. +.IP "Tightness of curly braces, parentheses, and square brackets." 4 +.IX Item "Tightness of curly braces, parentheses, and square brackets." +Here the term \*(L"tightness\*(R" will mean the closeness with which +pairs of enclosing tokens, such as parentheses, contain the quantities +within. A numerical value of 0, 1, or 2 defines the tightness, with +0 being least tight and 2 being most tight. Spaces within containers +are always symmetric, so if there is a space after a \f(CW\*(C`(\*(C'\fR then there +will be a space before the corresponding \f(CW\*(C`)\*(C'\fR. +.Sp +The \fB\-pt=n\fR or \fB\-\-paren\-tightness=n\fR parameter controls the space within +parens. The example below shows the effect of the three possible +values, 0, 1, and 2: +.Sp +.Vb 3 +\& if ( ( my $len_tab = length( $tabstr ) ) > 0 ) { # \-pt=0 +\& if ( ( my $len_tab = length($tabstr) ) > 0 ) { # \-pt=1 (default) +\& if ((my $len_tab = length($tabstr)) > 0) { # \-pt=2 +.Ve +.Sp +When n is 0, there is always a space to the right of a '(' and to the left +of a ')'. For n=2 there is never a space. For n=1, the default, there +is a space unless the quantity within the parens is a single token, such +as an identifier or quoted string. +.Sp +Likewise, the parameter \fB\-sbt=n\fR or \fB\-\-square\-bracket\-tightness=n\fR +controls the space within square brackets, as illustrated below. +.Sp +.Vb 3 +\& $width = $col[ $j + $k ] \- $col[ $j ]; # \-sbt=0 +\& $width = $col[ $j + $k ] \- $col[$j]; # \-sbt=1 (default) +\& $width = $col[$j + $k] \- $col[$j]; # \-sbt=2 +.Ve +.Sp +Curly braces which do not contain code blocks are controlled by +the parameter \fB\-bt=n\fR or \fB\-\-brace\-tightness=n\fR. +.Sp +.Vb 3 +\& $obj\->{ $parsed_sql\->{ \*(Aqtable\*(Aq }[0] }; # \-bt=0 +\& $obj\->{ $parsed_sql\->{\*(Aqtable\*(Aq}[0] }; # \-bt=1 (default) +\& $obj\->{$parsed_sql\->{\*(Aqtable\*(Aq}[0]}; # \-bt=2 +.Ve +.Sp +And finally, curly braces which contain blocks of code are controlled by the +parameter \fB\-bbt=n\fR or \fB\-\-block\-brace\-tightness=n\fR as illustrated in the +example below. +.Sp +.Vb 3 +\& %bf = map { $_ => \-M $_ } grep { /\e.deb$/ } dirents \*(Aq.\*(Aq; # \-bbt=0 (default) +\& %bf = map { $_ => \-M $_ } grep {/\e.deb$/} dirents \*(Aq.\*(Aq; # \-bbt=1 +\& %bf = map {$_ => \-M $_} grep {/\e.deb$/} dirents \*(Aq.\*(Aq; # \-bbt=2 +.Ve +.Sp +To simplify input in the case that all of the tightness flags have the same +value , the parameter <\-act=n> or \fB\-\-all\-containers\-tightness=n\fR is an +abbreviation for the combination <\-pt=n \-sbt=n \-bt=n \-bbt=n>. +.IP "\fB\-tso\fR, \fB\-\-tight\-secret\-operators\fR" 4 +.IX Item "-tso, --tight-secret-operators" +The flag \fB\-tso\fR causes certain perl token sequences (secret operators) +which might be considered to be a single operator to be formatted \*(L"tightly\*(R" +(without spaces). The operators currently modified by this flag are: +.Sp +.Vb 1 +\& 0+ +0 ()x!! ~~<> ,=> =( )= +.Ve +.Sp +For example the sequence \fB0 +\fR, which converts a string to a number, +would be formatted without a space: \fB0+\fR when the \fB\-tso\fR flag is set. This +flag is off by default. +.IP "\fB\-sts\fR, \fB\-\-space\-terminal\-semicolon\fR" 4 +.IX Item "-sts, --space-terminal-semicolon" +Some programmers prefer a space before all terminal semicolons. The +default is for no such space, and is indicated with \fB\-nsts\fR or +\&\fB\-\-nospace\-terminal\-semicolon\fR. +.Sp +.Vb 2 +\& $i = 1 ; # \-sts +\& $i = 1; # \-nsts (default) +.Ve +.IP "\fB\-sfs\fR, \fB\-\-space\-for\-semicolon\fR" 4 +.IX Item "-sfs, --space-for-semicolon" +Semicolons within \fBfor\fR loops may sometimes be hard to see, +particularly when commas are also present. This option places spaces on +both sides of these special semicolons, and is the default. Use +\&\fB\-nsfs\fR or \fB\-\-nospace\-for\-semicolon\fR to deactivate it. +.Sp +.Vb 2 +\& for ( @a = @$ap, $u = shift @a ; @a ; $u = $v ) { # \-sfs (default) +\& for ( @a = @$ap, $u = shift @a; @a; $u = $v ) { # \-nsfs +.Ve +.IP "\fB\-asc\fR, \fB\-\-add\-semicolons\fR" 4 +.IX Item "-asc, --add-semicolons" +Setting \fB\-asc\fR allows perltidy to add any missing optional semicolon at the end +of a line which is followed by a closing curly brace on the next line. This +is the default, and may be deactivated with \fB\-nasc\fR or \fB\-\-noadd\-semicolons\fR. +.IP "\fB\-dsm\fR, \fB\-\-delete\-semicolons\fR" 4 +.IX Item "-dsm, --delete-semicolons" +Setting \fB\-dsm\fR allows perltidy to delete extra semicolons which are +simply empty statements. This is the default, and may be deactivated +with \fB\-ndsm\fR or \fB\-\-nodelete\-semicolons\fR. (Such semicolons are not +deleted, however, if they would promote a side comment to a block +comment). +.IP "\fB\-aws\fR, \fB\-\-add\-whitespace\fR" 4 +.IX Item "-aws, --add-whitespace" +Setting this option allows perltidy to add certain whitespace improve +code readability. This is the default. If you do not want any +whitespace added, but are willing to have some whitespace deleted, use +\&\fB\-naws\fR. (Use \fB\-fws\fR to leave whitespace completely unchanged). +.IP "\fB\-dws\fR, \fB\-\-delete\-old\-whitespace\fR" 4 +.IX Item "-dws, --delete-old-whitespace" +Setting this option allows perltidy to remove some old whitespace +between characters, if necessary. This is the default. If you +do not want any old whitespace removed, use \fB\-ndws\fR or +\&\fB\-\-nodelete\-old\-whitespace\fR. +.IP "Detailed whitespace controls around tokens" 4 +.IX Item "Detailed whitespace controls around tokens" +For those who want more detailed control over the whitespace around +tokens, there are four parameters which can directly modify the default +whitespace rules built into perltidy for any token. They are: +.Sp +\&\fB\-wls=s\fR or \fB\-\-want\-left\-space=s\fR, +.Sp +\&\fB\-nwls=s\fR or \fB\-\-nowant\-left\-space=s\fR, +.Sp +\&\fB\-wrs=s\fR or \fB\-\-want\-right\-space=s\fR, +.Sp +\&\fB\-nwrs=s\fR or \fB\-\-nowant\-right\-space=s\fR. +.Sp +These parameters are each followed by a quoted string, \fBs\fR, containing a +list of token types. No more than one of each of these parameters +should be specified, because repeating a command-line parameter +always overwrites the previous one before perltidy ever sees it. +.Sp +To illustrate how these are used, suppose it is desired that there be no +space on either side of the token types \fB= + \- / *\fR. The following two +parameters would specify this desire: +.Sp +.Vb 1 +\& \-nwls="= + \- / *" \-nwrs="= + \- / *" +.Ve +.Sp +(Note that the token types are in quotes, and that they are separated by +spaces). With these modified whitespace rules, the following line of math: +.Sp +.Vb 1 +\& $root = \-$b + sqrt( $b * $b \- 4. * $a * $c ) / ( 2. * $a ); +.Ve +.Sp +becomes this: +.Sp +.Vb 1 +\& $root=\-$b+sqrt( $b*$b\-4.*$a*$c )/( 2.*$a ); +.Ve +.Sp +These parameters should be considered to be hints to perltidy rather +than fixed rules, because perltidy must try to resolve conflicts that +arise between them and all of the other rules that it uses. One +conflict that can arise is if, between two tokens, the left token wants +a space and the right one doesn't. In this case, the token not wanting +a space takes priority. +.Sp +It is necessary to have a list of all token types in order to create +this type of input. Such a list can be obtained by the command +\&\fB\-\-dump\-token\-types\fR. Also try the \fB\-D\fR flag on a short snippet of code +and look at the .DEBUG file to see the tokenization. +.Sp +\&\fB\s-1WARNING\s0\fR Be sure to put these tokens in quotes to avoid having them +misinterpreted by your command shell. +.IP "Space between specific keywords and opening paren" 4 +.IX Item "Space between specific keywords and opening paren" +When an opening paren follows a Perl keyword, no space is introduced after the +keyword, unless it is (by default) one of these: +.Sp +.Vb 2 +\& my local our and or eq ne if else elsif until unless +\& while for foreach return switch case given when +.Ve +.Sp +These defaults can be modified with two commands: +.Sp +\&\fB\-sak=s\fR or \fB\-\-space\-after\-keyword=s\fR adds keywords. +.Sp +\&\fB\-nsak=s\fR or \fB\-\-nospace\-after\-keyword=s\fR removes keywords. +.Sp +where \fBs\fR is a list of keywords (in quotes if necessary). For example, +.Sp +.Vb 2 +\& my ( $a, $b, $c ) = @_; # default +\& my( $a, $b, $c ) = @_; # \-nsak="my local our" +.Ve +.Sp +The abbreviation \fB\-nsak='*'\fR is equivalent to including all of the +keywords in the above list. +.Sp +When both \fB\-nsak=s\fR and \fB\-sak=s\fR commands are included, the \fB\-nsak=s\fR +command is executed first. For example, to have space after only the +keywords (my, local, our) you could use \fB\-nsak=\*(L"*\*(R" \-sak=\*(L"my local our\*(R"\fR. +.Sp +To put a space after all keywords, see the next item. +.IP "Space between all keywords and opening parens" 4 +.IX Item "Space between all keywords and opening parens" +When an opening paren follows a function or keyword, no space is introduced +after the keyword except for the keywords noted in the previous item. To +always put a space between a function or keyword and its opening paren, +use the command: +.Sp +\&\fB\-skp\fR or \fB\-\-space\-keyword\-paren\fR +.Sp +You will probably also want to use the flag \fB\-sfp\fR (next item) too. +.IP "Space between all function names and opening parens" 4 +.IX Item "Space between all function names and opening parens" +When an opening paren follows a function the default is not to introduce +a space. To cause a space to be introduced use: +.Sp +\&\fB\-sfp\fR or \fB\-\-space\-function\-paren\fR +.Sp +.Vb 2 +\& myfunc( $a, $b, $c ); # default +\& myfunc ( $a, $b, $c ); # \-sfp +.Ve +.Sp +You will probably also want to use the flag \fB\-skp\fR (previous item) too. +.ie n .IP "Trimming whitespace around ""qw"" quotes" 4 +.el .IP "Trimming whitespace around \f(CWqw\fR quotes" 4 +.IX Item "Trimming whitespace around qw quotes" +\&\fB\-tqw\fR or \fB\-\-trim\-qw\fR provide the default behavior of trimming +spaces around multi-line \f(CW\*(C`qw\*(C'\fR quotes and indenting them appropriately. +.Sp +\&\fB\-ntqw\fR or \fB\-\-notrim\-qw\fR cause leading and trailing whitespace around +multi-line \f(CW\*(C`qw\*(C'\fR quotes to be left unchanged. This option will not +normally be necessary, but was added for testing purposes, because in +some versions of perl, trimming \f(CW\*(C`qw\*(C'\fR quotes changes the syntax tree. +.IP "\fB\-sbq=n\fR or \fB\-\-space\-backslash\-quote=n\fR" 4 +.IX Item "-sbq=n or --space-backslash-quote=n" +Lines like +.Sp +.Vb 2 +\& $str1=\e"string1"; +\& $str2=\e\*(Aqstring2\*(Aq; +.Ve +.Sp +can confuse syntax highlighters unless a space is included between the backslash and the single or double quotation mark. +.Sp +This can be controlled with the value of \fBn\fR as follows: +.Sp +.Vb 3 +\& \-sbq=0 means no space between the backslash and quote +\& \-sbq=1 means follow the example of the source code +\& \-sbq=2 means always put a space between the backslash and quote +.Ve +.Sp +The default is \fB\-sbq=1\fR, meaning that a space will be used 0if there is one in the source code. +.IP "Trimming trailing whitespace from lines of \s-1POD\s0" 4 +.IX Item "Trimming trailing whitespace from lines of POD" +\&\fB\-trp\fR or \fB\-\-trim\-pod\fR will remove trailing whitespace from lines of \s-1POD.\s0 +The default is not to do this. +.SS "Comment Controls" +.IX Subsection "Comment Controls" +Perltidy has a number of ways to control the appearance of both block comments +and side comments. The term \fBblock comment\fR here refers to a full-line +comment, whereas \fBside comment\fR will refer to a comment which appears on a +line to the right of some code. +.IP "\fB\-ibc\fR, \fB\-\-indent\-block\-comments\fR" 4 +.IX Item "-ibc, --indent-block-comments" +Block comments normally look best when they are indented to the same +level as the code which follows them. This is the default behavior, but +you may use \fB\-nibc\fR to keep block comments left-justified. Here is an +example: +.Sp +.Vb 2 +\& # this comment is indented (\-ibc, default) +\& if ($task) { yyy(); } +.Ve +.Sp +The alternative is \fB\-nibc\fR: +.Sp +.Vb 2 +\& # this comment is not indented (\-nibc) +\& if ($task) { yyy(); } +.Ve +.Sp +See also the next item, \fB\-isbc\fR, as well as \fB\-sbc\fR, for other ways to +have some indented and some outdented block comments. +.IP "\fB\-isbc\fR, \fB\-\-indent\-spaced\-block\-comments\fR" 4 +.IX Item "-isbc, --indent-spaced-block-comments" +If there is no leading space on the line, then the comment will not be +indented, and otherwise it may be. +.Sp +If both \fB\-ibc\fR and \fB\-isbc\fR are set, then \fB\-isbc\fR takes priority. +.IP "\fB\-olc\fR, \fB\-\-outdent\-long\-comments\fR" 4 +.IX Item "-olc, --outdent-long-comments" +When \fB\-olc\fR is set, lines which are full-line (block) comments longer +than the value \fBmaximum-line-length\fR will have their indentation +removed. This is the default; use \fB\-nolc\fR to prevent outdenting. +.IP "\fB\-msc=n\fR, \fB\-\-minimum\-space\-to\-comment=n\fR" 4 +.IX Item "-msc=n, --minimum-space-to-comment=n" +Side comments look best when lined up several spaces to the right of +code. Perltidy will try to keep comments at least n spaces to the +right. The default is n=4 spaces. +.IP "\fB\-fpsc=n\fR, \fB\-\-fixed\-position\-side\-comment=n\fR" 4 +.IX Item "-fpsc=n, --fixed-position-side-comment=n" +This parameter tells perltidy to line up side comments in column number \fBn\fR +whenever possible. The default, n=0, will not do this. +.IP "\fB\-iscl\fR, \fB\-\-ignore\-side\-comment\-lengths\fR" 4 +.IX Item "-iscl, --ignore-side-comment-lengths" +This parameter causes perltidy to ignore the length of side comments when +setting line breaks. The default, \fB\-niscl\fR, is to include the length of +side comments when breaking lines to stay within the length prescribed +by the \fB\-l=n\fR maximum line length parameter. For example, the following +long single line would remain intact with \-l=80 and \-iscl: +.Sp +.Vb 2 +\& perltidy \-l=80 \-iscl +\& $vmsfile =~ s/;[\ed\e\-]*$//; # Clip off version number; we can use a newer version as well +.Ve +.Sp +whereas without the \-iscl flag the line will be broken: +.Sp +.Vb 3 +\& perltidy \-l=80 +\& $vmsfile =~ s/;[\ed\e\-]*$// +\& ; # Clip off version number; we can use a newer version as well +.Ve +.IP "\fB\-hsc\fR, \fB\-\-hanging\-side\-comments\fR" 4 +.IX Item "-hsc, --hanging-side-comments" +By default, perltidy tries to identify and align \*(L"hanging side +comments\*(R", which are something like this: +.Sp +.Vb 3 +\& my $IGNORE = 0; # This is a side comment +\& # This is a hanging side comment +\& # And so is this +.Ve +.Sp +A comment is considered to be a hanging side comment if (1) it immediately +follows a line with a side comment, or another hanging side comment, and +(2) there is some leading whitespace on the line. +To deactivate this feature, use \fB\-nhsc\fR or \fB\-\-nohanging\-side\-comments\fR. +If block comments are preceded by a blank line, or have no leading +whitespace, they will not be mistaken as hanging side comments. +.IP "Closing Side Comments" 4 +.IX Item "Closing Side Comments" +A closing side comment is a special comment which perltidy can +automatically create and place after the closing brace of a code block. +They can be useful for code maintenance and debugging. The command +\&\fB\-csc\fR (or \fB\-\-closing\-side\-comments\fR) adds or updates closing side +comments. For example, here is a small code snippet +.Sp +.Vb 8 +\& sub message { +\& if ( !defined( $_[0] ) ) { +\& print("Hello, World\en"); +\& } +\& else { +\& print( $_[0], "\en" ); +\& } +\& } +.Ve +.Sp +And here is the result of processing with \f(CW\*(C`perltidy \-csc\*(C'\fR: +.Sp +.Vb 8 +\& sub message { +\& if ( !defined( $_[0] ) ) { +\& print("Hello, World\en"); +\& } +\& else { +\& print( $_[0], "\en" ); +\& } +\& } ## end sub message +.Ve +.Sp +A closing side comment was added for \f(CW\*(C`sub message\*(C'\fR in this case, but not +for the \f(CW\*(C`if\*(C'\fR and \f(CW\*(C`else\*(C'\fR blocks, because they were below the 6 line +cutoff limit for adding closing side comments. This limit may be +changed with the \fB\-csci\fR command, described below. +.Sp +The command \fB\-dcsc\fR (or \fB\-\-delete\-closing\-side\-comments\fR) reverses this +process and removes these comments. +.Sp +Several commands are available to modify the behavior of these two basic +commands, \fB\-csc\fR and \fB\-dcsc\fR: +.RS 4 +.IP "\fB\-csci=n\fR, or \fB\-\-closing\-side\-comment\-interval=n\fR" 4 +.IX Item "-csci=n, or --closing-side-comment-interval=n" +where \f(CW\*(C`n\*(C'\fR is the minimum number of lines that a block must have in +order for a closing side comment to be added. The default value is +\&\f(CW\*(C`n=6\*(C'\fR. To illustrate: +.Sp +.Vb 9 +\& # perltidy \-csci=2 \-csc +\& sub message { +\& if ( !defined( $_[0] ) ) { +\& print("Hello, World\en"); +\& } ## end if ( !defined( $_[0] )) +\& else { +\& print( $_[0], "\en" ); +\& } ## end else [ if ( !defined( $_[0] )) +\& } ## end sub message +.Ve +.Sp +Now the \f(CW\*(C`if\*(C'\fR and \f(CW\*(C`else\*(C'\fR blocks are commented. However, now this has +become very cluttered. +.IP "\fB\-cscp=string\fR, or \fB\-\-closing\-side\-comment\-prefix=string\fR" 4 +.IX Item "-cscp=string, or --closing-side-comment-prefix=string" +where string is the prefix used before the name of the block type. The +default prefix, shown above, is \f(CW\*(C`## end\*(C'\fR. This string will be added to +closing side comments, and it will also be used to recognize them in +order to update, delete, and format them. Any comment identified as a +closing side comment will be placed just a single space to the right of +its closing brace. +.IP "\fB\-cscl=string\fR, or \fB\-\-closing\-side\-comment\-list\fR" 4 +.IX Item "-cscl=string, or --closing-side-comment-list" +where \f(CW\*(C`string\*(C'\fR is a list of block types to be tagged with closing side +comments. By default, all code block types preceded by a keyword or +label (such as \f(CW\*(C`if\*(C'\fR, \f(CW\*(C`sub\*(C'\fR, and so on) will be tagged. The \fB\-cscl\fR +command changes the default list to be any selected block types; see +\&\*(L"Specifying Block Types\*(R". +For example, the following command +requests that only \f(CW\*(C`sub\*(C'\fR's, labels, \f(CW\*(C`BEGIN\*(C'\fR, and \f(CW\*(C`END\*(C'\fR blocks be +affected by any \fB\-csc\fR or \fB\-dcsc\fR operation: +.Sp +.Vb 1 +\& \-cscl="sub : BEGIN END" +.Ve +.IP "\fB\-csct=n\fR, or \fB\-\-closing\-side\-comment\-maximum\-text=n\fR" 4 +.IX Item "-csct=n, or --closing-side-comment-maximum-text=n" +The text appended to certain block types, such as an \f(CW\*(C`if\*(C'\fR block, is +whatever lies between the keyword introducing the block, such as \f(CW\*(C`if\*(C'\fR, +and the opening brace. Since this might be too much text for a side +comment, there needs to be a limit, and that is the purpose of this +parameter. The default value is \f(CW\*(C`n=20\*(C'\fR, meaning that no additional +tokens will be appended to this text after its length reaches 20 +characters. Omitted text is indicated with \f(CW\*(C`...\*(C'\fR. (Tokens, including +sub names, are never truncated, however, so actual lengths may exceed +this). To illustrate, in the above example, the appended text of the +first block is \f(CW\*(C` ( !defined( $_[0] )...\*(C'\fR. The existing limit of +\&\f(CW\*(C`n=20\*(C'\fR caused this text to be truncated, as indicated by the \f(CW\*(C`...\*(C'\fR. See +the next flag for additional control of the abbreviated text. +.IP "\fB\-cscb\fR, or \fB\-\-closing\-side\-comments\-balanced\fR" 4 +.IX Item "-cscb, or --closing-side-comments-balanced" +As discussed in the previous item, when the +closing-side-comment-maximum-text limit is exceeded the comment text must +be truncated. Older versions of perltidy terminated with three dots, and this +can still be achieved with \-ncscb: +.Sp +.Vb 2 +\& perltidy \-csc \-ncscb +\& } ## end foreach my $foo (sort { $b cmp $a ... +.Ve +.Sp +However this causes a problem with editors which cannot recognize +comments or are not configured to do so because they cannot \*(L"bounce\*(R" around in +the text correctly. The \fB\-cscb\fR flag has been added to +help them by appending appropriate balancing structure: +.Sp +.Vb 2 +\& perltidy \-csc \-cscb +\& } ## end foreach my $foo (sort { $b cmp $a ... }) +.Ve +.Sp +The default is \fB\-cscb\fR. +.IP "\fB\-csce=n\fR, or \fB\-\-closing\-side\-comment\-else\-flag=n\fR" 4 +.IX Item "-csce=n, or --closing-side-comment-else-flag=n" +The default, \fBn=0\fR, places the text of the opening \f(CW\*(C`if\*(C'\fR statement after any +terminal \f(CW\*(C`else\*(C'\fR. +.Sp +If \fBn=2\fR is used, then each \f(CW\*(C`elsif\*(C'\fR is also given the text of the opening +\&\f(CW\*(C`if\*(C'\fR statement. Also, an \f(CW\*(C`else\*(C'\fR will include the text of a preceding +\&\f(CW\*(C`elsif\*(C'\fR statement. Note that this may result some long closing +side comments. +.Sp +If \fBn=1\fR is used, the results will be the same as \fBn=2\fR whenever the +resulting line length is less than the maximum allowed. +.IP "\fB\-cscb\fR, or \fB\-\-closing\-side\-comments\-balanced\fR" 4 +.IX Item "-cscb, or --closing-side-comments-balanced" +When using closing-side-comments, and the closing-side-comment-maximum-text +limit is exceeded, then the comment text must be abbreviated. +It is terminated with three dots if the \fB\-cscb\fR flag is negated: +.Sp +.Vb 2 +\& perltidy \-csc \-ncscb +\& } ## end foreach my $foo (sort { $b cmp $a ... +.Ve +.Sp +This causes a problem with older editors which do not recognize comments +because they cannot \*(L"bounce\*(R" around in the text correctly. The \fB\-cscb\fR +flag tries to help them by appending appropriate terminal balancing structures: +.Sp +.Vb 2 +\& perltidy \-csc \-cscb +\& } ## end foreach my $foo (sort { $b cmp $a ... }) +.Ve +.Sp +The default is \fB\-cscb\fR. +.IP "\fB\-cscw\fR, or \fB\-\-closing\-side\-comment\-warnings\fR" 4 +.IX Item "-cscw, or --closing-side-comment-warnings" +This parameter is intended to help make the initial transition to the use of +closing side comments. +It causes two +things to happen if a closing side comment replaces an existing, different +closing side comment: first, an error message will be issued, and second, the +original side comment will be placed alone on a new specially marked comment +line for later attention. +.Sp +The intent is to avoid clobbering existing hand-written side comments +which happen to match the pattern of closing side comments. This flag +should only be needed on the first run with \fB\-csc\fR. +.RE +.RS 4 +.Sp +\&\fBImportant Notes on Closing Side Comments:\fR +.IP "\(bu" 4 +Closing side comments are only placed on lines terminated with a closing +brace. Certain closing styles, such as the use of cuddled elses +(\fB\-ce\fR), preclude the generation of some closing side comments. +.IP "\(bu" 4 +Please note that adding or deleting of closing side comments takes +place only through the commands \fB\-csc\fR or \fB\-dcsc\fR. The other commands, +if used, merely modify the behavior of these two commands. +.IP "\(bu" 4 +It is recommended that the \fB\-cscw\fR flag be used along with \fB\-csc\fR on +the first use of perltidy on a given file. This will prevent loss of +any existing side comment data which happens to have the csc prefix. +.IP "\(bu" 4 +Once you use \fB\-csc\fR, you should continue to use it so that any +closing side comments remain correct as code changes. Otherwise, these +comments will become incorrect as the code is updated. +.IP "\(bu" 4 +If you edit the closing side comments generated by perltidy, you must also +change the prefix to be different from the closing side comment prefix. +Otherwise, your edits will be lost when you rerun perltidy with \fB\-csc\fR. For +example, you could simply change \f(CW\*(C`## end\*(C'\fR to be \f(CW\*(C`## End\*(C'\fR, since the test is +case sensitive. You may also want to use the \fB\-ssc\fR flag to keep these +modified closing side comments spaced the same as actual closing side comments. +.IP "\(bu" 4 +Temporarily generating closing side comments is a useful technique for +exploring and/or debugging a perl script, especially one written by someone +else. You can always remove them with \fB\-dcsc\fR. +.RE +.RS 4 +.RE +.IP "Static Block Comments" 4 +.IX Item "Static Block Comments" +Static block comments are block comments with a special leading pattern, +\&\f(CW\*(C`##\*(C'\fR by default, which will be treated slightly differently from other +block comments. They effectively behave as if they had glue along their +left and top edges, because they stick to the left edge and previous line +when there is no blank spaces in those places. This option is +particularly useful for controlling how commented code is displayed. +.RS 4 +.IP "\fB\-sbc\fR, \fB\-\-static\-block\-comments\fR" 4 +.IX Item "-sbc, --static-block-comments" +When \fB\-sbc\fR is used, a block comment with a special leading pattern, \f(CW\*(C`##\*(C'\fR by +default, will be treated specially. +.Sp +Comments so identified are treated as follows: +.RS 4 +.IP "\(bu" 4 +If there is no leading space on the line, then the comment will not +be indented, and otherwise it may be, +.IP "\(bu" 4 +no new blank line will be +inserted before such a comment, and +.IP "\(bu" 4 +such a comment will never become +a hanging side comment. +.RE +.RS 4 +.Sp +For example, assuming \f(CW@month_of_year\fR is +left-adjusted: +.Sp +.Vb 4 +\& @month_of_year = ( # \-sbc (default) +\& \*(AqJan\*(Aq, \*(AqFeb\*(Aq, \*(AqMar\*(Aq, \*(AqApr\*(Aq, \*(AqMay\*(Aq, \*(AqJun\*(Aq, \*(AqJul\*(Aq, \*(AqAug\*(Aq, \*(AqSep\*(Aq, \*(AqOct\*(Aq, +\& ## \*(AqDec\*(Aq, \*(AqNov\*(Aq +\& \*(AqNov\*(Aq, \*(AqDec\*(Aq); +.Ve +.Sp +Without this convention, the above code would become +.Sp +.Vb 2 +\& @month_of_year = ( # \-nsbc +\& \*(AqJan\*(Aq, \*(AqFeb\*(Aq, \*(AqMar\*(Aq, \*(AqApr\*(Aq, \*(AqMay\*(Aq, \*(AqJun\*(Aq, \*(AqJul\*(Aq, \*(AqAug\*(Aq, \*(AqSep\*(Aq, \*(AqOct\*(Aq, +\& +\& ## \*(AqDec\*(Aq, \*(AqNov\*(Aq +\& \*(AqNov\*(Aq, \*(AqDec\*(Aq +\& ); +.Ve +.Sp +which is not as clear. +The default is to use \fB\-sbc\fR. This may be deactivated with \fB\-nsbc\fR. +.RE +.IP "\fB\-sbcp=string\fR, \fB\-\-static\-block\-comment\-prefix=string\fR" 4 +.IX Item "-sbcp=string, --static-block-comment-prefix=string" +This parameter defines the prefix used to identify static block comments +when the \fB\-sbc\fR parameter is set. The default prefix is \f(CW\*(C`##\*(C'\fR, +corresponding to \f(CW\*(C`\-sbcp=##\*(C'\fR. The prefix is actually part of a perl +pattern used to match lines and it must either begin with \f(CW\*(C`#\*(C'\fR or \f(CW\*(C`^#\*(C'\fR. +In the first case a prefix ^\es* will be added to match any leading +whitespace, while in the second case the pattern will match only +comments with no leading whitespace. For example, to +identify all comments as static block comments, one would use \f(CW\*(C`\-sbcp=#\*(C'\fR. +To identify all left-adjusted comments as static block comments, use \f(CW\*(C`\-sbcp=\*(Aq^#\*(Aq\*(C'\fR. +.Sp +Please note that \fB\-sbcp\fR merely defines the pattern used to identify static +block comments; it will not be used unless the switch \fB\-sbc\fR is set. Also, +please be aware that since this string is used in a perl regular expression +which identifies these comments, it must enable a valid regular expression to +be formed. +.Sp +A pattern which can be useful is: +.Sp +.Vb 1 +\& \-sbcp=^#{2,}[^\es#] +.Ve +.Sp +This pattern requires a static block comment to have at least one character +which is neither a # nor a space. It allows a line containing only '#' +characters to be rejected as a static block comment. Such lines are often used +at the start and end of header information in subroutines and should not be +separated from the intervening comments, which typically begin with just a +single '#'. +.IP "\fB\-osbc\fR, \fB\-\-outdent\-static\-block\-comments\fR" 4 +.IX Item "-osbc, --outdent-static-block-comments" +The command \fB\-osbc\fR will cause static block comments to be outdented by 2 +spaces (or whatever \fB\-ci=n\fR has been set to), if possible. +.RE +.RS 4 +.RE +.IP "Static Side Comments" 4 +.IX Item "Static Side Comments" +Static side comments are side comments with a special leading pattern. +This option can be useful for controlling how commented code is displayed +when it is a side comment. +.RS 4 +.IP "\fB\-ssc\fR, \fB\-\-static\-side\-comments\fR" 4 +.IX Item "-ssc, --static-side-comments" +When \fB\-ssc\fR is used, a side comment with a static leading pattern, which is +\&\f(CW\*(C`##\*(C'\fR by default, will be spaced only a single space from previous +character, and it will not be vertically aligned with other side comments. +.Sp +The default is \fB\-nssc\fR. +.IP "\fB\-sscp=string\fR, \fB\-\-static\-side\-comment\-prefix=string\fR" 4 +.IX Item "-sscp=string, --static-side-comment-prefix=string" +This parameter defines the prefix used to identify static side comments +when the \fB\-ssc\fR parameter is set. The default prefix is \f(CW\*(C`##\*(C'\fR, +corresponding to \f(CW\*(C`\-sscp=##\*(C'\fR. +.Sp +Please note that \fB\-sscp\fR merely defines the pattern used to identify +static side comments; it will not be used unless the switch \fB\-ssc\fR is +set. Also, note that this string is used in a perl regular expression +which identifies these comments, so it must enable a valid regular +expression to be formed. +.RE +.RS 4 +.RE +.SS "Skipping Selected Sections of Code" +.IX Subsection "Skipping Selected Sections of Code" +Selected lines of code may be passed verbatim to the output without any +formatting. This feature is enabled by default but can be disabled with +the \fB\-\-noformat\-skipping\fR or \fB\-nfs\fR flag. It should be used sparingly to +avoid littering code with markers, but it might be helpful for working +around occasional problems. For example it might be useful for keeping +the indentation of old commented code unchanged, keeping indentation of +long blocks of aligned comments unchanged, keeping certain list +formatting unchanged, or working around a glitch in perltidy. +.IP "\fB\-fs\fR, \fB\-\-format\-skipping\fR" 4 +.IX Item "-fs, --format-skipping" +This flag, which is enabled by default, causes any code between +special beginning and ending comment markers to be passed to the +output without formatting. The default beginning marker is #<<< +and the default ending marker is #>>> but they +may be changed (see next items below). Additional text may appear on +these special comment lines provided that it is separated from the +marker by at least one space. For example +.Sp +.Vb 7 +\& #<<< do not let perltidy touch this +\& my @list = (1, +\& 1, 1, +\& 1, 2, 1, +\& 1, 3, 3, 1, +\& 1, 4, 6, 4, 1,); +\& #>>> +.Ve +.Sp +The comment markers may be placed at any location that a block comment may +appear. If they do not appear to be working, use the \-log flag and examine the +\&\fI.LOG\fR file. Use \fB\-nfs\fR to disable this feature. +.IP "\fB\-fsb=string\fR, \fB\-\-format\-skipping\-begin=string\fR" 4 +.IX Item "-fsb=string, --format-skipping-begin=string" +The \fB\-fsb=string\fR parameter may be used to change the beginning marker for +format skipping. The default is equivalent to \-fsb='#<<<'. The string that +you enter must begin with a # and should be in quotes as necessary to get past +the command shell of your system. It is actually the leading text of a pattern +that is constructed by appending a '\es', so you must also include backslashes +for characters to be taken literally rather than as patterns. +.Sp +Some examples show how example strings become patterns: +.Sp +.Vb 3 +\& \-fsb=\*(Aq#\e{\e{\e{\*(Aq becomes /^#\e{\e{\e{\es/ which matches #{{{ but not #{{{{ +\& \-fsb=\*(Aq#\e*\e*\*(Aq becomes /^#\e*\e*\es/ which matches #** but not #*** +\& \-fsb=\*(Aq#\e*{2,}\*(Aq becomes /^#\e*{2,}\es/ which matches #** and #***** +.Ve +.IP "\fB\-fse=string\fR, \fB\-\-format\-skipping\-end=string\fR" 4 +.IX Item "-fse=string, --format-skipping-end=string" +The \fB\-fsb=string\fR is the corresponding parameter used to change the +ending marker for format skipping. The default is equivalent to +\&\-fse='#<<<'. +.SS "Line Break Control" +.IX Subsection "Line Break Control" +The parameters in this section control breaks after +non-blank lines of code. Blank lines are controlled +separately by parameters in the section \*(L"Blank Line +Control\*(R". +.IP "\fB\-fnl\fR, \fB\-\-freeze\-newlines\fR" 4 +.IX Item "-fnl, --freeze-newlines" +If you do not want any changes to the line breaks within +lines of code in your script, set +\&\fB\-fnl\fR, and they will remain fixed, and the rest of the commands in +this section and sections +\&\*(L"Controlling List Formatting\*(R", +\&\*(L"Retaining or Ignoring Existing Line Breaks\*(R". +You may want to use \fB\-noll\fR with this. +.Sp +Note: If you also want to keep your blank lines exactly +as they are, you can use the \fB\-fbl\fR flag which is described +in the section \*(L"Blank Line Control\*(R". +.IP "\fB\-ce\fR, \fB\-\-cuddled\-else\fR" 4 +.IX Item "-ce, --cuddled-else" +Enable the \*(L"cuddled else\*(R" style, in which \f(CW\*(C`else\*(C'\fR and \f(CW\*(C`elsif\*(C'\fR are +follow immediately after the curly brace closing the previous block. +The default is not to use cuddled elses, and is indicated with the flag +\&\fB\-nce\fR or \fB\-\-nocuddled\-else\fR. Here is a comparison of the +alternatives: +.Sp +.Vb 6 +\& # \-ce +\& if ($task) { +\& yyy(); +\& } else { +\& zzz(); +\& } +\& +\& # \-nce (default) +\& if ($task) { +\& yyy(); +\& } +\& else { +\& zzz(); +\& } +.Ve +.Sp +In this example the keyword \fBelse\fR is placed on the same line which begins with +the preceding closing block brace and is followed by its own opening block brace +on the same line. Other keywords and function names which are formatted with +this \*(L"cuddled\*(R" style are \fBelsif\fR, \fBcontinue\fR, \fBcatch\fR, \fBfinally\fR. +.Sp +Other block types can be formatted by specifying their names on a +separate parameter \fB\-cbl\fR, described in a later section. +.Sp +Cuddling between a pair of code blocks requires that the closing brace of the +first block start a new line. If this block is entirely on one line in the +input file, it is necessary to decide if it should be broken to allow cuddling. +This decision is controlled by the flag \fB\-cbo=n\fR discussed below. The default +and recommended value of \fB\-cbo=1\fR bases this decision on the first block in +the chain. If it spans multiple lines then cuddling is made and continues +along the chain, regardless of the sizes of subsequent blocks. Otherwise, short +lines remain intact. +.Sp +So for example, the \fB\-ce\fR flag would not have any effect if the above snippet +is rewritten as +.Sp +.Vb 2 +\& if ($task) { yyy() } +\& else { zzz() } +.Ve +.Sp +If the first block spans multiple lines, then cuddling can be done and will +continue for the subsequent blocks in the chain, as illustrated in the previous +snippet. +.Sp +If there are blank lines between cuddled blocks they will be eliminated. If +there are comments after the closing brace where cuddling would occur then +cuddling will be prevented. If this occurs, cuddling will restart later in the +chain if possible. +.IP "\fB\-cb\fR, \fB\-\-cuddled\-blocks\fR" 4 +.IX Item "-cb, --cuddled-blocks" +This flag is equivalent to \fB\-ce\fR. +.IP "\fB\-cbl\fR, \fB\-\-cuddled\-block\-list\fR" 4 +.IX Item "-cbl, --cuddled-block-list" +The built-in default cuddled block types are \fBelse, elsif, continue, catch, finally\fR. +.Sp +Additional block types to which the \fB\-cuddled\-blocks\fR style applies can be defined by +this parameter. This parameter is a character string, giving a list of +block types separated by commas or spaces. For example, to cuddle code blocks +of type sort, map and grep, in addition to the default types, the string could +be set to +.Sp +.Vb 1 +\& \-cbl="sort map grep" +.Ve +.Sp +or equivalently +.Sp +.Vb 1 +\& \-cbl=sort,map,grep +.Ve +.Sp +Note however that these particular block types are typically short so there might not be much +opportunity for the cuddled format style. +.Sp +Using commas avoids the need to protect spaces with quotes. +.Sp +As a diagnostic check, the flag \fB\-\-dump\-cuddled\-block\-list\fR or \fB\-dcbl\fR can be +used to view the hash of values that are generated by this flag. +.Sp +Finally, note that the \fB\-cbl\fR flag by itself merely specifies which blocks are formatted +with the cuddled format. It has no effect unless this formatting style is activated with +\&\fB\-ce\fR. +.IP "\fB\-cblx\fR, \fB\-\-cuddled\-block\-list\-exclusive\fR" 4 +.IX Item "-cblx, --cuddled-block-list-exclusive" +When cuddled else formatting is selected with \fB\-ce\fR, setting this flag causes +perltidy to ignore its built-in defaults and rely exclusively on the block types +specified on the \fB\-cbl\fR flag described in the previous section. For example, +to avoid using cuddled \fBcatch\fR and \fBfinally\fR, which among in the defaults, the +following set of parameters could be used: +.Sp +.Vb 1 +\& perltidy \-ce \-cbl=\*(Aqelse elsif continue\*(Aq \-cblx +.Ve +.IP "\fB\-cbo=n\fR, \fB\-\-cuddled\-break\-option=n\fR" 4 +.IX Item "-cbo=n, --cuddled-break-option=n" +Cuddled formatting is only possible between a pair of code blocks if the +closing brace of the first block starts a new line. If a block is encountered +which is entirely on a single line, and cuddled formatting is selected, it is +necessary to make a decision as to whether or not to \*(L"break\*(R" the block, meaning +to cause it to span multiple lines. This parameter controls that decision. The +options are: +.Sp +.Vb 4 +\& cbo=0 Never force a short block to break. +\& cbo=1 If the first of a pair of blocks is broken in the input file, +\& then break the second [DEFAULT]. +\& cbo=2 Break open all blocks for maximal cuddled formatting. +.Ve +.Sp +The default and recommended value is \fBcbo=1\fR. With this value, if the starting +block of a chain spans multiple lines, then a cascade of breaks will occur for +remaining blocks causing the entire chain to be cuddled. +.Sp +The option \fBcbo=0\fR can produce erratic cuddling if there are numerous one-line +blocks. +.Sp +The option \fBcbo=2\fR produces maximal cuddling but will not allow any short blocks. +.IP "\fB\-bl\fR, \fB\-\-opening\-brace\-on\-new\-line\fR" 4 +.IX Item "-bl, --opening-brace-on-new-line" +Use the flag \fB\-bl\fR to place the opening brace on a new line: +.Sp +.Vb 4 +\& if ( $input_file eq \*(Aq\-\*(Aq ) # \-bl +\& { +\& important_function(); +\& } +.Ve +.Sp +This flag applies to all structural blocks, including named sub's (unless +the \fB\-sbl\fR flag is set \*(-- see next item). +.Sp +The default style, \fB\-nbl\fR, places an opening brace on the same line as +the keyword introducing it. For example, +.Sp +.Vb 1 +\& if ( $input_file eq \*(Aq\-\*(Aq ) { # \-nbl (default) +.Ve +.IP "\fB\-sbl\fR, \fB\-\-opening\-sub\-brace\-on\-new\-line\fR" 4 +.IX Item "-sbl, --opening-sub-brace-on-new-line" +The flag \fB\-sbl\fR can be used to override the value of \fB\-bl\fR for +the opening braces of named sub's. For example, +.Sp +.Vb 1 +\& perltidy \-sbl +.Ve +.Sp +produces this result: +.Sp +.Vb 9 +\& sub message +\& { +\& if (!defined($_[0])) { +\& print("Hello, World\en"); +\& } +\& else { +\& print($_[0], "\en"); +\& } +\& } +.Ve +.Sp +This flag is negated with \fB\-nsbl\fR. If \fB\-sbl\fR is not specified, +the value of \fB\-bl\fR is used. +.IP "\fB\-asbl\fR, \fB\-\-opening\-anonymous\-sub\-brace\-on\-new\-line\fR" 4 +.IX Item "-asbl, --opening-anonymous-sub-brace-on-new-line" +The flag \fB\-asbl\fR is like the \fB\-sbl\fR flag except that it applies +to anonymous sub's instead of named subs. For example +.Sp +.Vb 1 +\& perltidy \-asbl +.Ve +.Sp +produces this result: +.Sp +.Vb 9 +\& $a = sub +\& { +\& if ( !defined( $_[0] ) ) { +\& print("Hello, World\en"); +\& } +\& else { +\& print( $_[0], "\en" ); +\& } +\& }; +.Ve +.Sp +This flag is negated with \fB\-nasbl\fR, and the default is \fB\-nasbl\fR. +.IP "\fB\-bli\fR, \fB\-\-brace\-left\-and\-indent\fR" 4 +.IX Item "-bli, --brace-left-and-indent" +The flag \fB\-bli\fR is the same as \fB\-bl\fR but in addition it causes one +unit of continuation indentation ( see \fB\-ci\fR ) to be placed before +an opening and closing block braces. +.Sp +For example, +.Sp +.Vb 4 +\& if ( $input_file eq \*(Aq\-\*(Aq ) # \-bli +\& { +\& important_function(); +\& } +.Ve +.Sp +By default, this extra indentation occurs for blocks of type: +\&\fBif\fR, \fBelsif\fR, \fBelse\fR, \fBunless\fR, \fBfor\fR, \fBforeach\fR, \fBsub\fR, +\&\fBwhile\fR, \fBuntil\fR, and also with a preceding label. The next item +shows how to change this. +.IP "\fB\-blil=s\fR, \fB\-\-brace\-left\-and\-indent\-list=s\fR" 4 +.IX Item "-blil=s, --brace-left-and-indent-list=s" +Use this parameter to change the types of block braces for which the +\&\fB\-bli\fR flag applies; see \*(L"Specifying Block Types\*(R". For example, +\&\fB\-blil='if elsif else'\fR would apply it to only \f(CW\*(C`if/elsif/else\*(C'\fR blocks. +.IP "\fB\-bar\fR, \fB\-\-opening\-brace\-always\-on\-right\fR" 4 +.IX Item "-bar, --opening-brace-always-on-right" +The default style, \fB\-nbl\fR places the opening code block brace on a new +line if it does not fit on the same line as the opening keyword, like +this: +.Sp +.Vb 5 +\& if ( $bigwasteofspace1 && $bigwasteofspace2 +\& || $bigwasteofspace3 && $bigwasteofspace4 ) +\& { +\& big_waste_of_time(); +\& } +.Ve +.Sp +To force the opening brace to always be on the right, use the \fB\-bar\fR +flag. In this case, the above example becomes +.Sp +.Vb 4 +\& if ( $bigwasteofspace1 && $bigwasteofspace2 +\& || $bigwasteofspace3 && $bigwasteofspace4 ) { +\& big_waste_of_time(); +\& } +.Ve +.Sp +A conflict occurs if both \fB\-bl\fR and \fB\-bar\fR are specified. +.IP "\fB\-otr\fR, \fB\-\-opening\-token\-right\fR and related flags" 4 +.IX Item "-otr, --opening-token-right and related flags" +The \fB\-otr\fR flag is a hint that perltidy should not place a break between a +comma and an opening token. For example: +.Sp +.Vb 6 +\& # default formatting +\& push @{ $self\->{$module}{$key} }, +\& { +\& accno => $ref\->{accno}, +\& description => $ref\->{description} +\& }; +\& +\& # perltidy \-otr +\& push @{ $self\->{$module}{$key} }, { +\& accno => $ref\->{accno}, +\& description => $ref\->{description} +\& }; +.Ve +.Sp +The flag \fB\-otr\fR is actually an abbreviation for three other flags +which can be used to control parens, hash braces, and square brackets +separately if desired: +.Sp +.Vb 3 +\& \-opr or \-\-opening\-paren\-right +\& \-ohbr or \-\-opening\-hash\-brace\-right +\& \-osbr or \-\-opening\-square\-bracket\-right +.Ve +.IP "\fB\-wn\fR, \fB\-\-weld\-nested\-containers\fR" 4 +.IX Item "-wn, --weld-nested-containers" +The \fB\-wn\fR flag causes closely nested pairs of opening and closing container +symbols (curly braces, brackets, or parens) to be \*(L"welded\*(R" together, meaning +that they are treated as if combined into a single unit, with the indentation +of the innermost code reduced to be as if there were just a single container +symbol. +.Sp +For example: +.Sp +.Vb 6 +\& # default formatting +\& do { +\& { +\& next if $x == $y; +\& } +\& } until $x++ > $z; +\& +\& # perltidy \-wn +\& do { { +\& next if $x == $y; +\& } } until $x++ > $z; +.Ve +.Sp +When this flag is set perltidy makes a preliminary pass through the file and +identifies all nested pairs of containers. To qualify as a nested pair, the +closing container symbols must be immediately adjacent. The opening symbols +must either be adjacent, or, if the outer opening symbol is an opening +paren, they may be separated by any single non-container symbol or something +that looks like a function evaluation. +.Sp +Any container symbol may serve as both the inner container of one pair and as +the outer container of an adjacent pair. Consequently, any number of adjacent +opening or closing symbols may join together in weld. For example, here are +three levels of wrapped function calls: +.Sp +.Vb 9 +\& # default formatting +\& my (@date_time) = Localtime( +\& Date_to_Time( +\& Add_Delta_DHMS( +\& $year, $month, $day, $hour, $minute, $second, +\& \*(Aq0\*(Aq, $offset, \*(Aq0\*(Aq, \*(Aq0\*(Aq +\& ) +\& ) +\& ); +\& +\& # perltidy \-wn +\& my (@date_time) = Localtime( Date_to_Time( Add_Delta_DHMS( +\& $year, $month, $day, $hour, $minute, $second, +\& \*(Aq0\*(Aq, $offset, \*(Aq0\*(Aq, \*(Aq0\*(Aq +\& ) ) ); +.Ve +.Sp +Notice how the indentation of the inner lines are reduced by two levels in this +case. This example also shows the typical result of this formatting, namely it +is a sandwich consisting of an initial opening layer, a central section of any +complexity forming the \*(L"meat\*(R" of the sandwich, and a final closing layer. This +predictable structure helps keep the compacted structure readable. +.Sp +The inner sandwich layer is required to be at least one line thick. If this +cannot be achieved, welding does not occur. This constraint can cause +formatting to take a couple of iterations to stabilize when it is first applied +to a script. The \fB\-conv\fR flag can be used to insure that the final format is +achieved in a single run. +.Sp +Here is an example illustrating a welded container within a welded containers: +.Sp +.Vb 11 +\& # default formatting +\& $x\->badd( +\& bmul( +\& $class\->new( +\& abs( +\& $sx * int( $xr\->numify() ) & $sy * int( $yr\->numify() ) +\& ) +\& ), +\& $m +\& ) +\& ); +\& +\& # perltidy \-wn +\& $x\->badd( bmul( +\& $class\->new( abs( +\& $sx * int( $xr\->numify() ) & $sy * int( $yr\->numify() ) +\& ) ), +\& $m +\& ) ); +.Ve +.Sp +This format option is quite general but there are some limitations. +.Sp +One limitiation is that any line length limit still applies and can cause long +welded sections to be broken into multiple lines. +.Sp +Another limitation is that an opening symbol which delimits quoted text cannot +be included in a welded pair. This is because quote delimiters are treated +specially in perltidy. +.Sp +Finally, the stacking of containers defined by this flag have priority over +any other container stacking flags. This is because any welding is done first. +.IP "\fBVertical tightness\fR of non-block curly braces, parentheses, and square brackets." 4 +.IX Item "Vertical tightness of non-block curly braces, parentheses, and square brackets." +These parameters control what shall be called vertical tightness. Here are the +main points: +.RS 4 +.IP "\(bu" 4 +Opening tokens (except for block braces) are controlled by \fB\-vt=n\fR, or +\&\fB\-\-vertical\-tightness=n\fR, where +.Sp +.Vb 4 +\& \-vt=0 always break a line after opening token (default). +\& \-vt=1 do not break unless this would produce more than one +\& step in indentation in a line. +\& \-vt=2 never break a line after opening token +.Ve +.IP "\(bu" 4 +You must also use the \fB\-lp\fR flag when you use the \fB\-vt\fR flag; the +reason is explained below. +.IP "\(bu" 4 +Closing tokens (except for block braces) are controlled by \fB\-vtc=n\fR, or +\&\fB\-\-vertical\-tightness\-closing=n\fR, where +.Sp +.Vb 5 +\& \-vtc=0 always break a line before a closing token (default), +\& \-vtc=1 do not break before a closing token which is followed +\& by a semicolon or another closing token, and is not in +\& a list environment. +\& \-vtc=2 never break before a closing token. +.Ve +.Sp +The rules for \fB\-vtc=1\fR are designed to maintain a reasonable balance +between tightness and readability in complex lists. +.IP "\(bu" 4 +Different controls may be applied to different token types, +and it is also possible to control block braces; see below. +.IP "\(bu" 4 +Finally, please note that these vertical tightness flags are merely +hints to the formatter, and it cannot always follow them. Things which +make it difficult or impossible include comments, blank lines, blocks of +code within a list, and possibly the lack of the \fB\-lp\fR parameter. +Also, these flags may be ignored for very small lists (2 or 3 lines in +length). +.RE +.RS 4 +.Sp +Here are some examples: +.Sp +.Vb 7 +\& # perltidy \-lp \-vt=0 \-vtc=0 +\& %romanNumerals = ( +\& one => \*(AqI\*(Aq, +\& two => \*(AqII\*(Aq, +\& three => \*(AqIII\*(Aq, +\& four => \*(AqIV\*(Aq, +\& ); +\& +\& # perltidy \-lp \-vt=1 \-vtc=0 +\& %romanNumerals = ( one => \*(AqI\*(Aq, +\& two => \*(AqII\*(Aq, +\& three => \*(AqIII\*(Aq, +\& four => \*(AqIV\*(Aq, +\& ); +\& +\& # perltidy \-lp \-vt=1 \-vtc=1 +\& %romanNumerals = ( one => \*(AqI\*(Aq, +\& two => \*(AqII\*(Aq, +\& three => \*(AqIII\*(Aq, +\& four => \*(AqIV\*(Aq, ); +.Ve +.Sp +The difference between \fB\-vt=1\fR and \fB\-vt=2\fR is shown here: +.Sp +.Vb 6 +\& # perltidy \-lp \-vt=1 +\& $init\->add( +\& mysprintf( "(void)find_threadsv(%s);", +\& cstring( $threadsv_names[ $op\->targ ] ) +\& ) +\& ); +\& +\& # perltidy \-lp \-vt=2 +\& $init\->add( mysprintf( "(void)find_threadsv(%s);", +\& cstring( $threadsv_names[ $op\->targ ] ) +\& ) +\& ); +.Ve +.Sp +With \fB\-vt=1\fR, the line ending in \f(CW\*(C`add(\*(C'\fR does not combine with the next +line because the next line is not balanced. This can help with +readability, but \fB\-vt=2\fR can be used to ignore this rule. +.Sp +The tightest, and least readable, code is produced with both \f(CW\*(C`\-vt=2\*(C'\fR and +\&\f(CW\*(C`\-vtc=2\*(C'\fR: +.Sp +.Vb 3 +\& # perltidy \-lp \-vt=2 \-vtc=2 +\& $init\->add( mysprintf( "(void)find_threadsv(%s);", +\& cstring( $threadsv_names[ $op\->targ ] ) ) ); +.Ve +.Sp +Notice how the code in all of these examples collapses vertically as +\&\fB\-vt\fR increases, but the indentation remains unchanged. This is +because perltidy implements the \fB\-vt\fR parameter by first formatting as +if \fB\-vt=0\fR, and then simply overwriting one output line on top of the +next, if possible, to achieve the desired vertical tightness. The +\&\fB\-lp\fR indentation style has been designed to allow this vertical +collapse to occur, which is why it is required for the \fB\-vt\fR parameter. +.Sp +The \fB\-vt=n\fR and \fB\-vtc=n\fR parameters apply to each type of container +token. If desired, vertical tightness controls can be applied +independently to each of the closing container token types. +.Sp +The parameters for controlling parentheses are \fB\-pvt=n\fR or +\&\fB\-\-paren\-vertical\-tightness=n\fR, and \fB\-pcvt=n\fR or +\&\fB\-\-paren\-vertical\-tightness\-closing=n\fR. +.Sp +Likewise, the parameters for square brackets are \fB\-sbvt=n\fR or +\&\fB\-\-square\-bracket\-vertical\-tightness=n\fR, and \fB\-sbcvt=n\fR or +\&\fB\-\-square\-bracket\-vertical\-tightness\-closing=n\fR. +.Sp +Finally, the parameters for controlling non-code block braces are +\&\fB\-bvt=n\fR or \fB\-\-brace\-vertical\-tightness=n\fR, and \fB\-bcvt=n\fR or +\&\fB\-\-brace\-vertical\-tightness\-closing=n\fR. +.Sp +In fact, the parameter \fB\-vt=n\fR is actually just an abbreviation for +\&\fB\-pvt=n \-bvt=n sbvt=n\fR, and likewise \fB\-vtc=n\fR is an abbreviation +for \fB\-pvtc=n \-bvtc=n sbvtc=n\fR. +.RE +.IP "\fB\-bbvt=n\fR or \fB\-\-block\-brace\-vertical\-tightness=n\fR" 4 +.IX Item "-bbvt=n or --block-brace-vertical-tightness=n" +The \fB\-bbvt=n\fR flag is just like the \fB\-vt=n\fR flag but applies +to opening code block braces. +.Sp +.Vb 4 +\& \-bbvt=0 break after opening block brace (default). +\& \-bbvt=1 do not break unless this would produce more than one +\& step in indentation in a line. +\& \-bbvt=2 do not break after opening block brace. +.Ve +.Sp +It is necessary to also use either \fB\-bl\fR or \fB\-bli\fR for this to work, +because, as with other vertical tightness controls, it is implemented by +simply overwriting a line ending with an opening block brace with the +subsequent line. For example: +.Sp +.Vb 10 +\& # perltidy \-bli \-bbvt=0 +\& if ( open( FILE, "< $File" ) ) +\& { +\& while ( $File = ) +\& { +\& $In .= $File; +\& $count++; +\& } +\& close(FILE); +\& } +\& +\& # perltidy \-bli \-bbvt=1 +\& if ( open( FILE, "< $File" ) ) +\& { while ( $File = ) +\& { $In .= $File; +\& $count++; +\& } +\& close(FILE); +\& } +.Ve +.Sp +By default this applies to blocks associated with keywords \fBif\fR, +\&\fBelsif\fR, \fBelse\fR, \fBunless\fR, \fBfor\fR, \fBforeach\fR, \fBsub\fR, \fBwhile\fR, +\&\fBuntil\fR, and also with a preceding label. This can be changed with +the parameter \fB\-bbvtl=string\fR, or +\&\fB\-\-block\-brace\-vertical\-tightness\-list=string\fR, where \fBstring\fR is a +space-separated list of block types. For more information on the +possible values of this string, see \*(L"Specifying Block Types\*(R" +.Sp +For example, if we want to just apply this style to \f(CW\*(C`if\*(C'\fR, +\&\f(CW\*(C`elsif\*(C'\fR, and \f(CW\*(C`else\*(C'\fR blocks, we could use +\&\f(CW\*(C`perltidy \-bli \-bbvt=1 \-bbvtl=\*(Aqif elsif else\*(Aq\*(C'\fR. +.Sp +There is no vertical tightness control for closing block braces; with +one exception they will be placed on separate lines. +The exception is that a cascade of closing block braces may +be stacked on a single line. See \fB\-scbb\fR. +.IP "\fB\-sot\fR, \fB\-\-stack\-opening\-tokens\fR and related flags" 4 +.IX Item "-sot, --stack-opening-tokens and related flags" +The \fB\-sot\fR flag tells perltidy to \*(L"stack\*(R" opening tokens +when possible to avoid lines with isolated opening tokens. +.Sp +For example: +.Sp +.Vb 8 +\& # default +\& $opt_c = Text::CSV_XS\->new( +\& { +\& binary => 1, +\& sep_char => $opt_c, +\& always_quote => 1, +\& } +\& ); +\& +\& # \-sot +\& $opt_c = Text::CSV_XS\->new( { +\& binary => 1, +\& sep_char => $opt_c, +\& always_quote => 1, +\& } +\& ); +.Ve +.Sp +For detailed control of individual closing tokens the following +controls can be used: +.Sp +.Vb 4 +\& \-sop or \-\-stack\-opening\-paren +\& \-sohb or \-\-stack\-opening\-hash\-brace +\& \-sosb or \-\-stack\-opening\-square\-bracket +\& \-sobb or \-\-stack\-opening\-block\-brace +.Ve +.Sp +The flag \fB\-sot\fR is an abbreviation for \fB\-sop \-sohb \-sosb\fR. +.Sp +The flag \fB\-sobb\fR is a abbreviation for \fB\-bbvt=2 \-bbvtl='*'\fR. This +will case a cascade of opening block braces to appear on a single line, +although this an uncommon occurrence except in test scripts. +.IP "\fB\-sct\fR, \fB\-\-stack\-closing\-tokens\fR and related flags" 4 +.IX Item "-sct, --stack-closing-tokens and related flags" +The \fB\-sct\fR flag tells perltidy to \*(L"stack\*(R" closing tokens +when possible to avoid lines with isolated closing tokens. +.Sp +For example: +.Sp +.Vb 8 +\& # default +\& $opt_c = Text::CSV_XS\->new( +\& { +\& binary => 1, +\& sep_char => $opt_c, +\& always_quote => 1, +\& } +\& ); +\& +\& # \-sct +\& $opt_c = Text::CSV_XS\->new( +\& { +\& binary => 1, +\& sep_char => $opt_c, +\& always_quote => 1, +\& } ); +.Ve +.Sp +The \fB\-sct\fR flag is somewhat similar to the \fB\-vtc\fR flags, and in some +cases it can give a similar result. The difference is that the \fB\-vtc\fR +flags try to avoid lines with leading opening tokens by \*(L"hiding\*(R" them at +the end of a previous line, whereas the \fB\-sct\fR flag merely tries to +reduce the number of lines with isolated closing tokens by stacking them +but does not try to hide them. For example: +.Sp +.Vb 6 +\& # \-vtc=2 +\& $opt_c = Text::CSV_XS\->new( +\& { +\& binary => 1, +\& sep_char => $opt_c, +\& always_quote => 1, } ); +.Ve +.Sp +For detailed control of the stacking of individual closing tokens the +following controls can be used: +.Sp +.Vb 4 +\& \-scp or \-\-stack\-closing\-paren +\& \-schb or \-\-stack\-closing\-hash\-brace +\& \-scsb or \-\-stack\-closing\-square\-bracket +\& \-scbb or \-\-stack\-closing\-block\-brace +.Ve +.Sp +The flag \fB\-sct\fR is an abbreviation for stacking the non-block closing +tokens, \fB\-scp \-schb \-scsb\fR. +.Sp +Stacking of closing block braces, \fB\-scbb\fR, causes a cascade of isolated +closing block braces to be combined into a single line as in the following +example: +.Sp +.Vb 7 +\& # \-scbb: +\& for $w1 (@w1) { +\& for $w2 (@w2) { +\& for $w3 (@w3) { +\& for $w4 (@w4) { +\& push( @lines, "$w1 $w2 $w3 $w4\en" ); +\& } } } } +.Ve +.Sp +To simplify input even further for the case in which both opening and closing +non-block containers are stacked, the flag \fB\-sac\fR or \fB\-\-stack\-all\-containers\fR +is an abbreviation for \fB\-sot \-sot\fR. +.IP "\fB\-dnl\fR, \fB\-\-delete\-old\-newlines\fR" 4 +.IX Item "-dnl, --delete-old-newlines" +By default, perltidy first deletes all old line break locations, and then it +looks for good break points to match the desired line length. Use \fB\-ndnl\fR +or \fB\-\-nodelete\-old\-newlines\fR to force perltidy to retain all old line break +points. +.IP "\fB\-anl\fR, \fB\-\-add\-newlines\fR" 4 +.IX Item "-anl, --add-newlines" +By default, perltidy will add line breaks when necessary to create +continuations of long lines and to improve the script appearance. Use +\&\fB\-nanl\fR or \fB\-\-noadd\-newlines\fR to prevent any new line breaks. +.Sp +This flag does not prevent perltidy from eliminating existing line +breaks; see \fB\-\-freeze\-newlines\fR to completely prevent changes to line +break points. +.IP "Controlling whether perltidy breaks before or after operators" 4 +.IX Item "Controlling whether perltidy breaks before or after operators" +Four command line parameters provide some control over whether +a line break should be before or after specific token types. +Two parameters give detailed control: +.Sp +\&\fB\-wba=s\fR or \fB\-\-want\-break\-after=s\fR, and +.Sp +\&\fB\-wbb=s\fR or \fB\-\-want\-break\-before=s\fR. +.Sp +These parameters are each followed by a quoted string, \fBs\fR, containing +a list of token types (separated only by spaces). No more than one of each +of these parameters should be specified, because repeating a +command-line parameter always overwrites the previous one before +perltidy ever sees it. +.Sp +By default, perltidy breaks \fBafter\fR these token types: + % + \- * / x != == >= <= =~ !~ < > | & + = **= += *= &= <<= &&= \-= /= |= >>= ||= //= .= %= ^= x= +.Sp +And perltidy breaks \fBbefore\fR these token types by default: + . << >> \-> && || // +.Sp +To illustrate, to cause a break after a concatenation operator, \f(CW\*(Aq.\*(Aq\fR, +rather than before it, the command line would be +.Sp +.Vb 1 +\& \-wba="." +.Ve +.Sp +As another example, the following command would cause a break before +math operators \f(CW\*(Aq+\*(Aq\fR, \f(CW\*(Aq\-\*(Aq\fR, \f(CW\*(Aq/\*(Aq\fR, and \f(CW\*(Aq*\*(Aq\fR: +.Sp +.Vb 1 +\& \-wbb="+ \- / *" +.Ve +.Sp +These commands should work well for most of the token types that perltidy uses +(use \fB\-\-dump\-token\-types\fR for a list). Also try the \fB\-D\fR flag on a short +snippet of code and look at the .DEBUG file to see the tokenization. However, +for a few token types there may be conflicts with hardwired logic which cause +unexpected results. One example is curly braces, which should be controlled +with the parameter \fBbl\fR provided for that purpose. +.Sp +\&\fB\s-1WARNING\s0\fR Be sure to put these tokens in quotes to avoid having them +misinterpreted by your command shell. +.Sp +Two additional parameters are available which, though they provide no further +capability, can simplify input are: +.Sp +\&\fB\-baao\fR or \fB\-\-break\-after\-all\-operators\fR, +.Sp +\&\fB\-bbao\fR or \fB\-\-break\-before\-all\-operators\fR. +.Sp +The \-baao sets the default to be to break after all of the following operators: +.Sp +.Vb 3 +\& % + \- * / x != == >= <= =~ !~ < > | & +\& = **= += *= &= <<= &&= \-= /= |= >>= ||= //= .= %= ^= x= +\& . : ? && || and or err xor +.Ve +.Sp +and the \fB\-bbao\fR flag sets the default to break before all of these operators. +These can be used to define an initial break preference which can be fine-tuned +with the \fB\-wba\fR and \fB\-wbb\fR flags. For example, to break before all operators +except an \fB=\fR one could use \-\-bbao \-wba='=' rather than listing every +single perl operator except \fB=\fR on a \-wbb flag. +.SS "Controlling List Formatting" +.IX Subsection "Controlling List Formatting" +Perltidy attempts to place comma-separated arrays of values in tables +which look good. Its default algorithms usually work well, and they +have been improving with each release, but several parameters are +available to control list formatting. +.IP "\fB\-boc\fR, \fB\-\-break\-at\-old\-comma\-breakpoints\fR" 4 +.IX Item "-boc, --break-at-old-comma-breakpoints" +This flag tells perltidy to try to break at all old commas. This is not +the default. Normally, perltidy makes a best guess at list formatting, +and seldom uses old comma breakpoints. Usually this works well, +but consider: +.Sp +.Vb 5 +\& my @list = (1, +\& 1, 1, +\& 1, 2, 1, +\& 1, 3, 3, 1, +\& 1, 4, 6, 4, 1,); +.Ve +.Sp +The default formatting will flatten this down to one line: +.Sp +.Vb 2 +\& # perltidy (default) +\& my @list = ( 1, 1, 1, 1, 2, 1, 1, 3, 3, 1, 1, 4, 6, 4, 1, ); +.Ve +.Sp +which hides the structure. Using \fB\-boc\fR, plus additional flags +to retain the original style, yields +.Sp +.Vb 6 +\& # perltidy \-boc \-lp \-pt=2 \-vt=1 \-vtc=1 +\& my @list = (1, +\& 1, 1, +\& 1, 2, 1, +\& 1, 3, 3, 1, +\& 1, 4, 6, 4, 1,); +.Ve +.Sp +A disadvantage of this flag is that all tables in the file +must already be nicely formatted. For another possibility see +the \-fs flag in \*(L"Skipping Selected Sections of Code\*(R". +.IP "\fB\-mft=n\fR, \fB\-\-maximum\-fields\-per\-table=n\fR" 4 +.IX Item "-mft=n, --maximum-fields-per-table=n" +If the computed number of fields for any table exceeds \fBn\fR, then it +will be reduced to \fBn\fR. The default value for \fBn\fR is a large number, +40. While this value should probably be left unchanged as a general +rule, it might be used on a small section of code to force a list to +have a particular number of fields per line, and then either the \fB\-boc\fR +flag could be used to retain this formatting, or a single comment could +be introduced somewhere to freeze the formatting in future applications +of perltidy. +.Sp +.Vb 9 +\& # perltidy \-mft=2 +\& @month_of_year = ( +\& \*(AqJan\*(Aq, \*(AqFeb\*(Aq, +\& \*(AqMar\*(Aq, \*(AqApr\*(Aq, +\& \*(AqMay\*(Aq, \*(AqJun\*(Aq, +\& \*(AqJul\*(Aq, \*(AqAug\*(Aq, +\& \*(AqSep\*(Aq, \*(AqOct\*(Aq, +\& \*(AqNov\*(Aq, \*(AqDec\*(Aq +\& ); +.Ve +.IP "\fB\-cab=n\fR, \fB\-\-comma\-arrow\-breakpoints=n\fR" 4 +.IX Item "-cab=n, --comma-arrow-breakpoints=n" +A comma which follows a comma arrow, '=>', is given special +consideration. In a long list, it is common to break at all such +commas. This parameter can be used to control how perltidy breaks at +these commas. (However, it will have no effect if old comma breaks are +being forced because \fB\-boc\fR is used). The possible values of \fBn\fR are: +.Sp +.Vb 10 +\& n=0 break at all commas after => +\& n=1 stable: break at all commas after => if container is open, +\& EXCEPT FOR one\-line containers +\& n=2 break at all commas after =>, BUT try to form the maximum +\& maximum one\-line container lengths +\& n=3 do not treat commas after => specially at all +\& n=4 break everything: like n=0 but ALSO break a short container with +\& a => not followed by a comma when \-vt=0 is used +\& n=5 stable: like n=1 but ALSO break at open one\-line containers when +\& \-vt=0 is used (default) +.Ve +.Sp +For example, given the following single line, perltidy by default will +not add any line breaks because it would break the existing one-line +container: +.Sp +.Vb 1 +\& bless { B => $B, Root => $Root } => $package; +.Ve +.Sp +Using \fB\-cab=0\fR will force a break after each comma-arrow item: +.Sp +.Vb 5 +\& # perltidy \-cab=0: +\& bless { +\& B => $B, +\& Root => $Root +\& } => $package; +.Ve +.Sp +If perltidy is subsequently run with this container broken, then by +default it will break after each '=>' because the container is now +broken. To reform a one-line container, the parameter \fB\-cab=2\fR could +be used. +.Sp +The flag \fB\-cab=3\fR can be used to prevent these commas from being +treated specially. In this case, an item such as \*(L"01\*(R" => 31 is +treated as a single item in a table. The number of fields in this table +will be determined by the same rules that are used for any other table. +Here is an example. +.Sp +.Vb 6 +\& # perltidy \-cab=3 +\& my %last_day = ( +\& "01" => 31, "02" => 29, "03" => 31, "04" => 30, +\& "05" => 31, "06" => 30, "07" => 31, "08" => 31, +\& "09" => 30, "10" => 31, "11" => 30, "12" => 31 +\& ); +.Ve +.SS "Retaining or Ignoring Existing Line Breaks" +.IX Subsection "Retaining or Ignoring Existing Line Breaks" +Several additional parameters are available for controlling the extent +to which line breaks in the input script influence the output script. +In most cases, the default parameter values are set so that, if a choice +is possible, the output style follows the input style. For example, if +a short logical container is broken in the input script, then the +default behavior is for it to remain broken in the output script. +.PP +Most of the parameters in this section would only be required for a +one-time conversion of a script from short container lengths to longer +container lengths. The opposite effect, of converting long container +lengths to shorter lengths, can be obtained by temporarily using a short +maximum line length. +.IP "\fB\-bol\fR, \fB\-\-break\-at\-old\-logical\-breakpoints\fR" 4 +.IX Item "-bol, --break-at-old-logical-breakpoints" +By default, if a logical expression is broken at a \f(CW\*(C`&&\*(C'\fR, \f(CW\*(C`||\*(C'\fR, \f(CW\*(C`and\*(C'\fR, +or \f(CW\*(C`or\*(C'\fR, then the container will remain broken. Also, breaks +at internal keywords \f(CW\*(C`if\*(C'\fR and \f(CW\*(C`unless\*(C'\fR will normally be retained. +To prevent this, and thus form longer lines, use \fB\-nbol\fR. +.IP "\fB\-bok\fR, \fB\-\-break\-at\-old\-keyword\-breakpoints\fR" 4 +.IX Item "-bok, --break-at-old-keyword-breakpoints" +By default, perltidy will retain a breakpoint before keywords which may +return lists, such as \f(CW\*(C`sort\*(C'\fR and . This allows chains of these +operators to be displayed one per line. Use \fB\-nbok\fR to prevent +retaining these breakpoints. +.IP "\fB\-bot\fR, \fB\-\-break\-at\-old\-ternary\-breakpoints\fR" 4 +.IX Item "-bot, --break-at-old-ternary-breakpoints" +By default, if a conditional (ternary) operator is broken at a \f(CW\*(C`:\*(C'\fR, +then it will remain broken. To prevent this, and thereby +form longer lines, use \fB\-nbot\fR. +.IP "\fB\-boa\fR, \fB\-\-break\-at\-old\-attribute\-breakpoints\fR" 4 +.IX Item "-boa, --break-at-old-attribute-breakpoints" +By default, if an attribute list is broken at a \f(CW\*(C`:\*(C'\fR in the source file, then +it will remain broken. For example, given the following code, the line breaks +at the ':'s will be retained: +.Sp +.Vb 4 +\& my @field +\& : field +\& : Default(1) +\& : Get(\*(AqName\*(Aq => \*(Aqfoo\*(Aq) : Set(\*(AqName\*(Aq); +.Ve +.Sp +If the attributes are on a single line in the source code then they will remain +on a single line if possible. +.Sp +To prevent this, and thereby always form longer lines, use \fB\-nboa\fR. +.IP "\fB\-iob\fR, \fB\-\-ignore\-old\-breakpoints\fR" 4 +.IX Item "-iob, --ignore-old-breakpoints" +Use this flag to tell perltidy to ignore existing line breaks to the +maximum extent possible. This will tend to produce the longest possible +containers, regardless of type, which do not exceed the line length +limit. +.IP "\fB\-kis\fR, \fB\-\-keep\-interior\-semicolons\fR" 4 +.IX Item "-kis, --keep-interior-semicolons" +Use the \fB\-kis\fR flag to prevent breaking at a semicolon if +there was no break there in the input file. Normally +perltidy places a newline after each semicolon which +terminates a statement unless several statements are +contained within a one-line brace block. To illustrate, +consider the following input lines: +.Sp +.Vb 2 +\& dbmclose(%verb_delim); undef %verb_delim; +\& dbmclose(%expanded); undef %expanded; +.Ve +.Sp +The default is to break after each statement, giving +.Sp +.Vb 4 +\& dbmclose(%verb_delim); +\& undef %verb_delim; +\& dbmclose(%expanded); +\& undef %expanded; +.Ve +.Sp +With \fBperltidy \-kis\fR the multiple statements are retained: +.Sp +.Vb 2 +\& dbmclose(%verb_delim); undef %verb_delim; +\& dbmclose(%expanded); undef %expanded; +.Ve +.Sp +The statements are still subject to the specified value +of \fBmaximum-line-length\fR and will be broken if this +maximum is exceeded. +.SS "Blank Line Control" +.IX Subsection "Blank Line Control" +Blank lines can improve the readability of a script if they are carefully +placed. Perltidy has several commands for controlling the insertion, +retention, and removal of blank lines. +.IP "\fB\-fbl\fR, \fB\-\-freeze\-blank\-lines\fR" 4 +.IX Item "-fbl, --freeze-blank-lines" +Set \fB\-fbl\fR if you want to the blank lines in your script to +remain exactly as they are. The rest of the parameters in +this section may then be ignored. (Note: setting the \fB\-fbl\fR flag +is equivalent to setting \fB\-mbl=0\fR and \fB\-kbl=2\fR). +.IP "\fB\-bbc\fR, \fB\-\-blanks\-before\-comments\fR" 4 +.IX Item "-bbc, --blanks-before-comments" +A blank line will be introduced before a full-line comment. This is the +default. Use \fB\-nbbc\fR or \fB\-\-noblanks\-before\-comments\fR to prevent +such blank lines from being introduced. +.IP "\fB\-blbs=n\fR, \fB\-\-blank\-lines\-before\-subs=n\fR" 4 +.IX Item "-blbs=n, --blank-lines-before-subs=n" +The parameter \fB\-blbs=n\fR requests that least \fBn\fR blank lines precede a sub +definition which does not follow a comment and which is more than one-line +long. The default is <\-blbs=1>. \fB\s-1BEGIN\s0\fR and \fB\s-1END\s0\fR blocks are included. +.Sp +The requested number of blanks statement will be inserted regardless of the +value of \fB\-\-maximum\-consecutive\-blank\-lines=n\fR (\fB\-mbl=n\fR) with the exception +that if \fB\-mbl=0\fR then no blanks will be output. +.Sp +This parameter interacts with the value \fBk\fR of the parameter \fB\-\-maximum\-consecutive\-blank\-lines=k\fR (\fB\-mbl=k\fR) as follows: +.Sp +1. If \fB\-mbl=0\fR then no blanks will be output. This allows all blanks to be suppressed with a single parameter. Otherwise, +.Sp +2. If the number of old blank lines in the script is less than \fBn\fR then +additional blanks will be inserted to make the total \fBn\fR regardless of the +value of \fB\-mbl=k\fR. +.Sp +3. If the number of old blank lines in the script equals or exceeds \fBn\fR then +this parameter has no effect, however the total will not exceed +value specified on the \fB\-mbl=k\fR flag. +.IP "\fB\-blbp=n\fR, \fB\-\-blank\-lines\-before\-packages=n\fR" 4 +.IX Item "-blbp=n, --blank-lines-before-packages=n" +The parameter \fB\-blbp=n\fR requests that least \fBn\fR blank lines precede a package +which does not follow a comment. The default is \fB\-blbp=1\fR. +.Sp +This parameter interacts with the value \fBk\fR of the parameter +\&\fB\-\-maximum\-consecutive\-blank\-lines=k\fR (\fB\-mbl=k\fR) in the same way as described +for the previous item \fB\-blbs=n\fR. +.IP "\fB\-bbs\fR, \fB\-\-blanks\-before\-subs\fR" 4 +.IX Item "-bbs, --blanks-before-subs" +For compatibility with previous versions, \fB\-bbs\fR or \fB\-\-blanks\-before\-subs\fR +is equivalent to \fI\-blbp=1\fR and \fI\-blbs=1\fR. +.Sp +Likewise, \fB\-nbbs\fR or \fB\-\-noblanks\-before\-subs\fR +is equivalent to \fI\-blbp=0\fR and \fI\-blbs=0\fR. +.IP "\fB\-bbb\fR, \fB\-\-blanks\-before\-blocks\fR" 4 +.IX Item "-bbb, --blanks-before-blocks" +A blank line will be introduced before blocks of coding delimited by +\&\fBfor\fR, \fBforeach\fR, \fBwhile\fR, \fBuntil\fR, and \fBif\fR, \fBunless\fR, in the following +circumstances: +.RS 4 +.IP "\(bu" 4 +The block is not preceded by a comment. +.IP "\(bu" 4 +The block is not a one-line block. +.IP "\(bu" 4 +The number of consecutive non-blank lines at the current indentation depth is at least \fB\-lbl\fR +(see next section). +.RE +.RS 4 +.Sp +This is the default. The intention of this option is to introduce +some space within dense coding. +This is negated with \fB\-nbbb\fR or \fB\-\-noblanks\-before\-blocks\fR. +.RE +.IP "\fB\-lbl=n\fR \fB\-\-long\-block\-line\-count=n\fR" 4 +.IX Item "-lbl=n --long-block-line-count=n" +This controls how often perltidy is allowed to add blank lines before +certain block types (see previous section). The default is 8. Entering +a value of \fB0\fR is equivalent to entering a very large number. +.IP "\fB\-blao=i\fR or \fB\-\-blank\-lines\-after\-opening\-block=i\fR" 4 +.IX Item "-blao=i or --blank-lines-after-opening-block=i" +This control places a minimum of \fBi\fR blank lines \fBafter\fR a line which \fBends\fR +with an opening block brace of a specified type. By default, this only applies +to the block of a named \fBsub\fR, but this can be changed (see \fB\-blaol\fR below). +The default is not to do this (\fBi=0\fR). +.Sp +Please see the note below on using the \fB\-blao\fR and \fB\-blbc\fR options. +.IP "\fB\-blbc=i\fR or \fB\-\-blank\-lines\-before\-closing\-block=i\fR" 4 +.IX Item "-blbc=i or --blank-lines-before-closing-block=i" +This control places a minimum of \fBi\fR blank lines \fBbefore\fR a line which +\&\fBbegins\fR with a closing block brace of a specified type. By default, this +only applies to the block of a named \fBsub\fR, but this can be changed (see +\&\fB\-blbcl\fR below). The default is not to do this (\fBi=0\fR). +.IP "\fB\-blaol=s\fR or \fB\-\-blank\-lines\-after\-opening\-block\-list=s\fR" 4 +.IX Item "-blaol=s or --blank-lines-after-opening-block-list=s" +The parameter \fBs\fR is a list of block type keywords to which the flag \fB\-blao\fR +should apply. The section \*(L"Specifying Block Types\*(R" explains how to list +block types. +.IP "\fB\-blbcl=s\fR or \fB\-\-blank\-lines\-before\-closing\-block\-list=s\fR" 4 +.IX Item "-blbcl=s or --blank-lines-before-closing-block-list=s" +This parameter is a list of block type keywords to which the flag \fB\-blbc\fR +should apply. The section \*(L"Specifying Block Types\*(R" explains how to list +block types. +.IP "Note on using the \fB\-blao\fR and \fB\-blbc\fR options." 4 +.IX Item "Note on using the -blao and -blbc options." +These blank line controls introduce a certain minimum number of blank lines in +the text, but the final number of blank lines may be greater, depending on +values of the other blank line controls and the number of old blank lines. A +consequence is that introducing blank lines with these and other controls +cannot be exactly undone, so some experimentation with these controls is +recommended before using them. +.Sp +For example, suppose that for some reason we decide to introduce one blank +space at the beginning and ending of all blocks. We could do +this using +.Sp +.Vb 1 +\& perltidy \-blao=2 \-blbc=2 \-blaol=\*(Aq*\*(Aq \-blbcl=\*(Aq*\*(Aq filename +.Ve +.Sp +Now suppose the script continues to be developed, but at some later date we +decide we don't want these spaces after all. we might expect that running with +the flags \fB\-blao=0\fR and \fB\-blbc=0\fR will undo them. However, by default +perltidy retains single blank lines, so the blank lines remain. +.Sp +We can easily fix this by telling perltidy to ignore old blank lines by +including the added parameter \fB\-kbl=0\fR and rerunning. Then the unwanted blank +lines will be gone. However, this will cause all old blank lines to be +ignored, perhaps even some that were added by hand to improve formatting. So +please be cautious when using these parameters. +.IP "\fB\-mbl=n\fR \fB\-\-maximum\-consecutive\-blank\-lines=n\fR" 4 +.IX Item "-mbl=n --maximum-consecutive-blank-lines=n" +This parameter specifies the maximum number of consecutive blank lines which +will be output within code sections of a script. The default is n=1. If the +input file has more than n consecutive blank lines, the number will be reduced +to n except as noted above for the \fB\-blbp\fR and \fB\-blbs\fR parameters. If \fBn=0\fR +then no blank lines will be output (unless all old blank lines are retained +with the \fB\-kbl=2\fR flag of the next section). +.Sp +This flag obviously does not apply to pod sections, +here-documents, and quotes. +.IP "\fB\-kbl=n\fR, \fB\-\-keep\-old\-blank\-lines=n\fR" 4 +.IX Item "-kbl=n, --keep-old-blank-lines=n" +The \fB\-kbl=n\fR flag gives you control over how your existing blank lines are +treated. +.Sp +The possible values of \fBn\fR are: +.Sp +.Vb 3 +\& n=0 ignore all old blank lines +\& n=1 stable: keep old blanks, but limited by the value of the B<\-mbl=n> flag +\& n=2 keep all old blank lines, regardless of the value of the B<\-mbl=n> flag +.Ve +.Sp +The default is \fBn=1\fR. +.IP "\fB\-sob\fR, \fB\-\-swallow\-optional\-blank\-lines\fR" 4 +.IX Item "-sob, --swallow-optional-blank-lines" +This is equivalent to \fBkbl=0\fR and is included for compatibility with +previous versions. +.IP "\fB\-nsob\fR, \fB\-\-noswallow\-optional\-blank\-lines\fR" 4 +.IX Item "-nsob, --noswallow-optional-blank-lines" +This is equivalent to \fBkbl=1\fR and is included for compatibility with +previous versions. +.SS "Styles" +.IX Subsection "Styles" +A style refers to a convenient collection of existing parameters. +.IP "\fB\-gnu\fR, \fB\-\-gnu\-style\fR" 4 +.IX Item "-gnu, --gnu-style" +\&\fB\-gnu\fR gives an approximation to the \s-1GNU\s0 Coding Standards (which do +not apply to perl) as they are sometimes implemented. At present, this +style overrides the default style with the following parameters: +.Sp +.Vb 1 +\& \-lp \-bl \-noll \-pt=2 \-bt=2 \-sbt=2 \-icp +.Ve +.IP "\fB\-pbp\fR, \fB\-\-perl\-best\-practices\fR" 4 +.IX Item "-pbp, --perl-best-practices" +\&\fB\-pbp\fR is an abbreviation for the parameters in the book \fBPerl Best Practices\fR +by Damian Conway: +.Sp +.Vb 3 +\& \-l=78 \-i=4 \-ci=4 \-st \-se \-vt=2 \-cti=0 \-pt=1 \-bt=1 \-sbt=1 \-bbt=1 \-nsfs \-nolq +\& \-wbb="% + \- * / x != == >= <= =~ !~ < > | & = +\& **= += *= &= <<= &&= \-= /= |= >>= ||= //= .= %= ^= x=" +.Ve +.Sp +Please note that this parameter set includes \-st and \-se flags, which make +perltidy act as a filter on one file only. These can be overridden by placing +\&\fB\-nst\fR and/or \fB\-nse\fR after the \-pbp parameter. +.Sp +Also note that the value of continuation indentation, \-ci=4, is equal to the +value of the full indentation, \-i=4. In some complex statements perltidy will +produce nicer results with \-ci=2. This can be implemented by including \-ci=2 +after the \-pbp parameter. For example, +.Sp +.Vb 11 +\& # perltidy \-pbp +\& $self\->{_text} = ( +\& !$section ? \*(Aq\*(Aq +\& : $type eq \*(Aqitem\*(Aq ? "the $section entry" +\& : "the section on $section" +\& ) +\& . ( +\& $page +\& ? ( $section ? \*(Aq in \*(Aq : \*(Aq\*(Aq ) . "the $page$page_ext manpage" +\& : \*(Aq elsewhere in this document\*(Aq +\& ); +\& +\& # perltidy \-pbp \-ci=2 +\& $self\->{_text} = ( +\& !$section ? \*(Aq\*(Aq +\& : $type eq \*(Aqitem\*(Aq ? "the $section entry" +\& : "the section on $section" +\& ) +\& . ( +\& $page +\& ? ( $section ? \*(Aq in \*(Aq : \*(Aq\*(Aq ) . "the $page$page_ext manpage" +\& : \*(Aq elsewhere in this document\*(Aq +\& ); +.Ve +.SS "Controlling Vertical Alignment" +.IX Subsection "Controlling Vertical Alignment" +Vertical alignment refers to lining up certain symbols in list of consecutive +similar lines to improve readability. For example, the \*(L"fat commas\*(R" are +aligned in the following statement: +.PP +.Vb 5 +\& $data = $pkg\->new( +\& PeerAddr => join( ".", @port[ 0 .. 3 ] ), +\& PeerPort => $port[4] * 256 + $port[5], +\& Proto => \*(Aqtcp\*(Aq +\& ); +.Ve +.PP +The only explicit control on vertical alignment is to turn it off using +\&\fB\-novalign\fR, a flag mainly intended for debugging. However, vertical +alignment can be forced to stop and restart by selectively introducing blank +lines. For example, a blank has been inserted in the following code +to keep somewhat similar things aligned. +.PP +.Vb 4 +\& %option_range = ( +\& \*(Aqformat\*(Aq => [ \*(Aqtidy\*(Aq, \*(Aqhtml\*(Aq, \*(Aquser\*(Aq ], +\& \*(Aqoutput\-line\-ending\*(Aq => [ \*(Aqdos\*(Aq, \*(Aqwin\*(Aq, \*(Aqmac\*(Aq, \*(Aqunix\*(Aq ], +\& \*(Aqcharacter\-encoding\*(Aq => [ \*(Aqnone\*(Aq, \*(Aqutf8\*(Aq ], +\& +\& \*(Aqblock\-brace\-tightness\*(Aq => [ 0, 2 ], +\& \*(Aqbrace\-tightness\*(Aq => [ 0, 2 ], +\& \*(Aqparen\-tightness\*(Aq => [ 0, 2 ], +\& \*(Aqsquare\-bracket\-tightness\*(Aq => [ 0, 2 ], +\& ); +.Ve +.SS "Other Controls" +.IX Subsection "Other Controls" +.IP "Deleting selected text" 4 +.IX Item "Deleting selected text" +Perltidy can selectively delete comments and/or pod documentation. The +command \fB\-dac\fR or \fB\-\-delete\-all\-comments\fR will delete all comments +\&\fBand\fR all pod documentation, leaving just code and any leading system +control lines. +.Sp +The command \fB\-dp\fR or \fB\-\-delete\-pod\fR will remove all pod documentation +(but not comments). +.Sp +Two commands which remove comments (but not pod) are: \fB\-dbc\fR or +\&\fB\-\-delete\-block\-comments\fR and \fB\-dsc\fR or \fB\-\-delete\-side\-comments\fR. +(Hanging side comments will be deleted with block comments here.) +.Sp +The negatives of these commands also work, and are the defaults. When +block comments are deleted, any leading 'hash\-bang' will be retained. +Also, if the \fB\-x\fR flag is used, any system commands before a leading +hash-bang will be retained (even if they are in the form of comments). +.IP "Writing selected text to a file" 4 +.IX Item "Writing selected text to a file" +When perltidy writes a formatted text file, it has the ability to also +send selected text to a file with a \fI.TEE\fR extension. This text can +include comments and pod documentation. +.Sp +The command \fB\-tac\fR or \fB\-\-tee\-all\-comments\fR will write all comments +\&\fBand\fR all pod documentation. +.Sp +The command \fB\-tp\fR or \fB\-\-tee\-pod\fR will write all pod documentation (but +not comments). +.Sp +The commands which write comments (but not pod) are: \fB\-tbc\fR or +\&\fB\-\-tee\-block\-comments\fR and \fB\-tsc\fR or \fB\-\-tee\-side\-comments\fR. +(Hanging side comments will be written with block comments here.) +.Sp +The negatives of these commands also work, and are the defaults. +.IP "Using a \fI.perltidyrc\fR command file" 4 +.IX Item "Using a .perltidyrc command file" +If you use perltidy frequently, you probably won't be happy until you +create a \fI.perltidyrc\fR file to avoid typing commonly-used parameters. +Perltidy will first look in your current directory for a command file +named \fI.perltidyrc\fR. If it does not find one, it will continue looking +for one in other standard locations. +.Sp +These other locations are system-dependent, and may be displayed with +the command \f(CW\*(C`perltidy \-dpro\*(C'\fR. Under Unix systems, it will first look +for an environment variable \fB\s-1PERLTIDY\s0\fR. Then it will look for a +\&\fI.perltidyrc\fR file in the home directory, and then for a system-wide +file \fI/usr/local/etc/perltidyrc\fR, and then it will look for +\&\fI/etc/perltidyrc\fR. Note that these last two system-wide files do not +have a leading dot. Further system-dependent information will be found +in the \s-1INSTALL\s0 file distributed with perltidy. +.Sp +Under Windows, perltidy will also search for a configuration file named perltidy.ini since Windows does not allow files with a leading period (.). +Use \f(CW\*(C`perltidy \-dpro\*(C'\fR to see the possible locations for your system. +An example might be \fIC:\eDocuments and Settings\eAll Users\eperltidy.ini\fR. +.Sp +Another option is the use of the \s-1PERLTIDY\s0 environment variable. +The method for setting environment variables depends upon the version of +Windows that you are using. Instructions for Windows 95 and later versions can +be found here: +.Sp +http://www.netmanage.com/000/20021101_005_tcm21\-6336.pdf +.Sp +Under Windows \s-1NT / 2000 / XP\s0 the \s-1PERLTIDY\s0 environment variable can be placed in +either the user section or the system section. The later makes the +configuration file common to all users on the machine. Be sure to enter the +full path of the configuration file in the value of the environment variable. +Ex. PERLTIDY=C:\eDocuments and Settings\eperltidy.ini +.Sp +The configuration file is free format, and simply a list of parameters, just as +they would be entered on a command line. Any number of lines may be used, with +any number of parameters per line, although it may be easiest to read with one +parameter per line. Comment text begins with a #, and there must +also be a space before the # for side comments. It is a good idea to +put complex parameters in either single or double quotes. +.Sp +Here is an example of a \fI.perltidyrc\fR file: +.Sp +.Vb 8 +\& # This is a simple of a .perltidyrc configuration file +\& # This implements a highly spaced style +\& \-se # errors to standard error output +\& \-w # show all warnings +\& \-bl # braces on new lines +\& \-pt=0 # parens not tight at all +\& \-bt=0 # braces not tight +\& \-sbt=0 # square brackets not tight +.Ve +.Sp +The parameters in the \fI.perltidyrc\fR file are installed first, so any +parameters given on the command line will have priority over them. +.Sp +To avoid confusion, perltidy ignores any command in the .perltidyrc +file which would cause some kind of dump and an exit. These are: +.Sp +.Vb 1 +\& \-h \-v \-ddf \-dln \-dop \-dsn \-dtt \-dwls \-dwrs \-ss +.Ve +.Sp +There are several options may be helpful in debugging a \fI.perltidyrc\fR +file: +.RS 4 +.IP "\(bu" 4 +A very helpful command is \fB\-\-dump\-profile\fR or \fB\-dpro\fR. It writes a +list of all configuration filenames tested to standard output, and +if a file is found, it dumps the content to standard output before +exiting. So, to find out where perltidy looks for its configuration +files, and which one if any it selects, just enter +.Sp +.Vb 1 +\& perltidy \-dpro +.Ve +.IP "\(bu" 4 +It may be simplest to develop and test configuration files with +alternative names, and invoke them with \fB\-pro=filename\fR on the command +line. Then rename the desired file to \fI.perltidyrc\fR when finished. +.IP "\(bu" 4 +The parameters in the \fI.perltidyrc\fR file can be switched off with +the \fB\-npro\fR option. +.IP "\(bu" 4 +The commands \fB\-\-dump\-options\fR, \fB\-\-dump\-defaults\fR, \fB\-\-dump\-long\-names\fR, +and \fB\-\-dump\-short\-names\fR, all described below, may all be helpful. +.RE +.RS 4 +.RE +.IP "Creating a new abbreviation" 4 +.IX Item "Creating a new abbreviation" +A special notation is available for use in a \fI.perltidyrc\fR file +for creating an abbreviation for a group +of options. This can be used to create a +shorthand for one or more styles which are frequently, but not always, +used. The notation is to group the options within curly braces which +are preceded by the name of the alias (without leading dashes), like this: +.Sp +.Vb 4 +\& newword { +\& \-opt1 +\& \-opt2 +\& } +.Ve +.Sp +where \fBnewword\fR is the abbreviation, and \fBopt1\fR, etc, are existing parameters +\&\fIor other abbreviations\fR. The main syntax requirement is that the new +abbreviation along with its opening curly brace must begin on a new line. +Space before and after the curly braces is optional. +For a +specific example, the following line +.Sp +.Vb 1 +\& airy {\-bl \-pt=0 \-bt=0 \-sbt=0} +.Ve +.Sp +could be placed in a \fI.perltidyrc\fR file, and then invoked at will with +.Sp +.Vb 1 +\& perltidy \-airy somefile.pl +.Ve +.Sp +(Either \f(CW\*(C`\-airy\*(C'\fR or \f(CW\*(C`\-\-airy\*(C'\fR may be used). +.IP "Skipping leading non-perl commands with \fB\-x\fR or \fB\-\-look\-for\-hash\-bang\fR" 4 +.IX Item "Skipping leading non-perl commands with -x or --look-for-hash-bang" +If your script has leading lines of system commands or other text which +are not valid perl code, and which are separated from the start of the +perl code by a \*(L"hash-bang\*(R" line, ( a line of the form \f(CW\*(C`#!...perl\*(C'\fR ), +you must use the \fB\-x\fR flag to tell perltidy not to parse and format any +lines before the \*(L"hash-bang\*(R" line. This option also invokes perl with a +\&\-x flag when checking the syntax. This option was originally added to +allow perltidy to parse interactive \s-1VMS\s0 scripts, but it should be used +for any script which is normally invoked with \f(CW\*(C`perl \-x\*(C'\fR. +.IP "Making a file unreadable" 4 +.IX Item "Making a file unreadable" +The goal of perltidy is to improve the readability of files, but there +are two commands which have the opposite effect, \fB\-\-mangle\fR and +\&\fB\-\-extrude\fR. They are actually +merely aliases for combinations of other parameters. Both of these +strip all possible whitespace, but leave comments and pod documents, +so that they are essentially reversible. The +difference between these is that \fB\-\-mangle\fR puts the fewest possible +line breaks in a script while \fB\-\-extrude\fR puts the maximum possible. +Note that these options do not provided any meaningful obfuscation, because +perltidy can be used to reformat the files. They were originally +developed to help test the tokenization logic of perltidy, but they +have other uses. +One use for \fB\-\-mangle\fR is the following: +.Sp +.Vb 1 +\& perltidy \-\-mangle myfile.pl \-st | perltidy \-o myfile.pl.new +.Ve +.Sp +This will form the maximum possible number of one-line blocks (see next +section), and can sometimes help clean up a badly formatted script. +.Sp +A similar technique can be used with \fB\-\-extrude\fR instead of \fB\-\-mangle\fR +to make the minimum number of one-line blocks. +.Sp +Another use for \fB\-\-mangle\fR is to combine it with \fB\-dac\fR to reduce +the file size of a perl script. +.IP "One-line blocks" 4 +.IX Item "One-line blocks" +There are a few points to note regarding one-line blocks. A one-line +block is something like this, +.Sp +.Vb 1 +\& if ($x > 0) { $y = 1 / $x } +.Ve +.Sp +where the contents within the curly braces is short enough to fit +on a single line. +.Sp +With few exceptions, perltidy retains existing one-line blocks, if it +is possible within the line-length constraint, but it does not attempt +to form new ones. In other words, perltidy will try to follow the +one-line block style of the input file. +.Sp +If an existing one-line block is longer than the maximum line length, +however, it will be broken into multiple lines. When this happens, perltidy +checks for and adds any optional terminating semicolon (unless the \fB\-nasc\fR +option is used) if the block is a code block. +.Sp +The main exception is that perltidy will attempt to form new one-line +blocks following the keywords \f(CW\*(C`map\*(C'\fR, \f(CW\*(C`eval\*(C'\fR, and \f(CW\*(C`sort\*(C'\fR, because +these code blocks are often small and most clearly displayed in a single +line. +.Sp +One-line block rules can conflict with the cuddled-else option. When +the cuddled-else option is used, perltidy retains existing one-line +blocks, even if they do not obey cuddled-else formatting. +.Sp +Occasionally, when one-line blocks get broken because they exceed the +available line length, the formatting will violate the requested brace style. +If this happens, reformatting the script a second time should correct +the problem. +.IP "Debugging" 4 +.IX Item "Debugging" +The following flags are available for debugging: +.Sp +\&\fB\-\-dump\-cuddled\-block\-list\fR or \fB\-dcbl\fR will dump to standard output the +internal hash of cuddled block types created by a \fB\-cuddled\-block\-list\fR input +string. +.Sp +\&\fB\-\-dump\-defaults\fR or \fB\-ddf\fR will write the default option set to standard output and quit +.Sp +\&\fB\-\-dump\-profile\fR or \fB\-dpro\fR will write the name of the current +configuration file and its contents to standard output and quit. +.Sp +\&\fB\-\-dump\-options\fR or \fB\-dop\fR will write current option set to standard +output and quit. +.Sp +\&\fB\-\-dump\-long\-names\fR or \fB\-dln\fR will write all command line long names (passed +to Get_options) to standard output and quit. +.Sp +\&\fB\-\-dump\-short\-names\fR or \fB\-dsn\fR will write all command line short names +to standard output and quit. +.Sp +\&\fB\-\-dump\-token\-types\fR or \fB\-dtt\fR will write a list of all token types +to standard output and quit. +.Sp +\&\fB\-\-dump\-want\-left\-space\fR or \fB\-dwls\fR will write the hash \f(CW%want_left_space\fR +to standard output and quit. See the section on controlling whitespace +around tokens. +.Sp +\&\fB\-\-dump\-want\-right\-space\fR or \fB\-dwrs\fR will write the hash \f(CW%want_right_space\fR +to standard output and quit. See the section on controlling whitespace +around tokens. +.Sp +\&\fB\-\-no\-memoize\fR or \fB\-nmem\fR will turn of memoizing. +Memoization can reduce run time when running perltidy repeatedly in a +single process. It is on by default but can be deactivated for +testing with \fB\-nmem\fR. +.Sp +\&\fB\-\-no\-timestamp\fR or \fB\-nts\fR will eliminate any time stamps in output files to prevent +differences in dates from causing test installation scripts to fail. There are just +a couple of places where timestamps normally occur. One is in the headers of +html files, and another is when the \fB\-cscw\fR option is selected. The default is +to allow timestamps (\fB\-\-timestamp\fR or \fB\-ts\fR). +.Sp +\&\fB\-\-file\-size\-order\fR or \fB\-fso\fR will cause files to be processed in order of +increasing size, when multiple files are being processed. This is useful +during program development, when large numbers of files with varying sizes are +processed, because it can reduce virtual memory usage. +.Sp +\&\fB\-DEBUG\fR will write a file with extension \fI.DEBUG\fR for each input file +showing the tokenization of all lines of code. +.IP "Working with MakeMaker, AutoLoader and SelfLoader" 4 +.IX Item "Working with MakeMaker, AutoLoader and SelfLoader" +The first \f(CW$VERSION\fR line of a file which might be eval'd by MakeMaker +is passed through unchanged except for indentation. +Use \fB\-\-nopass\-version\-line\fR, or \fB\-npvl\fR, to deactivate this feature. +.Sp +If the AutoLoader module is used, perltidy will continue formatting +code after seeing an _\|_END_\|_ line. +Use \fB\-\-nolook\-for\-autoloader\fR, or \fB\-nlal\fR, to deactivate this feature. +.Sp +Likewise, if the SelfLoader module is used, perltidy will continue formatting +code after seeing a _\|_DATA_\|_ line. +Use \fB\-\-nolook\-for\-selfloader\fR, or \fB\-nlsl\fR, to deactivate this feature. +.IP "Working around problems with older version of Perl" 4 +.IX Item "Working around problems with older version of Perl" +Perltidy contains a number of rules which help avoid known subtleties +and problems with older versions of perl, and these rules always +take priority over whatever formatting flags have been set. For example, +perltidy will usually avoid starting a new line with a bareword, because +this might cause problems if \f(CW\*(C`use strict\*(C'\fR is active. +.Sp +There is no way to override these rules. +.SH "HTML OPTIONS" +.IX Header "HTML OPTIONS" +.IP "The \fB\-html\fR master switch" 4 +.IX Item "The -html master switch" +The flag \fB\-html\fR causes perltidy to write an html file with extension +\&\fI.html\fR. So, for example, the following command +.Sp +.Vb 1 +\& perltidy \-html somefile.pl +.Ve +.Sp +will produce a syntax-colored html file named \fIsomefile.pl.html\fR +which may be viewed with a browser. +.Sp +\&\fBPlease Note\fR: In this case, perltidy does not do any formatting to the +input file, and it does not write a formatted file with extension +\&\fI.tdy\fR. This means that two perltidy runs are required to create a +fully reformatted, html copy of a script. +.IP "The \fB\-pre\fR flag for code snippets" 4 +.IX Item "The -pre flag for code snippets" +When the \fB\-pre\fR flag is given, only the pre-formatted section, within +the <\s-1PRE\s0> and
tags, will be output. This simplifies inclusion +of the output in other files. The default is to output a complete +web page. +.IP "The \fB\-nnn\fR flag for line numbering" 4 +.IX Item "The -nnn flag for line numbering" +When the \fB\-nnn\fR flag is given, the output lines will be numbered. +.IP "The \fB\-toc\fR, or \fB\-\-html\-table\-of\-contents\fR flag" 4 +.IX Item "The -toc, or --html-table-of-contents flag" +By default, a table of contents to packages and subroutines will be +written at the start of html output. Use \fB\-ntoc\fR to prevent this. +This might be useful, for example, for a pod document which contains a +number of unrelated code snippets. This flag only influences the code +table of contents; it has no effect on any table of contents produced by +pod2html (see next item). +.IP "The \fB\-pod\fR, or \fB\-\-pod2html\fR flag" 4 +.IX Item "The -pod, or --pod2html flag" +There are two options for formatting pod documentation. The default is +to pass the pod through the Pod::Html module (which forms the basis of +the pod2html utility). Any code sections are formatted by perltidy, and +the results then merged. Note: perltidy creates a temporary file when +Pod::Html is used; see \*(L"\s-1FILES\*(R"\s0. Also, Pod::Html creates temporary +files for its cache. +.Sp +\&\s-1NOTE:\s0 Perltidy counts the number of \f(CW\*(C`=cut\*(C'\fR lines, and either moves the +pod text to the top of the html file if there is one \f(CW\*(C`=cut\*(C'\fR, or leaves +the pod text in its original order (interleaved with code) otherwise. +.Sp +Most of the flags accepted by pod2html may be included in the perltidy +command line, and they will be passed to pod2html. In some cases, +the flags have a prefix \f(CW\*(C`pod\*(C'\fR to emphasize that they are for the +pod2html, and this prefix will be removed before they are passed to +pod2html. The flags which have the additional \f(CW\*(C`pod\*(C'\fR prefix are: +.Sp +.Vb 2 +\& \-\-[no]podheader \-\-[no]podindex \-\-[no]podrecurse \-\-[no]podquiet +\& \-\-[no]podverbose \-\-podflush +.Ve +.Sp +The flags which are unchanged from their use in pod2html are: +.Sp +.Vb 2 +\& \-\-backlink=s \-\-cachedir=s \-\-htmlroot=s \-\-libpods=s \-\-title=s +\& \-\-podpath=s \-\-podroot=s +.Ve +.Sp +where 's' is an appropriate character string. Not all of these flags are +available in older versions of Pod::Html. See your Pod::Html documentation for +more information. +.Sp +The alternative, indicated with \fB\-npod\fR, is not to use Pod::Html, but +rather to format pod text in italics (or whatever the stylesheet +indicates), without special html markup. This is useful, for example, +if pod is being used as an alternative way to write comments. +.IP "The \fB\-frm\fR, or \fB\-\-frames\fR flag" 4 +.IX Item "The -frm, or --frames flag" +By default, a single html output file is produced. This can be changed +with the \fB\-frm\fR option, which creates a frame holding a table of +contents in the left panel and the source code in the right side. This +simplifies code browsing. Assume, for example, that the input file is +\&\fIMyModule.pm\fR. Then, for default file extension choices, these three +files will be created: +.Sp +.Vb 3 +\& MyModule.pm.html \- the frame +\& MyModule.pm.toc.html \- the table of contents +\& MyModule.pm.src.html \- the formatted source code +.Ve +.Sp +Obviously this file naming scheme requires that output be directed to a real +file (as opposed to, say, standard output). If this is not the +case, or if the file extension is unknown, the \fB\-frm\fR option will be +ignored. +.IP "The \fB\-text=s\fR, or \fB\-\-html\-toc\-extension\fR flag" 4 +.IX Item "The -text=s, or --html-toc-extension flag" +Use this flag to specify the extra file extension of the table of contents file +when html frames are used. The default is \*(L"toc\*(R". +See \*(L"Specifying File Extensions\*(R". +.IP "The \fB\-sext=s\fR, or \fB\-\-html\-src\-extension\fR flag" 4 +.IX Item "The -sext=s, or --html-src-extension flag" +Use this flag to specify the extra file extension of the content file when html +frames are used. The default is \*(L"src\*(R". +See \*(L"Specifying File Extensions\*(R". +.IP "The \fB\-hent\fR, or \fB\-\-html\-entities\fR flag" 4 +.IX Item "The -hent, or --html-entities flag" +This flag controls the use of Html::Entities for html formatting. By +default, the module Html::Entities is used to encode special symbols. +This may not be the right thing for some browser/language +combinations. Use \-\-nohtml\-entities or \-nhent to prevent this. +.IP "Style Sheets" 4 +.IX Item "Style Sheets" +Style sheets make it very convenient to control and adjust the +appearance of html pages. The default behavior is to write a page of +html with an embedded style sheet. +.Sp +An alternative to an embedded style sheet is to create a page with a +link to an external style sheet. This is indicated with the +\&\fB\-css=filename\fR, where the external style sheet is \fIfilename\fR. The +external style sheet \fIfilename\fR will be created if and only if it does +not exist. This option is useful for controlling multiple pages from a +single style sheet. +.Sp +To cause perltidy to write a style sheet to standard output and exit, +use the \fB\-ss\fR, or \fB\-\-stylesheet\fR, flag. This is useful if the style +sheet could not be written for some reason, such as if the \fB\-pre\fR flag +was used. Thus, for example, +.Sp +.Vb 1 +\& perltidy \-html \-ss >mystyle.css +.Ve +.Sp +will write a style sheet with the default properties to file +\&\fImystyle.css\fR. +.Sp +The use of style sheets is encouraged, but a web page without a style +sheets can be created with the flag \fB\-nss\fR. Use this option if you +must to be sure that older browsers (roughly speaking, versions prior to +4.0 of Netscape Navigator and Internet Explorer) can display the +syntax-coloring of the html files. +.IP "Controlling \s-1HTML\s0 properties" 4 +.IX Item "Controlling HTML properties" +Note: It is usually more convenient to accept the default properties +and then edit the stylesheet which is produced. However, this section +shows how to control the properties with flags to perltidy. +.Sp +Syntax colors may be changed from their default values by flags of the either +the long form, \fB\-html\-color\-xxxxxx=n\fR, or more conveniently the short form, +\&\fB\-hcx=n\fR, where \fBxxxxxx\fR is one of the following words, and \fBx\fR is the +corresponding abbreviation: +.Sp +.Vb 10 +\& Token Type xxxxxx x +\& \-\-\-\-\-\-\-\-\-\- \-\-\-\-\-\-\-\- \-\- +\& comment comment c +\& number numeric n +\& identifier identifier i +\& bareword, function bareword w +\& keyword keyword k +\& quite, pattern quote q +\& here doc text here\-doc\-text h +\& here doc target here\-doc\-target hh +\& punctuation punctuation pu +\& parentheses paren p +\& structural braces structure s +\& semicolon semicolon sc +\& colon colon co +\& comma comma cm +\& label label j +\& sub definition name subroutine m +\& pod text pod\-text pd +.Ve +.Sp +A default set of colors has been defined, but they may be changed by providing +values to any of the following parameters, where \fBn\fR is either a 6 digit +hex \s-1RGB\s0 color value or an ascii name for a color, such as 'red'. +.Sp +To illustrate, the following command will produce an html +file \fIsomefile.pl.html\fR with \*(L"aqua\*(R" keywords: +.Sp +.Vb 1 +\& perltidy \-html \-hck=00ffff somefile.pl +.Ve +.Sp +and this should be equivalent for most browsers: +.Sp +.Vb 1 +\& perltidy \-html \-hck=aqua somefile.pl +.Ve +.Sp +Perltidy merely writes any non-hex names that it sees in the html file. +The following 16 color names are defined in the \s-1HTML 3.2\s0 standard: +.Sp +.Vb 10 +\& black => 000000, +\& silver => c0c0c0, +\& gray => 808080, +\& white => ffffff, +\& maroon => 800000, +\& red => ff0000, +\& purple => 800080, +\& fuchsia => ff00ff, +\& green => 008000, +\& lime => 00ff00, +\& olive => 808000, +\& yellow => ffff00 +\& navy => 000080, +\& blue => 0000ff, +\& teal => 008080, +\& aqua => 00ffff, +.Ve +.Sp +Many more names are supported in specific browsers, but it is safest +to use the hex codes for other colors. Helpful color tables can be +located with an internet search for \*(L"\s-1HTML\s0 color tables\*(R". +.Sp +Besides color, two other character attributes may be set: bold, and italics. +To set a token type to use bold, use the flag +\&\fB\-\-html\-bold\-xxxxxx\fR or \fB\-hbx\fR, where \fBxxxxxx\fR or \fBx\fR are the long +or short names from the above table. Conversely, to set a token type to +\&\s-1NOT\s0 use bold, use \fB\-\-nohtml\-bold\-xxxxxx\fR or \fB\-nhbx\fR. +.Sp +Likewise, to set a token type to use an italic font, use the flag +\&\fB\-\-html\-italic\-xxxxxx\fR or \fB\-hix\fR, where again \fBxxxxxx\fR or \fBx\fR are the +long or short names from the above table. And to set a token type to +\&\s-1NOT\s0 use italics, use \fB\-\-nohtml\-italic\-xxxxxx\fR or \fB\-nhix\fR. +.Sp +For example, to use bold braces and lime color, non-bold, italics keywords the +following command would be used: +.Sp +.Vb 1 +\& perltidy \-html \-hbs \-hck=00FF00 \-nhbk \-hik somefile.pl +.Ve +.Sp +The background color can be specified with \fB\-\-html\-color\-background=n\fR, +or \fB\-hcbg=n\fR for short, where n is a 6 character hex \s-1RGB\s0 value. The +default color of text is the value given to \fBpunctuation\fR, which is +black as a default. +.Sp +Here are some notes and hints: +.Sp +1. If you find a preferred set of these parameters, you may want +to create a \fI.perltidyrc\fR file containing them. See the perltidy man +page for an explanation. +.Sp +2. Rather than specifying values for these parameters, it is probably +easier to accept the defaults and then edit a style sheet. The style +sheet contains comments which should make this easy. +.Sp +3. The syntax-colored html files can be very large, so it may be best to +split large files into smaller pieces to improve download times. +.SH "SOME COMMON INPUT CONVENTIONS" +.IX Header "SOME COMMON INPUT CONVENTIONS" +.SS "Specifying Block Types" +.IX Subsection "Specifying Block Types" +Several parameters which refer to code block types may be customized by also +specifying an associated list of block types. The type of a block is the name +of the keyword which introduces that block, such as \fBif\fR, \fBelse\fR, or \fBsub\fR. +An exception is a labeled block, which has no keyword, and should be specified +with just a colon. To specify all blocks use \fB'*'\fR. +.PP +The keyword \fBsub\fR indicates a named sub. For anonymous subs, use the special +keyword \fBasub\fR. +.PP +For example, the following parameter specifies \f(CW\*(C`sub\*(C'\fR, labels, \f(CW\*(C`BEGIN\*(C'\fR, and +\&\f(CW\*(C`END\*(C'\fR blocks: +.PP +.Vb 1 +\& \-cscl="sub : BEGIN END" +.Ve +.PP +(the meaning of the \-cscl parameter is described above.) Note that +quotes are required around the list of block types because of the +spaces. For another example, the following list specifies all block types +for vertical tightness: +.PP +.Vb 1 +\& \-bbvtl=\*(Aq*\*(Aq +.Ve +.SS "Specifying File Extensions" +.IX Subsection "Specifying File Extensions" +Several parameters allow default file extensions to be overridden. For +example, a backup file extension may be specified with \fB\-bext=ext\fR, +where \fBext\fR is some new extension. In order to provides the user some +flexibility, the following convention is used in all cases to decide if +a leading '.' should be used. If the extension \f(CW\*(C`ext\*(C'\fR begins with +\&\f(CW\*(C`A\-Z\*(C'\fR, \f(CW\*(C`a\-z\*(C'\fR, or \f(CW\*(C`0\-9\*(C'\fR, then it will be appended to the filename with +an intermediate '.' (or perhaps an '_' on \s-1VMS\s0 systems). Otherwise, it +will be appended directly. +.PP +For example, suppose the file is \fIsomefile.pl\fR. For \f(CW\*(C`\-bext=old\*(C'\fR, a '.' is +added to give \fIsomefile.pl.old\fR. For \f(CW\*(C`\-bext=.old\*(C'\fR, no additional '.' is +added, so again the backup file is \fIsomefile.pl.old\fR. For \f(CW\*(C`\-bext=~\*(C'\fR, then no +dot is added, and the backup file will be \fIsomefile.pl~\fR . +.SH "SWITCHES WHICH MAY BE NEGATED" +.IX Header "SWITCHES WHICH MAY BE NEGATED" +The following list shows all short parameter names which allow a prefix +\&'n' to produce the negated form: +.PP +.Vb 6 +\& D anl asc aws b bbb bbc bbs bl bli boc bok bol bot ce +\& csc dac dbc dcsc ddf dln dnl dop dp dpro dsc dsm dsn dtt dwls +\& dwrs dws f fll frm fs hsc html ibc icb icp iob isbc lal log +\& lp lsl ohbr okw ola oll opr opt osbr otr ple pod pvl q +\& sbc sbl schb scp scsb sct se sfp sfs skp sob sohb sop sosb sot +\& ssc st sts syn t tac tbc toc tp tqw tsc w x bar kis +.Ve +.PP +Equivalently, the prefix 'no' or 'no\-' on the corresponding long names may be +used. +.SH "LIMITATIONS" +.IX Header "LIMITATIONS" +.IP "Parsing Limitations" 4 +.IX Item "Parsing Limitations" +Perltidy should work properly on most perl scripts. It does a lot of +self-checking, but still, it is possible that an error could be +introduced and go undetected. Therefore, it is essential to make +careful backups and to test reformatted scripts. +.Sp +The main current limitation is that perltidy does not scan modules +included with 'use' statements. This makes it necessary to guess the +context of any bare words introduced by such modules. Perltidy has good +guessing algorithms, but they are not infallible. When it must guess, +it leaves a message in the log file. +.Sp +If you encounter a bug, please report it. +.IP "What perltidy does not parse and format" 4 +.IX Item "What perltidy does not parse and format" +Perltidy indents but does not reformat comments and \f(CW\*(C`qw\*(C'\fR quotes. +Perltidy does not in any way modify the contents of here documents or +quoted text, even if they contain source code. (You could, however, +reformat them separately). Perltidy does not format 'format' sections +in any way. And, of course, it does not modify pod documents. +.SH "FILES" +.IX Header "FILES" +.IP "Temporary files" 4 +.IX Item "Temporary files" +Under the \-html option with the default \-\-pod2html flag, a temporary file is +required to pass text to Pod::Html. Unix systems will try to use the \s-1POSIX\s0 +\&\fBtmpnam()\fR function. Otherwise the file \fIperltidy.TMP\fR will be temporarily +created in the current working directory. +.IP "Special files when standard input is used" 4 +.IX Item "Special files when standard input is used" +When standard input is used, the log file, if saved, is \fIperltidy.LOG\fR, +and any errors are written to \fIperltidy.ERR\fR unless the \fB\-se\fR flag is +set. These are saved in the current working directory. +.IP "Files overwritten" 4 +.IX Item "Files overwritten" +The following file extensions are used by perltidy, and files with these +extensions may be overwritten or deleted: \fI.ERR\fR, \fI.LOG\fR, \fI.TEE\fR, +and/or \fI.tdy\fR, \fI.html\fR, and \fI.bak\fR, depending on the run type and +settings. +.IP "Files extensions limitations" 4 +.IX Item "Files extensions limitations" +Perltidy does not operate on files for which the run could produce a file with +a duplicated file extension. These extensions include \fI.LOG\fR, \fI.ERR\fR, +\&\fI.TEE\fR, and perhaps \fI.tdy\fR and \fI.bak\fR, depending on the run type. The +purpose of this rule is to prevent generating confusing filenames such as +\&\fIsomefile.tdy.tdy.tdy\fR. +.SH "SEE ALSO" +.IX Header "SEE ALSO" +\&\fBperlstyle\fR\|(1), \fBPerl::Tidy\fR\|(3) +.SH "VERSION" +.IX Header "VERSION" +This man page documents perltidy version 20180220.01 +.SH "BUG REPORTS" +.IX Header "BUG REPORTS" +A list of current bugs and issues can be found at the \s-1CPAN\s0 site +.PP +.Vb 1 +\& https://rt.cpan.org/Public/Dist/Display.html?Name=Perl\-Tidy +.Ve +.PP +To report a new bug or problem, use the link on this page. +.SH "COPYRIGHT" +.IX Header "COPYRIGHT" +Copyright (c) 2000\-2018 by Steve Hancock +.SH "LICENSE" +.IX Header "LICENSE" +This package is free software; you can redistribute it and/or modify it +under the terms of the \*(L"\s-1GNU\s0 General Public License\*(R". +.PP +Please refer to the file \*(L"\s-1COPYING\*(R"\s0 for details. +.SH "DISCLAIMER" +.IX Header "DISCLAIMER" +This package is distributed in the hope that it will be useful, +but \s-1WITHOUT ANY WARRANTY\s0; without even the implied warranty of +\&\s-1MERCHANTABILITY\s0 or \s-1FITNESS FOR A PARTICULAR PURPOSE.\s0 +.PP +See the \*(L"\s-1GNU\s0 General Public License\*(R" for more details. diff --git a/local-docs/perltidy.md b/local-docs/perltidy.md new file mode 100644 index 00000000..d0fae2c2 --- /dev/null +++ b/local-docs/perltidy.md @@ -0,0 +1,3304 @@ +# NAME + +perltidy - a perl script indenter and reformatter + +# SYNOPSIS + + perltidy [ options ] file1 file2 file3 ... + (output goes to file1.tdy, file2.tdy, file3.tdy, ...) + perltidy [ options ] file1 -o outfile + perltidy [ options ] file1 -st >outfile + perltidy [ options ] outfile + +# DESCRIPTION + +Perltidy reads a perl script and writes an indented, reformatted script. + +Many users will find enough information in ["EXAMPLES"](#examples) to get +started. New users may benefit from the short tutorial +which can be found at +http://perltidy.sourceforge.net/tutorial.html + +A convenient aid to systematically defining a set of style parameters +can be found at +http://perltidy.sourceforge.net/stylekey.html + +Perltidy can produce output on either of two modes, depending on the +existence of an **-html** flag. Without this flag, the output is passed +through a formatter. The default formatting tries to follow the +recommendations in perlstyle(1), but it can be controlled in detail with +numerous input parameters, which are described in ["FORMATTING +OPTIONS"](#formatting-options). + +When the **-html** flag is given, the output is passed through an HTML +formatter which is described in ["HTML OPTIONS"](#html-options). + +# EXAMPLES + + perltidy somefile.pl + +This will produce a file `somefile.pl.tdy` containing the script reformatted +using the default options, which approximate the style suggested in +perlstyle(1). The source file `somefile.pl` is unchanged. + + perltidy *.pl + +Execute perltidy on all `.pl` files in the current directory with the +default options. The output will be in files with an appended `.tdy` +extension. For any file with an error, there will be a file with extension +`.ERR`. + + perltidy -b file1.pl file2.pl + +Modify `file1.pl` and `file2.pl` in place, and backup the originals to +`file1.pl.bak` and `file2.pl.bak`. If `file1.pl.bak` and/or `file2.pl.bak` +already exist, they will be overwritten. + + perltidy -b -bext='/' file1.pl file2.pl + +Same as the previous example except that the backup files `file1.pl.bak` and `file2.pl.bak` will be deleted if there are no errors. + + perltidy -gnu somefile.pl + +Execute perltidy on file `somefile.pl` with a style which approximates the +GNU Coding Standards for C programs. The output will be `somefile.pl.tdy`. + + perltidy -i=3 somefile.pl + +Execute perltidy on file `somefile.pl`, with 3 columns for each level of +indentation (**-i=3**) instead of the default 4 columns. There will not be any +tabs in the reformatted script, except for any which already exist in comments, +pod documents, quotes, and here documents. Output will be `somefile.pl.tdy`. + + perltidy -i=3 -et=8 somefile.pl + +Same as the previous example, except that leading whitespace will +be entabbed with one tab character per 8 spaces. + + perltidy -ce -l=72 somefile.pl + +Execute perltidy on file `somefile.pl` with all defaults except use "cuddled +elses" (**-ce**) and a maximum line length of 72 columns (**-l=72**) instead of +the default 80 columns. + + perltidy -g somefile.pl + +Execute perltidy on file `somefile.pl` and save a log file `somefile.pl.LOG` +which shows the nesting of braces, parentheses, and square brackets at +the start of every line. + + perltidy -html somefile.pl + +This will produce a file `somefile.pl.html` containing the script with +html markup. The output file will contain an embedded style sheet in +the section which may be edited to change the appearance. + + perltidy -html -css=mystyle.css somefile.pl + +This will produce a file `somefile.pl.html` containing the script with +html markup. This output file will contain a link to a separate style +sheet file `mystyle.css`. If the file `mystyle.css` does not exist, +it will be created. If it exists, it will not be overwritten. + + perltidy -html -pre somefile.pl + +Write an html snippet with only the PRE section to `somefile.pl.html`. +This is useful when code snippets are being formatted for inclusion in a +larger web page. No style sheet will be written in this case. + + perltidy -html -ss >mystyle.css + +Write a style sheet to `mystyle.css` and exit. + + perltidy -html -frm mymodule.pm + +Write html with a frame holding a table of contents and the source code. The +output files will be `mymodule.pm.html` (the frame), `mymodule.pm.toc.html` +(the table of contents), and `mymodule.pm.src.html` (the source code). + +# OPTIONS - OVERVIEW + +The entire command line is scanned for options, and they are processed +before any files are processed. As a result, it does not matter +whether flags are before or after any filenames. However, the relative +order of parameters is important, with later parameters overriding the +values of earlier parameters. + +For each parameter, there is a long name and a short name. The short +names are convenient for keyboard input, while the long names are +self-documenting and therefore useful in scripts. It is customary to +use two leading dashes for long names, but one may be used. + +Most parameters which serve as on/off flags can be negated with a +leading "n" (for the short name) or a leading "no" or "no-" (for the +long name). For example, the flag to outdent long quotes is **-olq** +or **--outdent-long-quotes**. The flag to skip this is **-nolq** +or **--nooutdent-long-quotes** or **--no-outdent-long-quotes**. + +Options may not be bundled together. In other words, options **-q** and +**-g** may NOT be entered as **-qg**. + +Option names may be terminated early as long as they are uniquely identified. +For example, instead of **--dump-token-types**, it would be sufficient to enter +**--dump-tok**, or even **--dump-t**, to uniquely identify this command. + +## I/O control + +The following parameters concern the files which are read and written. + +- **-h**, **--help** + + Show summary of usage and exit. + +- **-o**=filename, **--outfile**=filename + + Name of the output file (only if a single input file is being + processed). If no output file is specified, and output is not + redirected to the standard output, the output will go to `filename.tdy`. + +- **-st**, **--standard-output** + + Perltidy must be able to operate on an arbitrarily large number of files + in a single run, with each output being directed to a different output + file. Obviously this would conflict with outputting to the single + standard output device, so a special flag, **-st**, is required to + request outputting to the standard output. For example, + + perltidy somefile.pl -st >somefile.new.pl + + This option may only be used if there is just a single input file. + The default is **-nst** or **--nostandard-output**. + +- **-se**, **--standard-error-output** + + If perltidy detects an error when processing file `somefile.pl`, its + default behavior is to write error messages to file `somefile.pl.ERR`. + Use **-se** to cause all error messages to be sent to the standard error + output stream instead. This directive may be negated with **-nse**. + Thus, you may place **-se** in a `.perltidyrc` and override it when + desired with **-nse** on the command line. + +- **-oext**=ext, **--output-file-extension**=ext + + Change the extension of the output file to be `ext` instead of the + default `tdy` (or `html` in case the -**-html** option is used). + See ["Specifying File Extensions"](#specifying-file-extensions). + +- **-opath**=path, **--output-path**=path + + When perltidy creates a filename for an output file, by default it merely + appends an extension to the path and basename of the input file. This + parameter causes the path to be changed to `path` instead. + + The path should end in a valid path separator character, but perltidy will try + to add one if it is missing. + + For example + + perltidy somefile.pl -opath=/tmp/ + + will produce `/tmp/somefile.pl.tdy`. Otherwise, `somefile.pl.tdy` will + appear in whatever directory contains `somefile.pl`. + + If the path contains spaces, it should be placed in quotes. + + This parameter will be ignored if output is being directed to standard output, + or if it is being specified explicitly with the **-o=s** parameter. + +- **-b**, **--backup-and-modify-in-place** + + Modify the input file or files in-place and save the original with the + extension `.bak`. Any existing `.bak` file will be deleted. See next + item for changing the default backup extension, and for eliminating the + backup file altogether. + + A **-b** flag will be ignored if input is from standard input or goes to + standard output, or if the **-html** flag is set. + + In particular, if you want to use both the **-b** flag and the **-pbp** + (--perl-best-practices) flag, then you must put a **-nst** flag after the + **-pbp** flag because it contains a **-st** flag as one of its components, + which means that output will go to the standard output stream. + +- **-bext**=ext, **--backup-file-extension**=ext + + This parameter serves two purposes: (1) to change the extension of the backup + file to be something other than the default `.bak`, and (2) to indicate + that no backup file should be saved. + + To change the default extension to something other than `.bak` see + ["Specifying File Extensions"](#specifying-file-extensions). + + A backup file of the source is always written, but you can request that it + be deleted at the end of processing if there were no errors. This is risky + unless the source code is being maintained with a source code control + system. + + To indicate that the backup should be deleted include one forward slash, + **/**, in the extension. If any text remains after the slash is removed + it will be used to define the backup file extension (which is always + created and only deleted if there were no errors). + + Here are some examples: + + Parameter Extension Backup File Treatment + <-bext=bak> F<.bak> Keep (same as the default behavior) + <-bext='/'> F<.bak> Delete if no errors + <-bext='/backup'> F<.backup> Delete if no errors + <-bext='original/'> F<.original> Delete if no errors + +- **-w**, **--warning-output** + + Setting **-w** causes any non-critical warning + messages to be reported as errors. These include messages + about possible pod problems, possibly bad starting indentation level, + and cautions about indirect object usage. The default, **-nw** or + **--nowarning-output**, is not to include these warnings. + +- **-q**, **--quiet** + + Deactivate error messages and syntax checking (for running under + an editor). + + For example, if you use a vi-style editor, such as vim, you may execute + perltidy as a filter from within the editor using something like + + :n1,n2!perltidy -q + + where `n1,n2` represents the selected text. Without the **-q** flag, + any error message may mess up your screen, so be prepared to use your + "undo" key. + +- **-log**, **--logfile** + + Save the `.LOG` file, which has many useful diagnostics. Perltidy always + creates a `.LOG` file, but by default it is deleted unless a program bug is + suspected. Setting the **-log** flag forces the log file to be saved. + +- **-g=n**, **--logfile-gap=n** + + Set maximum interval between input code lines in the logfile. This purpose of + this flag is to assist in debugging nesting errors. The value of `n` is + optional. If you set the flag **-g** without the value of `n`, it will be + taken to be 1, meaning that every line will be written to the log file. This + can be helpful if you are looking for a brace, paren, or bracket nesting error. + + Setting **-g** also causes the logfile to be saved, so it is not necessary to + also include **-log**. + + If no **-g** flag is given, a value of 50 will be used, meaning that at least + every 50th line will be recorded in the logfile. This helps prevent + excessively long log files. + + Setting a negative value of `n` is the same as not setting **-g** at all. + +- **-npro** **--noprofile** + + Ignore any `.perltidyrc` command file. Normally, perltidy looks first in + your current directory for a `.perltidyrc` file of parameters. (The format + is described below). If it finds one, it applies those options to the + initial default values, and then it applies any that have been defined + on the command line. If no `.perltidyrc` file is found, it looks for one + in your home directory. + + If you set the **-npro** flag, perltidy will not look for this file. + +- **-pro=filename** or **--profile=filename** + + To simplify testing and switching .perltidyrc files, this command may be + used to specify a configuration file which will override the default + name of .perltidyrc. There must not be a space on either side of the + '=' sign. For example, the line + + perltidy -pro=testcfg + + would cause file `testcfg` to be used instead of the + default `.perltidyrc`. + + A pathname begins with three dots, e.g. ".../.perltidyrc", indicates that + the file should be searched for starting in the current directory and + working upwards. This makes it easier to have multiple projects each with + their own .perltidyrc in their root directories. + +- **-opt**, **--show-options** + + Write a list of all options used to the `.LOG` file. + Please see **--dump-options** for a simpler way to do this. + +- **-f**, **--force-read-binary** + + Force perltidy to process binary files. To avoid producing excessive + error messages, perltidy skips files identified by the system as non-text. + However, valid perl scripts containing binary data may sometimes be identified + as non-text, and this flag forces perltidy to process them. + +# FORMATTING OPTIONS + +## Basic Options + +- **--notidy** + + This flag disables all formatting and causes the input to be copied unchanged + to the output except for possible changes in line ending characters and any + pre- and post-filters. This can be useful in conjunction with a hierarchical + set of `.perltidyrc` files to avoid unwanted code tidying. See also + ["Skipping Selected Sections of Code"](#skipping-selected-sections-of-code) for a way to avoid tidying specific + sections of code. + +- **-i=n**, **--indent-columns=n** + + Use n columns per indentation level (default n=4). + +- **-l=n**, **--maximum-line-length=n** + + The default maximum line length is n=80 characters. Perltidy will try + to find line break points to keep lines below this length. However, long + quotes and side comments may cause lines to exceed this length. + Setting **-l=0** is equivalent to setting **-l=(a large number)**. + +- **-vmll**, **--variable-maximum-line-length** + + A problem arises using a fixed maximum line length with very deeply nested code + and data structures because eventually the amount of leading whitespace used + for indicating indentation takes up most or all of the available line width, + leaving little or no space for the actual code or data. One solution is to use + a vary long line length. Another solution is to use the **-vmll** flag, which + basically tells perltidy to ignore leading whitespace when measuring the line + length. + + To be precise, when the **-vmll** parameter is set, the maximum line length of a + line of code will be M+L\*I, where + + M is the value of --maximum-line-length=M (-l=M), default 80, + I is the value of --indent-columns=I (-i=I), default 4, + L is the indentation level of the line of code + + When this flag is set, the choice of breakpoints for a block of code should be + essentially independent of its nesting depth. However, the absolute line + lengths, including leading whitespace, can still be arbitrarily large. This + problem can be avoided by including the next parameter. + + The default is not to do this (**-nvmll**). + +- **-wc=n**, **--whitespace-cycle=n** + + This flag also addresses problems with very deeply nested code and data + structures. When the nesting depth exceeds the value **n** the leading + whitespace will be reduced and start at a depth of 1 again. The result is that + blocks of code will shift back to the left rather than moving arbitrarily far + to the right. This occurs cyclically to any depth. + + For example if one level of indentation equals 4 spaces (**-i=4**, the default), + and one uses **-wc=15**, then if the leading whitespace on a line exceeds about + 4\*15=60 spaces it will be reduced back to 4\*1=4 spaces and continue increasing + from there. If the whitespace never exceeds this limit the formatting remains + unchanged. + + The combination of **-vmll** and **-wc=n** provides a solution to the problem of + displaying arbitrarily deep data structures and code in a finite window, + although **-wc=n** may of course be used without **-vmll**. + + The default is not to use this, which can also be indicated using **-wc=0**. + +- tabs + + Using tab characters will almost certainly lead to future portability + and maintenance problems, so the default and recommendation is not to + use them. For those who prefer tabs, however, there are two different + options. + + Except for possibly introducing tab indentation characters, as outlined + below, perltidy does not introduce any tab characters into your file, + and it removes any tabs from the code (unless requested not to do so + with **-fws**). If you have any tabs in your comments, quotes, or + here-documents, they will remain. + + - **-et=n**, **--entab-leading-whitespace** + + This flag causes each **n** initial space characters to be replaced by + one tab character. Note that the integer **n** is completely independent + of the integer specified for indentation parameter, **-i=n**. + + - **-t**, **--tabs** + + This flag causes one leading tab character to be inserted for each level + of indentation. Certain other features are incompatible with this + option, and if these options are also given, then a warning message will + be issued and this flag will be unset. One example is the **-lp** + option. + + - **-dt=n**, **--default-tabsize=n** + + If the first line of code passed to perltidy contains leading tabs but no + tab scheme is specified for the output stream then perltidy must guess how many + spaces correspond to each leading tab. This number of spaces **n** + corresponding to each leading tab of the input stream may be specified with + **-dt=n**. The default is **n=8**. + + This flag has no effect if a tab scheme is specified for the output stream, + because then the input stream is assumed to use the same tab scheme and + indentation spaces as for the output stream (any other assumption would lead to + unstable editing). + +- **-syn**, **--check-syntax** + + This flag is now ignored for safety, but the following documentation + has been retained for reference. + + This flag causes perltidy to run `perl -c -T` to check syntax of input + and output. (To change the flags passed to perl, see the next + item, **-pscf**). The results are written to the `.LOG` file, which + will be saved if an error is detected in the output script. The output + script is not checked if the input script has a syntax error. Perltidy + does its own checking, but this option employs perl to get a "second + opinion". + + If perl reports errors in the input file, they will not be reported in + the error output unless the **--warning-output** flag is given. + + The default is **NOT** to do this type of syntax checking (although + perltidy will still do as much self-checking as possible). The reason + is that it causes all code in BEGIN blocks to be executed, for all + modules being used, and this opens the door to security issues and + infinite loops when running perltidy. + +- **-pscf=s**, **-perl-syntax-check-flags=s** + + When perl is invoked to check syntax, the normal flags are `-c -T`. In + addition, if the **-x** flag is given to perltidy, then perl will also be + passed a **-x** flag. It should not normally be necessary to change + these flags, but it can be done with the **-pscf=s** flag. For example, + if the taint flag, `-T`, is not wanted, the flag could be set to be just + **-pscf=-c**. + + Perltidy will pass your string to perl with the exception that it will + add a **-c** and **-x** if appropriate. The `.LOG` file will show + exactly what flags were passed to perl. + +- **-xs**, **--extended-syntax** + + A problem with formatting Perl code is that some modules can introduce new + syntax. This flag allows perltidy to handle certain common extensions + to the standard syntax without complaint. + + For example, without this flag a structure such as the following would generate + a syntax error and the braces would not be balanced: + + method deposit( Num $amount) { + $self->balance( $self->balance + $amount ); + } + + This flag is enabled by default but it can be deactivated with **-nxs**. + Probably the only reason to deactivate this flag is to generate more diagnostic + messages when debugging a script. + +- **-io**, **--indent-only** + + This flag is used to deactivate all whitespace and line break changes + within non-blank lines of code. + When it is in effect, the only change to the script will be + to the indentation and to the number of blank lines. + And any flags controlling whitespace and newlines will be ignored. You + might want to use this if you are perfectly happy with your whitespace + and line breaks, and merely want perltidy to handle the indentation. + (This also speeds up perltidy by well over a factor of two, so it might be + useful when perltidy is merely being used to help find a brace error in + a large script). + + Setting this flag is equivalent to setting **--freeze-newlines** and + **--freeze-whitespace**. + + If you also want to keep your existing blank lines exactly + as they are, you can add **--freeze-blank-lines**. + + With this option perltidy is still free to modify the indenting (and + outdenting) of code and comments as it normally would. If you also want to + prevent long comment lines from being outdented, you can add either **-noll** or + **-l=0**. + + Setting this flag will prevent perltidy from doing any special operations on + closing side comments. You may still delete all side comments however when + this flag is in effect. + +- **-enc=s**, **--character-encoding=s** + + where **s**=**none** or **utf8**. This flag tells perltidy the character encoding + of both the input and output character streams. The value **utf8** causes the + stream to be read and written as UTF-8. The value **none** causes the stream to + be processed without special encoding assumptions. At present there is no + automatic detection of character encoding (even if there is a `'use utf8'` + statement in your code) so this flag must be set for streams encoded in UTF-8. + Incorrectly setting this parameter can cause data corruption, so please + carefully check the output. + + The default is **none**. + + The abbreviations **-utf8** or **-UTF8** are equivalent to **-enc=utf8**. + So to process a file named **file.pl** which is encoded in UTF-8 you can use: + + perltidy -utf8 file.pl + +- **-ole=s**, **--output-line-ending=s** + + where s=`win`, `dos`, `unix`, or `mac`. This flag tells perltidy + to output line endings for a specific system. Normally, + perltidy writes files with the line separator character of the host + system. The `win` and `dos` flags have an identical result. + +- **-ple**, **--preserve-line-endings** + + This flag tells perltidy to write its output files with the same line + endings as the input file, if possible. It should work for + **dos**, **unix**, and **mac** line endings. It will only work if perltidy + input comes from a filename (rather than stdin, for example). If + perltidy has trouble determining the input file line ending, it will + revert to the default behavior of using the line ending of the host system. + +- **-it=n**, **--iterations=n** + + This flag causes perltidy to do **n** complete iterations. The reason for this + flag is that code beautification is an iterative process and in some + cases the output from perltidy can be different if it is applied a second time. + For most purposes the default of **n=1** should be satisfactory. However **n=2** + can be useful when a major style change is being made, or when code is being + beautified on check-in to a source code control system. It has been found to + be extremely rare for the output to change after 2 iterations. If a value + **n** is greater than 2 is input then a convergence test will be used to stop + the iterations as soon as possible, almost always after 2 iterations. See + the next item for a simplified iteration control. + + This flag has no effect when perltidy is used to generate html. + +- **-conv**, **--converge** + + This flag is equivalent to **-it=4** and is included to simplify iteration + control. For all practical purposes one either does or does not want to be + sure that the output is converged, and there is no penalty to using a large + iteration limit since perltidy will check for convergence and stop iterating as + soon as possible. The default is **-nconv** (no convergence check). Using + **-conv** will approximately double run time since normally one extra iteration + is required to verify convergence. + +## Code Indentation Control + +- **-ci=n**, **--continuation-indentation=n** + + Continuation indentation is extra indentation spaces applied when + a long line is broken. The default is n=2, illustrated here: + + my $level = # -ci=2 + ( $max_index_to_go >= 0 ) ? $levels_to_go[0] : $last_output_level; + + The same example, with n=0, is a little harder to read: + + my $level = # -ci=0 + ( $max_index_to_go >= 0 ) ? $levels_to_go[0] : $last_output_level; + + The value given to **-ci** is also used by some commands when a small + space is required. Examples are commands for outdenting labels, + **-ola**, and control keywords, **-okw**. + + When default values are not used, it is suggested that the value **n** + given with **-ci=n** be no more than about one-half of the number of + spaces assigned to a full indentation level on the **-i=n** command. + +- **-sil=n** **--starting-indentation-level=n** + + By default, perltidy examines the input file and tries to determine the + starting indentation level. While it is often zero, it may not be + zero for a code snippet being sent from an editing session. + + To guess the starting indentation level perltidy simply assumes that + indentation scheme used to create the code snippet is the same as is being used + for the current perltidy process. This is the only sensible guess that can be + made. It should be correct if this is true, but otherwise it probably won't. + For example, if the input script was written with -i=2 and the current peltidy + flags have -i=4, the wrong initial indentation will be guessed for a code + snippet which has non-zero initial indentation. Likewise, if an entabbing + scheme is used in the input script and not in the current process then the + guessed indentation will be wrong. + + If the default method does not work correctly, or you want to change the + starting level, use **-sil=n**, to force the starting level to be n. + +- List indentation using **-lp**, **--line-up-parentheses** + + By default, perltidy indents lists with 4 spaces, or whatever value + is specified with **-i=n**. Here is a small list formatted in this way: + + # perltidy (default) + @month_of_year = ( + 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' + ); + + Use the **-lp** flag to add extra indentation to cause the data to begin + past the opening parentheses of a sub call or list, or opening square + bracket of an anonymous array, or opening curly brace of an anonymous + hash. With this option, the above list would become: + + # perltidy -lp + @month_of_year = ( + 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' + ); + + If the available line length (see **-l=n** ) does not permit this much + space, perltidy will use less. For alternate placement of the + closing paren, see the next section. + + This option has no effect on code BLOCKS, such as if/then/else blocks, + which always use whatever is specified with **-i=n**. Also, the + existence of line breaks and/or block comments between the opening and + closing parens may cause perltidy to temporarily revert to its default + method. + + Note: The **-lp** option may not be used together with the **-t** tabs option. + It may, however, be used with the **-et=n** tab method. + + In addition, any parameter which significantly restricts the ability of + perltidy to choose newlines will conflict with **-lp** and will cause + **-lp** to be deactivated. These include **-io**, **-fnl**, **-nanl**, and + **-ndnl**. The reason is that the **-lp** indentation style can require + the careful coordination of an arbitrary number of break points in + hierarchical lists, and these flags may prevent that. + +- **-cti=n**, **--closing-token-indentation** + + The **-cti=n** flag controls the indentation of a line beginning with + a `)`, `]`, or a non-block `}`. Such a line receives: + + -cti = 0 no extra indentation (default) + -cti = 1 extra indentation such that the closing token + aligns with its opening token. + -cti = 2 one extra indentation level if the line looks like: + ); or ]; or }; + -cti = 3 one extra indentation level always + + The flags **-cti=1** and **-cti=2** work well with the **-lp** flag (previous + section). + + # perltidy -lp -cti=1 + @month_of_year = ( + 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' + ); + + # perltidy -lp -cti=2 + @month_of_year = ( + 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' + ); + + These flags are merely hints to the formatter and they may not always be + followed. In particular, if -lp is not being used, the indentation for + **cti=1** is constrained to be no more than one indentation level. + + If desired, this control can be applied independently to each of the + closing container token types. In fact, **-cti=n** is merely an + abbreviation for **-cpi=n -csbi=n -cbi=n**, where: + **-cpi** or **--closing-paren-indentation** controls **)**'s, + **-csbi** or **--closing-square-bracket-indentation** controls **\]**'s, + **-cbi** or **--closing-brace-indentation** controls non-block **}**'s. + +- **-icp**, **--indent-closing-paren** + + The **-icp** flag is equivalent to + **-cti=2**, described in the previous section. The **-nicp** flag is + equivalent **-cti=0**. They are included for backwards compatibility. + +- **-icb**, **--indent-closing-brace** + + The **-icb** option gives one extra level of indentation to a brace which + terminates a code block . For example, + + if ($task) { + yyy(); + } # -icb + else { + zzz(); + } + + The default is not to do this, indicated by **-nicb**. + +- **-olq**, **--outdent-long-quotes** + + When **-olq** is set, lines which is a quoted string longer than the + value **maximum-line-length** will have their indentation removed to make + them more readable. This is the default. To prevent such out-denting, + use **-nolq** or **--nooutdent-long-lines**. + +- **-oll**, **--outdent-long-lines** + + This command is equivalent to **--outdent-long-quotes** and + **--outdent-long-comments**, and it is included for compatibility with previous + versions of perltidy. The negation of this also works, **-noll** or + **--nooutdent-long-lines**, and is equivalent to setting **-nolq** and **-nolc**. + +- Outdenting Labels: **-ola**, **--outdent-labels** + + This command will cause labels to be outdented by 2 spaces (or whatever **-ci** + has been set to), if possible. This is the default. For example: + + my $i; + LOOP: while ( $i = ) { + chomp($i); + next unless $i; + fixit($i); + } + + Use **-nola** to not outdent labels. + +- Outdenting Keywords + - **-okw**, **--outdent-keywords** + + The command **-okw** will cause certain leading control keywords to + be outdented by 2 spaces (or whatever **-ci** has been set to), if + possible. By default, these keywords are `redo`, `next`, `last`, + `goto`, and `return`. The intention is to make these control keywords + easier to see. To change this list of keywords being outdented, see + the next section. + + For example, using `perltidy -okw` on the previous example gives: + + my $i; + LOOP: while ( $i = ) { + chomp($i); + next unless $i; + fixit($i); + } + + The default is not to do this. + + - Specifying Outdented Keywords: **-okwl=string**, **--outdent-keyword-list=string** + + This command can be used to change the keywords which are outdented with + the **-okw** command. The parameter **string** is a required list of perl + keywords, which should be placed in quotes if there are more than one. + By itself, it does not cause any outdenting to occur, so the **-okw** + command is still required. + + For example, the commands `-okwl="next last redo goto" -okw` will cause + those four keywords to be outdented. It is probably simplest to place + any **-okwl** command in a `.perltidyrc` file. + +## Whitespace Control + +Whitespace refers to the blank space between variables, operators, +and other code tokens. + +- **-fws**, **--freeze-whitespace** + + This flag causes your original whitespace to remain unchanged, and + causes the rest of the whitespace commands in this section, the + Code Indentation section, and + the Comment Control section to be ignored. + +- Tightness of curly braces, parentheses, and square brackets. + + Here the term "tightness" will mean the closeness with which + pairs of enclosing tokens, such as parentheses, contain the quantities + within. A numerical value of 0, 1, or 2 defines the tightness, with + 0 being least tight and 2 being most tight. Spaces within containers + are always symmetric, so if there is a space after a `(` then there + will be a space before the corresponding `)`. + + The **-pt=n** or **--paren-tightness=n** parameter controls the space within + parens. The example below shows the effect of the three possible + values, 0, 1, and 2: + + if ( ( my $len_tab = length( $tabstr ) ) > 0 ) { # -pt=0 + if ( ( my $len_tab = length($tabstr) ) > 0 ) { # -pt=1 (default) + if ((my $len_tab = length($tabstr)) > 0) { # -pt=2 + + When n is 0, there is always a space to the right of a '(' and to the left + of a ')'. For n=2 there is never a space. For n=1, the default, there + is a space unless the quantity within the parens is a single token, such + as an identifier or quoted string. + + Likewise, the parameter **-sbt=n** or **--square-bracket-tightness=n** + controls the space within square brackets, as illustrated below. + + $width = $col[ $j + $k ] - $col[ $j ]; # -sbt=0 + $width = $col[ $j + $k ] - $col[$j]; # -sbt=1 (default) + $width = $col[$j + $k] - $col[$j]; # -sbt=2 + + Curly braces which do not contain code blocks are controlled by + the parameter **-bt=n** or **--brace-tightness=n**. + + $obj->{ $parsed_sql->{ 'table' }[0] }; # -bt=0 + $obj->{ $parsed_sql->{'table'}[0] }; # -bt=1 (default) + $obj->{$parsed_sql->{'table'}[0]}; # -bt=2 + + And finally, curly braces which contain blocks of code are controlled by the + parameter **-bbt=n** or **--block-brace-tightness=n** as illustrated in the + example below. + + %bf = map { $_ => -M $_ } grep { /\.deb$/ } dirents '.'; # -bbt=0 (default) + %bf = map { $_ => -M $_ } grep {/\.deb$/} dirents '.'; # -bbt=1 + %bf = map {$_ => -M $_} grep {/\.deb$/} dirents '.'; # -bbt=2 + + To simplify input in the case that all of the tightness flags have the same + value <n>, the parameter <-act=n> or **--all-containers-tightness=n** is an + abbreviation for the combination <-pt=n -sbt=n -bt=n -bbt=n>. + +- **-tso**, **--tight-secret-operators** + + The flag **-tso** causes certain perl token sequences (secret operators) + which might be considered to be a single operator to be formatted "tightly" + (without spaces). The operators currently modified by this flag are: + + 0+ +0 ()x!! ~~<> ,=> =( )= + + For example the sequence **0 +**, which converts a string to a number, + would be formatted without a space: **0+** when the **-tso** flag is set. This + flag is off by default. + +- **-sts**, **--space-terminal-semicolon** + + Some programmers prefer a space before all terminal semicolons. The + default is for no such space, and is indicated with **-nsts** or + **--nospace-terminal-semicolon**. + + $i = 1 ; # -sts + $i = 1; # -nsts (default) + +- **-sfs**, **--space-for-semicolon** + + Semicolons within **for** loops may sometimes be hard to see, + particularly when commas are also present. This option places spaces on + both sides of these special semicolons, and is the default. Use + **-nsfs** or **--nospace-for-semicolon** to deactivate it. + + for ( @a = @$ap, $u = shift @a ; @a ; $u = $v ) { # -sfs (default) + for ( @a = @$ap, $u = shift @a; @a; $u = $v ) { # -nsfs + +- **-asc**, **--add-semicolons** + + Setting **-asc** allows perltidy to add any missing optional semicolon at the end + of a line which is followed by a closing curly brace on the next line. This + is the default, and may be deactivated with **-nasc** or **--noadd-semicolons**. + +- **-dsm**, **--delete-semicolons** + + Setting **-dsm** allows perltidy to delete extra semicolons which are + simply empty statements. This is the default, and may be deactivated + with **-ndsm** or **--nodelete-semicolons**. (Such semicolons are not + deleted, however, if they would promote a side comment to a block + comment). + +- **-aws**, **--add-whitespace** + + Setting this option allows perltidy to add certain whitespace improve + code readability. This is the default. If you do not want any + whitespace added, but are willing to have some whitespace deleted, use + **-naws**. (Use **-fws** to leave whitespace completely unchanged). + +- **-dws**, **--delete-old-whitespace** + + Setting this option allows perltidy to remove some old whitespace + between characters, if necessary. This is the default. If you + do not want any old whitespace removed, use **-ndws** or + **--nodelete-old-whitespace**. + +- Detailed whitespace controls around tokens + + For those who want more detailed control over the whitespace around + tokens, there are four parameters which can directly modify the default + whitespace rules built into perltidy for any token. They are: + + **-wls=s** or **--want-left-space=s**, + + **-nwls=s** or **--nowant-left-space=s**, + + **-wrs=s** or **--want-right-space=s**, + + **-nwrs=s** or **--nowant-right-space=s**. + + These parameters are each followed by a quoted string, **s**, containing a + list of token types. No more than one of each of these parameters + should be specified, because repeating a command-line parameter + always overwrites the previous one before perltidy ever sees it. + + To illustrate how these are used, suppose it is desired that there be no + space on either side of the token types **= + - / \***. The following two + parameters would specify this desire: + + -nwls="= + - / *" -nwrs="= + - / *" + + (Note that the token types are in quotes, and that they are separated by + spaces). With these modified whitespace rules, the following line of math: + + $root = -$b + sqrt( $b * $b - 4. * $a * $c ) / ( 2. * $a ); + + becomes this: + + $root=-$b+sqrt( $b*$b-4.*$a*$c )/( 2.*$a ); + + These parameters should be considered to be hints to perltidy rather + than fixed rules, because perltidy must try to resolve conflicts that + arise between them and all of the other rules that it uses. One + conflict that can arise is if, between two tokens, the left token wants + a space and the right one doesn't. In this case, the token not wanting + a space takes priority. + + It is necessary to have a list of all token types in order to create + this type of input. Such a list can be obtained by the command + **--dump-token-types**. Also try the **-D** flag on a short snippet of code + and look at the .DEBUG file to see the tokenization. + + **WARNING** Be sure to put these tokens in quotes to avoid having them + misinterpreted by your command shell. + +- Space between specific keywords and opening paren + + When an opening paren follows a Perl keyword, no space is introduced after the + keyword, unless it is (by default) one of these: + + my local our and or eq ne if else elsif until unless + while for foreach return switch case given when + + These defaults can be modified with two commands: + + **-sak=s** or **--space-after-keyword=s** adds keywords. + + **-nsak=s** or **--nospace-after-keyword=s** removes keywords. + + where **s** is a list of keywords (in quotes if necessary). For example, + + my ( $a, $b, $c ) = @_; # default + my( $a, $b, $c ) = @_; # -nsak="my local our" + + The abbreviation **-nsak='\*'** is equivalent to including all of the + keywords in the above list. + + When both **-nsak=s** and **-sak=s** commands are included, the **-nsak=s** + command is executed first. For example, to have space after only the + keywords (my, local, our) you could use **-nsak="\*" -sak="my local our"**. + + To put a space after all keywords, see the next item. + +- Space between all keywords and opening parens + + When an opening paren follows a function or keyword, no space is introduced + after the keyword except for the keywords noted in the previous item. To + always put a space between a function or keyword and its opening paren, + use the command: + + **-skp** or **--space-keyword-paren** + + You will probably also want to use the flag **-sfp** (next item) too. + +- Space between all function names and opening parens + + When an opening paren follows a function the default is not to introduce + a space. To cause a space to be introduced use: + + **-sfp** or **--space-function-paren** + + myfunc( $a, $b, $c ); # default + myfunc ( $a, $b, $c ); # -sfp + + You will probably also want to use the flag **-skp** (previous item) too. + +- Trimming whitespace around `qw` quotes + + **-tqw** or **--trim-qw** provide the default behavior of trimming + spaces around multi-line `qw` quotes and indenting them appropriately. + + **-ntqw** or **--notrim-qw** cause leading and trailing whitespace around + multi-line `qw` quotes to be left unchanged. This option will not + normally be necessary, but was added for testing purposes, because in + some versions of perl, trimming `qw` quotes changes the syntax tree. + +- **-sbq=n** or **--space-backslash-quote=n** + + Lines like + + $str1=\"string1"; + $str2=\'string2'; + + can confuse syntax highlighters unless a space is included between the backslash and the single or double quotation mark. + + This can be controlled with the value of **n** as follows: + + -sbq=0 means no space between the backslash and quote + -sbq=1 means follow the example of the source code + -sbq=2 means always put a space between the backslash and quote + + The default is **-sbq=1**, meaning that a space will be used 0if there is one in the source code. + +- Trimming trailing whitespace from lines of POD + + **-trp** or **--trim-pod** will remove trailing whitespace from lines of POD. + The default is not to do this. + +## Comment Controls + +Perltidy has a number of ways to control the appearance of both block comments +and side comments. The term **block comment** here refers to a full-line +comment, whereas **side comment** will refer to a comment which appears on a +line to the right of some code. + +- **-ibc**, **--indent-block-comments** + + Block comments normally look best when they are indented to the same + level as the code which follows them. This is the default behavior, but + you may use **-nibc** to keep block comments left-justified. Here is an + example: + + # this comment is indented (-ibc, default) + if ($task) { yyy(); } + + The alternative is **-nibc**: + + # this comment is not indented (-nibc) + if ($task) { yyy(); } + + See also the next item, **-isbc**, as well as **-sbc**, for other ways to + have some indented and some outdented block comments. + +- **-isbc**, **--indent-spaced-block-comments** + + If there is no leading space on the line, then the comment will not be + indented, and otherwise it may be. + + If both **-ibc** and **-isbc** are set, then **-isbc** takes priority. + +- **-olc**, **--outdent-long-comments** + + When **-olc** is set, lines which are full-line (block) comments longer + than the value **maximum-line-length** will have their indentation + removed. This is the default; use **-nolc** to prevent outdenting. + +- **-msc=n**, **--minimum-space-to-comment=n** + + Side comments look best when lined up several spaces to the right of + code. Perltidy will try to keep comments at least n spaces to the + right. The default is n=4 spaces. + +- **-fpsc=n**, **--fixed-position-side-comment=n** + + This parameter tells perltidy to line up side comments in column number **n** + whenever possible. The default, n=0, will not do this. + +- **-iscl**, **--ignore-side-comment-lengths** + + This parameter causes perltidy to ignore the length of side comments when + setting line breaks. The default, **-niscl**, is to include the length of + side comments when breaking lines to stay within the length prescribed + by the **-l=n** maximum line length parameter. For example, the following + long single line would remain intact with -l=80 and -iscl: + + perltidy -l=80 -iscl + $vmsfile =~ s/;[\d\-]*$//; # Clip off version number; we can use a newer version as well + + whereas without the -iscl flag the line will be broken: + + perltidy -l=80 + $vmsfile =~ s/;[\d\-]*$// + ; # Clip off version number; we can use a newer version as well + + +- **-hsc**, **--hanging-side-comments** + + By default, perltidy tries to identify and align "hanging side + comments", which are something like this: + + my $IGNORE = 0; # This is a side comment + # This is a hanging side comment + # And so is this + + A comment is considered to be a hanging side comment if (1) it immediately + follows a line with a side comment, or another hanging side comment, and + (2) there is some leading whitespace on the line. + To deactivate this feature, use **-nhsc** or **--nohanging-side-comments**. + If block comments are preceded by a blank line, or have no leading + whitespace, they will not be mistaken as hanging side comments. + +- Closing Side Comments + + A closing side comment is a special comment which perltidy can + automatically create and place after the closing brace of a code block. + They can be useful for code maintenance and debugging. The command + **-csc** (or **--closing-side-comments**) adds or updates closing side + comments. For example, here is a small code snippet + + sub message { + if ( !defined( $_[0] ) ) { + print("Hello, World\n"); + } + else { + print( $_[0], "\n" ); + } + } + + And here is the result of processing with `perltidy -csc`: + + sub message { + if ( !defined( $_[0] ) ) { + print("Hello, World\n"); + } + else { + print( $_[0], "\n" ); + } + } ## end sub message + + A closing side comment was added for `sub message` in this case, but not + for the `if` and `else` blocks, because they were below the 6 line + cutoff limit for adding closing side comments. This limit may be + changed with the **-csci** command, described below. + + The command **-dcsc** (or **--delete-closing-side-comments**) reverses this + process and removes these comments. + + Several commands are available to modify the behavior of these two basic + commands, **-csc** and **-dcsc**: + + - **-csci=n**, or **--closing-side-comment-interval=n** + + where `n` is the minimum number of lines that a block must have in + order for a closing side comment to be added. The default value is + `n=6`. To illustrate: + + # perltidy -csci=2 -csc + sub message { + if ( !defined( $_[0] ) ) { + print("Hello, World\n"); + } ## end if ( !defined( $_[0] )) + else { + print( $_[0], "\n" ); + } ## end else [ if ( !defined( $_[0] )) + } ## end sub message + + Now the `if` and `else` blocks are commented. However, now this has + become very cluttered. + + - **-cscp=string**, or **--closing-side-comment-prefix=string** + + where string is the prefix used before the name of the block type. The + default prefix, shown above, is `## end`. This string will be added to + closing side comments, and it will also be used to recognize them in + order to update, delete, and format them. Any comment identified as a + closing side comment will be placed just a single space to the right of + its closing brace. + + - **-cscl=string**, or **--closing-side-comment-list** + + where `string` is a list of block types to be tagged with closing side + comments. By default, all code block types preceded by a keyword or + label (such as `if`, `sub`, and so on) will be tagged. The **-cscl** + command changes the default list to be any selected block types; see + ["Specifying Block Types"](#specifying-block-types). + For example, the following command + requests that only `sub`'s, labels, `BEGIN`, and `END` blocks be + affected by any **-csc** or **-dcsc** operation: + + -cscl="sub : BEGIN END" + + - **-csct=n**, or **--closing-side-comment-maximum-text=n** + + The text appended to certain block types, such as an `if` block, is + whatever lies between the keyword introducing the block, such as `if`, + and the opening brace. Since this might be too much text for a side + comment, there needs to be a limit, and that is the purpose of this + parameter. The default value is `n=20`, meaning that no additional + tokens will be appended to this text after its length reaches 20 + characters. Omitted text is indicated with `...`. (Tokens, including + sub names, are never truncated, however, so actual lengths may exceed + this). To illustrate, in the above example, the appended text of the + first block is ` ( !defined( $_[0] )...`. The existing limit of + `n=20` caused this text to be truncated, as indicated by the `...`. See + the next flag for additional control of the abbreviated text. + + - **-cscb**, or **--closing-side-comments-balanced** + + As discussed in the previous item, when the + closing-side-comment-maximum-text limit is exceeded the comment text must + be truncated. Older versions of perltidy terminated with three dots, and this + can still be achieved with -ncscb: + + perltidy -csc -ncscb + } ## end foreach my $foo (sort { $b cmp $a ... + + However this causes a problem with editors which cannot recognize + comments or are not configured to do so because they cannot "bounce" around in + the text correctly. The **-cscb** flag has been added to + help them by appending appropriate balancing structure: + + perltidy -csc -cscb + } ## end foreach my $foo (sort { $b cmp $a ... }) + + The default is **-cscb**. + + - **-csce=n**, or **--closing-side-comment-else-flag=n** + + The default, **n=0**, places the text of the opening `if` statement after any + terminal `else`. + + If **n=2** is used, then each `elsif` is also given the text of the opening + `if` statement. Also, an `else` will include the text of a preceding + `elsif` statement. Note that this may result some long closing + side comments. + + If **n=1** is used, the results will be the same as **n=2** whenever the + resulting line length is less than the maximum allowed. + + - **-cscb**, or **--closing-side-comments-balanced** + + When using closing-side-comments, and the closing-side-comment-maximum-text + limit is exceeded, then the comment text must be abbreviated. + It is terminated with three dots if the **-cscb** flag is negated: + + perltidy -csc -ncscb + } ## end foreach my $foo (sort { $b cmp $a ... + + This causes a problem with older editors which do not recognize comments + because they cannot "bounce" around in the text correctly. The **-cscb** + flag tries to help them by appending appropriate terminal balancing structures: + + perltidy -csc -cscb + } ## end foreach my $foo (sort { $b cmp $a ... }) + + The default is **-cscb**. + + - **-cscw**, or **--closing-side-comment-warnings** + + This parameter is intended to help make the initial transition to the use of + closing side comments. + It causes two + things to happen if a closing side comment replaces an existing, different + closing side comment: first, an error message will be issued, and second, the + original side comment will be placed alone on a new specially marked comment + line for later attention. + + The intent is to avoid clobbering existing hand-written side comments + which happen to match the pattern of closing side comments. This flag + should only be needed on the first run with **-csc**. + + **Important Notes on Closing Side Comments:** + + - Closing side comments are only placed on lines terminated with a closing + brace. Certain closing styles, such as the use of cuddled elses + (**-ce**), preclude the generation of some closing side comments. + - Please note that adding or deleting of closing side comments takes + place only through the commands **-csc** or **-dcsc**. The other commands, + if used, merely modify the behavior of these two commands. + - It is recommended that the **-cscw** flag be used along with **-csc** on + the first use of perltidy on a given file. This will prevent loss of + any existing side comment data which happens to have the csc prefix. + - Once you use **-csc**, you should continue to use it so that any + closing side comments remain correct as code changes. Otherwise, these + comments will become incorrect as the code is updated. + - If you edit the closing side comments generated by perltidy, you must also + change the prefix to be different from the closing side comment prefix. + Otherwise, your edits will be lost when you rerun perltidy with **-csc**. For + example, you could simply change `## end` to be `## End`, since the test is + case sensitive. You may also want to use the **-ssc** flag to keep these + modified closing side comments spaced the same as actual closing side comments. + - Temporarily generating closing side comments is a useful technique for + exploring and/or debugging a perl script, especially one written by someone + else. You can always remove them with **-dcsc**. + +- Static Block Comments + + Static block comments are block comments with a special leading pattern, + `##` by default, which will be treated slightly differently from other + block comments. They effectively behave as if they had glue along their + left and top edges, because they stick to the left edge and previous line + when there is no blank spaces in those places. This option is + particularly useful for controlling how commented code is displayed. + + - **-sbc**, **--static-block-comments** + + When **-sbc** is used, a block comment with a special leading pattern, `##` by + default, will be treated specially. + + Comments so identified are treated as follows: + + - If there is no leading space on the line, then the comment will not + be indented, and otherwise it may be, + - no new blank line will be + inserted before such a comment, and + - such a comment will never become + a hanging side comment. + + For example, assuming `@month_of_year` is + left-adjusted: + + @month_of_year = ( # -sbc (default) + 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', + ## 'Dec', 'Nov' + 'Nov', 'Dec'); + + Without this convention, the above code would become + + @month_of_year = ( # -nsbc + 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', + + ## 'Dec', 'Nov' + 'Nov', 'Dec' + ); + + which is not as clear. + The default is to use **-sbc**. This may be deactivated with **-nsbc**. + + - **-sbcp=string**, **--static-block-comment-prefix=string** + + This parameter defines the prefix used to identify static block comments + when the **-sbc** parameter is set. The default prefix is `##`, + corresponding to `-sbcp=##`. The prefix is actually part of a perl + pattern used to match lines and it must either begin with `#` or `^#`. + In the first case a prefix ^\\s\* will be added to match any leading + whitespace, while in the second case the pattern will match only + comments with no leading whitespace. For example, to + identify all comments as static block comments, one would use `-sbcp=#`. + To identify all left-adjusted comments as static block comments, use `-sbcp='^#'`. + + Please note that **-sbcp** merely defines the pattern used to identify static + block comments; it will not be used unless the switch **-sbc** is set. Also, + please be aware that since this string is used in a perl regular expression + which identifies these comments, it must enable a valid regular expression to + be formed. + + A pattern which can be useful is: + + -sbcp=^#{2,}[^\s#] + + This pattern requires a static block comment to have at least one character + which is neither a # nor a space. It allows a line containing only '#' + characters to be rejected as a static block comment. Such lines are often used + at the start and end of header information in subroutines and should not be + separated from the intervening comments, which typically begin with just a + single '#'. + + - **-osbc**, **--outdent-static-block-comments** + + The command **-osbc** will cause static block comments to be outdented by 2 + spaces (or whatever **-ci=n** has been set to), if possible. + +- Static Side Comments + + Static side comments are side comments with a special leading pattern. + This option can be useful for controlling how commented code is displayed + when it is a side comment. + + - **-ssc**, **--static-side-comments** + + When **-ssc** is used, a side comment with a static leading pattern, which is + `##` by default, will be spaced only a single space from previous + character, and it will not be vertically aligned with other side comments. + + The default is **-nssc**. + + - **-sscp=string**, **--static-side-comment-prefix=string** + + This parameter defines the prefix used to identify static side comments + when the **-ssc** parameter is set. The default prefix is `##`, + corresponding to `-sscp=##`. + + Please note that **-sscp** merely defines the pattern used to identify + static side comments; it will not be used unless the switch **-ssc** is + set. Also, note that this string is used in a perl regular expression + which identifies these comments, so it must enable a valid regular + expression to be formed. + +## Skipping Selected Sections of Code + +Selected lines of code may be passed verbatim to the output without any +formatting. This feature is enabled by default but can be disabled with +the **--noformat-skipping** or **-nfs** flag. It should be used sparingly to +avoid littering code with markers, but it might be helpful for working +around occasional problems. For example it might be useful for keeping +the indentation of old commented code unchanged, keeping indentation of +long blocks of aligned comments unchanged, keeping certain list +formatting unchanged, or working around a glitch in perltidy. + +- **-fs**, **--format-skipping** + + This flag, which is enabled by default, causes any code between + special beginning and ending comment markers to be passed to the + output without formatting. The default beginning marker is #<<< + and the default ending marker is #>>> but they + may be changed (see next items below). Additional text may appear on + these special comment lines provided that it is separated from the + marker by at least one space. For example + + #<<< do not let perltidy touch this + my @list = (1, + 1, 1, + 1, 2, 1, + 1, 3, 3, 1, + 1, 4, 6, 4, 1,); + #>>> + + The comment markers may be placed at any location that a block comment may + appear. If they do not appear to be working, use the -log flag and examine the + `.LOG` file. Use **-nfs** to disable this feature. + +- **-fsb=string**, **--format-skipping-begin=string** + + The **-fsb=string** parameter may be used to change the beginning marker for + format skipping. The default is equivalent to -fsb='#<<<'. The string that + you enter must begin with a # and should be in quotes as necessary to get past + the command shell of your system. It is actually the leading text of a pattern + that is constructed by appending a '\\s', so you must also include backslashes + for characters to be taken literally rather than as patterns. + + Some examples show how example strings become patterns: + + -fsb='#\{\{\{' becomes /^#\{\{\{\s/ which matches #{{{ but not #{{{{ + -fsb='#\*\*' becomes /^#\*\*\s/ which matches #** but not #*** + -fsb='#\*{2,}' becomes /^#\*{2,}\s/ which matches #** and #***** + +- **-fse=string**, **--format-skipping-end=string** + + The **-fsb=string** is the corresponding parameter used to change the + ending marker for format skipping. The default is equivalent to + \-fse='#<<<'. + +## Line Break Control + +The parameters in this section control breaks after +non-blank lines of code. Blank lines are controlled +separately by parameters in the section ["Blank Line +Control"](#blank-line-control). + +- **-fnl**, **--freeze-newlines** + + If you do not want any changes to the line breaks within + lines of code in your script, set + **-fnl**, and they will remain fixed, and the rest of the commands in + this section and sections + ["Controlling List Formatting"](#controlling-list-formatting), + ["Retaining or Ignoring Existing Line Breaks"](#retaining-or-ignoring-existing-line-breaks). + You may want to use **-noll** with this. + + Note: If you also want to keep your blank lines exactly + as they are, you can use the **-fbl** flag which is described + in the section ["Blank Line Control"](#blank-line-control). + +- **-ce**, **--cuddled-else** + + Enable the "cuddled else" style, in which `else` and `elsif` are + follow immediately after the curly brace closing the previous block. + The default is not to use cuddled elses, and is indicated with the flag + **-nce** or **--nocuddled-else**. Here is a comparison of the + alternatives: + + # -ce + if ($task) { + yyy(); + } else { + zzz(); + } + + # -nce (default) + if ($task) { + yyy(); + } + else { + zzz(); + } + + In this example the keyword **else** is placed on the same line which begins with + the preceding closing block brace and is followed by its own opening block brace + on the same line. Other keywords and function names which are formatted with + this "cuddled" style are **elsif**, **continue**, **catch**, **finally**. + + Other block types can be formatted by specifying their names on a + separate parameter **-cbl**, described in a later section. + + Cuddling between a pair of code blocks requires that the closing brace of the + first block start a new line. If this block is entirely on one line in the + input file, it is necessary to decide if it should be broken to allow cuddling. + This decision is controlled by the flag **-cbo=n** discussed below. The default + and recommended value of **-cbo=1** bases this decision on the first block in + the chain. If it spans multiple lines then cuddling is made and continues + along the chain, regardless of the sizes of subsequent blocks. Otherwise, short + lines remain intact. + + So for example, the **-ce** flag would not have any effect if the above snippet + is rewritten as + + if ($task) { yyy() } + else { zzz() } + + If the first block spans multiple lines, then cuddling can be done and will + continue for the subsequent blocks in the chain, as illustrated in the previous + snippet. + + If there are blank lines between cuddled blocks they will be eliminated. If + there are comments after the closing brace where cuddling would occur then + cuddling will be prevented. If this occurs, cuddling will restart later in the + chain if possible. + +- **-cb**, **--cuddled-blocks** + + This flag is equivalent to **-ce**. + +- **-cbl**, **--cuddled-block-list** + + The built-in default cuddled block types are **else, elsif, continue, catch, finally**. + + Additional block types to which the **-cuddled-blocks** style applies can be defined by + this parameter. This parameter is a character string, giving a list of + block types separated by commas or spaces. For example, to cuddle code blocks + of type sort, map and grep, in addition to the default types, the string could + be set to + + -cbl="sort map grep" + + or equivalently + + -cbl=sort,map,grep + + Note however that these particular block types are typically short so there might not be much + opportunity for the cuddled format style. + + Using commas avoids the need to protect spaces with quotes. + + As a diagnostic check, the flag **--dump-cuddled-block-list** or **-dcbl** can be + used to view the hash of values that are generated by this flag. + + Finally, note that the **-cbl** flag by itself merely specifies which blocks are formatted + with the cuddled format. It has no effect unless this formatting style is activated with + **-ce**. + +- **-cblx**, **--cuddled-block-list-exclusive** + + When cuddled else formatting is selected with **-ce**, setting this flag causes + perltidy to ignore its built-in defaults and rely exclusively on the block types + specified on the **-cbl** flag described in the previous section. For example, + to avoid using cuddled **catch** and **finally**, which among in the defaults, the + following set of parameters could be used: + + perltidy -ce -cbl='else elsif continue' -cblx + +- **-cbo=n**, **--cuddled-break-option=n** + + Cuddled formatting is only possible between a pair of code blocks if the + closing brace of the first block starts a new line. If a block is encountered + which is entirely on a single line, and cuddled formatting is selected, it is + necessary to make a decision as to whether or not to "break" the block, meaning + to cause it to span multiple lines. This parameter controls that decision. The + options are: + + cbo=0 Never force a short block to break. + cbo=1 If the first of a pair of blocks is broken in the input file, + then break the second [DEFAULT]. + cbo=2 Break open all blocks for maximal cuddled formatting. + + The default and recommended value is **cbo=1**. With this value, if the starting + block of a chain spans multiple lines, then a cascade of breaks will occur for + remaining blocks causing the entire chain to be cuddled. + + The option **cbo=0** can produce erratic cuddling if there are numerous one-line + blocks. + + The option **cbo=2** produces maximal cuddling but will not allow any short blocks. + +- **-bl**, **--opening-brace-on-new-line** + + Use the flag **-bl** to place the opening brace on a new line: + + if ( $input_file eq '-' ) # -bl + { + important_function(); + } + + This flag applies to all structural blocks, including named sub's (unless + the **-sbl** flag is set -- see next item). + + The default style, **-nbl**, places an opening brace on the same line as + the keyword introducing it. For example, + + if ( $input_file eq '-' ) { # -nbl (default) + +- **-sbl**, **--opening-sub-brace-on-new-line** + + The flag **-sbl** can be used to override the value of **-bl** for + the opening braces of named sub's. For example, + + perltidy -sbl + + produces this result: + + sub message + { + if (!defined($_[0])) { + print("Hello, World\n"); + } + else { + print($_[0], "\n"); + } + } + + This flag is negated with **-nsbl**. If **-sbl** is not specified, + the value of **-bl** is used. + +- **-asbl**, **--opening-anonymous-sub-brace-on-new-line** + + The flag **-asbl** is like the **-sbl** flag except that it applies + to anonymous sub's instead of named subs. For example + + perltidy -asbl + + produces this result: + + $a = sub + { + if ( !defined( $_[0] ) ) { + print("Hello, World\n"); + } + else { + print( $_[0], "\n" ); + } + }; + + This flag is negated with **-nasbl**, and the default is **-nasbl**. + +- **-bli**, **--brace-left-and-indent** + + The flag **-bli** is the same as **-bl** but in addition it causes one + unit of continuation indentation ( see **-ci** ) to be placed before + an opening and closing block braces. + + For example, + + if ( $input_file eq '-' ) # -bli + { + important_function(); + } + + By default, this extra indentation occurs for blocks of type: + **if**, **elsif**, **else**, **unless**, **for**, **foreach**, **sub**, + **while**, **until**, and also with a preceding label. The next item + shows how to change this. + +- **-blil=s**, **--brace-left-and-indent-list=s** + + Use this parameter to change the types of block braces for which the + **-bli** flag applies; see ["Specifying Block Types"](#specifying-block-types). For example, + **-blil='if elsif else'** would apply it to only `if/elsif/else` blocks. + +- **-bar**, **--opening-brace-always-on-right** + + The default style, **-nbl** places the opening code block brace on a new + line if it does not fit on the same line as the opening keyword, like + this: + + if ( $bigwasteofspace1 && $bigwasteofspace2 + || $bigwasteofspace3 && $bigwasteofspace4 ) + { + big_waste_of_time(); + } + + To force the opening brace to always be on the right, use the **-bar** + flag. In this case, the above example becomes + + if ( $bigwasteofspace1 && $bigwasteofspace2 + || $bigwasteofspace3 && $bigwasteofspace4 ) { + big_waste_of_time(); + } + + A conflict occurs if both **-bl** and **-bar** are specified. + +- **-otr**, **--opening-token-right** and related flags + + The **-otr** flag is a hint that perltidy should not place a break between a + comma and an opening token. For example: + + # default formatting + push @{ $self->{$module}{$key} }, + { + accno => $ref->{accno}, + description => $ref->{description} + }; + + # perltidy -otr + push @{ $self->{$module}{$key} }, { + accno => $ref->{accno}, + description => $ref->{description} + }; + + The flag **-otr** is actually an abbreviation for three other flags + which can be used to control parens, hash braces, and square brackets + separately if desired: + + -opr or --opening-paren-right + -ohbr or --opening-hash-brace-right + -osbr or --opening-square-bracket-right + +- **-wn**, **--weld-nested-containers** + + The **-wn** flag causes closely nested pairs of opening and closing container + symbols (curly braces, brackets, or parens) to be "welded" together, meaning + that they are treated as if combined into a single unit, with the indentation + of the innermost code reduced to be as if there were just a single container + symbol. + + For example: + + # default formatting + do { + { + next if $x == $y; + } + } until $x++ > $z; + + # perltidy -wn + do { { + next if $x == $y; + } } until $x++ > $z; + + When this flag is set perltidy makes a preliminary pass through the file and + identifies all nested pairs of containers. To qualify as a nested pair, the + closing container symbols must be immediately adjacent. The opening symbols + must either be adjacent, or, if the outer opening symbol is an opening + paren, they may be separated by any single non-container symbol or something + that looks like a function evaluation. + + Any container symbol may serve as both the inner container of one pair and as + the outer container of an adjacent pair. Consequently, any number of adjacent + opening or closing symbols may join together in weld. For example, here are + three levels of wrapped function calls: + + # default formatting + my (@date_time) = Localtime( + Date_to_Time( + Add_Delta_DHMS( + $year, $month, $day, $hour, $minute, $second, + '0', $offset, '0', '0' + ) + ) + ); + + # perltidy -wn + my (@date_time) = Localtime( Date_to_Time( Add_Delta_DHMS( + $year, $month, $day, $hour, $minute, $second, + '0', $offset, '0', '0' + ) ) ); + + Notice how the indentation of the inner lines are reduced by two levels in this + case. This example also shows the typical result of this formatting, namely it + is a sandwich consisting of an initial opening layer, a central section of any + complexity forming the "meat" of the sandwich, and a final closing layer. This + predictable structure helps keep the compacted structure readable. + + The inner sandwich layer is required to be at least one line thick. If this + cannot be achieved, welding does not occur. This constraint can cause + formatting to take a couple of iterations to stabilize when it is first applied + to a script. The **-conv** flag can be used to insure that the final format is + achieved in a single run. + + Here is an example illustrating a welded container within a welded containers: + + # default formatting + $x->badd( + bmul( + $class->new( + abs( + $sx * int( $xr->numify() ) & $sy * int( $yr->numify() ) + ) + ), + $m + ) + ); + + # perltidy -wn + $x->badd( bmul( + $class->new( abs( + $sx * int( $xr->numify() ) & $sy * int( $yr->numify() ) + ) ), + $m + ) ); + + This format option is quite general but there are some limitations. + + One limitiation is that any line length limit still applies and can cause long + welded sections to be broken into multiple lines. + + Another limitation is that an opening symbol which delimits quoted text cannot + be included in a welded pair. This is because quote delimiters are treated + specially in perltidy. + + Finally, the stacking of containers defined by this flag have priority over + any other container stacking flags. This is because any welding is done first. + +- **Vertical tightness** of non-block curly braces, parentheses, and square brackets. + + These parameters control what shall be called vertical tightness. Here are the + main points: + + - Opening tokens (except for block braces) are controlled by **-vt=n**, or + **--vertical-tightness=n**, where + + -vt=0 always break a line after opening token (default). + -vt=1 do not break unless this would produce more than one + step in indentation in a line. + -vt=2 never break a line after opening token + + - You must also use the **-lp** flag when you use the **-vt** flag; the + reason is explained below. + - Closing tokens (except for block braces) are controlled by **-vtc=n**, or + **--vertical-tightness-closing=n**, where + + -vtc=0 always break a line before a closing token (default), + -vtc=1 do not break before a closing token which is followed + by a semicolon or another closing token, and is not in + a list environment. + -vtc=2 never break before a closing token. + + The rules for **-vtc=1** are designed to maintain a reasonable balance + between tightness and readability in complex lists. + + - Different controls may be applied to different token types, + and it is also possible to control block braces; see below. + - Finally, please note that these vertical tightness flags are merely + hints to the formatter, and it cannot always follow them. Things which + make it difficult or impossible include comments, blank lines, blocks of + code within a list, and possibly the lack of the **-lp** parameter. + Also, these flags may be ignored for very small lists (2 or 3 lines in + length). + + Here are some examples: + + # perltidy -lp -vt=0 -vtc=0 + %romanNumerals = ( + one => 'I', + two => 'II', + three => 'III', + four => 'IV', + ); + + # perltidy -lp -vt=1 -vtc=0 + %romanNumerals = ( one => 'I', + two => 'II', + three => 'III', + four => 'IV', + ); + + # perltidy -lp -vt=1 -vtc=1 + %romanNumerals = ( one => 'I', + two => 'II', + three => 'III', + four => 'IV', ); + + The difference between **-vt=1** and **-vt=2** is shown here: + + # perltidy -lp -vt=1 + $init->add( + mysprintf( "(void)find_threadsv(%s);", + cstring( $threadsv_names[ $op->targ ] ) + ) + ); + + # perltidy -lp -vt=2 + $init->add( mysprintf( "(void)find_threadsv(%s);", + cstring( $threadsv_names[ $op->targ ] ) + ) + ); + + With **-vt=1**, the line ending in `add(` does not combine with the next + line because the next line is not balanced. This can help with + readability, but **-vt=2** can be used to ignore this rule. + + The tightest, and least readable, code is produced with both `-vt=2` and + `-vtc=2`: + + # perltidy -lp -vt=2 -vtc=2 + $init->add( mysprintf( "(void)find_threadsv(%s);", + cstring( $threadsv_names[ $op->targ ] ) ) ); + + Notice how the code in all of these examples collapses vertically as + **-vt** increases, but the indentation remains unchanged. This is + because perltidy implements the **-vt** parameter by first formatting as + if **-vt=0**, and then simply overwriting one output line on top of the + next, if possible, to achieve the desired vertical tightness. The + **-lp** indentation style has been designed to allow this vertical + collapse to occur, which is why it is required for the **-vt** parameter. + + The **-vt=n** and **-vtc=n** parameters apply to each type of container + token. If desired, vertical tightness controls can be applied + independently to each of the closing container token types. + + The parameters for controlling parentheses are **-pvt=n** or + **--paren-vertical-tightness=n**, and **-pcvt=n** or + **--paren-vertical-tightness-closing=n**. + + Likewise, the parameters for square brackets are **-sbvt=n** or + **--square-bracket-vertical-tightness=n**, and **-sbcvt=n** or + **--square-bracket-vertical-tightness-closing=n**. + + Finally, the parameters for controlling non-code block braces are + **-bvt=n** or **--brace-vertical-tightness=n**, and **-bcvt=n** or + **--brace-vertical-tightness-closing=n**. + + In fact, the parameter **-vt=n** is actually just an abbreviation for + **-pvt=n -bvt=n sbvt=n**, and likewise **-vtc=n** is an abbreviation + for **-pvtc=n -bvtc=n sbvtc=n**. + +- **-bbvt=n** or **--block-brace-vertical-tightness=n** + + The **-bbvt=n** flag is just like the **-vt=n** flag but applies + to opening code block braces. + + -bbvt=0 break after opening block brace (default). + -bbvt=1 do not break unless this would produce more than one + step in indentation in a line. + -bbvt=2 do not break after opening block brace. + + It is necessary to also use either **-bl** or **-bli** for this to work, + because, as with other vertical tightness controls, it is implemented by + simply overwriting a line ending with an opening block brace with the + subsequent line. For example: + + # perltidy -bli -bbvt=0 + if ( open( FILE, "< $File" ) ) + { + while ( $File = ) + { + $In .= $File; + $count++; + } + close(FILE); + } + + # perltidy -bli -bbvt=1 + if ( open( FILE, "< $File" ) ) + { while ( $File = ) + { $In .= $File; + $count++; + } + close(FILE); + } + + By default this applies to blocks associated with keywords **if**, + **elsif**, **else**, **unless**, **for**, **foreach**, **sub**, **while**, + **until**, and also with a preceding label. This can be changed with + the parameter **-bbvtl=string**, or + **--block-brace-vertical-tightness-list=string**, where **string** is a + space-separated list of block types. For more information on the + possible values of this string, see ["Specifying Block Types"](#specifying-block-types) + + For example, if we want to just apply this style to `if`, + `elsif`, and `else` blocks, we could use + `perltidy -bli -bbvt=1 -bbvtl='if elsif else'`. + + There is no vertical tightness control for closing block braces; with + one exception they will be placed on separate lines. + The exception is that a cascade of closing block braces may + be stacked on a single line. See **-scbb**. + +- **-sot**, **--stack-opening-tokens** and related flags + + The **-sot** flag tells perltidy to "stack" opening tokens + when possible to avoid lines with isolated opening tokens. + + For example: + + # default + $opt_c = Text::CSV_XS->new( + { + binary => 1, + sep_char => $opt_c, + always_quote => 1, + } + ); + + # -sot + $opt_c = Text::CSV_XS->new( { + binary => 1, + sep_char => $opt_c, + always_quote => 1, + } + ); + + For detailed control of individual closing tokens the following + controls can be used: + + -sop or --stack-opening-paren + -sohb or --stack-opening-hash-brace + -sosb or --stack-opening-square-bracket + -sobb or --stack-opening-block-brace + + The flag **-sot** is an abbreviation for **-sop -sohb -sosb**. + + The flag **-sobb** is a abbreviation for **-bbvt=2 -bbvtl='\*'**. This + will case a cascade of opening block braces to appear on a single line, + although this an uncommon occurrence except in test scripts. + +- **-sct**, **--stack-closing-tokens** and related flags + + The **-sct** flag tells perltidy to "stack" closing tokens + when possible to avoid lines with isolated closing tokens. + + For example: + + # default + $opt_c = Text::CSV_XS->new( + { + binary => 1, + sep_char => $opt_c, + always_quote => 1, + } + ); + + # -sct + $opt_c = Text::CSV_XS->new( + { + binary => 1, + sep_char => $opt_c, + always_quote => 1, + } ); + + The **-sct** flag is somewhat similar to the **-vtc** flags, and in some + cases it can give a similar result. The difference is that the **-vtc** + flags try to avoid lines with leading opening tokens by "hiding" them at + the end of a previous line, whereas the **-sct** flag merely tries to + reduce the number of lines with isolated closing tokens by stacking them + but does not try to hide them. For example: + + # -vtc=2 + $opt_c = Text::CSV_XS->new( + { + binary => 1, + sep_char => $opt_c, + always_quote => 1, } ); + + For detailed control of the stacking of individual closing tokens the + following controls can be used: + + -scp or --stack-closing-paren + -schb or --stack-closing-hash-brace + -scsb or --stack-closing-square-bracket + -scbb or --stack-closing-block-brace + + The flag **-sct** is an abbreviation for stacking the non-block closing + tokens, **-scp -schb -scsb**. + + Stacking of closing block braces, **-scbb**, causes a cascade of isolated + closing block braces to be combined into a single line as in the following + example: + + # -scbb: + for $w1 (@w1) { + for $w2 (@w2) { + for $w3 (@w3) { + for $w4 (@w4) { + push( @lines, "$w1 $w2 $w3 $w4\n" ); + } } } } + + To simplify input even further for the case in which both opening and closing + non-block containers are stacked, the flag **-sac** or **--stack-all-containers** + is an abbreviation for **-sot -sot**. + +- **-dnl**, **--delete-old-newlines** + + By default, perltidy first deletes all old line break locations, and then it + looks for good break points to match the desired line length. Use **-ndnl** + or **--nodelete-old-newlines** to force perltidy to retain all old line break + points. + +- **-anl**, **--add-newlines** + + By default, perltidy will add line breaks when necessary to create + continuations of long lines and to improve the script appearance. Use + **-nanl** or **--noadd-newlines** to prevent any new line breaks. + + This flag does not prevent perltidy from eliminating existing line + breaks; see **--freeze-newlines** to completely prevent changes to line + break points. + +- Controlling whether perltidy breaks before or after operators + + Four command line parameters provide some control over whether + a line break should be before or after specific token types. + Two parameters give detailed control: + + **-wba=s** or **--want-break-after=s**, and + + **-wbb=s** or **--want-break-before=s**. + + These parameters are each followed by a quoted string, **s**, containing + a list of token types (separated only by spaces). No more than one of each + of these parameters should be specified, because repeating a + command-line parameter always overwrites the previous one before + perltidy ever sees it. + + By default, perltidy breaks **after** these token types: + % + - \* / x != == >= <= =~ !~ < > | & + = \*\*= += \*= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= + + And perltidy breaks **before** these token types by default: + . << >> -> && || // + + To illustrate, to cause a break after a concatenation operator, `'.'`, + rather than before it, the command line would be + + -wba="." + + As another example, the following command would cause a break before + math operators `'+'`, `'-'`, `'/'`, and `'*'`: + + -wbb="+ - / *" + + These commands should work well for most of the token types that perltidy uses + (use **--dump-token-types** for a list). Also try the **-D** flag on a short + snippet of code and look at the .DEBUG file to see the tokenization. However, + for a few token types there may be conflicts with hardwired logic which cause + unexpected results. One example is curly braces, which should be controlled + with the parameter **bl** provided for that purpose. + + **WARNING** Be sure to put these tokens in quotes to avoid having them + misinterpreted by your command shell. + + Two additional parameters are available which, though they provide no further + capability, can simplify input are: + + **-baao** or **--break-after-all-operators**, + + **-bbao** or **--break-before-all-operators**. + + The -baao sets the default to be to break after all of the following operators: + + % + - * / x != == >= <= =~ !~ < > | & + = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= + . : ? && || and or err xor + + and the **-bbao** flag sets the default to break before all of these operators. + These can be used to define an initial break preference which can be fine-tuned + with the **-wba** and **-wbb** flags. For example, to break before all operators + except an **=** one could use --bbao -wba='=' rather than listing every + single perl operator except **=** on a -wbb flag. + +## Controlling List Formatting + +Perltidy attempts to place comma-separated arrays of values in tables +which look good. Its default algorithms usually work well, and they +have been improving with each release, but several parameters are +available to control list formatting. + +- **-boc**, **--break-at-old-comma-breakpoints** + + This flag tells perltidy to try to break at all old commas. This is not + the default. Normally, perltidy makes a best guess at list formatting, + and seldom uses old comma breakpoints. Usually this works well, + but consider: + + my @list = (1, + 1, 1, + 1, 2, 1, + 1, 3, 3, 1, + 1, 4, 6, 4, 1,); + + The default formatting will flatten this down to one line: + + # perltidy (default) + my @list = ( 1, 1, 1, 1, 2, 1, 1, 3, 3, 1, 1, 4, 6, 4, 1, ); + + which hides the structure. Using **-boc**, plus additional flags + to retain the original style, yields + + # perltidy -boc -lp -pt=2 -vt=1 -vtc=1 + my @list = (1, + 1, 1, + 1, 2, 1, + 1, 3, 3, 1, + 1, 4, 6, 4, 1,); + + A disadvantage of this flag is that all tables in the file + must already be nicely formatted. For another possibility see + the -fs flag in ["Skipping Selected Sections of Code"](#skipping-selected-sections-of-code). + +- **-mft=n**, **--maximum-fields-per-table=n** + + If the computed number of fields for any table exceeds **n**, then it + will be reduced to **n**. The default value for **n** is a large number, + 40\. While this value should probably be left unchanged as a general + rule, it might be used on a small section of code to force a list to + have a particular number of fields per line, and then either the **-boc** + flag could be used to retain this formatting, or a single comment could + be introduced somewhere to freeze the formatting in future applications + of perltidy. + + # perltidy -mft=2 + @month_of_year = ( + 'Jan', 'Feb', + 'Mar', 'Apr', + 'May', 'Jun', + 'Jul', 'Aug', + 'Sep', 'Oct', + 'Nov', 'Dec' + ); + +- **-cab=n**, **--comma-arrow-breakpoints=n** + + A comma which follows a comma arrow, '=>', is given special + consideration. In a long list, it is common to break at all such + commas. This parameter can be used to control how perltidy breaks at + these commas. (However, it will have no effect if old comma breaks are + being forced because **-boc** is used). The possible values of **n** are: + + n=0 break at all commas after => + n=1 stable: break at all commas after => if container is open, + EXCEPT FOR one-line containers + n=2 break at all commas after =>, BUT try to form the maximum + maximum one-line container lengths + n=3 do not treat commas after => specially at all + n=4 break everything: like n=0 but ALSO break a short container with + a => not followed by a comma when -vt=0 is used + n=5 stable: like n=1 but ALSO break at open one-line containers when + -vt=0 is used (default) + + For example, given the following single line, perltidy by default will + not add any line breaks because it would break the existing one-line + container: + + bless { B => $B, Root => $Root } => $package; + + Using **-cab=0** will force a break after each comma-arrow item: + + # perltidy -cab=0: + bless { + B => $B, + Root => $Root + } => $package; + + If perltidy is subsequently run with this container broken, then by + default it will break after each '=>' because the container is now + broken. To reform a one-line container, the parameter **-cab=2** could + be used. + + The flag **-cab=3** can be used to prevent these commas from being + treated specially. In this case, an item such as "01" => 31 is + treated as a single item in a table. The number of fields in this table + will be determined by the same rules that are used for any other table. + Here is an example. + + # perltidy -cab=3 + my %last_day = ( + "01" => 31, "02" => 29, "03" => 31, "04" => 30, + "05" => 31, "06" => 30, "07" => 31, "08" => 31, + "09" => 30, "10" => 31, "11" => 30, "12" => 31 + ); + +## Retaining or Ignoring Existing Line Breaks + +Several additional parameters are available for controlling the extent +to which line breaks in the input script influence the output script. +In most cases, the default parameter values are set so that, if a choice +is possible, the output style follows the input style. For example, if +a short logical container is broken in the input script, then the +default behavior is for it to remain broken in the output script. + +Most of the parameters in this section would only be required for a +one-time conversion of a script from short container lengths to longer +container lengths. The opposite effect, of converting long container +lengths to shorter lengths, can be obtained by temporarily using a short +maximum line length. + +- **-bol**, **--break-at-old-logical-breakpoints** + + By default, if a logical expression is broken at a `&&`, `||`, `and`, + or `or`, then the container will remain broken. Also, breaks + at internal keywords `if` and `unless` will normally be retained. + To prevent this, and thus form longer lines, use **-nbol**. + +- **-bok**, **--break-at-old-keyword-breakpoints** + + By default, perltidy will retain a breakpoint before keywords which may + return lists, such as `sort` and <map>. This allows chains of these + operators to be displayed one per line. Use **-nbok** to prevent + retaining these breakpoints. + +- **-bot**, **--break-at-old-ternary-breakpoints** + + By default, if a conditional (ternary) operator is broken at a `:`, + then it will remain broken. To prevent this, and thereby + form longer lines, use **-nbot**. + +- **-boa**, **--break-at-old-attribute-breakpoints** + + By default, if an attribute list is broken at a `:` in the source file, then + it will remain broken. For example, given the following code, the line breaks + at the ':'s will be retained: + + my @field + : field + : Default(1) + : Get('Name' => 'foo') : Set('Name'); + + If the attributes are on a single line in the source code then they will remain + on a single line if possible. + + To prevent this, and thereby always form longer lines, use **-nboa**. + +- **-iob**, **--ignore-old-breakpoints** + + Use this flag to tell perltidy to ignore existing line breaks to the + maximum extent possible. This will tend to produce the longest possible + containers, regardless of type, which do not exceed the line length + limit. + +- **-kis**, **--keep-interior-semicolons** + + Use the **-kis** flag to prevent breaking at a semicolon if + there was no break there in the input file. Normally + perltidy places a newline after each semicolon which + terminates a statement unless several statements are + contained within a one-line brace block. To illustrate, + consider the following input lines: + + dbmclose(%verb_delim); undef %verb_delim; + dbmclose(%expanded); undef %expanded; + + The default is to break after each statement, giving + + dbmclose(%verb_delim); + undef %verb_delim; + dbmclose(%expanded); + undef %expanded; + + With **perltidy -kis** the multiple statements are retained: + + dbmclose(%verb_delim); undef %verb_delim; + dbmclose(%expanded); undef %expanded; + + The statements are still subject to the specified value + of **maximum-line-length** and will be broken if this + maximum is exceeded. + +## Blank Line Control + +Blank lines can improve the readability of a script if they are carefully +placed. Perltidy has several commands for controlling the insertion, +retention, and removal of blank lines. + +- **-fbl**, **--freeze-blank-lines** + + Set **-fbl** if you want to the blank lines in your script to + remain exactly as they are. The rest of the parameters in + this section may then be ignored. (Note: setting the **-fbl** flag + is equivalent to setting **-mbl=0** and **-kbl=2**). + +- **-bbc**, **--blanks-before-comments** + + A blank line will be introduced before a full-line comment. This is the + default. Use **-nbbc** or **--noblanks-before-comments** to prevent + such blank lines from being introduced. + +- **-blbs=n**, **--blank-lines-before-subs=n** + + The parameter **-blbs=n** requests that least **n** blank lines precede a sub + definition which does not follow a comment and which is more than one-line + long. The default is <-blbs=1>. **BEGIN** and **END** blocks are included. + + The requested number of blanks statement will be inserted regardless of the + value of **--maximum-consecutive-blank-lines=n** (**-mbl=n**) with the exception + that if **-mbl=0** then no blanks will be output. + + This parameter interacts with the value **k** of the parameter **--maximum-consecutive-blank-lines=k** (**-mbl=k**) as follows: + + 1\. If **-mbl=0** then no blanks will be output. This allows all blanks to be suppressed with a single parameter. Otherwise, + + 2\. If the number of old blank lines in the script is less than **n** then + additional blanks will be inserted to make the total **n** regardless of the + value of **-mbl=k**. + + 3\. If the number of old blank lines in the script equals or exceeds **n** then + this parameter has no effect, however the total will not exceed + value specified on the **-mbl=k** flag. + +- **-blbp=n**, **--blank-lines-before-packages=n** + + The parameter **-blbp=n** requests that least **n** blank lines precede a package + which does not follow a comment. The default is **-blbp=1**. + + This parameter interacts with the value **k** of the parameter + **--maximum-consecutive-blank-lines=k** (**-mbl=k**) in the same way as described + for the previous item **-blbs=n**. + +- **-bbs**, **--blanks-before-subs** + + For compatibility with previous versions, **-bbs** or **--blanks-before-subs** + is equivalent to `-blbp=1` and `-blbs=1`. + + Likewise, **-nbbs** or **--noblanks-before-subs** + is equivalent to `-blbp=0` and `-blbs=0`. + +- **-bbb**, **--blanks-before-blocks** + + A blank line will be introduced before blocks of coding delimited by + **for**, **foreach**, **while**, **until**, and **if**, **unless**, in the following + circumstances: + + - The block is not preceded by a comment. + - The block is not a one-line block. + - The number of consecutive non-blank lines at the current indentation depth is at least **-lbl** + (see next section). + + This is the default. The intention of this option is to introduce + some space within dense coding. + This is negated with **-nbbb** or **--noblanks-before-blocks**. + +- **-lbl=n** **--long-block-line-count=n** + + This controls how often perltidy is allowed to add blank lines before + certain block types (see previous section). The default is 8. Entering + a value of **0** is equivalent to entering a very large number. + +- **-blao=i** or **--blank-lines-after-opening-block=i** + + This control places a minimum of **i** blank lines **after** a line which **ends** + with an opening block brace of a specified type. By default, this only applies + to the block of a named **sub**, but this can be changed (see **-blaol** below). + The default is not to do this (**i=0**). + + Please see the note below on using the **-blao** and **-blbc** options. + +- **-blbc=i** or **--blank-lines-before-closing-block=i** + + This control places a minimum of **i** blank lines **before** a line which + **begins** with a closing block brace of a specified type. By default, this + only applies to the block of a named **sub**, but this can be changed (see + **-blbcl** below). The default is not to do this (**i=0**). + +- **-blaol=s** or **--blank-lines-after-opening-block-list=s** + + The parameter **s** is a list of block type keywords to which the flag **-blao** + should apply. The section ["Specifying Block Types"](#specifying-block-types) explains how to list + block types. + +- **-blbcl=s** or **--blank-lines-before-closing-block-list=s** + + This parameter is a list of block type keywords to which the flag **-blbc** + should apply. The section ["Specifying Block Types"](#specifying-block-types) explains how to list + block types. + +- Note on using the **-blao** and **-blbc** options. + + These blank line controls introduce a certain minimum number of blank lines in + the text, but the final number of blank lines may be greater, depending on + values of the other blank line controls and the number of old blank lines. A + consequence is that introducing blank lines with these and other controls + cannot be exactly undone, so some experimentation with these controls is + recommended before using them. + + For example, suppose that for some reason we decide to introduce one blank + space at the beginning and ending of all blocks. We could do + this using + + perltidy -blao=2 -blbc=2 -blaol='*' -blbcl='*' filename + + Now suppose the script continues to be developed, but at some later date we + decide we don't want these spaces after all. we might expect that running with + the flags **-blao=0** and **-blbc=0** will undo them. However, by default + perltidy retains single blank lines, so the blank lines remain. + + We can easily fix this by telling perltidy to ignore old blank lines by + including the added parameter **-kbl=0** and rerunning. Then the unwanted blank + lines will be gone. However, this will cause all old blank lines to be + ignored, perhaps even some that were added by hand to improve formatting. So + please be cautious when using these parameters. + +- **-mbl=n** **--maximum-consecutive-blank-lines=n** + + This parameter specifies the maximum number of consecutive blank lines which + will be output within code sections of a script. The default is n=1. If the + input file has more than n consecutive blank lines, the number will be reduced + to n except as noted above for the **-blbp** and **-blbs** parameters. If **n=0** + then no blank lines will be output (unless all old blank lines are retained + with the **-kbl=2** flag of the next section). + + This flag obviously does not apply to pod sections, + here-documents, and quotes. + +- **-kbl=n**, **--keep-old-blank-lines=n** + + The **-kbl=n** flag gives you control over how your existing blank lines are + treated. + + The possible values of **n** are: + + n=0 ignore all old blank lines + n=1 stable: keep old blanks, but limited by the value of the B<-mbl=n> flag + n=2 keep all old blank lines, regardless of the value of the B<-mbl=n> flag + + The default is **n=1**. + +- **-sob**, **--swallow-optional-blank-lines** + + This is equivalent to **kbl=0** and is included for compatibility with + previous versions. + +- **-nsob**, **--noswallow-optional-blank-lines** + + This is equivalent to **kbl=1** and is included for compatibility with + previous versions. + +## Styles + +A style refers to a convenient collection of existing parameters. + +- **-gnu**, **--gnu-style** + + **-gnu** gives an approximation to the GNU Coding Standards (which do + not apply to perl) as they are sometimes implemented. At present, this + style overrides the default style with the following parameters: + + -lp -bl -noll -pt=2 -bt=2 -sbt=2 -icp + +- **-pbp**, **--perl-best-practices** + + **-pbp** is an abbreviation for the parameters in the book **Perl Best Practices** + by Damian Conway: + + -l=78 -i=4 -ci=4 -st -se -vt=2 -cti=0 -pt=1 -bt=1 -sbt=1 -bbt=1 -nsfs -nolq + -wbb="% + - * / x != == >= <= =~ !~ < > | & = + **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=" + + Please note that this parameter set includes -st and -se flags, which make + perltidy act as a filter on one file only. These can be overridden by placing + **-nst** and/or **-nse** after the -pbp parameter. + + Also note that the value of continuation indentation, -ci=4, is equal to the + value of the full indentation, -i=4. In some complex statements perltidy will + produce nicer results with -ci=2. This can be implemented by including -ci=2 + after the -pbp parameter. For example, + + # perltidy -pbp + $self->{_text} = ( + !$section ? '' + : $type eq 'item' ? "the $section entry" + : "the section on $section" + ) + . ( + $page + ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage" + : ' elsewhere in this document' + ); + + # perltidy -pbp -ci=2 + $self->{_text} = ( + !$section ? '' + : $type eq 'item' ? "the $section entry" + : "the section on $section" + ) + . ( + $page + ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage" + : ' elsewhere in this document' + ); + +## Controlling Vertical Alignment + +Vertical alignment refers to lining up certain symbols in list of consecutive +similar lines to improve readability. For example, the "fat commas" are +aligned in the following statement: + + $data = $pkg->new( + PeerAddr => join( ".", @port[ 0 .. 3 ] ), + PeerPort => $port[4] * 256 + $port[5], + Proto => 'tcp' + ); + +The only explicit control on vertical alignment is to turn it off using +**-novalign**, a flag mainly intended for debugging. However, vertical +alignment can be forced to stop and restart by selectively introducing blank +lines. For example, a blank has been inserted in the following code +to keep somewhat similar things aligned. + + %option_range = ( + 'format' => [ 'tidy', 'html', 'user' ], + 'output-line-ending' => [ 'dos', 'win', 'mac', 'unix' ], + 'character-encoding' => [ 'none', 'utf8' ], + + 'block-brace-tightness' => [ 0, 2 ], + 'brace-tightness' => [ 0, 2 ], + 'paren-tightness' => [ 0, 2 ], + 'square-bracket-tightness' => [ 0, 2 ], + ); + +## Other Controls + +- Deleting selected text + + Perltidy can selectively delete comments and/or pod documentation. The + command **-dac** or **--delete-all-comments** will delete all comments + **and** all pod documentation, leaving just code and any leading system + control lines. + + The command **-dp** or **--delete-pod** will remove all pod documentation + (but not comments). + + Two commands which remove comments (but not pod) are: **-dbc** or + **--delete-block-comments** and **-dsc** or **--delete-side-comments**. + (Hanging side comments will be deleted with block comments here.) + + The negatives of these commands also work, and are the defaults. When + block comments are deleted, any leading 'hash-bang' will be retained. + Also, if the **-x** flag is used, any system commands before a leading + hash-bang will be retained (even if they are in the form of comments). + +- Writing selected text to a file + + When perltidy writes a formatted text file, it has the ability to also + send selected text to a file with a `.TEE` extension. This text can + include comments and pod documentation. + + The command **-tac** or **--tee-all-comments** will write all comments + **and** all pod documentation. + + The command **-tp** or **--tee-pod** will write all pod documentation (but + not comments). + + The commands which write comments (but not pod) are: **-tbc** or + **--tee-block-comments** and **-tsc** or **--tee-side-comments**. + (Hanging side comments will be written with block comments here.) + + The negatives of these commands also work, and are the defaults. + +- Using a `.perltidyrc` command file + + If you use perltidy frequently, you probably won't be happy until you + create a `.perltidyrc` file to avoid typing commonly-used parameters. + Perltidy will first look in your current directory for a command file + named `.perltidyrc`. If it does not find one, it will continue looking + for one in other standard locations. + + These other locations are system-dependent, and may be displayed with + the command `perltidy -dpro`. Under Unix systems, it will first look + for an environment variable **PERLTIDY**. Then it will look for a + `.perltidyrc` file in the home directory, and then for a system-wide + file `/usr/local/etc/perltidyrc`, and then it will look for + `/etc/perltidyrc`. Note that these last two system-wide files do not + have a leading dot. Further system-dependent information will be found + in the INSTALL file distributed with perltidy. + + Under Windows, perltidy will also search for a configuration file named perltidy.ini since Windows does not allow files with a leading period (.). + Use `perltidy -dpro` to see the possible locations for your system. + An example might be `C:\Documents and Settings\All Users\perltidy.ini`. + + Another option is the use of the PERLTIDY environment variable. + The method for setting environment variables depends upon the version of + Windows that you are using. Instructions for Windows 95 and later versions can + be found here: + + http://www.netmanage.com/000/20021101\_005\_tcm21-6336.pdf + + Under Windows NT / 2000 / XP the PERLTIDY environment variable can be placed in + either the user section or the system section. The later makes the + configuration file common to all users on the machine. Be sure to enter the + full path of the configuration file in the value of the environment variable. + Ex. PERLTIDY=C:\\Documents and Settings\\perltidy.ini + + The configuration file is free format, and simply a list of parameters, just as + they would be entered on a command line. Any number of lines may be used, with + any number of parameters per line, although it may be easiest to read with one + parameter per line. Comment text begins with a #, and there must + also be a space before the # for side comments. It is a good idea to + put complex parameters in either single or double quotes. + + Here is an example of a `.perltidyrc` file: + + # This is a simple of a .perltidyrc configuration file + # This implements a highly spaced style + -se # errors to standard error output + -w # show all warnings + -bl # braces on new lines + -pt=0 # parens not tight at all + -bt=0 # braces not tight + -sbt=0 # square brackets not tight + + The parameters in the `.perltidyrc` file are installed first, so any + parameters given on the command line will have priority over them. + + To avoid confusion, perltidy ignores any command in the .perltidyrc + file which would cause some kind of dump and an exit. These are: + + -h -v -ddf -dln -dop -dsn -dtt -dwls -dwrs -ss + + There are several options may be helpful in debugging a `.perltidyrc` + file: + + - A very helpful command is **--dump-profile** or **-dpro**. It writes a + list of all configuration filenames tested to standard output, and + if a file is found, it dumps the content to standard output before + exiting. So, to find out where perltidy looks for its configuration + files, and which one if any it selects, just enter + + perltidy -dpro + + - It may be simplest to develop and test configuration files with + alternative names, and invoke them with **-pro=filename** on the command + line. Then rename the desired file to `.perltidyrc` when finished. + - The parameters in the `.perltidyrc` file can be switched off with + the **-npro** option. + - The commands **--dump-options**, **--dump-defaults**, **--dump-long-names**, + and **--dump-short-names**, all described below, may all be helpful. + +- Creating a new abbreviation + + A special notation is available for use in a `.perltidyrc` file + for creating an abbreviation for a group + of options. This can be used to create a + shorthand for one or more styles which are frequently, but not always, + used. The notation is to group the options within curly braces which + are preceded by the name of the alias (without leading dashes), like this: + + newword { + -opt1 + -opt2 + } + + where **newword** is the abbreviation, and **opt1**, etc, are existing parameters + _or other abbreviations_. The main syntax requirement is that the new + abbreviation along with its opening curly brace must begin on a new line. + Space before and after the curly braces is optional. + For a + specific example, the following line + + airy {-bl -pt=0 -bt=0 -sbt=0} + + could be placed in a `.perltidyrc` file, and then invoked at will with + + perltidy -airy somefile.pl + + (Either `-airy` or `--airy` may be used). + +- Skipping leading non-perl commands with **-x** or **--look-for-hash-bang** + + If your script has leading lines of system commands or other text which + are not valid perl code, and which are separated from the start of the + perl code by a "hash-bang" line, ( a line of the form `#!...perl` ), + you must use the **-x** flag to tell perltidy not to parse and format any + lines before the "hash-bang" line. This option also invokes perl with a + \-x flag when checking the syntax. This option was originally added to + allow perltidy to parse interactive VMS scripts, but it should be used + for any script which is normally invoked with `perl -x`. + +- Making a file unreadable + + The goal of perltidy is to improve the readability of files, but there + are two commands which have the opposite effect, **--mangle** and + **--extrude**. They are actually + merely aliases for combinations of other parameters. Both of these + strip all possible whitespace, but leave comments and pod documents, + so that they are essentially reversible. The + difference between these is that **--mangle** puts the fewest possible + line breaks in a script while **--extrude** puts the maximum possible. + Note that these options do not provided any meaningful obfuscation, because + perltidy can be used to reformat the files. They were originally + developed to help test the tokenization logic of perltidy, but they + have other uses. + One use for **--mangle** is the following: + + perltidy --mangle myfile.pl -st | perltidy -o myfile.pl.new + + This will form the maximum possible number of one-line blocks (see next + section), and can sometimes help clean up a badly formatted script. + + A similar technique can be used with **--extrude** instead of **--mangle** + to make the minimum number of one-line blocks. + + Another use for **--mangle** is to combine it with **-dac** to reduce + the file size of a perl script. + +- One-line blocks + + There are a few points to note regarding one-line blocks. A one-line + block is something like this, + + if ($x > 0) { $y = 1 / $x } + + where the contents within the curly braces is short enough to fit + on a single line. + + With few exceptions, perltidy retains existing one-line blocks, if it + is possible within the line-length constraint, but it does not attempt + to form new ones. In other words, perltidy will try to follow the + one-line block style of the input file. + + If an existing one-line block is longer than the maximum line length, + however, it will be broken into multiple lines. When this happens, perltidy + checks for and adds any optional terminating semicolon (unless the **-nasc** + option is used) if the block is a code block. + + The main exception is that perltidy will attempt to form new one-line + blocks following the keywords `map`, `eval`, and `sort`, because + these code blocks are often small and most clearly displayed in a single + line. + + One-line block rules can conflict with the cuddled-else option. When + the cuddled-else option is used, perltidy retains existing one-line + blocks, even if they do not obey cuddled-else formatting. + + Occasionally, when one-line blocks get broken because they exceed the + available line length, the formatting will violate the requested brace style. + If this happens, reformatting the script a second time should correct + the problem. + +- Debugging + + The following flags are available for debugging: + + **--dump-cuddled-block-list** or **-dcbl** will dump to standard output the + internal hash of cuddled block types created by a **-cuddled-block-list** input + string. + + **--dump-defaults** or **-ddf** will write the default option set to standard output and quit + + **--dump-profile** or **-dpro** will write the name of the current + configuration file and its contents to standard output and quit. + + **--dump-options** or **-dop** will write current option set to standard + output and quit. + + **--dump-long-names** or **-dln** will write all command line long names (passed + to Get\_options) to standard output and quit. + + **--dump-short-names** or **-dsn** will write all command line short names + to standard output and quit. + + **--dump-token-types** or **-dtt** will write a list of all token types + to standard output and quit. + + **--dump-want-left-space** or **-dwls** will write the hash %want\_left\_space + to standard output and quit. See the section on controlling whitespace + around tokens. + + **--dump-want-right-space** or **-dwrs** will write the hash %want\_right\_space + to standard output and quit. See the section on controlling whitespace + around tokens. + + **--no-memoize** or **-nmem** will turn of memoizing. + Memoization can reduce run time when running perltidy repeatedly in a + single process. It is on by default but can be deactivated for + testing with **-nmem**. + + **--no-timestamp** or **-nts** will eliminate any time stamps in output files to prevent + differences in dates from causing test installation scripts to fail. There are just + a couple of places where timestamps normally occur. One is in the headers of + html files, and another is when the **-cscw** option is selected. The default is + to allow timestamps (**--timestamp** or **-ts**). + + **--file-size-order** or **-fso** will cause files to be processed in order of + increasing size, when multiple files are being processed. This is useful + during program development, when large numbers of files with varying sizes are + processed, because it can reduce virtual memory usage. + + **-DEBUG** will write a file with extension `.DEBUG` for each input file + showing the tokenization of all lines of code. + +- Working with MakeMaker, AutoLoader and SelfLoader + + The first $VERSION line of a file which might be eval'd by MakeMaker + is passed through unchanged except for indentation. + Use **--nopass-version-line**, or **-npvl**, to deactivate this feature. + + If the AutoLoader module is used, perltidy will continue formatting + code after seeing an \_\_END\_\_ line. + Use **--nolook-for-autoloader**, or **-nlal**, to deactivate this feature. + + Likewise, if the SelfLoader module is used, perltidy will continue formatting + code after seeing a \_\_DATA\_\_ line. + Use **--nolook-for-selfloader**, or **-nlsl**, to deactivate this feature. + +- Working around problems with older version of Perl + + Perltidy contains a number of rules which help avoid known subtleties + and problems with older versions of perl, and these rules always + take priority over whatever formatting flags have been set. For example, + perltidy will usually avoid starting a new line with a bareword, because + this might cause problems if `use strict` is active. + + There is no way to override these rules. + +# HTML OPTIONS + +- The **-html** master switch + + The flag **-html** causes perltidy to write an html file with extension + `.html`. So, for example, the following command + + perltidy -html somefile.pl + + will produce a syntax-colored html file named `somefile.pl.html` + which may be viewed with a browser. + + **Please Note**: In this case, perltidy does not do any formatting to the + input file, and it does not write a formatted file with extension + `.tdy`. This means that two perltidy runs are required to create a + fully reformatted, html copy of a script. + +- The **-pre** flag for code snippets + + When the **-pre** flag is given, only the pre-formatted section, within + the
 and </PRE> tags, will be output.  This simplifies inclusion
+    of the output in other files.  The default is to output a complete
+    web page.
+
+- The **-nnn** flag for line numbering
+
+    When the **-nnn** flag is given, the output lines will be numbered.
+
+- The **-toc**, or **--html-table-of-contents** flag
+
+    By default, a table of contents to packages and subroutines will be
+    written at the start of html output.  Use **-ntoc** to prevent this.
+    This might be useful, for example, for a pod document which contains a
+    number of unrelated code snippets.  This flag only influences the code
+    table of contents; it has no effect on any table of contents produced by
+    pod2html (see next item).
+
+- The **-pod**, or **--pod2html** flag
+
+    There are two options for formatting pod documentation.  The default is
+    to pass the pod through the Pod::Html module (which forms the basis of
+    the pod2html utility).  Any code sections are formatted by perltidy, and
+    the results then merged.  Note: perltidy creates a temporary file when
+    Pod::Html is used; see ["FILES"](#files).  Also, Pod::Html creates temporary
+    files for its cache.
+
+    NOTE: Perltidy counts the number of `=cut` lines, and either moves the
+    pod text to the top of the html file if there is one `=cut`, or leaves
+    the pod text in its original order (interleaved with code) otherwise.
+
+    Most of the flags accepted by pod2html may be included in the perltidy
+    command line, and they will be passed to pod2html.  In some cases,
+    the flags have a prefix `pod` to emphasize that they are for the
+    pod2html, and this prefix will be removed before they are passed to
+    pod2html.  The flags which have the additional `pod` prefix are:
+
+        --[no]podheader --[no]podindex --[no]podrecurse --[no]podquiet 
+        --[no]podverbose --podflush
+
+    The flags which are unchanged from their use in pod2html are:
+
+        --backlink=s --cachedir=s --htmlroot=s --libpods=s --title=s
+        --podpath=s --podroot=s 
+
+    where 's' is an appropriate character string.  Not all of these flags are
+    available in older versions of Pod::Html.  See your Pod::Html documentation for
+    more information.
+
+    The alternative, indicated with **-npod**, is not to use Pod::Html, but
+    rather to format pod text in italics (or whatever the stylesheet
+    indicates), without special html markup.  This is useful, for example,
+    if pod is being used as an alternative way to write comments.
+
+- The **-frm**, or **--frames** flag
+
+    By default, a single html output file is produced.  This can be changed
+    with the **-frm** option, which creates a frame holding a table of
+    contents in the left panel and the source code in the right side. This
+    simplifies code browsing.  Assume, for example, that the input file is
+    `MyModule.pm`.  Then, for default file extension choices, these three
+    files will be created:
+
+        MyModule.pm.html      - the frame
+        MyModule.pm.toc.html  - the table of contents
+        MyModule.pm.src.html  - the formatted source code
+
+    Obviously this file naming scheme requires that output be directed to a real
+    file (as opposed to, say, standard output).  If this is not the
+    case, or if the file extension is unknown, the **-frm** option will be
+    ignored.
+
+- The **-text=s**, or **--html-toc-extension** flag
+
+    Use this flag to specify the extra file extension of the table of contents file
+    when html frames are used.  The default is "toc".
+    See ["Specifying File Extensions"](#specifying-file-extensions).
+
+- The **-sext=s**, or **--html-src-extension** flag
+
+    Use this flag to specify the extra file extension of the content file when html
+    frames are used.  The default is "src".
+    See ["Specifying File Extensions"](#specifying-file-extensions).
+
+- The **-hent**, or **--html-entities** flag
+
+    This flag controls the use of Html::Entities for html formatting.  By
+    default, the module Html::Entities is used to encode special symbols.
+    This may not be the right thing for some browser/language
+    combinations.  Use --nohtml-entities or -nhent to prevent this.
+
+- Style Sheets
+
+    Style sheets make it very convenient to control and adjust the
+    appearance of html pages.  The default behavior is to write a page of
+    html with an embedded style sheet.
+
+    An alternative to an embedded style sheet is to create a page with a
+    link to an external style sheet.  This is indicated with the
+    **-css=filename**,  where the external style sheet is `filename`.  The
+    external style sheet `filename` will be created if and only if it does
+    not exist.  This option is useful for controlling multiple pages from a
+    single style sheet.
+
+    To cause perltidy to write a style sheet to standard output and exit,
+    use the **-ss**, or **--stylesheet**, flag.  This is useful if the style
+    sheet could not be written for some reason, such as if the **-pre** flag
+    was used.  Thus, for example,
+
+        perltidy -html -ss >mystyle.css
+
+    will write a style sheet with the default properties to file
+    `mystyle.css`.
+
+    The use of style sheets is encouraged, but a web page without a style
+    sheets can be created with the flag **-nss**.  Use this option if you
+    must to be sure that older browsers (roughly speaking, versions prior to
+    4.0 of Netscape Navigator and Internet Explorer) can display the
+    syntax-coloring of the html files.
+
+- Controlling HTML properties
+
+    Note: It is usually more convenient to accept the default properties
+    and then edit the stylesheet which is produced.  However, this section
+    shows how to control the properties with flags to perltidy.
+
+    Syntax colors may be changed from their default values by flags of the either
+    the long form, **-html-color-xxxxxx=n**, or more conveniently the short form,
+    **-hcx=n**, where **xxxxxx** is one of the following words, and **x** is the
+    corresponding abbreviation:
+
+          Token Type             xxxxxx           x 
+          ----------             --------         --
+          comment                comment          c
+          number                 numeric          n
+          identifier             identifier       i
+          bareword, function     bareword         w
+          keyword                keyword          k
+          quite, pattern         quote            q
+          here doc text          here-doc-text    h
+          here doc target        here-doc-target  hh
+          punctuation            punctuation      pu
+          parentheses            paren            p
+          structural braces      structure        s
+          semicolon              semicolon        sc
+          colon                  colon            co
+          comma                  comma            cm
+          label                  label            j
+          sub definition name    subroutine       m
+          pod text               pod-text         pd
+
+    A default set of colors has been defined, but they may be changed by providing
+    values to any of the following parameters, where **n** is either a 6 digit 
+    hex RGB color value or an ascii name for a color, such as 'red'.
+
+    To illustrate, the following command will produce an html 
+    file `somefile.pl.html` with "aqua" keywords:
+
+            perltidy -html -hck=00ffff somefile.pl
+
+    and this should be equivalent for most browsers:
+
+            perltidy -html -hck=aqua somefile.pl
+
+    Perltidy merely writes any non-hex names that it sees in the html file.
+    The following 16 color names are defined in the HTML 3.2 standard:
+
+            black   => 000000,
+            silver  => c0c0c0,
+            gray    => 808080,
+            white   => ffffff,
+            maroon  => 800000,
+            red     => ff0000,
+            purple  => 800080,
+            fuchsia => ff00ff,
+            green   => 008000,
+            lime    => 00ff00,
+            olive   => 808000,
+            yellow  => ffff00
+            navy    => 000080,
+            blue    => 0000ff,
+            teal    => 008080,
+            aqua    => 00ffff,
+
+    Many more names are supported in specific browsers, but it is safest
+    to use the hex codes for other colors.  Helpful color tables can be
+    located with an internet search for "HTML color tables". 
+
+    Besides color, two other character attributes may be set: bold, and italics.
+    To set a token type to use bold, use the flag
+    **--html-bold-xxxxxx** or **-hbx**, where **xxxxxx** or **x** are the long
+    or short names from the above table.  Conversely, to set a token type to 
+    NOT use bold, use **--nohtml-bold-xxxxxx** or **-nhbx**.
+
+    Likewise, to set a token type to use an italic font, use the flag
+    **--html-italic-xxxxxx** or **-hix**, where again **xxxxxx** or **x** are the
+    long or short names from the above table.  And to set a token type to
+    NOT use italics, use **--nohtml-italic-xxxxxx** or **-nhix**.
+
+    For example, to use bold braces and lime color, non-bold, italics keywords the
+    following command would be used:
+
+            perltidy -html -hbs -hck=00FF00 -nhbk -hik somefile.pl
+
+    The background color can be specified with **--html-color-background=n**,
+    or **-hcbg=n** for short, where n is a 6 character hex RGB value.  The
+    default color of text is the value given to **punctuation**, which is
+    black as a default.
+
+    Here are some notes and hints:
+
+    1\. If you find a preferred set of these parameters, you may want
+    to create a `.perltidyrc` file containing them.  See the perltidy man
+    page for an explanation.
+
+    2\. Rather than specifying values for these parameters, it is probably
+    easier to accept the defaults and then edit a style sheet.  The style
+    sheet contains comments which should make this easy.
+
+    3\. The syntax-colored html files can be very large, so it may be best to
+    split large files into smaller pieces to improve download times.
+
+# SOME COMMON INPUT CONVENTIONS
+
+## Specifying Block Types
+
+Several parameters which refer to code block types may be customized by also
+specifying an associated list of block types.  The type of a block is the name
+of the keyword which introduces that block, such as **if**, **else**, or **sub**.
+An exception is a labeled block, which has no keyword, and should be specified
+with just a colon.  To specify all blocks use **'\*'**.
+
+The keyword **sub** indicates a named sub.  For anonymous subs, use the special
+keyword **asub**.
+
+For example, the following parameter specifies `sub`, labels, `BEGIN`, and
+`END` blocks:
+
+    -cscl="sub : BEGIN END"
+
+(the meaning of the -cscl parameter is described above.)  Note that
+quotes are required around the list of block types because of the
+spaces.  For another example, the following list specifies all block types
+for vertical tightness:
+
+    -bbvtl='*'
+
+## Specifying File Extensions
+
+Several parameters allow default file extensions to be overridden.  For
+example, a backup file extension may be specified with **-bext=ext**,
+where **ext** is some new extension.  In order to provides the user some
+flexibility, the following convention is used in all cases to decide if
+a leading '.' should be used.  If the extension `ext` begins with
+`A-Z`, `a-z`, or `0-9`, then it will be appended to the filename with
+an intermediate '.' (or perhaps an '\_' on VMS systems).  Otherwise, it
+will be appended directly.  
+
+For example, suppose the file is `somefile.pl`.  For `-bext=old`, a '.' is
+added to give `somefile.pl.old`.  For `-bext=.old`, no additional '.' is
+added, so again the backup file is `somefile.pl.old`.  For `-bext=~`, then no
+dot is added, and the backup file will be `somefile.pl~`  .  
+
+# SWITCHES WHICH MAY BE NEGATED
+
+The following list shows all short parameter names which allow a prefix
+'n' to produce the negated form:
+
+    D    anl asc  aws  b    bbb bbc bbs  bl   bli  boc bok  bol  bot  ce
+    csc  dac dbc  dcsc ddf  dln dnl dop  dp   dpro dsc dsm  dsn  dtt  dwls
+    dwrs dws f    fll  frm  fs  hsc html ibc  icb  icp iob  isbc lal  log
+    lp   lsl ohbr okw  ola  oll opr opt  osbr otr  ple  pod  pvl  q
+    sbc  sbl schb scp  scsb sct se  sfp  sfs  skp  sob sohb sop  sosb sot
+    ssc  st  sts  syn  t    tac tbc toc  tp   tqw  tsc w    x    bar  kis
+
+Equivalently, the prefix 'no' or 'no-' on the corresponding long names may be
+used.
+
+# LIMITATIONS
+
+- Parsing Limitations
+
+    Perltidy should work properly on most perl scripts.  It does a lot of
+    self-checking, but still, it is possible that an error could be
+    introduced and go undetected.  Therefore, it is essential to make
+    careful backups and to test reformatted scripts.
+
+    The main current limitation is that perltidy does not scan modules
+    included with 'use' statements.  This makes it necessary to guess the
+    context of any bare words introduced by such modules.  Perltidy has good
+    guessing algorithms, but they are not infallible.  When it must guess,
+    it leaves a message in the log file.
+
+    If you encounter a bug, please report it.
+
+- What perltidy does not parse and format
+
+    Perltidy indents but does not reformat comments and `qw` quotes. 
+    Perltidy does not in any way modify the contents of here documents or
+    quoted text, even if they contain source code.  (You could, however,
+    reformat them separately).  Perltidy does not format 'format' sections
+    in any way.  And, of course, it does not modify pod documents.
+
+# FILES
+
+- Temporary files
+
+    Under the -html option with the default --pod2html flag, a temporary file is
+    required to pass text to Pod::Html.  Unix systems will try to use the POSIX
+    tmpnam() function.  Otherwise the file `perltidy.TMP` will be temporarily
+    created in the current working directory.
+
+- Special files when standard input is used
+
+    When standard input is used, the log file, if saved, is `perltidy.LOG`,
+    and any errors are written to `perltidy.ERR` unless the **-se** flag is
+    set.  These are saved in the current working directory.  
+
+- Files overwritten
+
+    The following file extensions are used by perltidy, and files with these
+    extensions may be overwritten or deleted: `.ERR`, `.LOG`, `.TEE`,
+    and/or `.tdy`, `.html`, and `.bak`, depending on the run type and
+    settings.
+
+- Files extensions limitations
+
+    Perltidy does not operate on files for which the run could produce a file with
+    a duplicated file extension.  These extensions include `.LOG`, `.ERR`,
+    `.TEE`, and perhaps `.tdy` and `.bak`, depending on the run type.  The
+    purpose of this rule is to prevent generating confusing filenames such as
+    `somefile.tdy.tdy.tdy`.
+
+# SEE ALSO
+
+perlstyle(1), Perl::Tidy(3)
+
+# VERSION
+
+This man page documents perltidy version 20180220.01
+
+# BUG REPORTS
+
+A list of current bugs and issues can be found at the CPAN site
+
+     https://rt.cpan.org/Public/Dist/Display.html?Name=Perl-Tidy
+
+To report a new bug or problem, use the link on this page.  
+
+# COPYRIGHT
+
+Copyright (c) 2000-2018 by Steve Hancock
+
+# LICENSE
+
+This package is free software; you can redistribute it and/or modify it
+under the terms of the "GNU General Public License".
+
+Please refer to the file "COPYING" for details.
+
+# DISCLAIMER
+
+This package is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+See the "GNU General Public License" for more details.
diff --git a/local-docs/stylekey.pod b/local-docs/stylekey.pod
new file mode 100644
index 00000000..ca9a7e00
--- /dev/null
+++ b/local-docs/stylekey.pod
@@ -0,0 +1,781 @@
+=head1 Perltidy Style Key
+
+When perltidy was first developed, the main parameter choices were the number
+of indentation spaces and if the user liked cuddled else's.  As the number of
+users has grown so has the number of parameters.  Now there are so many that it
+can be difficult for a new user to find a good initial set.  This document is
+one attempt to help with this problem, and some other suggestions are given at
+the end.
+
+Use this document to methodically find a starting set of perltidy parameters to
+approximate your style.  We will be working on just one aspect of formatting at
+a time.  Just read each question and select the best answer.  Enter your
+parameters in a file named F<.perltidyrc> (examples are listed at the end).
+Then move it to one of the places where perltidy will find it.  You can run
+perltidy with the parameter B<-dpro> to see where these places are for your
+system.
+
+=head2 Before You Start
+
+Before you begin, experiment using just C on some
+of your files.  From the results (which you will find in files with a
+F<.tdy> extension), you will get a sense of what formatting changes, if
+any, you'd like to make.  If the default formatting is acceptable, you
+do not need a F<.perltidyrc> file.
+
+=head2 Use as Filter?
+
+Do you almost always want to run perltidy as a standard filter on just
+one input file?  If yes, use B<-st> and B<-se>.  
+
+=head2 Line Length Setting
+
+Perltidy will set line breaks to prevent lines from exceeding the
+maximum line length.  
+
+Do you want the maximum line length to be 80 columns?  If no, use
+B<-l=n>, where B is the number of columns you prefer.
+
+=head2 Indentation in Code Blocks
+
+In the block below, the variable C<$anchor> is one indentation level deep
+and is indented by 4 spaces as shown here: 
+
+    if ( $flag eq "a" ) {
+        $anchor = $header;
+    }  
+
+If you want to change this to be a different number B of spaces
+per indentation level, use B<-i=n>.
+
+=head2 Continuation Indentation
+
+Look at the statement beginning with C<$anchor>:
+
+    if ( $flag eq "a" ) {
+        $anchor =
+          substr( $header, 0, 6 )
+          . substr( $char_list, $place_1, 1 )
+          . substr( $char_list, $place_2, 1 );
+    }
+
+The statement is too long for the line length (80 characters by default), so it
+has been broken into 4 lines.  The second and later lines have some extra
+"continuation indentation" to help make the start of the statement easy to
+find.  The default number of extra spaces is 2.  If you prefer a number n
+different from 2, you may specify this with B<-ci=n>.  It is probably best if
+it does not exceed the value of the primary indentation.
+
+=head2 Tabs
+
+The default, and recommendation, is to represent leading whitespace
+with actual space characters.  However, if you prefer to entab
+leading whitespace with one tab character for each B spaces,
+use B<-et=n>.  Typically, B would be 8.  
+
+=head2 Opening Block Brace Right or Left?
+
+Opening and closing curly braces, parentheses, and square brackets are divided
+into two separate categories and controlled separately in most cases.  The two
+categories are (1) code block curly braces, which contain perl code, and (2)
+everything else.  Basically, a code block brace is one which could contain
+semicolon-terminated lines of perl code.  We will first work on the scheme for
+code block curly braces.  
+
+Decide which of the following opening brace styles you prefer for most blocks
+of code (with the possible exception of a B which will
+be covered later):
+
+If you like opening braces on the right, like this, go to 
+L.
+
+    if ( $flag eq "h" ) {
+        $headers = 0;
+    }  
+
+If you like opening braces on the left, like this, go to 
+L.
+
+    if ( $flag eq "h" )
+    {
+        $headers = 0;
+    }
+
+=head2 Opening Braces Right
+
+In a multi-line B test expression, the default is to place
+the opening brace on the left, like this:
+
+    if ( $bigwasteofspace1 && $bigwasteofspace2
+        || $bigwasteofspace3 && $bigwasteofspace4 )
+    {
+        big_waste_of_time();
+    }
+
+This helps to visually separate the block contents from the test
+expression.  
+
+An alternative is to keep the brace on the right even for
+multiple-line test expressions, like this:
+
+    if ( $bigwasteofspace1 && $bigwasteofspace2
+        || $bigwasteofspace3 && $bigwasteofspace4 ) {
+        big_waste_of_time();
+    }
+
+If you prefer this alternative, use B<-bar>.
+
+=head2 Cuddled Else?
+
+Do you prefer this B style
+
+    if ( $flag eq "h" ) {
+        $headers = 0;
+    } elsif ( $flag eq "f" ) {
+        $sectiontype = 3;
+    } else {
+        print "invalid option: " . substr( $arg, $i, 1 ) . "\n";
+        dohelp();
+    }
+
+instead of this default style?
+
+    if ( $flag eq "h" ) {
+        $headers = 0;
+    }  
+    elsif ( $flag eq "f" ) {
+        $sectiontype = 3;
+    } 
+    else {    
+        print "invalid option: " . substr( $arg, $i, 1 ) . "\n";
+        dohelp();
+    }
+
+If yes, you should use B<-ce>.
+Now skip ahead to L.
+
+=head2 Opening Braces Left
+
+Use B<-bl> if you prefer this style:
+
+    if ( $flag eq "h" )
+    {
+        $headers = 0;
+    }
+
+Use B<-bli> if you prefer this indented-brace style:
+
+    if ( $flag eq "h" )
+      {
+        $headers = 0;
+      }
+
+The number of spaces of extra indentation will be the value specified
+for continuation indentation with the B<-ci=n> parameter (2 by default).
+
+=head2 Opening Sub Braces
+
+By default, the opening brace of a sub block will be treated
+the same as other code blocks.  If this is okay, skip ahead
+to L.
+
+If you prefer an opening sub brace to be on a new line,
+like this: 
+
+    sub message
+    {
+        # -sbl
+    }
+
+use B<-sbl>.  If you prefer the sub brace on the right like this
+
+    sub message {
+
+        # -nsbl
+    }
+
+use B<-nsbl>.
+
+If you wish to give this opening sub brace some indentation you can do
+that with the parameters B<-bli> and B<-blil> which are described in the
+manual.
+
+=head2 Block Brace Vertical Tightness
+
+If you chose to put opening block braces of all types to the right, skip
+ahead to L.
+
+If you chose to put braces of any type on the left, the default is to leave the
+opening brace on a line by itself, like this (shown for B<-bli>, but also true
+for B<-bl>):
+
+    if ( $flag eq "h" )
+      {
+        $headers = 0;
+      }
+
+But you may also use this more compressed style if you wish:
+
+    if ( $flag eq "h" )
+      { $headers = 0;
+      }
+
+If you do not prefer this more compressed form, go to 
+L.
+
+Otherwise use parameter B<-bbvt=n>, where n=1 or n=2.  To decide,
+look at this snippet:
+
+    # -bli -bbvt=1
+    sub _directives
+      {
+        {
+            'ENDIF' => \&_endif,
+               'IF' => \&_if,
+        };
+      }
+
+    # -bli -bbvt=2
+    sub _directives
+    {   {
+            'ENDIF' => \&_endif,
+            'IF'    => \&_if,
+        };
+    }
+
+The difference is that B<-bbvt=1> breaks after an opening brace if
+the next line is unbalanced, whereas B<-bbvt=2> never breaks.  
+
+If you were expecting the 'ENDIF' word to move up vertically here, note that
+the second opening brace in the above example is not a code block brace (it is
+a hash brace), so the B<-bbvt> does not apply to it (another parameter will).
+
+=head2 Closing Block Brace Indentation
+
+The default is to place closing braces at the same indentation as the
+opening keyword or brace of that code block, as shown here:
+
+        if ($task) {
+            yyy();
+        }            # default
+
+If you chose the B<-bli> style, however, the default closing braces will be
+indented one continuation indentation like the opening brace:
+
+        if ($task)
+          {
+            yyy();
+          }    # -bli
+
+If you prefer to give closing block braces one full level of
+indentation, independently of how the opening brace is treated,
+for example like this:
+
+        if ($task) {
+            yyy();
+            }          # -icb
+
+use B<-icb>.
+
+This completes the definition of the placement of code block braces.
+
+=head2 Indentation Style for Other Containers
+
+You have a choice of two basic indentation schemes for non-block containers.
+The default is to use a fixed number of spaces per indentation level (the same
+number of spaces used for code blocks, which is 4 by default).  Here is an
+example of the default:
+
+    $dbh = DBI->connect(
+        undef, undef, undef,
+        {
+            PrintError => 0,
+            RaiseError => 1
+        }
+    );
+
+In this default indentation scheme, a simple formula is used to find the
+indentation of every line.  Notice how the first 'undef' is indented 4
+spaces (one level) to the right, and how 'PrintError' is indented 4 more
+speces (one more level) to the right.  
+
+The alternate is to let the location of the opening paren (or square
+bracket, or curly brace) define the indentation, like this:
+
+    $dbh = DBI->connect(
+                         undef, undef, undef,
+                         {
+                           PrintError => 0,
+                           RaiseError => 1
+                         }
+    );
+
+The first scheme is completely robust.  The second scheme often looks a little
+nicer, but be aware that deeply nested structures it can be spoiled if the line
+length limit is exceeded.  Also, if there are comments or blank lines within a
+complex structure perltidy will temporarily fall back on the default
+indentation scheme.  You may want to try both on large sections of code to see
+which works best.
+
+If you prefer the first (default) scheme, no parameter is needed.
+
+If you prefer the latter scheme, use B<-lp>. 
+
+=head2 Opening Vertical Tightness
+
+The information in this section applies mainly to the B<-lp>
+style but it also applies in some cases to the default style.
+It will be illustrated for the B<-lp> indentation style.
+
+The default B<-lp> indentation style ends a line at the
+opening tokens, like this:
+
+    $dbh = DBI->connect(
+                         undef, undef, undef,
+                         {
+                           PrintError => 0,
+                           RaiseError => 1
+                         }
+    );
+
+Here is a tighter alternative, which does not end a line
+with the opening tokens:
+
+    $dbh = DBI->connect( undef, undef, undef,
+                         { PrintError => 0,
+                           RaiseError => 1
+                         }
+    );
+
+The difference is that the lines have been compressed vertically without
+any changes to the indentation.  This can almost always be done with the
+B<-lp> indentation style, but only in limited cases for the default
+indentation style. 
+
+If you prefer the default, skip ahead to L.
+
+Otherwise, use B<-vt=n>, where B should be either 1 or 2.  To help
+decide, observe the first three opening parens in the following snippet
+and choose the value of n you prefer.  Here it is with B<-lp -vt=1>:
+
+    if (
+         !defined(
+                   start_slip( $DEVICE, $PHONE,  $ACCOUNT, $PASSWORD,
+                               $LOCAL,  $REMOTE, $NETMASK, $MTU
+                   )
+         )
+         && $continuation_flag
+      )
+    {
+        do_something_about_it();
+    }
+
+And here it is again formatted with B<-lp -vt=2>:
+
+    if ( !defined( start_slip( $DEVICE, $PHONE,  $ACCOUNT, $PASSWORD,
+                               $LOCAL,  $REMOTE, $NETMASK, $MTU
+                   )
+         )
+         && $continuation_flag
+      )
+    {
+        do_something_about_it();
+    }
+
+The B<-vt=1> style tries to display the structure by preventing more
+than one step in indentation per line. In this example, the first two
+opening parens were not followed by balanced lines, so B<-vt=1> broke
+after them.  
+
+The B<-vt=2> style does not limit itself to a single indentation step
+per line.
+
+Note that in the above example the function 'do_sumething_about_it'
+started on a new line. This is because it follows an opening code
+block brace and is governed by the flag previously set in 
+L.
+
+=head2 Closing Token Placement
+
+You have several options for dealing with the terminal closing tokens of
+non-blocks.  In the following examples, a closing parenthesis is shown, but
+these parameters apply to closing square brackets and non-block curly braces as
+well.  
+
+The default behavior for parenthesized relatively large lists is to place the
+closing paren on a separate new line.  The flag B<-cti=n> controls the amount
+of indentation of such a closing paren.
+
+The default, B<-cti=0>, for a line beginning with a closing paren, is to use
+the indentation defined by the next (lower) indentation level.  This works
+well for the default indentation scheme:
+
+    # perltidy
+    @month_of_year = (
+        'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+        'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
+    );
+
+but it may not look very good with the B<-lp> indentation scheme:
+
+    # perltidy -lp
+    @month_of_year = (
+                       'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+                       'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
+    );
+
+An alternative which works well with B<-lp> indentation is B<-cti=1>,
+which aligns the closing paren vertically with its
+opening paren, if possible:  
+
+    # perltidy -lp -cti=1
+    @month_of_year = (
+                       'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+                       'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
+                     );
+
+Another alternative, B<-cti=3>, indents a line with leading closing
+paren one full indentation level:
+
+    # perltidy -lp -cti=3
+    @month_of_year = (
+                       'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+                       'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
+                       );
+
+If you prefer the closing paren on a separate line like this, 
+note the value of B<-cti=n> that you prefer and skip ahead to 
+L. 
+
+Finally, the question of paren indentation can be avoided by placing it
+at the end of the previous line, like this:
+
+    @month_of_year = (
+        'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+        'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' );
+
+Perltidy will automatically do this to save space for very short lists but not
+for longer lists.
+
+Use B<-vtc=n> if you prefer to usually do this, where B is either 1 or 2. To
+determine B, we have to look at something more complex.  Observe the
+behavior of the closing tokens in the following snippet:
+
+Here is B<-lp -vtc=1>:
+
+    $srec->{'ACTION'} = [
+                          $self->read_value(
+                                             $lookup->{'VFMT'},
+                                             $loc, $lookup, $fh
+                          ),
+                          $self->read_value(
+                                             $lookup->{'VFMT2'},
+                                             $loc, $lookup, $fh
+                          ) ];
+
+Here is B<-lp -vtc=2>:
+
+    $srec->{'ACTION'} = [
+                          $self->read_value(
+                                             $lookup->{'VFMT'},
+                                             $loc, $lookup, $fh ),
+                          $self->read_value(
+                                             $lookup->{'VFMT2'},
+                                             $loc, $lookup, $fh ) ];
+
+Choose the one that you prefer.  The difference is that B<-vtc=1> leaves
+closing tokens at the start of a line within a list, which can assist in
+keeping hierarchical lists readable.  The B<-vtc=2> style always tries
+to move closing tokens to the end of a line.  
+
+If you choose B<-vtc=1>,
+you may also want to specify a value of B<-cti=n> (previous section) to
+handle cases where a line begins with a closing paren.
+
+=head2 Stack Opening Tokens
+
+In the following snippet the opening hash brace has been placed
+alone on a new line.  
+
+    $opt_c = Text::CSV_XS->new(
+        {
+            binary       => 1,
+            sep_char     => $opt_c,
+            always_quote => 1,
+        }
+    );
+
+If you prefer to avoid isolated opening tokens by
+"stacking" them together with other opening tokens like this:
+
+    $opt_c = Text::CSV_XS->new( {
+            binary       => 1,
+            sep_char     => $opt_c,
+            always_quote => 1,
+        }
+    );
+
+use B<-sot>.
+
+=head2 Stack Closing Tokens
+
+Likewise, in the same snippet the default formatting leaves
+the closing paren on a line by itself here:
+
+    $opt_c = Text::CSV_XS->new(
+        {
+            binary       => 1,
+            sep_char     => $opt_c,
+            always_quote => 1,
+        }
+    );
+
+If you would like to avoid leaving isolated closing tokens by
+stacking them with other closing tokens, like this:
+
+    $opt_c = Text::CSV_XS->new(
+        {
+            binary       => 1,
+            sep_char     => $opt_c,
+            always_quote => 1,
+        } );
+
+use B<-sct>.
+
+The B<-sct> flag is somewhat similar to the B<-vtc> flags, and in some cases it
+can give a similar result.  The difference is that the B<-vtc> flags try to
+avoid lines with leading opening tokens by "hiding" them at the end of a
+previous line, whereas the B<-sct> flag merely tries to reduce the number of
+lines with isolated closing tokens by stacking multiple closing tokens
+together, but it does not try to hide them.  
+
+The manual shows how all of these vertical tightness controls may be applied
+independently to each type of non-block opening and opening token.
+
+=head2 Define Horizontal Tightness
+
+Horizontal tightness parameters define how much space is included
+within a set of container tokens.
+
+For parentheses, decide which of the following values of B<-pt=n>
+you prefer: 
+
+ if ( ( my $len_tab = length( $tabstr ) ) > 0 ) {  # -pt=0
+ if ( ( my $len_tab = length($tabstr) ) > 0 ) {    # -pt=1 (default)
+ if ((my $len_tab = length($tabstr)) > 0) {        # -pt=2
+
+For n=0, space is always used, and for n=2, space is never used.  For
+the default n=1, space is used if the parentheses contain more than
+one token.
+
+For square brackets, decide which of the following values of B<-sbt=n>
+you prefer:
+
+ $width = $col[ $j + $k ] - $col[ $j ];  # -sbt=0
+ $width = $col[ $j + $k ] - $col[$j];    # -sbt=1 (default)
+ $width = $col[$j + $k] - $col[$j];      # -sbt=2 
+
+For curly braces, decide which of the following values of B<-bt=n>
+you prefer:
+
+ $obj->{ $parsed_sql->{ 'table' }[0] };    # -bt=0
+ $obj->{ $parsed_sql->{'table'}[0] };      # -bt=1 (default)
+ $obj->{$parsed_sql->{'table'}[0]};        # -bt=2
+
+For code block curly braces, decide which of the following values of
+B<-bbt=n> you prefer: 
+
+ %bf = map { $_ => -M $_ } grep { /\.deb$/ } dirents '.'; # -bbt=0 (default)
+ %bf = map { $_ => -M $_ } grep {/\.deb$/} dirents '.';   # -bbt=1
+ %bf = map {$_ => -M $_} grep {/\.deb$/} dirents '.';     # -bbt=2
+
+=head2 Spaces between function names and opening parens
+
+The default is not to place a space after a function call:
+
+  myfunc( $a, $b, $c );    # default 
+
+If you prefer a space:
+
+  myfunc ( $a, $b, $c );   # -sfp
+
+use B<-sfp>.
+
+=head2 Spaces between Perl keywords and parens
+
+The default is to place a space between only these keywords
+and an opening paren:
+
+   my local our and or eq ne if else elsif until unless 
+   while for foreach return switch case given when
+
+but no others. For example, the default is:
+
+    $aa = pop(@bb);
+
+If you want a space between all Perl keywords and an opening paren,
+
+    $aa = pop (@bb);
+
+use B<-skp>.  For detailed control of individual keywords, see the manual.
+
+=head2 Statement Termination Semicolon Spaces
+
+The default is not to put a space before a statement termination
+semicolon, like this:
+
+    $i = 1;
+
+If you prefer a space, like this:
+
+    $i = 1 ; 
+
+enter B<-sts>.
+
+=head2 For Loop Semicolon Spaces
+
+The default is to place a space before a semicolon in a for statement,
+like this:
+
+ for ( @a = @$ap, $u = shift @a ; @a ; $u = $v ) {  # -sfs (default)
+
+If you prefer no such space, like this:
+
+ for ( @a = @$ap, $u = shift @a; @a; $u = $v ) {    # -nsfs
+
+enter B<-nsfs>.
+
+=head2 Block Comment Indentation
+
+Block comments are comments which occupy a full line, as opposed to side
+comments.  The default is to indent block comments with the same
+indentation as the code block that contains them (even though this
+will allow long comments to exceed the maximum line length). 
+
+If you would like block comments indented except when this would cause
+the maximum line length to be exceeded, use B<-olc>.  This will cause a
+group of consecutive block comments to be outdented by the amount needed
+to prevent any one from exceeding the maximum line length. 
+
+If you never want block comments indented, use B<-nibc>.
+
+If block comments may only be indented if they have some space
+characters before the leading C<#> character in the input file, use
+B<-isbc>.
+
+The manual shows many other options for controlling comments.
+
+=head2 Outdenting Long Quotes
+
+Long quoted strings may exceed the specified line length limit.  The
+default, when this happens, is to outdent them to the first column.
+Here is an example of an outdented long quote:
+
+        if ($source_stream) {
+            if ( @ARGV > 0 ) {
+                die
+ "You may not specify any filenames when a source array is given\n";
+            }
+        }
+
+The effect is not too different from using a here document to represent
+the quote.  If you prefer to leave the quote indented, like this:
+
+        if ($source_stream) {
+            if ( @ARGV > 0 ) {
+                die
+                  "You may not specify any filenames when a source array is given\n";
+            }
+        }
+
+use B<-nolq>.
+
+=head2 Many Other Parameters
+
+This document has only covered the most popular parameters.  The manual
+contains many more and should be consulted if you did not find what you need
+here.
+
+=head2 Example F<.perltidyrc> files
+
+Now gather together all of the parameters you prefer and enter them
+in a file called F<.perltidyrc>.
+
+Here are some example F<.perltidyrc> files and the corresponding style.
+
+Here is a little test snippet, shown the way it would appear with
+the default style.
+
+    for (@methods) {
+        push (
+            @results,
+            {
+                name => $_->name,
+                help => $_->help,
+            }
+        );
+    }
+
+You do not need a F<.perltidyrc> file for this style.
+
+Here is the same snippet
+
+    for (@methods)
+    {
+        push(@results,
+             {  name => $_->name,
+                help => $_->help,
+             }
+            );
+    }
+
+for a F<.perltidyrc> file containing these parameters:
+
+ -bl
+ -lp
+ -cti=1
+ -vt=1
+ -pt=2
+
+You do not need to place just one parameter per line, but this may be
+convenient for long lists.  You may then hide any parameter by placing
+a C<#> symbol before it.
+
+And here is the snippet
+
+    for (@methods) {
+        push ( @results,
+               { name => $_->name,
+                 help => $_->help,
+               } );
+    }
+
+for a F<.perltidyrc> file containing these parameters:
+
+ -lp
+ -vt=1
+ -vtc=1
+
+=head2 Tidyview
+
+There is a graphical program called B which you can use to read a
+preliminary F<.perltidyrc> file, make trial adjustments and immediately see
+their effect on a test file, and then write a new F<.perltidyrc>.  You can
+download a copy at
+
+http://sourceforge.net/projects/tidyview
+
+=head2 Additional Information
+
+This document has covered the main parameters.  Many more parameters are
+available for special purposes and for fine-tuning a style.  For complete
+information see the perltidy manual
+http://perltidy.sourceforge.net/perltidy.html
+
+For an introduction to using perltidy, see the tutorial 
+http://perltidy.sourceforge.net/tutorial.html
+
+Suggestions for improving this document are welcome and may be sent to
+perltidy at users.sourceforge.net
+
+=cut
diff --git a/local-docs/tilde.pod b/local-docs/tilde.pod
new file mode 100644
index 00000000..83bd6f7a
--- /dev/null
+++ b/local-docs/tilde.pod
@@ -0,0 +1,164 @@
+=head1 Developing a Perltidy Test for Missing Binding Operator
+
+I recently needed to construct a filter to process some text,
+and after about a minute of coding I had what I thought would do the
+job.  But when I went to test it, it didn't work, and I wasted several
+more minutes trying to find what I had done wrong.  The problem was
+a line something like this:
+
+  $line = s/\s*$//;
+
+whereas what I meant was
+
+  $line =~ s/\s*$//;
+
+Both are valid code, so perl does not complain.  I've made this error
+before in my haste to enter a script, and so I decided to update
+F to check for this.  This is an ideal candidate for the B<-w>
+flag, which warns of things that look suspicious but are not 
+necessarily incorrect.
+
+=head2 Illustrative Snippets
+
+It isn't possible to be perfectly accurate with a warning of this
+nature, but it would be nice to avoid many false-alarms.  What I needed
+to implement this was a fairly simple rule which would just flag the
+most likely cases of a missing binding operator.  
+
+In order to construct a rule, I needed lots of examples of code.  I
+modified perltidy to report situations in which an C operator,
+as well as a B pattern delimiter, was preceded by a non-binding
+equality operator of some sort.  (I decided to skip the B pattern
+delimiter because it is fairly rare).
+
+One thing that became obvious was that there is no need to look for
+operators such as C<+=>,
+
+   $line += tr/\n//;
+
+since the author is obviously doing some math.  
+
+Likewise, there is little chance that a C<||=>, such as this
+
+    $havedate  ||= m/^Date:/;
+
+should have been a binding operator.
+
+So I decided to limit my
+search to situations with patterns preceded by C<=>, C<==>, and C,
+since these could easily have been typed instead of C<=~> and C.
+
+Another useful result is shown in the next snippet,
+
+   $filename = /^$/ ? $filename : $_;
+
+where the pattern is part of a C conditional operator and therefore the
+C<=> is unlikely to have been mistaken for a binding operator.
+Likewise, in this example from B, 
+
+    $page  =    /Mac/            && 'm/Macintrash.html'
+             || /Win(dows )?NT/  && 'e/evilandrude.html'
+             || /Win|MSIE|WebTV/ && 'm/MicroslothWindows.html'
+             || /Linux/          && 'l/Linux.html'
+             || /HP-UX/          && 'h/HP-SUX.html'
+             || /SunOS/          && 's/ScumOS.html'
+             ||                     'a/AppendixB.html';
+
+a trailing C<&&> indicates that the C<=> is what the author really
+wanted.  I therefore decided to only produce a warning if the pattern
+is terminated by a C<;>, a C<)>, or a C<}>.
+
+A large class of snippets that I found were in LIST context at the
+pattern, such as these:
+
+    ($key, $val) = m/^(.*?:)\s*(.*)/;
+    @export = m/sub\s+main'(\w+)/g;
+
+A binding operator is obviously not what is intended here, so I 
+limited the warning to a SCALAR variable immediately before the equality.
+
+The test for a SCALAR, as currently implemented, looks just one token to
+the left of the equality, so it would not flag something with a hash
+key, like this
+
+   $stats{a} = tr/a/a/;
+
+(which, in this case, is ok).  This could be done with some additional
+programming complexity, but is probably not worthwhile.
+
+Finally, I found many examples of scalars which were actually created
+just before the equality, such as this:
+
+        my $is_empty = /^$/;
+
+Obviously, these can be eliminated from consideration because
+the scalar does not contain character data to match to a pattern.
+
+=head2 A Simple Filter
+
+Putting all of this information together, here is a summary of the logic
+that I settled on:
+
+=over 4
+
+=item *
+
+We have an operator C or pattern beginning with C,
+
+=item *
+
+followed by one of C<;>, C<)>, or C<}>,
+
+=item *
+
+and operator is preceded by C<=>, C<==>, or C,
+
+=item *
+
+and the C<=> or similar operator is immediately preceded by a 
+SCALAR identifier,
+
+=item *
+
+which is not introduced with a C, C, or C keyword.
+
+=back
+
+=head2 Final Test
+
+This is localized enough that it is easy to implement, and does a fairly
+good job of avoiding false alarms.  Running it through a collection of
+several hundred megabytes of perl code produced about 30 warnings.  Most
+of them turned out to be valid code (as they should be, since the
+database is fairly mature code downloaded from the internet), but about
+fifteen percent were probably actual errors that had slipped by their
+authors.  
+
+Here are a few valid snippets that produce false-alarm warnings:
+
+    $saw_bk = /\\$/;
+    if ($pod = /^=head\d\s/) { }
+    $Match = /(\S+)\s+(\S+)\s+(\S+)\s+\[(.*)\]\s+"(.*)"\s+(\S+)\s+(\S+)/;
+
+    $match = /^(.*?)($delim.*?$delim)(.*)$/s;
+    ($pre, $match, $post) = ($1, $2, $3);
+
+And here are some suspicious snippets that this filter caught:
+
+    $text = s/\\itemize$id(.*)$id/\\itemize$id$tmptext$id/s;
+
+    if ($val = /^\|expand/) {		# Found an expand command
+
+    # edit comment to get rid of leading space and trailing dot
+    $comment = s/^\s*(\S+)\..*/$1/;
+
+    $Alt = s/\<.*\>//g;
+
+This test is currently activated in perltidy when the user enters
+the B<-w> flag, which is intended to look for problems like this.
+I think the value of being able to catch this type of error greatly
+outweighs the inconvenience of a few false alarms.  
+
+Here is the message produced:
+
+ 1:	Note: be sure you want '=' instead of '=~' here
diff --git a/local-docs/tutorial.md b/local-docs/tutorial.md
new file mode 100644
index 00000000..27e70b34
--- /dev/null
+++ b/local-docs/tutorial.md
@@ -0,0 +1,528 @@
+# A Brief Perltidy Tutorial
+
+Perltidy can save you a lot of tedious editing if you spend a few
+minutes learning to use it effectively.  Perltidy is highly
+configurable, but for many programmers the default parameter set will be
+satisfactory, with perhaps a few additional parameters to account for
+style preferences.
+
+This tutorial assumes that perltidy has been installed on your system.
+Installation instructions accompany the package.  To follow along with
+this tutorial, please find a small Perl script and place a copy in a
+temporary directory.  For example, here is a small (and silly) script:
+
+    print "Help Desk -- What Editor do you use?";
+    chomp($editor = );
+    if ($editor =~ /emacs/i) {
+      print "Why aren't you using vi?\n";
+    } elsif ($editor =~ /vi/i) {
+      print "Why aren't you using emacs?\n";
+    } else {
+      print "I think that's the problem\n";
+    }
+
+It is included in the `docs` section of the distribution.
+
+## A First Test
+
+Assume that the name of your script is `testfile.pl`.  You can reformat it
+with the default options to use the style recommended in the perlstyle man
+pages with the command:
+
+    perltidy testfile.pl
+
+For safety, perltidy never overwrites your original file.  In this case,
+its output will go to a file named `testfile.pl.tdy`, which you should
+examine now with your editor.  Here is what the above file looks like
+with the default options:
+
+    print "Help Desk -- What Editor do you use?";
+    chomp( $editor =  );
+    if ( $editor =~ /emacs/i ) {
+        print "Why aren't you using vi?\n";
+    }
+    elsif ( $editor =~ /vi/i ) {
+        print "Why aren't you using emacs?\n";
+    }
+    else {
+        print "I think that's the problem\n";
+    }
+
+You'll notice an immediate style change from the "cuddled-else" style of
+the original to the default "non-cuddled-else" style.  This is because
+perltidy has to make some kind of default selection of formatting
+options, and this default tries to follow the suggestions in the
+perlstyle man pages.  
+
+If you prefer the original "cuddled-else" style, don't worry, you can
+indicate that with a **-ce** flag.  So if you rerun with that flag
+
+    perltidy -ce testfile.pl
+
+you will see a return to the original "cuddled-else" style.  There are
+many more parameters for controlling style, and some of the most useful
+of these are discussed below.  
+
+## Indentation
+
+Another noticeable difference between the original and the reformatted
+file is that the indentation has been changed from 2 spaces to 4 spaces.
+That's because 4 spaces is the default.  You may change this to be a
+different number with **-i=n**.
+
+To get some practice, try these examples, and examine the resulting
+`testfile.pl.tdy` file:
+
+    perltidy -i=8 testfile.pl
+
+This changes the default of 4 spaces per indentation level to be 8.  Now
+just to emphasize the point, try this and examine the result:
+
+    perltidy -i=0 testfile.pl
+
+There will be no indentation at all in this case.
+
+## Input Flags
+
+This is a good place to mention a few points regarding the input flags.
+First, for each option, there are two forms, a long form and a short
+form, and either may be used.  
+
+For example, if you want to change the number of columns corresponding to one
+indentation level to 3 (from the default of 4) you may use either
+
+    -i=3   or  --indent-columns=3
+
+The short forms are convenient for entering parameters by hand, whereas
+the long forms, though often ridiculously long, are self-documenting and
+therefore useful in configuration scripts.  You may use either one or
+two dashes ahead of the parameters.  Also, the '=' sign is optional, 
+and may be a single space instead.  However, the value of a parameter
+must NOT be adjacent to the flag, like this **-i3** (WRONG).  Also,
+flags must be input separately, never bundled together.
+
+## Line Length and Continuation Indentation.
+
+If you change the indentation spaces you will probably also need to
+change the continuation indentation spaces with the parameter **-ci=n**.
+The continuation indentation is the extra indentation -- 2 spaces by
+default -- given to that portion of a long line which has been placed
+below the start of a statement.  For example:
+
+    croak "Couldn't pop genome file"
+      unless sysread( $impl->{file}, $element, $impl->{group} )
+      and truncate( $impl->{file}, $new_end );
+
+There is no fixed rule for setting the value for **-ci=n**, but it should
+probably not exceed one-half of the number of spaces of a full
+indentation level.
+
+In the above snippet, the statement was broken into three lines.  The
+actual number is governed by a parameter, the maximum line length, as
+well as by what perltidy considers to be good break points.  The maximum
+line length is 80 characters by default.  You can change this to be any
+number **n** with the **-l=n** flag.  Perltidy tries to produce lines
+which do not exceed this length, and it does this by finding good break
+points.  For example, the above snippet would look like this with
+**perltidy -l=40**:
+
+    croak "Couldn't pop genome file"
+      unless
+      sysread( $impl->{file}, $element,
+        $impl->{group} )
+      and
+      truncate( $impl->{file}, $new_end );
+
+You may be wondering what would happen with, say, **-l=1**.  Go 
+ahead and try it.
+
+## Tabs or Spaces?
+
+With indentation, there is always a tab issue to resolve.  By default,
+perltidy will use leading ascii space characters instead of tabs.  The
+reason is that this will be displayed correctly by virtually all
+editors, and in the long run, will avoid maintenance problems.  
+
+However, if you prefer, you may have perltidy entab the leading
+whitespace of a line with the command **-et=n**, where **n** is the number
+of spaces which will be represented by one tab.  But note that your text
+will not be displayed properly unless viewed with software that is
+configured to display **n** spaces per tab.
+
+## Input/Output Control
+
+In the first example, we saw that if we pass perltidy the name
+of a file on the command line, it reformats it and creates a
+new filename by appending an extension, `.tdy`.  This is the
+default behavior, but there are several other options.
+
+On most systems, you may use wildcards to reformat a whole batch of
+files at once, like this for example:
+
+    perltidy *.pl
+
+and in this case, each of the output files will be have a name equal to
+the input file with the extension `.tdy` appended.  If you decide that
+the formatting is acceptable, you will want to backup your originals and
+then remove the `.tdy` extensions from the reformatted files.  There is
+an powerful perl script called `rename` that can be used for this
+purpose; if you don't have it, you can find it for example in **The Perl
+Cookbook**.
+
+If you find that the formatting done by perltidy is usually acceptable,
+you may want to save some effort by letting perltidy do a simple backup
+of the original files and then reformat them in place.  You specify this
+with a **-b** flag.  For example, the command
+
+    perltidy -b *.pl
+
+will rename the original files by appending a `.bak` extension, and then
+create reformatted files with the same names as the originals.  (If you don't
+like the default backup extension choice `.bak`, the manual tells how to
+change it).  Each time you run perltidy with the **-b** option, the previous
+`.bak` files will be overwritten, so please make regular separate backups.
+
+If there is no input filename specified on the command line, then input
+is assumed to come from standard input and output will go to standard
+output.  On systems with a Unix-like interface, you can use perltidy as
+a filter, like this:
+
+    perltidy newfile.pl
+
+What happens in this case is that the shell takes care of the redirected
+input files, '<somefile.pl', and so perltidy never sees the filename.
+Therefore, it knows to use the standard input and standard output
+channels.
+
+If you are executing perltidy on a file and want to force the output
+to standard output, rather than create a `.tdy` file, you can
+indicate this with the flag **-st**, like this:
+
+    perltidy somefile.pl -st >otherfile.pl
+
+You can also control the name of the output file with the **-o** flag,
+like this:
+
+    perltidy testfile.pl -o=testfile.new.pl
+
+## Style Variations
+
+Perltidy has to make some kind of default selection of formatting
+options, and its choice is to try to follow the suggestions in the
+perlstyle man pages.  Many programmers more or less follow these
+suggestions with a few exceptions.  In this section we will
+look at just a few of the most commonly used style parameters.  Later,
+you may want to systematically develop a set of style
+parameters with the help of
+the perltidy **stylekey** web page at
+http://perltidy.sourceforge.net/stylekey.html
+
+- **-ce**, cuddled elses
+
+    If you prefer cuddled elses, use the **-ce** flag.  
+
+- **-bl**, braces left
+
+    Here is what the `if` block in the above script looks like with **-bl**:
+
+        if ( $editor =~ /emacs/i )
+        {
+            print "Why aren't you using vi?\n";
+        }
+        elsif ( $editor =~ /vi/i )
+        {
+            print "Why aren't you using emacs?\n";
+        }
+        else
+        {
+            print "I think that's the problem\n";
+        }
+
+- **-lp**, Lining up with parentheses
+
+    The **-lp** parameter can enhance the readability of lists by adding
+    extra indentation.  Consider:
+
+            %romanNumerals = (
+                one   => 'I',
+                two   => 'II',
+                three => 'III',
+                four  => 'IV',
+                five  => 'V',
+                six   => 'VI',
+                seven => 'VII',
+                eight => 'VIII',
+                nine  => 'IX',
+                ten   => 'X'
+            );
+
+    With the **-lp** flag, this is formatted as:
+
+            %romanNumerals = (
+                               one   => 'I',
+                               two   => 'II',
+                               three => 'III',
+                               four  => 'IV',
+                               five  => 'V',
+                               six   => 'VI',
+                               seven => 'VII',
+                               eight => 'VIII',
+                               nine  => 'IX',
+                               ten   => 'X'
+                             );
+
+    which is preferred by some.  (I've actually used **-lp** and **-cti=1** to
+    format this block.  The **-cti=1** flag causes the closing paren to align
+    vertically with the opening paren, which works well with the **-lp**
+    indentation style).  An advantage of **-lp** indentation are that it
+    displays lists nicely.  A disadvantage is that deeply nested lists can
+    require a long line length.
+
+- **-bt**,**-pt**,**-sbt**:  Container tightness
+
+    These are parameters for controlling the amount of space within
+    containing parentheses, braces, and square brackets.  The example below
+    shows the effect of the three possible values, 0, 1, and 2, for the case
+    of parentheses:
+
+        if ( ( my $len_tab = length( $tabstr ) ) > 0 ) {  # -pt=0
+        if ( ( my $len_tab = length($tabstr) ) > 0 ) {    # -pt=1 (default)
+        if ((my $len_tab = length($tabstr)) > 0) {        # -pt=2
+
+    A value of 0 causes all parens to be padded on the inside with a space,
+    and a value of 2 causes this never to happen.  With a value of 1, spaces
+    will be introduced if the item within is more than a single token.
+
+## Configuration Files
+
+While style preferences vary, most people would agree that it is
+important to maintain a uniform style within a script, and this is a
+major benefit provided by perltidy.  Once you have decided on which, if
+any, special options you prefer, you may want to avoid having to enter
+them each time you run it.  You can do this by creating a special file
+named `.perltidyrc` in either your home directory, your current
+directory, or certain system-dependent locations.  (Note the leading "."
+in the file name).  
+
+A handy command to know when you start using a configuration file is
+
+    perltidy -dpro
+
+which will dump to standard output the search that perltidy makes when
+looking for a configuration file, and the contents of the one that it
+selects, if any.  This is one of a number of useful "dump and die"
+commands, in which perltidy will dump some information to standard
+output and then immediately exit.  Others include **-h**, which dumps
+help information, and **-v**, which dumps the version number.
+
+Another useful command when working with configuration files is
+
+    perltidy -pro=file
+
+which causes the contents of `file` to be used as the configuration
+file instead of a `.perltidyrc` file.  With this command, you can
+easily switch among several different candidate configuration files
+during testing.
+
+This `.perltidyrc` file is free format.  It is simply a list of
+parameters, just as they would be entered on a command line.  Any number
+of lines may be used, with any number of parameters per line, although
+it may be easiest to read with one parameter per line.  Blank lines are
+ignored, and text after a '#' is ignored to the end of a line.
+
+Here is an example of a `.perltidyrc` file:
+
+    # This is a simple of a .perltidyrc configuration file
+    # This implements a highly spaced style
+    -bl    # braces on new lines
+    -pt=0  # parens not tight at all
+    -bt=0  # braces not tight
+    -sbt=0 # square brackets not tight
+
+If you experiment with this file, remember that it is in your directory,
+since if you are running on a Unix system, files beginning with a "."
+are normally hidden.  
+
+If you have a `.perltidyrc` file, and want perltidy to ignore it,
+use the **-npro** flag on the command line.
+
+## Error Reporting
+
+Let's run through a 'fire drill' to see how perltidy reports errors.  Try
+introducing an extra opening brace somewhere in a test file.  For example,
+introducing an extra brace in the file listed above produces the following
+message on the terminal (or standard error output):
+
+    ## Please see file testfile.pl.ERR!
+
+Here is what `testfile.pl.ERR` contains:
+
+    10:    final indentation level: 1
+    
+    Final nesting depth of '{'s is 1
+    The most recent un-matched '{' is on line 6
+    6: } elsif ($temperature < 68) {{
+                                   ^
+
+This shows how perltidy will, by default, write error messages to a file
+with the extension `.ERR`, and it will write a note that it did so to
+the standard error device.  If you would prefer to have the error
+messages sent to standard output, instead of to a `.ERR` file, use the
+**-se** flag.
+
+Almost every programmer would want to see error messages of this type,
+but there are a number of messages which, if reported, would be
+annoying.  To manage this problem, perltidy puts its messages into two
+categories: errors and warnings.  The default is to just report the
+errors, but you can control this with input flags, as follows:
+
+    flag  what this does
+    ----  --------------
+          default: report errors but not warnings
+    -w    report all errors and warnings
+    -q    quiet! do not report either errors or warnings
+
+The default is generally a good choice, but it's not a bad idea to check
+programs with **-w** occasionally, especially if your are looking for a
+bug.  For example, it will ask if you really want '=' instead of '=~' in
+this line:
+
+    $line = s/^\s*//;
+
+This kind of error can otherwise be hard to find.
+
+## The Log File
+
+One last topic that needs to be touched upon concerns the `.LOG` file.
+This is where perltidy records messages that are not normally of any
+interest, but which just might occasionally be useful.  This file is not
+saved, though, unless perltidy detects that it has made a mistake or you
+ask for it to be saved.
+
+There are a couple of ways to ask perltidy to save a log file.  To
+create a relatively sparse log file, use
+
+    perltidy -log testfile.pl
+
+and for a verbose log file, use
+
+    perltidy -g testfile.pl
+
+The difference is that the first form only saves detailed information at
+least every 50th line, while the second form saves detailed information
+about every line.
+
+So returning to our example, lets force perltidy to save a
+verbose log file by issuing the following command
+
+    perltidy -g testfile.pl
+
+You will find that a file named `testfile.pl.LOG` has been
+created in your directory.  
+
+If you open this file, you will see that it is a text file with a
+combination of warning messages and informative messages.  All you need
+to know for now is that it exists; someday it may be useful.
+
+## Using Perltidy as a Filter on Selected Text from an Editor
+
+Most programmer's editors allow a selected group of lines to be passed
+through an external filter.  Perltidy has been designed to work well as
+a filter, and it is well worthwhile learning the appropriate commands to
+do this with your editor.  This means that you can enter a few
+keystrokes and watch a block of text get reformatted.  If you are not
+doing this, you are missing out of a lot of fun!  You may want to supply
+the **-q** flag to prevent error messages regarding incorrect syntax,
+since errors may be obvious in the indentation of the reformatted text.
+This is entirely optional, but if you do not use the **-q** flag, you
+will need to use the undo keys in case an error message appears on the
+screen. 
+
+For example, within the **vim** editor it is only necessary to select the
+text by any of the text selection methods, and then issue the command
+!perltidy in command mode.  Thus, an entire file can be formatted using
+
+    :%!perltidy -q
+
+or, without the **-q** flag, just
+
+    :%!perltidy
+
+It isn't necessary to format an entire file, however.  Perltidy will
+probably work well as long as you select blocks of text whose braces,
+parentheses, and square brackets are properly balanced.  You can
+even format an `elsif` block without the leading `if` block, as
+long as the text you select has all braces balanced.
+
+For the **emacs** editor, first mark a region and then pipe it through
+perltidy.  For example, to format an entire file, select it with `C-x h` 
+and then pipe it with `M-1 M-|` and then `perltidy`.  The numeric
+argument, `M-1` causes the output from perltidy to replace the marked
+text.  See "GNU Emacs Manual" for more information,
+http://www.gnu.org/manual/emacs-20.3/html\_node/emacs\_toc.html
+
+If you have difficulty with an editor, try the **-st** flag, which will
+force perltidy to send output to standard output.  This might be needed,
+for example, if the editor passes text to perltidy as temporary filename
+instead of through the standard input.  If this works, you might put the
+**-st** flag in your `.perltidyrc` file.
+
+If you have some tips for making perltidy work with your editor, and
+are willing to share them, please email me (see below) and I'll try to
+incorporate them in this document or put up a link to them.
+
+After you get your editor and perltidy successfully talking to each
+other, try formatting a snippet of code with a brace error to see what
+happens.  (Do not use the quiet flag, **-q**, for this test).  Perltidy
+will send one line starting with `##` to standard error output.  Your
+editor may either display it at the top of the reformatted text or at
+the bottom (or even midstream!).  You probably cannot control this, and
+perltidy can't, but you need to know where to look when an actual error
+is detected.
+
+## Writing an HTML File
+
+Perltidy can switch between two different output modes.  We have been
+discussing what might be called its "beautifier" mode, but it can also
+output in HTML.  To do this, use the **-html** flag, like this:
+
+    perltidy -html testfile.pl
+
+which will produce a file `testfile.pl.html`.  There are many
+parameters available for adjusting the appearance of an HTML file, but a
+very easy way is to just write the HTML file with this simple command
+and then edit the stylesheet which is embedded at its top.
+
+One important thing to know about the **-html** flag is that perltidy can
+either send its output to its beautifier or to its HTML writer, but
+(unfortunately) not both in a single run.  So the situation can be
+represented like this:
+
+                     ------------
+                     |          |     --->beautifier--> testfile.pl.tdy
+    testfile.pl -->  | perltidy | -->
+                     |          |     --->HTML -------> testfile.pl.html
+                     ------------
+
+And in the future, there may be more output filters.  So if you would
+like to both beautify a script and write it to HTML, you need to do it
+in two steps.
+
+## Summary
+
+That's enough to get started using perltidy.  
+When you are ready to create a `.perltidyrc` file, you may find it
+helpful to use the `stylekey` page as a guide at
+http://perltidy.sourceforge.net/stylekey.html
+
+Many additional special
+features and capabilities can be found in the manual pages for perltidy
+at
+http://perltidy.sourceforge.net/perltidy.html
+
+We hope that perltidy makes perl programming a little more fun.
+Please check the perltidy
+web site http://perltidy.sourceforge.net occasionally
+for updates.
+
+The author may be contacted at perltidy at users.sourceforge.net.
diff --git a/local-docs/tutorial.pod b/local-docs/tutorial.pod
new file mode 100644
index 00000000..9d1f2606
--- /dev/null
+++ b/local-docs/tutorial.pod
@@ -0,0 +1,534 @@
+=head1 A Brief Perltidy Tutorial
+
+Perltidy can save you a lot of tedious editing if you spend a few
+minutes learning to use it effectively.  Perltidy is highly
+configurable, but for many programmers the default parameter set will be
+satisfactory, with perhaps a few additional parameters to account for
+style preferences.
+
+This tutorial assumes that perltidy has been installed on your system.
+Installation instructions accompany the package.  To follow along with
+this tutorial, please find a small Perl script and place a copy in a
+temporary directory.  For example, here is a small (and silly) script:
+
+ print "Help Desk -- What Editor do you use?";
+ chomp($editor = );
+ if ($editor =~ /emacs/i) {
+   print "Why aren't you using vi?\n";
+ } elsif ($editor =~ /vi/i) {
+   print "Why aren't you using emacs?\n";
+ } else {
+   print "I think that's the problem\n";
+ }
+
+It is included in the F section of the distribution.
+
+=head2 A First Test
+
+Assume that the name of your script is F.  You can reformat it
+with the default options to use the style recommended in the perlstyle man
+pages with the command:
+
+ perltidy testfile.pl
+
+For safety, perltidy never overwrites your original file.  In this case,
+its output will go to a file named F, which you should
+examine now with your editor.  Here is what the above file looks like
+with the default options:
+
+ print "Help Desk -- What Editor do you use?";
+ chomp( $editor =  );
+ if ( $editor =~ /emacs/i ) {
+     print "Why aren't you using vi?\n";
+ }
+ elsif ( $editor =~ /vi/i ) {
+     print "Why aren't you using emacs?\n";
+ }
+ else {
+     print "I think that's the problem\n";
+ }
+
+You'll notice an immediate style change from the "cuddled-else" style of
+the original to the default "non-cuddled-else" style.  This is because
+perltidy has to make some kind of default selection of formatting
+options, and this default tries to follow the suggestions in the
+perlstyle man pages.  
+
+If you prefer the original "cuddled-else" style, don't worry, you can
+indicate that with a B<-ce> flag.  So if you rerun with that flag
+
+ perltidy -ce testfile.pl
+
+you will see a return to the original "cuddled-else" style.  There are
+many more parameters for controlling style, and some of the most useful
+of these are discussed below.  
+
+=head2 Indentation
+
+Another noticeable difference between the original and the reformatted
+file is that the indentation has been changed from 2 spaces to 4 spaces.
+That's because 4 spaces is the default.  You may change this to be a
+different number with B<-i=n>.
+
+To get some practice, try these examples, and examine the resulting
+F file:
+
+ perltidy -i=8 testfile.pl
+
+This changes the default of 4 spaces per indentation level to be 8.  Now
+just to emphasize the point, try this and examine the result:
+
+ perltidy -i=0 testfile.pl
+
+There will be no indentation at all in this case.
+
+=head2 Input Flags
+
+This is a good place to mention a few points regarding the input flags.
+First, for each option, there are two forms, a long form and a short
+form, and either may be used.  
+
+For example, if you want to change the number of columns corresponding to one
+indentation level to 3 (from the default of 4) you may use either
+
+ -i=3   or  --indent-columns=3
+
+The short forms are convenient for entering parameters by hand, whereas
+the long forms, though often ridiculously long, are self-documenting and
+therefore useful in configuration scripts.  You may use either one or
+two dashes ahead of the parameters.  Also, the '=' sign is optional, 
+and may be a single space instead.  However, the value of a parameter
+must NOT be adjacent to the flag, like this B<-i3> (WRONG).  Also,
+flags must be input separately, never bundled together.
+
+=head2 Line Length and Continuation Indentation.
+
+If you change the indentation spaces you will probably also need to
+change the continuation indentation spaces with the parameter B<-ci=n>.
+The continuation indentation is the extra indentation -- 2 spaces by
+default -- given to that portion of a long line which has been placed
+below the start of a statement.  For example:
+
+ croak "Couldn't pop genome file"
+   unless sysread( $impl->{file}, $element, $impl->{group} )
+   and truncate( $impl->{file}, $new_end );
+
+There is no fixed rule for setting the value for B<-ci=n>, but it should
+probably not exceed one-half of the number of spaces of a full
+indentation level.
+
+In the above snippet, the statement was broken into three lines.  The
+actual number is governed by a parameter, the maximum line length, as
+well as by what perltidy considers to be good break points.  The maximum
+line length is 80 characters by default.  You can change this to be any
+number B with the B<-l=n> flag.  Perltidy tries to produce lines
+which do not exceed this length, and it does this by finding good break
+points.  For example, the above snippet would look like this with
+B:
+
+ croak "Couldn't pop genome file"
+   unless
+   sysread( $impl->{file}, $element,
+     $impl->{group} )
+   and
+   truncate( $impl->{file}, $new_end );
+
+You may be wondering what would happen with, say, B<-l=1>.  Go 
+ahead and try it.
+
+=head2 Tabs or Spaces?
+
+With indentation, there is always a tab issue to resolve.  By default,
+perltidy will use leading ascii space characters instead of tabs.  The
+reason is that this will be displayed correctly by virtually all
+editors, and in the long run, will avoid maintenance problems.  
+
+However, if you prefer, you may have perltidy entab the leading
+whitespace of a line with the command B<-et=n>, where B is the number
+of spaces which will be represented by one tab.  But note that your text
+will not be displayed properly unless viewed with software that is
+configured to display B spaces per tab.
+
+=head2 Input/Output Control
+
+In the first example, we saw that if we pass perltidy the name
+of a file on the command line, it reformats it and creates a
+new filename by appending an extension, F<.tdy>.  This is the
+default behavior, but there are several other options.
+
+On most systems, you may use wildcards to reformat a whole batch of
+files at once, like this for example:
+
+ perltidy *.pl
+
+and in this case, each of the output files will be have a name equal to
+the input file with the extension F<.tdy> appended.  If you decide that
+the formatting is acceptable, you will want to backup your originals and
+then remove the F<.tdy> extensions from the reformatted files.  There is
+an powerful perl script called C that can be used for this
+purpose; if you don't have it, you can find it for example in B.
+
+If you find that the formatting done by perltidy is usually acceptable,
+you may want to save some effort by letting perltidy do a simple backup
+of the original files and then reformat them in place.  You specify this
+with a B<-b> flag.  For example, the command
+
+ perltidy -b *.pl
+
+will rename the original files by appending a F<.bak> extension, and then
+create reformatted files with the same names as the originals.  (If you don't
+like the default backup extension choice F<.bak>, the manual tells how to
+change it).  Each time you run perltidy with the B<-b> option, the previous
+F<.bak> files will be overwritten, so please make regular separate backups.
+
+If there is no input filename specified on the command line, then input
+is assumed to come from standard input and output will go to standard
+output.  On systems with a Unix-like interface, you can use perltidy as
+a filter, like this:
+
+ perltidy newfile.pl
+
+What happens in this case is that the shell takes care of the redirected
+input files, ' file, you can
+indicate this with the flag B<-st>, like this:
+
+ perltidy somefile.pl -st >otherfile.pl
+
+You can also control the name of the output file with the B<-o> flag,
+like this:
+
+ perltidy testfile.pl -o=testfile.new.pl
+
+=head2 Style Variations
+
+Perltidy has to make some kind of default selection of formatting
+options, and its choice is to try to follow the suggestions in the
+perlstyle man pages.  Many programmers more or less follow these
+suggestions with a few exceptions.  In this section we will
+look at just a few of the most commonly used style parameters.  Later,
+you may want to systematically develop a set of style
+parameters with the help of
+the perltidy B web page at
+http://perltidy.sourceforge.net/stylekey.html
+
+=over 4
+
+=item B<-ce>, cuddled elses
+
+If you prefer cuddled elses, use the B<-ce> flag.  
+
+=item B<-bl>, braces left
+
+Here is what the C block in the above script looks like with B<-bl>:
+
+ if ( $editor =~ /emacs/i )
+ {
+     print "Why aren't you using vi?\n";
+ }
+ elsif ( $editor =~ /vi/i )
+ {
+     print "Why aren't you using emacs?\n";
+ }
+ else
+ {
+     print "I think that's the problem\n";
+ }
+
+=item B<-lp>, Lining up with parentheses
+
+The B<-lp> parameter can enhance the readability of lists by adding
+extra indentation.  Consider:
+
+        %romanNumerals = (
+            one   => 'I',
+            two   => 'II',
+            three => 'III',
+            four  => 'IV',
+            five  => 'V',
+            six   => 'VI',
+            seven => 'VII',
+            eight => 'VIII',
+            nine  => 'IX',
+            ten   => 'X'
+        );
+
+With the B<-lp> flag, this is formatted as:
+
+        %romanNumerals = (
+                           one   => 'I',
+                           two   => 'II',
+                           three => 'III',
+                           four  => 'IV',
+                           five  => 'V',
+                           six   => 'VI',
+                           seven => 'VII',
+                           eight => 'VIII',
+                           nine  => 'IX',
+                           ten   => 'X'
+                         );
+
+which is preferred by some.  (I've actually used B<-lp> and B<-cti=1> to
+format this block.  The B<-cti=1> flag causes the closing paren to align
+vertically with the opening paren, which works well with the B<-lp>
+indentation style).  An advantage of B<-lp> indentation are that it
+displays lists nicely.  A disadvantage is that deeply nested lists can
+require a long line length.
+
+=item B<-bt>,B<-pt>,B<-sbt>:  Container tightness
+
+These are parameters for controlling the amount of space within
+containing parentheses, braces, and square brackets.  The example below
+shows the effect of the three possible values, 0, 1, and 2, for the case
+of parentheses:
+
+ if ( ( my $len_tab = length( $tabstr ) ) > 0 ) {  # -pt=0
+ if ( ( my $len_tab = length($tabstr) ) > 0 ) {    # -pt=1 (default)
+ if ((my $len_tab = length($tabstr)) > 0) {        # -pt=2
+
+A value of 0 causes all parens to be padded on the inside with a space,
+and a value of 2 causes this never to happen.  With a value of 1, spaces
+will be introduced if the item within is more than a single token.
+
+=back
+
+=head2 Configuration Files
+
+While style preferences vary, most people would agree that it is
+important to maintain a uniform style within a script, and this is a
+major benefit provided by perltidy.  Once you have decided on which, if
+any, special options you prefer, you may want to avoid having to enter
+them each time you run it.  You can do this by creating a special file
+named F<.perltidyrc> in either your home directory, your current
+directory, or certain system-dependent locations.  (Note the leading "."
+in the file name).  
+
+A handy command to know when you start using a configuration file is
+
+  perltidy -dpro
+
+which will dump to standard output the search that perltidy makes when
+looking for a configuration file, and the contents of the one that it
+selects, if any.  This is one of a number of useful "dump and die"
+commands, in which perltidy will dump some information to standard
+output and then immediately exit.  Others include B<-h>, which dumps
+help information, and B<-v>, which dumps the version number.
+
+Another useful command when working with configuration files is
+
+ perltidy -pro=file
+
+which causes the contents of F to be used as the configuration
+file instead of a F<.perltidyrc> file.  With this command, you can
+easily switch among several different candidate configuration files
+during testing.
+
+This F<.perltidyrc> file is free format.  It is simply a list of
+parameters, just as they would be entered on a command line.  Any number
+of lines may be used, with any number of parameters per line, although
+it may be easiest to read with one parameter per line.  Blank lines are
+ignored, and text after a '#' is ignored to the end of a line.
+
+Here is an example of a F<.perltidyrc> file:
+
+  # This is a simple of a .perltidyrc configuration file
+  # This implements a highly spaced style
+  -bl	 # braces on new lines
+  -pt=0  # parens not tight at all
+  -bt=0  # braces not tight
+  -sbt=0 # square brackets not tight
+
+If you experiment with this file, remember that it is in your directory,
+since if you are running on a Unix system, files beginning with a "."
+are normally hidden.  
+
+If you have a F<.perltidyrc> file, and want perltidy to ignore it,
+use the B<-npro> flag on the command line.
+
+=head2 Error Reporting
+
+Let's run through a 'fire drill' to see how perltidy reports errors.  Try
+introducing an extra opening brace somewhere in a test file.  For example,
+introducing an extra brace in the file listed above produces the following
+message on the terminal (or standard error output):
+
+ ## Please see file testfile.pl.ERR!
+
+Here is what F contains:
+
+ 10:	final indentation level: 1
+ 
+ Final nesting depth of '{'s is 1
+ The most recent un-matched '{' is on line 6
+ 6: } elsif ($temperature < 68) {{
+                                ^
+
+This shows how perltidy will, by default, write error messages to a file
+with the extension F<.ERR>, and it will write a note that it did so to
+the standard error device.  If you would prefer to have the error
+messages sent to standard output, instead of to a F<.ERR> file, use the
+B<-se> flag.
+
+Almost every programmer would want to see error messages of this type,
+but there are a number of messages which, if reported, would be
+annoying.  To manage this problem, perltidy puts its messages into two
+categories: errors and warnings.  The default is to just report the
+errors, but you can control this with input flags, as follows:
+
+ flag  what this does
+ ----  --------------
+       default: report errors but not warnings
+ -w    report all errors and warnings
+ -q    quiet! do not report either errors or warnings
+
+The default is generally a good choice, but it's not a bad idea to check
+programs with B<-w> occasionally, especially if your are looking for a
+bug.  For example, it will ask if you really want '=' instead of '=~' in
+this line:
+  
+    $line = s/^\s*//;
+
+This kind of error can otherwise be hard to find.
+
+=head2 The Log File
+
+One last topic that needs to be touched upon concerns the F<.LOG> file.
+This is where perltidy records messages that are not normally of any
+interest, but which just might occasionally be useful.  This file is not
+saved, though, unless perltidy detects that it has made a mistake or you
+ask for it to be saved.
+
+There are a couple of ways to ask perltidy to save a log file.  To
+create a relatively sparse log file, use
+
+ perltidy -log testfile.pl
+
+and for a verbose log file, use
+
+ perltidy -g testfile.pl
+
+The difference is that the first form only saves detailed information at
+least every 50th line, while the second form saves detailed information
+about every line.
+
+So returning to our example, lets force perltidy to save a
+verbose log file by issuing the following command
+
+ perltidy -g testfile.pl
+
+You will find that a file named F has been
+created in your directory.  
+
+If you open this file, you will see that it is a text file with a
+combination of warning messages and informative messages.  All you need
+to know for now is that it exists; someday it may be useful.
+
+=head2 Using Perltidy as a Filter on Selected Text from an Editor
+
+Most programmer's editors allow a selected group of lines to be passed
+through an external filter.  Perltidy has been designed to work well as
+a filter, and it is well worthwhile learning the appropriate commands to
+do this with your editor.  This means that you can enter a few
+keystrokes and watch a block of text get reformatted.  If you are not
+doing this, you are missing out of a lot of fun!  You may want to supply
+the B<-q> flag to prevent error messages regarding incorrect syntax,
+since errors may be obvious in the indentation of the reformatted text.
+This is entirely optional, but if you do not use the B<-q> flag, you
+will need to use the undo keys in case an error message appears on the
+screen. 
+
+For example, within the B editor it is only necessary to select the
+text by any of the text selection methods, and then issue the command
+!perltidy in command mode.  Thus, an entire file can be formatted using
+
+ :%!perltidy -q
+
+or, without the B<-q> flag, just
+
+ :%!perltidy
+
+It isn't necessary to format an entire file, however.  Perltidy will
+probably work well as long as you select blocks of text whose braces,
+parentheses, and square brackets are properly balanced.  You can
+even format an C block without the leading C block, as
+long as the text you select has all braces balanced.
+
+For the B editor, first mark a region and then pipe it through
+perltidy.  For example, to format an entire file, select it with C 
+and then pipe it with C and then C.  The numeric
+argument, C causes the output from perltidy to replace the marked
+text.  See "GNU Emacs Manual" for more information,
+http://www.gnu.org/manual/emacs-20.3/html_node/emacs_toc.html
+
+If you have difficulty with an editor, try the B<-st> flag, which will
+force perltidy to send output to standard output.  This might be needed,
+for example, if the editor passes text to perltidy as temporary filename
+instead of through the standard input.  If this works, you might put the
+B<-st> flag in your F<.perltidyrc> file.
+
+If you have some tips for making perltidy work with your editor, and
+are willing to share them, please email me (see below) and I'll try to
+incorporate them in this document or put up a link to them.
+
+After you get your editor and perltidy successfully talking to each
+other, try formatting a snippet of code with a brace error to see what
+happens.  (Do not use the quiet flag, B<-q>, for this test).  Perltidy
+will send one line starting with C<##> to standard error output.  Your
+editor may either display it at the top of the reformatted text or at
+the bottom (or even midstream!).  You probably cannot control this, and
+perltidy can't, but you need to know where to look when an actual error
+is detected.
+
+=head2 Writing an HTML File
+
+Perltidy can switch between two different output modes.  We have been
+discussing what might be called its "beautifier" mode, but it can also
+output in HTML.  To do this, use the B<-html> flag, like this:
+
+ perltidy -html testfile.pl
+
+which will produce a file F.  There are many
+parameters available for adjusting the appearance of an HTML file, but a
+very easy way is to just write the HTML file with this simple command
+and then edit the stylesheet which is embedded at its top.
+
+One important thing to know about the B<-html> flag is that perltidy can
+either send its output to its beautifier or to its HTML writer, but
+(unfortunately) not both in a single run.  So the situation can be
+represented like this:
+
+                  ------------
+                  |          |     --->beautifier--> testfile.pl.tdy
+ testfile.pl -->  | perltidy | -->
+                  |          |     --->HTML -------> testfile.pl.html
+                  ------------
+
+And in the future, there may be more output filters.  So if you would
+like to both beautify a script and write it to HTML, you need to do it
+in two steps.
+
+=head2 Summary
+
+That's enough to get started using perltidy.  
+When you are ready to create a F<.perltidyrc> file, you may find it
+helpful to use the F page as a guide at
+http://perltidy.sourceforge.net/stylekey.html
+
+Many additional special
+features and capabilities can be found in the manual pages for perltidy
+at
+http://perltidy.sourceforge.net/perltidy.html
+
+We hope that perltidy makes perl programming a little more fun.
+Please check the perltidy
+web site http://perltidy.sourceforge.net occasionally
+for updates.
+
+The author may be contacted at perltidy at users.sourceforge.net.
+
+=cut
diff --git a/pm2pl b/pm2pl
new file mode 100755
index 00000000..f873cf74
--- /dev/null
+++ b/pm2pl
@@ -0,0 +1,125 @@
+#!/usr/bin/env perl
+use strict;
+
+# This script will recombine the perltidy binary script and all of its modules
+# into a single, monolithic script.  I use it for making a temporary "sandbox"
+# for debugging. 
+
+# This is also useful for making a copy of previous versions for parallel
+# debugging.
+
+# usage:
+#   perl pm2pl
+
+# Run this from the perltidy main installation directory.  It reads
+# bin/perltidy and lib/*.pm and writes a file 'perltidy-VERSION.pl' in the
+# current directory. 
+
+# This should work for a system with File::Spec,
+# and for older Windows/Unix systems without File::Spec.
+my $script = 'bin/perltidy';
+my @modules = qw(
+  lib/Perl/Tidy.pm
+  lib/Perl/Tidy/Debugger.pm
+  lib/Perl/Tidy/DevNull.pm
+  lib/Perl/Tidy/Diagnostics.pm
+  lib/Perl/Tidy/FileWriter.pm
+  lib/Perl/Tidy/Formatter.pm
+  lib/Perl/Tidy/HtmlWriter.pm
+  lib/Perl/Tidy/IOScalar.pm
+  lib/Perl/Tidy/IOScalarArray.pm
+  lib/Perl/Tidy/IndentationItem.pm
+  lib/Perl/Tidy/LineBuffer.pm
+  lib/Perl/Tidy/LineSink.pm
+  lib/Perl/Tidy/LineSource.pm
+  lib/Perl/Tidy/Logger.pm
+  lib/Perl/Tidy/Tokenizer.pm
+  lib/Perl/Tidy/VerticalAligner.pm
+  lib/Perl/Tidy/VerticalAligner/Alignment.pm
+  lib/Perl/Tidy/VerticalAligner/Line.pm
+);
+
+# try to make the pathnames system independent
+eval "use File::Spec;";
+my $missing_file_spec = $@;
+unless ($missing_file_spec) {
+    $script = File::Spec->catfile( split '/', $script );
+    foreach my $module (@modules) {
+        $module = File::Spec->catfile( split '/', $module );
+    }
+}
+
+my $VERSION=get_version("lib/Perl/Tidy.pm");
+my $outfile = "perltidy-$VERSION.pl";
+open OUTFILE, "> $outfile" or die "can't open file '$outfile' : $!\n";
+print "Creating file '$outfile' ....\n ";
+
+# first, open the script and copy the first (hash-bang) line
+# (Note: forward slashes in file names here will work in Windows)
+open SCRIPT, "< $script" or die "can't open script file '$script' : $!\n";
+my $hash_bang =