From 9ac6af6598e9346774d558853f179a8864422670 Mon Sep 17 00:00:00 2001 From: Steve Hancock Date: Tue, 15 Sep 2020 08:49:17 -0700 Subject: [PATCH] added flags -bbhb=n, -bbsb=n, -bbq=n, suggestion git #38 --- CHANGES.md | 13 ++ bin/perltidy | 77 +++++++ docs/BugLog.html | 330 ++++++++++++++++++++++++++++ docs/ChangeLog.html | 14 ++ docs/perltidy.html | 75 ++++++- lib/Perl/Tidy.pm | 37 ++-- lib/Perl/Tidy/Formatter.pm | 222 +++++++++++++++++-- local-docs/BugLog.pod | 18 +- t/snippets/random_file_generator.pl | 84 ++++++- 9 files changed, 823 insertions(+), 47 deletions(-) create mode 100644 docs/BugLog.html diff --git a/CHANGES.md b/CHANGES.md index 753500a3..0f2a3b68 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,18 @@ # Perltidy Change Log +## 2020 xx xx + + - Added the token '->' to the list of alignment tokens, as suggested in git + #39, so that it can be vertically aligned if a space is placed before them with -wls='->'. + + - Added parameters -bbhb=n (--break-before-hash-brace=n), -bbsb=n (--break-before-square-bracket=n), + and -bbp=n (--break-before-paren=n) suggested in git #38. These provide control over the + opening container token of a multiple-line list. + + - Numerous issues have been found during automated testing and fixed. Many involve references to + uninitialized variables when perltidy is given random text. A complete list is given in + the file 'BugLog.pod'. + ## 2020 09 07 - Fixed bug git #37, an error when the combination -scbb -csc was used. diff --git a/bin/perltidy b/bin/perltidy index a6f65a71..9b9e87e1 100755 --- a/bin/perltidy +++ b/bin/perltidy @@ -2125,6 +2125,83 @@ separately if desired: -ohbr or --opening-hash-brace-right -osbr or --opening-square-bracket-right +=item B<-bbhb=n>, B<--break-before-hash-brace=n> and related flags + +When a list of items spans multiple lines, the default formatting is to place +the opening brace (or other container token) at the end of the starting line, +like this: + + $romanNumerals = { + one => 'I', + two => 'II', + three => 'III', + four => 'IV', + }; + +This flag can change the default behavior to cause a line break to be placed +before the opening brace according to the value given to the integer B: + + -bbhb=0 never break [default] + -bbhb=1 stable: break if the input script had a break + -bbhb=2 break if list is 'complex', meaning it contains other broken lists + -bbhb=3 always break + +For example, + + # perltidy -bbhb=3 + $romanNumerals = + { + one => 'I', + two => 'II', + three => 'III', + four => 'IV', + }; + +There are a couple of points to note about this flag: + +=over 4 + +=item * + +This parameter only applies if the contents of the container looks like a list. +The contents need to contain some commas or '=>'s at the next interior level to +be considered a list. + +=item * + +This parameter only applies if there is a blank space before the opening brace +in the default formatting. + +=item * + +If multiple opening tokens have been 'welded' together with the B<-wn> parameter, then +this parameter has no effect. + +=item * + +Similar flags for controlling parens and square brackets are given in the next section. + +=back + +=item B<-bbsb=n>, B<--break-before-square-bracket=n> + +This flag is similar to the flag described above, except it applies to square brackets. + + -bbsb=0 never break [default] + -bbsb=1 stable: break if the input script had a break + -bbsb=2 break if list is 'complex', meaning it contains other broken lists + -bbsb=3 always break + +=item B<-bbp=n>, B<--break-before-paren=n> + +This flag is similar to B<-bbhb=n>, described above, except it applies to parens. + + -bbp=0 never break [default] + -bbp=1 stable: break if the input script had a break + -bbp=2 break if list is 'complex', meaning it contains other broken lists + -bbp=3 always break + + =item B<-wn>, B<--weld-nested-containers> The B<-wn> flag causes closely nested pairs of opening and closing container diff --git a/docs/BugLog.html b/docs/BugLog.html new file mode 100644 index 00000000..bd06dfde --- /dev/null +++ b/docs/BugLog.html @@ -0,0 +1,330 @@ + + + + + + + + + + + + + + + +

Issues fixed after release 20200907

+ +

This is a detailed log of changes since the release 20200907. All bugs were found with the help of automated random testing.

+ +
+ +
make the arrow a vertical alignment token, git #39
+
+ +

The -> can now be vertically aligned if a space is placed before it with -wls='->'.

+ +
+
add flags -bbhb=n, -bbsb=n, =bbp=n, git #38
+
+ +

These flags give control over the opening token of a multiple-line list. They are described in the man pages, perltidy.html.

+ +
+
Allow vertical alignment of line-ending fat comma
+
+ +

A change was made to allow a '=>' at the end of a line to align vertically, provided that it aligns with two or more other '=>' tokens. This update was 14 Sep 2020, 'Allow line-ending '=>' to align vertically', ea96739.

+ +
+
fixed uninitialized value reference
+
+ +

The following message was generated when running perltidy on random text:

+ +
 Use of uninitialized value $K_semicolon in subtraction (-) at /home/steve/bin/Perl/Tidy/Formatter.pm line 16467.
+ +

This was fixed 14 Sep 2020, included in 'Allow line-ending '=>' to align vertically', ea96739.

+ +
+
Do not create a zero size file by deleting semicolons
+
+ +

A rule was added to prevent a file consisting of a single semicolon

+ +
 ;
+ +

from becoming a zero length file. This could cause problems with other software. Fixed 13 Sep 2020, 'do not create a zero length file by deleting semicolons', b39195e.

+ +
+
fixed uninitialized value reference
+
+ +

The following message was generated when running perltidy on random text:

+ +
 Use of uninitialized value $cti in numeric eq (==) at /home/steve/bin/Perl/Tidy/Formatter.pm line 11926.
+ Use of uninitialized value $cti in numeric eq (==) at /home/steve/bin/Perl/Tidy/Formatter.pm line 11936.
+ Use of uninitialized value $cti in numeric eq (==) at /home/steve/bin/Perl/Tidy/Formatter.pm line 11944.
+ +

This was fixed 13 Sep 2020 in 'fixed unitialized variable problem ', adb2096.

+ +
+
fixed uninitialized value reference
+
+ +

The following message was generated when running perltidy on random text:

+ +
 substr outside of string at /home/steve/bin/Perl/Tidy/Tokenizer.pm line 7362.
+ Use of uninitialized value in concatenation (.) or string at /home/steve/bin/Perl/Tidy/Tokenizer.pm line 7362.
+ +

This was fixed 13 Sep 2020 in 'fixed unitialized variable problem', 5bf49a3.

+ +
+
fixed uninitialized value reference
+
+ +

The following message was generated when running perltidy on random text:

+ +
 Use of uninitialized value $K_opening in subtraction (-) at /home/steve/bin/Perl/Tidy/Formatter.pm line 16467.
+ +

This was fixed 13 Sep 2020 in 'fix undefined variable reference', 1919482.

+ +
+
hashbang warning changed
+
+ +

The following snippet generated a warning that there might be a hash-bang after the start of the script.

+ +
 $x = 2;
+ #!  sunos does not yet provide a /usr/bin/perl
+ $script = "$^X $script";
+ +

To prevent this annoyance, the warning is not given unless the first nonblank character after the '#!' is a '/'. Note that this change is just for the warning message. The actual hash bang check does not require the slash.

+ +

Fixed 13 Sep 2020, 'prevent unnecessary hash-bang warning message' 4f7733e and 'improved hash-bang warning filter', fa84904.

+ +
+
uninitialized index referenced
+
+ +

An unitialized index was referenced when running on a file of randomly generated text:

+ +
  Use of uninitialized value $K_oo in subtraction (-) at /home/steve/bin/Perl/Tidy/Formatter.pm line 7259.
+ +

This was fixed 12 Sep 2020 in 'fixed undefined index', 616bb88.

+ +
+
Oops message triggered
+
+ +

The parameter combination -lp -wc triggered an internal bug message from perltidy:

+ +
 398: Program bug with -lp.  seqno=77 should be 254 and i=1 should be less than max=-1
+ 713: The logfile perltidy.LOG may contain useful information
+ 713: 
+ 713: Oops, you seem to have encountered a bug in perltidy.  Please check the
+ 713: BUGS file at http://perltidy.sourceforge.net.  If the problem is not
+ 713: listed there, please report it so that it can be corrected.  Include the
+ ...
+ +

The problem is that the parameters --line-up-parentheses and --whitespace-cycle=n are not compatible. The fix is to write a message and turn off the -wc parameter when the both occur. This was fixed 8 Sep 2020 in "do not allow -wc and -lp together, can cause bugs", 7103781.

+ +
+
Internal fault detected by perltidy
+
+ +

This snippet after processing with the indicated parameters triggered a Fault message in store-token-to-go due to discontinuous internal index values :

+ +
  perltidy --noadd-newlines --space-terminal-semicolon
+
+  if ( $_ =~ /PENCIL/ ) { $pencil_flag= 1 } ; ;
+  $yy=1;
+ +

This triggered the message:

+ +
 ==============================================================================
+ While operating on input stream with name: '<stdin>'
+ A fault was detected at line 7472 of sub 'Perl::Tidy::Formatter::store_token_to_go'
+ in file '/home/steve/bin/Perl/Tidy/Formatter.pm'
+ which was called from line 8298 of sub 'Perl::Tidy::Formatter::process_line_of_CODE'
+ Message: 'Unexpected break in K values: 591 != 589+1'
+ This is probably an error introduced by a recent programming change. 
+ ==============================================================================
+ +

The deletion of the extra, spaced, comma had created an extra space in the token array which had not been forseen in the original programming. It was fixed 10 Sep 2020 in "fixed very rare fault found with automated testing", eb1b1d9.

+ +
+
Error parsing deprecated $# variable
+
+ +

This problem can be illustrated with this two-line snippet:

+ +
  $#
+  eq$,?print"yes\n":print"no\n";
+ +

Perltidy joined '$#' and 'eq' to get $#eq, but should have stopped at the line end to get $# followed by keyword 'eq'. (Note that $# is deprecated). This was fixed 11 Sep 2020 in "fixed several fringe parsing bugs found in testing", 85e01b7.

+ +
+
Error message parsing a file with angle brackets and ternaries
+
+ +

This problem can be illustrated with the following test snippet which was not correctly parsed.

+ +
 print$$ <300?"$$<300\n":$$<700?"$$<700\n":$$<2_000?"$$<2,000\n":$$<10_000?"$$ <10,000\n":"$$>9,999\n";
+ +

The problem is related to the '<' symbol following the '$$' variable, a possible filehandle, and is similar to a previous bug. The problem was corrected 11 Sep 2020 in "fixed several fringe parsing bugs found in testing", 85e01b7. The line now correctly formats to

+ +
 print $$ < 300  ? "$$<300\n"
+   : $$ < 700    ? "$$<700\n"
+   : $$ < 2_000  ? "$$<2,000\n"
+   : $$ < 10_000 ? "$$ <10,000\n"
+   :               "$$>9,999\n";
+ +
+
code crash with cuddled-else formatting on unbalanced files
+
+ +

A file with incorrect bracing which effectively gave negative indentation caused a crash when a stack was referenced with a negative index. The problem was fixed 8 Sept 2020 in "convert array to hash to avoid trouble with neg levels in bad files", a720e0d.

+ +
+
error message 'Unterminated angle operator?'
+
+ +

This error can be demonstrated with this line.

+ +
  print $i <10 ? "yes" : "no";
+ +

Perl has some strange parsing rules near a possible filehandle, and they change over time. The '<' here is a less than symbol, but perltidy expected that it might be the start of an angle operator, based on the old rules, and gave a warning. The formatting was still correct, but the warning was confusing. This has been fixed 8 Sep 2020 in 'remove confusing warning message', 0a4d725.

+ +
+
Line broken after here target
+
+ +

This problem is illustrated with the following snippet

+ +
  $sth= $dbh->prepare (<<"END_OF_SELECT") or die "Couldn't prepare SQL" ;
+      SELECT COUNT(duration),SUM(duration) 
+      FROM logins WHERE username='$user'
+  END_OF_SELECT
+ +

When run with a short line length it got broken after the here target, causing an error. This was due to a recent program change and fixed 7 Sep 2020 in 'fixed bug where long line with here target got broken', 8f7e4cb.

+ +
+
undefined variable named 'test2'
+
+ +

An uninitialized value was being referenced and triggered this message:

+ +
 undefined test2, i_opening=5, max=18, caller=Perl::Tidy::Formatter ./perltidy-20200907.pl 13465
+ Use of uninitialized value $test2 in numeric eq (==) at ./perltidy-20200907.pl line 19692.
+ +

Fixed 8 Sep 2020 in 'fixed rare problem with stored index values for -lp option', 4147c8c.

+ +
+
Line order switched at start of quoted text
+
+ +

This problem arose in several scripts involving the parameter --line-up-parentheses pluse one or more of the vertical tightness flags. It can be illustrated with the following snippet:

+ +
    perltidy --line-up-parentheses --paren-vertical-tightness=1
+
+    if (
+        ( $name, $chap ) =
+        $cur_fname =~ m!^Bible/
+          .*?/          # testament
+          .*?/          # range of books
+          (.*?)/        # book name
+          .*?           # optional range of verses
+          (\d+)$!x
+      )
+    {
+        $cur_name = "$name $chap";
+    }
+ +

This gave

+ +
    if (( $name, $chap ) =
+          .*?/          # testament
+        $cur_fname =~ m!^Bible/
+          .*?/          # range of books
+          (.*?)/        # book name
+          .*?           # optional range of verses
+          (\d+)$!x
+      )
+    {
+        $cur_name = "$name $chap";
+    }
+ +

Notice the incorrect line order. The problem was an incorrect order of operations in the vertical aligner flush, leaving a line stranded and coming out in the wrong order. This was fixed 11 Sep 2020.

+ +
+
crash due to bad index named '$j_terminal_match'
+
+ +

This crash was due to an index error which caused a non-existant object to be referenced. The problem is fixed 2020-09-07 in "fix problem of undefined values involving j_terminal_match", c5bfa77. The particular parameters which caused this were:

+ +
    --noadd-newlines --nowant-left-space='=' 
+ +
+
an issue with the -x flag
+
+ +

This is not a bug but did take some time to resolve. The problem was reduced to the following script run with the -x flag (--look-for-hash-bang)

+ +
 print(SCRIPT$headmaybe . <<EOB . <<'EOF' .$tailmaybe),$!;
+ #!$wd/perl
+ EOB
+ print "\$^X is $^X, \$0 is $0\n";
+ EOF
+ +

The resulting file had a syntax error (here-doc target EOB changed).

+ +
 print(SCRIPT$headmaybe . <<EOB . <<'EOF' .$tailmaybe),$!;
+ #!$wd/perl
+ EOB print "\$^X is $^X, \$0 is $0\n";
+ EOF
+ +

The problem is that the -x flag tells perltidy not to start parsing until it sees a line starting with '#!', which happens to be in a here-doc in this case.

+ +

A warning was added to the manual 7 Sept 2020 in "add warning about inappropriate -x flag", fe66743.

+ +
+
error parsing sub signature
+
+ +

This problem was reduced to the following snippet:

+ +
 substr
+ (
+  $#
+ )
+ +

The deprecated variable '$#' was being parsed incorrectly, and this was due to an error in which the word 'substr' followed by a paren was taken as the start of a sub signature. The problem was fixed 8 Sep 2020 in 'fix problem parsing sub prototypes' 569e05f. The code

+ +
  $container_type =~ /^sub/;
+ +

was corrected to be

+ +
  $container_type =~ /^sub\b/;
+ +
+
uninitialized value message, found 7 Sep 2020
+
+ +

Unitialized values were referenced. An index was not being tested. Fixed 8 Sep 2020 in "fix undefined variable", 9729965.

+ +
 Use of uninitialized value $Kon in array element at /home/steve/bin/Perl/Tidy/Formatter.pm line 4022.
+ Use of uninitialized value $Kon in array element at /home/steve/bin/Perl/Tidy/Formatter.pm line 4023.
+ Use of uninitialized value $Ko in subtraction (-) at /home/steve/bin/Perl/Tidy/Formatter.pm line 4023.
+ +
+
+ + + + + + + diff --git a/docs/ChangeLog.html b/docs/ChangeLog.html index af434945..539d64f2 100644 --- a/docs/ChangeLog.html +++ b/docs/ChangeLog.html @@ -1,5 +1,19 @@

Perltidy Change Log

+

2020 xx xx

+ +
- Added the token '->' to the list of alignment tokens, as suggested in git
+  #39, so that it can be vertically aligned if a space is placed before them with -wls='->'.
+
+- Added parameters -bbhb=n (--break-before-hash-brace=n), -bbsb=n (--break-before-square-bracket=n),
+  and -bbp=n (--break-before-paren=n) suggested in git #38.  These provide control over the
+  opening container token of a multiple-line list.
+
+- Numerous issues have been found during automated testing and fixed. Many involve references to
+  uninitialized variables when perltidy is given random text. A complete list is given in
+  the file 'BugLog.pod'.
+
+

2020 09 07

- Fixed bug git #37, an error when the combination -scbb -csc was used.
diff --git a/docs/perltidy.html b/docs/perltidy.html
index d4050276..f3411de8 100644
--- a/docs/perltidy.html
+++ b/docs/perltidy.html
@@ -833,7 +833,7 @@
 
-aws, --add-whitespace
-

Setting this option allows perltidy to add certain whitespace improve code readability. This is the default. If you do not want any whitespace added, but are willing to have some whitespace deleted, use -naws. (Use -fws to leave whitespace completely unchanged).

+

Setting this option allows perltidy to add certain whitespace to improve code readability. This is the default. If you do not want any whitespace added, but are willing to have some whitespace deleted, use -naws. (Use -fws to leave whitespace completely unchanged).

-dws, --delete-old-whitespace
@@ -1675,6 +1675,77 @@ -ohbr or --opening-hash-brace-right -osbr or --opening-square-bracket-right
+ + +
+ +

When a list of items spans multiple lines, the default formatting is to place the opening brace (or other container token) at the end of the starting line, like this:

+ +
    $romanNumerals = {
+        one   => 'I',
+        two   => 'II',
+        three => 'III',
+        four  => 'IV',
+    };
+ +

This flag can change the default behavior to cause a line break to be placed before the opening brace according to the value given to the integer n:

+ +
  -bbhb=0 never break [default]
+  -bbhb=1 stable: break if the input script had a break
+  -bbhb=2 break if list is 'complex', meaning it contains other broken lists
+  -bbhb=3 always break
+ +

For example,

+ +
    # perltidy -bbhb=3
+    $romanNumerals =
+      {
+        one   => 'I',
+        two   => 'II',
+        three => 'III',
+        four  => 'IV',
+      };
+ +

There are a couple of points to note about this flag:

+ +
    + +
  • This parameter only applies if the contents of the container looks like a list. The contents need to contain some commas or '=>'s at the next interior level to be considered a list.

    + +
  • +
  • This parameter only applies if there is a blank space before the opening brace in the default formatting.

    + +
  • +
  • If multiple opening tokens have been 'welded' together with the -wn parameter, then this parameter has no effect.

    + +
  • +
  • Similar flags for controlling parens and square brackets are given in the next section.

    + +
  • +
+ +
+
-bbsb=n, --break-before-square-bracket=n
+
+ +

This flag is similar to the flag described above, except it applies to square brackets.

+ +
  -bbsb=0 never break [default]
+  -bbsb=1 stable: break if the input script had a break
+  -bbsb=2 break if list is 'complex', meaning it contains other broken lists
+  -bbsb=3 always break
+ +
+
-bbp=n, --break-before-paren=n
+
+ +

This flag is similar to -bbhb=n, described above, except it applies to parens.

+ +
  -bbp=0 never break [default]
+  -bbp=1 stable: break if the input script had a break
+  -bbp=2 break if list is 'complex', meaning it contains other broken lists
+  -bbp=3 always break
+
-wn, --weld-nested-containers
@@ -2801,6 +2872,8 @@

If your script has leading lines of system commands or other text which are not valid perl code, and which are separated from the start of the perl code by a "hash-bang" line, ( a line of the form #!...perl ), you must use the -x flag to tell perltidy not to parse and format any lines before the "hash-bang" line. This option also invokes perl with a -x flag when checking the syntax. This option was originally added to allow perltidy to parse interactive VMS scripts, but it should be used for any script which is normally invoked with perl -x.

+

Please note: do not use this flag unless you are sure your script needs it. Parsing errors can occur if it does not have a hash-bang, or, for example, if the actual first hash-bang is in a here-doc. In that case a parsing error will occur because the tokenization will begin in the middle of the here-doc.

+
Making a file unreadable
diff --git a/lib/Perl/Tidy.pm b/lib/Perl/Tidy.pm index 6623c2dc..e07daaba 100644 --- a/lib/Perl/Tidy.pm +++ b/lib/Perl/Tidy.pm @@ -111,6 +111,7 @@ BEGIN { } sub DESTROY { + # required to avoid call to AUTOLOAD in some versions of perl } @@ -120,7 +121,7 @@ sub AUTOLOAD { # some diagnostic information. This sub should never be called # except for a programming error. our $AUTOLOAD; - return if ($AUTOLOAD eq 'DESTROY'); + return if ( $AUTOLOAD eq 'DESTROY' ); my ( $pkg, $fname, $lno ) = caller(); print STDERR <print($msg); $Warn_count++; return } @@ -1745,8 +1746,8 @@ EOM ERROR_EXIT: return 1; -} ## end of main program perltidy -} ## end of closure for sub perltidy +} ## end of main program perltidy +} ## end of closure for sub perltidy sub line_diff { @@ -2282,6 +2283,9 @@ sub generate_options { $add_option->( 'keep-interior-semicolons', 'kis', '!' ); $add_option->( 'one-line-block-semicolons', 'olbs', '=i' ); $add_option->( 'one-line-block-nesting', 'olbn', '=i' ); + $add_option->( 'break-before-hash-brace', 'bbhb', '=i' ); + $add_option->( 'break-before-square-bracket', 'bbsb', '=i' ); + $add_option->( 'break-before-paren', 'bbp', '=i' ); ######################################## $category = 6; # Controlling list formatting @@ -2468,6 +2472,9 @@ sub generate_options { break-at-old-ternary-breakpoints break-at-old-attribute-breakpoints break-at-old-keyword-breakpoints + break-before-hash-brace=0 + break-before-square-bracket=0 + break-before-paren=0 comma-arrow-breakpoints=5 nocheck-syntax character-encoding=guess @@ -2551,10 +2558,10 @@ sub generate_options { #--------------------------------------------------------------- %expansion = ( %expansion, - 'freeze-newlines' => [qw(noadd-newlines nodelete-old-newlines)], - 'fnl' => [qw(freeze-newlines)], - 'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)], - 'fws' => [qw(freeze-whitespace)], + 'freeze-newlines' => [qw(noadd-newlines nodelete-old-newlines)], + 'fnl' => [qw(freeze-newlines)], + 'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)], + 'fws' => [qw(freeze-whitespace)], 'freeze-blank-lines' => [qw(maximum-consecutive-blank-lines=0 keep-old-blank-lines=2)], 'fbl' => [qw(freeze-blank-lines)], @@ -2562,16 +2569,16 @@ sub generate_options { 'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)], 'nooutdent-long-lines' => [qw(nooutdent-long-quotes nooutdent-long-comments)], - 'noll' => [qw(nooutdent-long-lines)], - 'io' => [qw(indent-only)], + 'noll' => [qw(nooutdent-long-lines)], + 'io' => [qw(indent-only)], 'delete-all-comments' => [qw(delete-block-comments delete-side-comments delete-pod)], 'nodelete-all-comments' => [qw(nodelete-block-comments nodelete-side-comments nodelete-pod)], - 'dac' => [qw(delete-all-comments)], - 'ndac' => [qw(nodelete-all-comments)], - 'gnu' => [qw(gnu-style)], - 'pbp' => [qw(perl-best-practices)], + 'dac' => [qw(delete-all-comments)], + 'ndac' => [qw(nodelete-all-comments)], + 'gnu' => [qw(gnu-style)], + 'pbp' => [qw(perl-best-practices)], 'tee-all-comments' => [qw(tee-block-comments tee-side-comments tee-pod)], 'notee-all-comments' => diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm index 40c8c403..96e5c115 100644 --- a/lib/Perl/Tidy/Formatter.pm +++ b/lib/Perl/Tidy/Formatter.pm @@ -103,6 +103,8 @@ my ( %want_break_before, + %break_before_container_types, + %space_after_keyword, %tightness, @@ -207,6 +209,9 @@ BEGIN { _K_closing_ternary_ => $i++, _rcontainer_map_ => $i++, _rK_phantom_semicolons_ => $i++, + _rtype_count_by_seqno_ => $i++, + _ris_broken_container_ => $i++, + _rhas_broken_container_ => $i++, _rpaired_to_inner_container_ => $i++, _rbreak_container_ => $i++, _rshort_nested_ => $i++, @@ -667,6 +672,9 @@ sub new { $self->[_rcontainer_map_] = {}; # hierarchical map of containers $self->[_rK_phantom_semicolons_] = undef; # for undoing phantom semicolons if iterating + $self->[_rtype_count_by_seqno_] = {}; + $self->[_ris_broken_container_] = {}; + $self->[_rhas_broken_container_] = {}; $self->[_rpaired_to_inner_container_] = {}; $self->[_rbreak_container_] = {}; # prevent one-line blocks $self->[_rshort_nested_] = {}; # blocks not forced open @@ -2361,6 +2369,13 @@ sub respace_tokens { # This will be needed if we want to undo them for iterations my $rK_phantom_semicolons = []; + my %seqno_stack; + my %KK_stack; + my $depth_next = 0; + my $rtype_count_by_seqno = {}; + my $ris_broken_container = {}; + my $rhas_broken_container = {}; + # a sub to link preceding nodes forward to a new node type my $link_back = sub { my ( $Ktop, $key ) = @_; @@ -2450,6 +2465,14 @@ sub respace_tokens { $last_nonblank_token = $item->[_TOKEN_]; $last_nonblank_block_type = $item->[_BLOCK_TYPE_]; $nonblank_token_count++; + + # count selected types + if ( $type =~ /^(=>|,)$/ ) { + my $seqno = $seqno_stack{ $depth_next - 1 }; + if ( defined($seqno) ) { + $rtype_count_by_seqno->{$seqno}->{$type}++; + } + } } # and finally, add this item to the new array @@ -3082,10 +3105,32 @@ sub respace_tokens { elsif ($type_sequence) { - # if ( $is_opening_token{$token} ) { - # } - - if ( $is_closing_token{$token} ) { + if ( $is_opening_token{$token} ) { + $seqno_stack{$depth_next} = $type_sequence; + $KK_stack{$depth_next} = $KK; + $depth_next++; + } + elsif ( $is_closing_token{$token} ) { + $depth_next--; + + # keep track of broken lists for later formatting + my $seqno_test = $seqno_stack{$depth_next}; + my $KK_open = $KK_stack{$depth_next}; + my $seqno_outer = $seqno_stack{ $depth_next - 1 }; + if ( defined($seqno_test) + && defined($KK_open) + && $seqno_test == $type_sequence ) + { + my $lx_open = $rLL->[$KK_open]->[_LINE_INDEX_]; + my $lx_close = $rLL->[$KK]->[_LINE_INDEX_]; + if ( $lx_open < $lx_close ) { + $ris_broken_container->{$type_sequence} = + $lx_close - $lx_open; + if ( defined($seqno_outer) ) { + $rhas_broken_container->{$seqno_outer} = 1; + } + } + } # Insert a tentative missing semicolon if the next token is # a closing block brace @@ -3125,6 +3170,9 @@ sub respace_tokens { $self->[_K_opening_ternary_] = $K_opening_ternary; $self->[_K_closing_ternary_] = $K_closing_ternary; $self->[_rK_phantom_semicolons_] = $rK_phantom_semicolons; + $self->[_rtype_count_by_seqno_] = $rtype_count_by_seqno; + $self->[_ris_broken_container_] = $ris_broken_container; + $self->[_rhas_broken_container_] = $rhas_broken_container; # make sure the new array looks okay $self->check_token_array(); @@ -6144,6 +6192,18 @@ EOM $left_bond_strength{'?'} = NO_BREAK; } + # Only make a hash entry for the next parameters if values are defined. + # That allows a quick check to be made later. + for ( $rOpts->{'break-before-hash-brace'} ) { + $break_before_container_types{'{'} = $_ if $_ && $_ > 0; + } + for ( $rOpts->{'break-before-square-bracket'} ) { + $break_before_container_types{'['} = $_ if $_ && $_ > 0; + } + for ( $rOpts->{'break-before-paren'} ) { + $break_before_container_types{'('} = $_ if $_ && $_ > 0; + } + # Define here tokens which may follow the closing brace of a do statement # on the same line, as in: # } while ( $something); @@ -7472,21 +7532,21 @@ sub copy_token_as_type { # We had to wait until now for reasons explained in sub 'write_line'. if ( $level < 0 ) { $level = 0 } - # Check for emergency flush... - # The K indexes in the batch must always be a continuous sequence of - # the global token array. The batch process programming assumes this. - # If storing this token would cause this relation to fail we must dump - # the current batch before storing the new token. It is extremely rare - # for this to happen. One known example is the following two-line snippet - # when run with parameters - # --noadd-newlines --space-terminal-semicolon: - # if ( $_ =~ /PENCIL/ ) { $pencil_flag= 1 } ; ; - # $yy=1; + # Check for emergency flush... + # The K indexes in the batch must always be a continuous sequence of + # the global token array. The batch process programming assumes this. + # If storing this token would cause this relation to fail we must dump + # the current batch before storing the new token. It is extremely rare + # for this to happen. One known example is the following two-line snippet + # when run with parameters + # --noadd-newlines --space-terminal-semicolon: + # if ( $_ =~ /PENCIL/ ) { $pencil_flag= 1 } ; ; + # $yy=1; if ( defined($max_index_to_go) && $max_index_to_go >= 0 ) { my $Klast = $K_to_go[$max_index_to_go]; if ( $Ktoken_vars != $Klast + 1 ) { - $self->flush_batch_of_CODE(); + $self->flush_batch_of_CODE(); } } @@ -8710,10 +8770,13 @@ EOM $self->recombine_breakpoints( $ri_first, $ri_last ); } - $self->insert_final_breaks( $ri_first, $ri_last ) + $self->insert_final_ternary_breaks( $ri_first, $ri_last ) if $colon_count; } + $self->insert_breaks_before_list_opening_containers( $ri_first, + $ri_last ); + # do corrector step if -lp option is used my $do_not_pad = 0; if ($rOpts_line_up_parentheses) { @@ -11383,8 +11446,8 @@ sub send_lines_to_vertical_aligner { } } - # Convert a bareword within braces into a quote for matching. - # This will allow alignment of expressions like this: + # Convert a bareword within braces into a quote for matching. + # This will allow alignment of expressions like this: # local ( $SIG{'INT'} ) = IGNORE; # local ( $SIG{ALRM} ) = 'POSTMAN'; if ( $type eq 'w' @@ -11931,11 +11994,11 @@ sub lookup_opening_indentation { # But don't do special indentation to something like ')->pack(' if ( !$block_type_to_go[$ibeg] ) { - # Note that logical padding has already been applied, so we may - # need to remove some spaces to get a valid hash key. + # Note that logical padding has already been applied, so we may + # need to remove some spaces to get a valid hash key. my $tok = $tokens_to_go[$ibeg]; if ( length($tok) > 1 ) { $tok =~ s/\s//g } - my $cti = $closing_token_indentation{ $tok }; + my $cti = $closing_token_indentation{$tok}; if ( $cti == 1 ) { if ( $i_terminal <= $ibeg + 1 || $is_semicolon_terminated ) @@ -12642,7 +12705,7 @@ sub get_seqno { # Replaced =~ and // in the list. // had been removed in RT 119588 @q = qw# = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= - { ? : => && || ~~ !~~ =~ !~ // <=> + { ? : => && || ~~ !~~ =~ !~ // <=> -> #; @is_vertical_alignment_type{@q} = (1) x scalar(@q); @@ -16479,7 +16542,7 @@ sub set_nobreaks { my $type_sequence = $rLL->[$K_end]->[_TYPE_SEQUENCE_]; my $K_opening = $K_opening_container->{$type_sequence}; next unless ( defined($K_opening) ); - my $i_opening = $i_beg + ( $K_opening - $K_beg ); + my $i_opening = $i_beg + ( $K_opening - $K_beg ); next if ( $i_opening < $i_beg ); # ... and only one semicolon between these braces @@ -17815,7 +17878,118 @@ sub break_equals { return; } -sub insert_final_breaks { +sub insert_breaks_before_list_opening_containers { + + my ( $self, $ri_left, $ri_right ) = @_; + + return unless %break_before_container_types; + + my $nmax = @{$ri_right} - 1; + my $rLL = $self->[_rLL_]; + my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_]; + my $ris_broken_container = $self->[_ris_broken_container_]; + my $rhas_broken_container = $self->[_rhas_broken_container_]; + + # scan the ends of all lines + my @insert_list; + for my $n ( 0 .. $nmax ) { + my $il = $ri_left->[$n]; + my $ir = $ri_right->[$n]; + next unless ( $ir > $il ); + my $Kl = $K_to_go[$il]; + my $Kr = $K_to_go[$ir]; + my $Kend = $Kr; + my $iend = $ir; + my $type_end = $rLL->[$Kr]->[_TYPE_]; + + # backup before a side comment + if ( $type_end eq '#' ) { + $Kend = $self->K_previous_nonblank($Kr); + next unless defined($Kend); + $type_end = $rLL->[$Kend]->[_TYPE_]; + $iend = $ir + ( $Kend - $Kr ); + } + + # This is only for line-ending tokens + next unless ( $Kl < $Kend - 1 ); + + # Only for selected types of tokens + my $token_end = $rLL->[$Kend]->[_TOKEN_]; + my $break_option = $break_before_container_types{$token_end}; + next unless ($break_option); + + # This is not for block braces + my $block_type = $rLL->[$Kend]->[_BLOCK_TYPE_]; + next if ($block_type); + + # Require a space before the line ending token + next unless ( $rLL->[ $Kend - 1 ]->[_TYPE_] eq 'b' ); + + # This is only for list containers. This is a little fuzzy, + # but we will require at least 2 commas or 1 fat comma in the + # immediate lower level + my $seqno = $rLL->[$Kend]->[_TYPE_SEQUENCE_]; + my $fat_comma_count = $rtype_count_by_seqno->{$seqno}->{'=>'}; + my $comma_count = $rtype_count_by_seqno->{$seqno}->{','}; + next unless ( $fat_comma_count || $comma_count && $comma_count > 1 ); + + # Do not break a weld + next if ( $self->weld_len_left( $seqno, $token_end ) ); + + # Parens cannot break after certain keywords + if ( $token_end eq '(' ) { + my $iend_m2 = $iend - 2; + if ( $iend_m2 >= $il ) { + if ( $types_to_go[$iend_m2] eq 'k' + && $is_if_unless{ $tokens_to_go[$iend_m2] } ) + { + next; + } + } + } + + # Final decision is based on selected option + # 1 = stable + my $ok_to_break; + if ( $break_option == 1 ) { + if ( $ir - 2 > $il ) { + $ok_to_break = $old_breakpoint_to_go[ $ir - 2 ]; + } + } + + # 2 = only if complex list + elsif ( $break_option == 2 ) { + $ok_to_break = $rhas_broken_container->{$seqno}; + } + + # 3 = always break + elsif ( $break_option == 3 ) { + $ok_to_break = 1; + } + + # Shouldn't happen! Bad flag, make same as 3 + else { + $ok_to_break = 1; + } + + next unless ($ok_to_break); + + # This meets the criteria, so install a break + my $Kbreak = $self->K_previous_nonblank($Kend); + my $ibreak = $Kbreak - $Kl + $il; + next if ( $ibreak < $il ); + next if ( $nobreak_to_go[$ibreak] ); + push @insert_list, $ibreak; + } + + # insert any new break points + if (@insert_list) { + $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); + } + return; +} + +sub insert_final_ternary_breaks { my ( $self, $ri_left, $ri_right ) = @_; diff --git a/local-docs/BugLog.pod b/local-docs/BugLog.pod index 509ce4ab..de9ba82a 100644 --- a/local-docs/BugLog.pod +++ b/local-docs/BugLog.pod @@ -1,14 +1,24 @@ =head1 Issues fixed after release 20200907 -This is a log of bugs found and fixed since the release 20200907. All were +This is a detailed log of changes since the release 20200907. All bugs were found with the help of automated random testing. =over +=item B + +The -> can now be vertically aligned if a space is placed before it with -wls='->'. + +=item B + +These flags give control over the opening token of a multiple-line list. They are +described in the man pages, perltidy.html. + =item B A change was made to allow a '=>' at the end of a line to align vertically, provided that it aligns with two or more other '=>' tokens. +This update was 14 Sep 2020, 'Allow line-ending '=>' to align vertically', ea96739. =item B @@ -16,7 +26,7 @@ The following message was generated when running perltidy on random text: Use of uninitialized value $K_semicolon in subtraction (-) at /home/steve/bin/Perl/Tidy/Formatter.pm line 16467. -This was fixed 14 Sep 2020. +This was fixed 14 Sep 2020, included in 'Allow line-ending '=>' to align vertically', ea96739. =item B @@ -25,7 +35,7 @@ A rule was added to prevent a file consisting of a single semicolon ; from becoming a zero length file. This could cause problems with other -software. +software. Fixed 13 Sep 2020, 'do not create a zero length file by deleting semicolons', b39195e. =item B @@ -67,6 +77,8 @@ To prevent this annoyance, the warning is not given unless the first nonblank character after the '#!' is a '/'. Note that this change is just for the warning message. The actual hash bang check does not require the slash. +Fixed 13 Sep 2020, 'prevent unnecessary hash-bang warning message' 4f7733e +and 'improved hash-bang warning filter', fa84904. =item B diff --git a/t/snippets/random_file_generator.pl b/t/snippets/random_file_generator.pl index ced0738e..b7abcffb 100755 --- a/t/snippets/random_file_generator.pl +++ b/t/snippets/random_file_generator.pl @@ -117,14 +117,90 @@ sub random_index { sub random_characters { my ($nchars) = @_; - my @qc = qw# { [ ( } ] ) , ; $x for #; - my $nqc = @qc; + my @qset1 = qw# { [ ( } ] ) , ; #; + my @qset2 = ( + qw{a b c f g m q r s t w x y z V W X 0 1 8 9}, + ';', '[', ']', '{', '}', '(', ')', '=', '?', '|', '+', '<', + '>', '.', '!', '~', '^', '*', '$', '@', '&', ':', '%', ',', + '\\', '/', '_', ' ', "\n", "\t", '-', + "'", '"', '`', '#', + ); + my @qset3 = ( + '!%:', '!%:', + '!%:', '!%:', + '!*:', '!@:', + '%:', '%:,', + '%:;', '*:', + '*:,', '*::', + '*:;', '+%:', + '+*:', '+@:', + '-%:', '-*:', + '-@:', ';%:', + ';*:', ';@:', + '@:', '@:,', + '@::', '@:;', + '\%:', '\&:', + '\*:', '\@:', + '~%:', '~*:', + '~@:', '(<', + '(<', '=<', + 'm(', 'm(', + 'm<', 'm[', + 'm{', 'q(', + 'q<', 'q[', + 'q{', 's(', + 's<', 's[', + 's{', 'y(', + 'y<', 'y[', + 'y{', '$\'0', + '009', '0bB', + '0xX', '009;', + '0bB;', '0xX;', + "<<'", '<<"', + '<<`', '&::', + '<s', + 's<>-', '*::0', + '*::1', '*:::', + '*::\'', '$::0', + '$:::', '$::\'', + '@::0', '@::1', + '@:::', '&::0', + '&::\'', '%:::', + '%::\'', '$:::z', + '*:::z', "\\\@::'9:!", + "} mz}~<\nV", "( {8", + ); my @lines; my $ncpl = 0; my $line = ""; for ( my $ich = 0 ; $ich < $nchars ; $ich++ ) { - my $ix = random_index( $nqc - 1 ); - my $ch = $qc[$ix]; + my $nset = random_index(2); + my $ch; + if ($nset==0) { + my $ix = random_index( @qset1 - 1 ); + $ch = $qset1[$ix]; + } + elsif ($nset==1) { + my $ix = random_index( @qset2 - 1 ); + $ch = $qset2[$ix]; + } + elsif ($nset==2) { + my $ix = random_index( @qset3 - 1 ); + $ch = $qset3[$ix]; + } $line .= " $ch "; $ncpl++; if ( $ncpl > 20 ) { -- 2.39.5