From 71812b20ebbc76cbe4d89cf262928047776bc776 Mon Sep 17 00:00:00 2001
From: Steve Hancock <perltidy@users.sourceforge.net>
Date: Wed, 29 Apr 2020 08:11:34 -0700
Subject: [PATCH] update docs

---
 bin/perltidy               |  4 +-
 docs/ChangeLog.html        | 41 +++++++++++++++++++
 docs/Tidy.html             |  2 +-
 docs/perltidy.html         | 84 ++++++++++++++++++++++++++++++++++----
 lib/Perl/Tidy/Formatter.pm | 12 +++---
 5 files changed, 126 insertions(+), 17 deletions(-)
diff --git a/bin/perltidy b/bin/perltidy
index 0c7983fd..89d9dcce 100755
--- a/bin/perltidy
+++ b/bin/perltidy
@@ -1094,9 +1094,9 @@ Suppose the user requests that / signs have a space to the left but not to the r
        print $x /$y;
    }
 
-If formatted in this way, the program will not run (at least with recent versions of perl) because the / is assumed to start a quote. In a complex program, Perl might find a terminating / to the multiline quote and run, but incorrectly.
+If formatted in this way, the program will not run (at least with recent versions of perl) because the / is assumed to start a quote. In a complex program, there might happen to be a / which terminates the multiline quote without a syntax error, allowing the program to run, but incorrectly.
 
-Similar issues arise with other binary operator symbols, such as + and -, and in older versions of perl there could be problems with ternary operators.  So to avoid changing program behavior, perltidy has the simple rule that whitespace around possible filehandles is left unchanged.  Likewise, whitespace around barewords is left unchanged. But the bigger picture is that it is risky write programs which do math operations involving a term which could be taken to be a filehandle.
+Related issues arise with other binary operator symbols, such as + and -, and in older versions of perl there could be problems with ternary operators.  So to avoid changing program behavior, perltidy has the simple rule that whitespace around possible filehandles is left unchanged.  Likewise, whitespace around barewords is left unchanged.
 
 =item Space between specific keywords and opening paren
 
diff --git a/docs/ChangeLog.html b/docs/ChangeLog.html
index 6457896b..54ae6383 100644
--- a/docs/ChangeLog.html
+++ b/docs/ChangeLog.html
@@ -1,5 +1,46 @@
 <h1>Perltidy Change Log</h1>
 
+<h2>2020 01 10.01</h2>
+
+<pre><code>- add option --break-at-old-semicolon-breakpoints', -bos, requested 
+  in RT#131644.  This flag will keep lines beginning with a semicolon.
+
+- Added --use-unicode-gcstring to control use of Unicode::GCString for
+  evaluating character widths of encoded data.  By default, for encoded files
+  perltidy will now look for Unicode::GCString and, if found, will use it 
+  to evaluate character display widths.  This can improve displayed
+  vertical alignment for files with wide characters. This flag is mainly intended
+  for testing, but can also prevent the use of this module in the event that some
+  unforseen issue arises.  Perltidy installation does not require
+  Unicode::GCString, so users wanting to use this feature need to install it
+  separately.
+
+- Added --character-encoding=guess or -guess to have perltidy guess
+  if a file is encoded as -utf8 or some older single-byte encoding. This
+  is useful when processing a mixture of file types, such as utf8 and 
+  latin-1.  Also, specific encodings of input files other than utf8 may
+  now be given, for example --character-encoding=euc-jp.  For a
+  description of the guessing method see the man pages.
+
+  Please Note: The default encoding has been set to be 'guess'
+  instead of 'none'. I do not like to change defaults, but this seems like
+  the right choice, since it should make perltidy work properly with both
+  older latin-1 and newer utf8 files.  I have done extensive testing and
+  so far haven't found any problems.
+
+- Fix for git#22, Preserve function signature on a single line. An
+  unwanted line break was being introduced when a closing signature paren
+  followed a closing do brace.
+
+- Fix RT#132059, the -dac parameter was not working and caused an error exit
+
+- When -utf8 is used, any error output is encoded as utf8
+
+- Fix for git#19, adjust line break around an 'xor'
+
+- Fix for git#18, added warning for missing comma before unknown bare word.
+</code></pre>
+
 <h2>2020 01 10</h2>
 
 <pre><code>- This release adds a flag to control the feature RT#130394 (allow short nested blocks)
diff --git a/docs/Tidy.html b/docs/Tidy.html
index 9b493562..412c2bae 100644
--- a/docs/Tidy.html
+++ b/docs/Tidy.html
@@ -358,7 +358,7 @@
 
 <h1 id="VERSION">VERSION</h1>
 
-<p>This man page documents Perl::Tidy version 20200110</p>
+<p>This man page documents Perl::Tidy version 20200110.01</p>
 
 <h1 id="LICENSE">LICENSE</h1>
 
diff --git a/docs/perltidy.html b/docs/perltidy.html
index c9834dd2..44f05206 100644
--- a/docs/perltidy.html
+++ b/docs/perltidy.html
@@ -468,14 +468,40 @@
 <dt id="enc-s---character-encoding-s"><b>-enc=s</b>, <b>--character-encoding=s</b></dt>
 <dd>
 
-<p>where <b>s</b>=<b>none</b> or <b>utf8</b>. This flag tells perltidy the character encoding of both the input and output character streams. The value <b>utf8</b> causes the stream to be read and written as UTF-8. The value <b>none</b> causes the stream to be processed without special encoding assumptions. At present there is no automatic detection of character encoding (even if there is a <code>&#39;use utf8&#39;</code> statement in your code) so this flag must be set for streams encoded in UTF-8. Incorrectly setting this parameter can cause data corruption, so please carefully check the output.</p>
+<p>This flag indicates the character encoding, if any, of the input data stream. Perltidy does not look for the encoding directives in the soure stream, such as <b>use utf8</b>, and instead relies on this flag to determine the encoding. (Note that perltidy often works on snippets of code rather than complete files so it cannot rely on <b>use utf8</b> directives).</p>
 
-<p>The default is <b>none</b>.</p>
+<p>The possible values for <b>s</b> are (1) the name of an encoding recognized by the Encode.pm module, (2) <b>none</b> if no encoding is used, or (3) &lt;guess&gt; if perltidy should guess.</p>
 
-<p>The abbreviations <b>-utf8</b> or <b>-UTF8</b> are equivalent to <b>-enc=utf8</b>. So to process a file named <b>file.pl</b> which is encoded in UTF-8 you can use:</p>
+<p>For example, the value <b>utf8</b> causes the stream to be read and written as UTF-8. If the input stream cannot be decoded with a specified encoding then processing is not done.</p>
+
+<p>The value <b>none</b> causes the stream to be processed without special encoding assumptions. This is appropriate for files which are written in single-byte character encodings such as latin-1.</p>
+
+<p>The value <b>guess</b> tells perltidy to guess between either utf8 encoding or no encoding (meaning one character per byte). The guess uses the Encode::Guess module and this restricted range of guesses covers the most common cases. Testing showed that considering any greater number of encodings as guess suspects is too risky.</p>
+
+<p>The current default is <b>guess</b>.</p>
+
+<p>The abbreviations <b>-utf8</b> or <b>-UTF8</b> are equivalent to <b>-enc=utf8</b>, and the abbreviation <b>-guess</b> is equivalent to &lt;-enc=guess&gt;. So to process a file named <b>file.pl</b> which is encoded in UTF-8 you can use:</p>
 
 <pre><code>   perltidy -utf8 file.pl</code></pre>
 
+<p>or perltidy -guess file.pl</p>
+
+<p>To process a file in <b>euc-jp</b> you could use</p>
+
+<pre><code>   perltidy -enc=euc-jp file.pl</code></pre>
+
+<p>A perltidy output file is unencoded if the input file is unencoded, and otherwise it is encoded as <b>utf8</b>, even if the input encoding was not <b>utf8</b>.</p>
+
+</dd>
+<dt id="gcs---use-unicode-gcstring"><b>-gcs</b>, <b>--use-unicode-gcstring</b></dt>
+<dd>
+
+<p>This flag controls whether or not perltidy may use module Unicode::GCString. It has no effect if that module is not installed on a system. If text is encoded then perltidy will by default use the module Unicode::GCString to determine the horizontal widths of the text, provided Unicode::GCString is available. This is needed to produce formatted text with good vertical alignment when there are wide characters. If the module is not available on the system, or if the flag <b>--nouse-unicode-gcstring</b> or <b>ngcs</b> has been set, then this module will not be used and characters will have an assumed display width of 1. In this case, vertical alignment may be poor if code contains a mixture of wide and normal characters. The default setting is <b>--use-unicode-gcstring</b>. This flag is mainly intended for testing, and as a workaround in case a problem arises with the Unicode::GCString module.</p>
+
+<p>Perltidy installation does not require Unicode::GCString, so users wanting to use this feature need to install it separately.</p>
+
+<p>Also note that actual vertical alignment depends upon the fonts used by the text display software, so vertical alignment may not be optimal even when Unicode::GCString is used.</p>
+
 </dd>
 <dt id="ole-s---output-line-ending-s"><b>-ole=s</b>, <b>--output-line-ending=s</b></dt>
 <dd>
@@ -810,6 +836,28 @@
 
 <p><b>WARNING</b> Be sure to put these tokens in quotes to avoid having them misinterpreted by your command shell.</p>
 
+</dd>
+<dt id="Note:-Perltidy-does-always-follow-whitespace-controls">Note: Perltidy does always follow whitespace controls</dt>
+<dd>
+
+<p>The various parameters controlling whitespace within a program are requests which perltidy follows as well as possible, but there are a number of situations where changing whitespace could change program behavior and is not done. Examples are whitespace around bareword symbols and possible filehandles. For example, consider the problem of formatting the following subroutine:</p>
+
+<pre><code>   sub print_div {
+      my ($x,$y)=@_;
+      print $x/$y;
+   }</code></pre>
+
+<p>Suppose the user requests that / signs have a space to the left but not to the right. Perltidy will refuse to do this, but if this were done the result would be</p>
+
+<pre><code>   sub print_div {
+       my ($x,$y)=@_;
+       print $x /$y;
+   }</code></pre>
+
+<p>If formatted in this way, the program will not run (at least with recent versions of perl) because the / is assumed to start a quote. In a complex program, there might happen to be a / which terminates the multiline quote without a syntax error, allowing the program to run, but incorrectly.</p>
+
+<p>Related issues arise with other binary operator symbols, such as + and -, and in older versions of perl there could be problems with ternary operators. So to avoid changing program behavior, perltidy has the simple rule that whitespace around possible filehandles is left unchanged. Likewise, whitespace around barewords is left unchanged.</p>
+
 </dd>
 <dt id="Space-between-specific-keywords-and-opening-paren">Space between specific keywords and opening paren</dt>
 <dd>
@@ -904,7 +952,7 @@
     -sbq=1 means follow the example of the source code
     -sbq=2 means always put a space between the backslash and quote</code></pre>
 
-<p>The default is <b>-sbq=1</b>, meaning that a space will be used 0if there is one in the source code.</p>
+<p>The default is <b>-sbq=1</b>, meaning that a space will be used if there is one in the source code.</p>
 
 </dd>
 <dt id="Trimming-trailing-whitespace-from-lines-of-POD">Trimming trailing whitespace from lines of POD</dt>
@@ -1275,6 +1323,8 @@
 
 <p>The <b>-fsb=string</b> is the corresponding parameter used to change the ending marker for format skipping. The default is equivalent to -fse=&#39;#&lt;&lt;&lt;&#39;.</p>
 
+<p>The beginning and ending strings may be the same, but it is preferable to make them different for clarity.</p>
+
 </dd>
 </dl>
 
@@ -2003,6 +2053,26 @@
 
 <p>You may want to include the <b>-weld-nested-containers</b> flag in this case to keep nested braces and parens together, as in the last line.</p>
 
+</dd>
+<dt id="bos---break-at-old-semicolon-breakpoints"><b>-bos</b>, <b>--break-at-old-semicolon-breakpoints</b></dt>
+<dd>
+
+<p>Semicolons are normally placed at the end of a statement. This means that formatted lines do not normally begin with semicolons. If the input stream has some lines which begin with semicolons, these can be retained by setting this flag. For example, consider the following two-line input snippet:</p>
+
+<pre><code>  $z = sqrt($x**2 + $y**2)
+  ;</code></pre>
+
+<p>The default formatting will be:</p>
+
+<pre><code>  $z = sqrt( $x**2 + $y**2 );</code></pre>
+
+<p>The result using <b>perltidy -bos</b> keeps the isolated semicolon:</p>
+
+<pre><code>  $z = sqrt( $x**2 + $y**2 )
+    ;</code></pre>
+
+<p>The default is not to do this, <b>-nbos</b>.</p>
+
 </dd>
 <dt id="bok---break-at-old-keyword-breakpoints"><b>-bok</b>, <b>--break-at-old-keyword-breakpoints</b></dt>
 <dd>
@@ -2500,7 +2570,7 @@
 
 <p>The command <b>-dp</b> or <b>--delete-pod</b> will remove all pod documentation (but not comments).</p>
 
-<p>Two commands which remove comments (but not pod) are: <b>-dbc</b> or <b>--delete-block-comments</b> and <b>-dsc</b> or <b>--delete-side-comments</b>. (Hanging side comments will be deleted with block comments here.)</p>
+<p>Two commands which remove comments (but not pod) are: <b>-dbc</b> or <b>--delete-block-comments</b> and <b>-dsc</b> or <b>--delete-side-comments</b>. (Hanging side comments will be deleted with side comments here.)</p>
 
 <p>The negatives of these commands also work, and are the defaults. When block comments are deleted, any leading &#39;hash-bang&#39; will be retained. Also, if the <b>-x</b> flag is used, any system commands before a leading hash-bang will be retained (even if they are in the form of comments).</p>
 
@@ -2514,7 +2584,7 @@
 
 <p>The command <b>-tp</b> or <b>--tee-pod</b> will write all pod documentation (but not comments).</p>
 
-<p>The commands which write comments (but not pod) are: <b>-tbc</b> or <b>--tee-block-comments</b> and <b>-tsc</b> or <b>--tee-side-comments</b>. (Hanging side comments will be written with block comments here.)</p>
+<p>The commands which write comments (but not pod) are: <b>-tbc</b> or <b>--tee-block-comments</b> and <b>-tsc</b> or <b>--tee-side-comments</b>. (Hanging side comments will be written with side comments here.)</p>
 
 <p>The negatives of these commands also work, and are the defaults.</p>
 
@@ -2957,7 +3027,7 @@
 
 <h1 id="VERSION">VERSION</h1>
 
-<p>This man page documents perltidy version 20200110</p>
+<p>This man page documents perltidy version 20200110.01</p>
 
 <h1 id="BUG-REPORTS">BUG REPORTS</h1>
 
diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm
index 37ccd800..37ef42a7 100644
--- a/lib/Perl/Tidy/Formatter.pm
+++ b/lib/Perl/Tidy/Formatter.pm
@@ -6286,9 +6286,9 @@ sub make_sub_matching_pattern {
     #  'sub' is an anonymous sub
     #  'sub:' is a label, not a sub
     #  'substr' is a keyword
-    $SUB_PATTERN    = '^sub\s+(::|\w)';   # match normal sub
-    $ASUB_PATTERN   = '^sub$';            # match anonymous sub
-    $ANYSUB_PATTERN = '^sub\b';           # match either type of sub
+    $SUB_PATTERN    = '^sub\s+(::|\w)';    # match normal sub
+    $ASUB_PATTERN   = '^sub$';             # match anonymous sub
+    $ANYSUB_PATTERN = '^sub\b';            # match either type of sub
 
     if ( $rOpts->{'sub-alias-list'} ) {
 
@@ -6890,11 +6890,9 @@ sub tight_paren_follows {
     my $K_test = $self->K_next_nonblank($K_oc);
     if ( defined($K_test) ) {
         my $block_type = $rLL->[$K_test]->[_BLOCK_TYPE_];
-        if (
-               $block_type
+        if (   $block_type
             && $rLL->[$K_test]->[_TYPE_] eq '{'
-            && $block_type =~ /$ANYSUB_PATTERN/
-          )
+            && $block_type =~ /$ANYSUB_PATTERN/ )
         {
             return 1;
         }
-- 
2.47.3