From: Steve Hancock <perltidy@users.sourceforge.net>
Date: Tue, 3 Nov 2020 16:33:57 +0000 (-0800)
Subject: added flags -maxle and -maxue
X-Git-Tag: 20201001.03~9
X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=4299dc515ac0fa73ed489f76c75fc41cbc491667;p=perltidy.git

added flags -maxle and -maxue
---

diff --git a/CHANGES.md b/CHANGES.md
index 3630dc8f..db09ba65 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -6,12 +6,27 @@
       always 0 for --assert-tidy if the user had turned off all error messages with
       the -quiet.  This has been fixed.
 
-    - Add flag -mfs=n, --maximum-file-size-mb=n.  This parameter is provided to
+    - Add flag -maxfs=n, --maximum-file-size-mb=n.  This parameter is provided to
       avoid causing system problems by accidentally attempting to format an 
       extremely large data file. The default is n=10.  The command to increase 
       the limit to 20 MB for example would be  -mfs=20.  This only applies to
       files specified by filename on the command line.
 
+    - Skip formatting if too many indentation level errors.  This is controlled
+      with -maxle=n, --maximum-level-errors=n.  This means that if the ending 
+      indentation differs from the starting indentation by more than
+      n levels, the file will be output verbatim. The default is n=1. 
+      To skip this check, set n=0.
+
+    - Skip formatting if too many 'unexpected' tokenization errors.  This is controlled
+      with -maxue=n, --maximum-unexpected-errors=n.  This means that if the 
+      number of times the tokenizer found unexpected tokens is greater than n,
+      the file will be output verbatim. The intention is to avoid formatting
+      non-perl scripts. The default is n=3.  To skip this check, set n=0.
+      It is possible that some extended syntaxes will require setting
+      -maxue=0.  It would be better to try to work out a patch to perltidy to handle 
+      such cases.
+
     - Add flag -xci, --extended-continuation-indentation, regarding issue git #28
       This flag causes continuation indentation to "extend" deeper into structures.
       Since this is a fairly new flag, the default is -nxci to avoid disturbing 
@@ -34,6 +49,9 @@
 
     - Added 'state' as a keyword.
 
+    - This version is about 15% faster than previous versions due to some optimization work 
+      using Devel::NYTProf.
+
     - Numerous minor issues that the average user would not encounter were found
       and fixed. They can be seen in the more complete list of updates at 
 
diff --git a/bin/perltidy b/bin/perltidy
index aa2f1973..88c9d5ed 100755
--- a/bin/perltidy
+++ b/bin/perltidy
@@ -3931,7 +3931,7 @@ increasing size, when multiple files are being processed.  This is useful
 during program development, when large numbers of files with varying sizes are
 processed, because it can reduce virtual memory usage. 
 
-B<--maximum-file-size-mb=n> or B<-mfs=n> specifies the maximum file size in
+B<--maximum-file-size-mb=n> or B<-maxfs=n> specifies the maximum file size in
 megabytes that perltidy will attempt to format. This parameter is provided to
 avoid causing system problems by accidentally attempting to format an extremely
 large data file. Most perl scripts are less than about 2 MB in size. The
@@ -3939,10 +3939,39 @@ integer B<n> has a default value of 10, so perltidy will skip formatting files
 which have a size greater than 10 MB.  The command to increase the limit to 20
 MB for example would be
    
-  perltidy -mfs=20
+  perltidy -maxfs=20
 
 This only applies to files specified by filename on the command line. 
 
+B<--maximum-level-errors=n> or B<-maxle=n> specifies the maximum number of
+indentation level errors are allowed before perltidy skips formatting and just
+outputs a file verbatim.  The default is n=1.  This means that if the final indentation 
+of a script differs from the starting indentation by more than 1 levels, the file 
+will be output verbatim.  To skip this check completely, set n=0.
+
+For example, the following script has level error of 3 and will be output verbatim
+
+    Input and default output:
+    {{{
+
+
+    perltidy -maxle=0
+    {
+        {
+            {
+
+B<--maximum-unexpected-errors=n> or B<-maxue=n> specifies the maximum number of
+unexpected tokenization errors are allowed before formatting is skipped and a
+script is output verbatim.  The intention of this flag is to avoid accidentally
+formatting something like an html file, for example.  The default is n=3.  This
+means that if the number of times the tokenizer found unexpected tokens is
+greater than 3, the file will be output verbatim.  To skip this check
+completely, set n=0.
+
+It is possible that some extended syntaxes will require setting -maxue=0.  It
+would be better to try to work out a patch to perltidy to handle such cases
+cleanly so that the tokenizer is not left in an uncertain state.
+
 B<-DEBUG>  will write a file with extension F<.DEBUG> for each input file 
 showing the tokenization of all lines of code.
 
diff --git a/lib/Perl/Tidy.pm b/lib/Perl/Tidy.pm
index 452eb8d1..df0b9e4d 100644
--- a/lib/Perl/Tidy.pm
+++ b/lib/Perl/Tidy.pm
@@ -906,9 +906,9 @@ EOM
                 next;
             }
 
-	    # And avoid formatting extremely large files. Since perltidy reads
-	    # files into memory, trying to process an extremely large file
-	    # could cause system problems.
+            # And avoid formatting extremely large files. Since perltidy reads
+            # files into memory, trying to process an extremely large file
+            # could cause system problems.
             my $size_in_mb = ( -s $input_file ) / ( 1024 * 1024 );
             if ( $size_in_mb > $rOpts->{'maximum-file-size-mb'} ) {
                 $size_in_mb = sprintf( "%0.1f", $size_in_mb );
@@ -1393,6 +1393,7 @@ EOM
                 debugger_object    => $debugger_object,
                 diagnostics_object => $diagnostics_object,
                 tabsize            => $tabsize,
+                rOpts              => $rOpts,
 
                 starting_level      => $rOpts->{'starting-indentation-level'},
                 indent_columns      => $rOpts->{'indent-columns'},
@@ -2365,25 +2366,27 @@ sub generate_options {
     $category = 13;    # Debugging
     ########################################
 ##  $add_option->( 'DIAGNOSTICS',                     'I',    '!' );
-    $add_option->( 'DEBUG',                           'D',    '!' );
-    $add_option->( 'dump-cuddled-block-list',         'dcbl', '!' );
-    $add_option->( 'dump-defaults',                   'ddf',  '!' );
-    $add_option->( 'dump-long-names',                 'dln',  '!' );
-    $add_option->( 'dump-options',                    'dop',  '!' );
-    $add_option->( 'dump-profile',                    'dpro', '!' );
-    $add_option->( 'dump-short-names',                'dsn',  '!' );
-    $add_option->( 'dump-token-types',                'dtt',  '!' );
-    $add_option->( 'dump-want-left-space',            'dwls', '!' );
-    $add_option->( 'dump-want-right-space',           'dwrs', '!' );
-    $add_option->( 'fuzzy-line-length',               'fll',  '!' );
-    $add_option->( 'help',                            'h',    '' );
-    $add_option->( 'short-concatenation-item-length', 'scl',  '=i' );
-    $add_option->( 'show-options',                    'opt',  '!' );
-    $add_option->( 'timestamp',                       'ts',   '!' );
-    $add_option->( 'version',                         'v',    '' );
-    $add_option->( 'memoize',                         'mem',  '!' );
-    $add_option->( 'file-size-order',                 'fso',  '!' );
-    $add_option->( 'maximum-file-size-mb',            'mfs',  '=i' );
+    $add_option->( 'DEBUG',                           'D',     '!' );
+    $add_option->( 'dump-cuddled-block-list',         'dcbl',  '!' );
+    $add_option->( 'dump-defaults',                   'ddf',   '!' );
+    $add_option->( 'dump-long-names',                 'dln',   '!' );
+    $add_option->( 'dump-options',                    'dop',   '!' );
+    $add_option->( 'dump-profile',                    'dpro',  '!' );
+    $add_option->( 'dump-short-names',                'dsn',   '!' );
+    $add_option->( 'dump-token-types',                'dtt',   '!' );
+    $add_option->( 'dump-want-left-space',            'dwls',  '!' );
+    $add_option->( 'dump-want-right-space',           'dwrs',  '!' );
+    $add_option->( 'fuzzy-line-length',               'fll',   '!' );
+    $add_option->( 'help',                            'h',     '' );
+    $add_option->( 'short-concatenation-item-length', 'scl',   '=i' );
+    $add_option->( 'show-options',                    'opt',   '!' );
+    $add_option->( 'timestamp',                       'ts',    '!' );
+    $add_option->( 'version',                         'v',     '' );
+    $add_option->( 'memoize',                         'mem',   '!' );
+    $add_option->( 'file-size-order',                 'fso',   '!' );
+    $add_option->( 'maximum-file-size-mb',            'maxfs', '=i' );
+    $add_option->( 'maximum-level-errors',            'maxle', '=i' );
+    $add_option->( 'maximum-unexpected-errors',       'maxue', '=i' );
 
     #---------------------------------------------------------------------
 
@@ -2527,6 +2530,8 @@ sub generate_options {
       maximum-fields-per-table=0
       maximum-line-length=80
       maximum-file-size-mb=10
+      maximum-level-errors=1
+      maximum-unexpected-errors=3
       memoize
       minimum-space-to-comment=4
       nobrace-left-and-indent
diff --git a/lib/Perl/Tidy/Tokenizer.pm b/lib/Perl/Tidy/Tokenizer.pm
index d96be7b3..08fb4e69 100644
--- a/lib/Perl/Tidy/Tokenizer.pm
+++ b/lib/Perl/Tidy/Tokenizer.pm
@@ -166,6 +166,8 @@ BEGIN {
         _extended_syntax_                    => $i++,
         _maximum_level_                      => $i++,
         _true_brace_error_count_             => $i++,
+        _rOpts_maximum_level_errors_         => $i++,
+        _rOpts_maximum_unexpected_errors_    => $i++,
     };
 }
 
@@ -247,11 +249,13 @@ sub new {
         look_for_selfloader  => 1,
         starting_line_number => 1,
         extended_syntax      => 0,
+        rOpts                => {},
     );
     my %args = ( %defaults, @args );
 
     # we are given an object with a get_line() method to supply source lines
     my $source_object = $args{source_object};
+    my $rOpts         = $args{rOpts};
 
     # we create another object with a get_line() and peek_ahead() method
     my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object);
@@ -327,6 +331,9 @@ sub new {
     $self->[_extended_syntax_]                    = $args{extended_syntax};
     $self->[_maximum_level_]                      = 0;
     $self->[_true_brace_error_count_]             = 0;
+    $self->[_rOpts_maximum_level_errors_] = $rOpts->{'maximum-level-errors'};
+    $self->[_rOpts_maximum_unexpected_errors_] =
+      $rOpts->{'maximum-unexpected-errors'};
     bless $self, $class;
 
     $tokenizer_self = $self;
@@ -457,15 +464,26 @@ sub report_tokenization_errors {
     # (i.e. unexpected binary characters)
     my $severe_error = $self->[_in_error_];
 
+    my $maxle = $self->[_rOpts_maximum_level_errors_];
+    my $maxue = $self->[_rOpts_maximum_unexpected_errors_];
+    $maxle = 1 unless defined($maxle);
+    $maxue = 3 unless defined($maxue);
+
     my $level = get_indentation_level();
     if ( $level != $tokenizer_self->[_starting_level_] ) {
         warning("final indentation level: $level\n");
         my $level_diff = $tokenizer_self->[_starting_level_] - $level;
+        if ( $level_diff < 0 ) { $level_diff = -$level_diff }
 
         # Set severe error flag if the level error is greater than 1.
         # The formatter can function for any level error but it is probably
         # best not to attempt formatting for a high level error.
-        $severe_error = 1 if ( $level_diff < -1 || $level_diff > 1 );
+        if ( $maxle > 0 && $level_diff > $maxle ) {
+            $severe_error = 1;
+            warning(<<EOM);
+Formatting will be skipped because level error '$level_diff' exceeds -maxle=$maxle; use -maxle=0 to force formatting
+EOM
+        }
     }
 
     check_final_nesting_depths();
@@ -554,7 +572,11 @@ sub report_tokenization_errors {
     # non-perl scripts, or that something is seriously wrong, so we should
     # avoid formatting them.  This can happen for example if we run perltidy on
     # a shell script or an html file.
-    if ( $tokenizer_self->[_unexpected_error_count_] > 3 ) {
+    my $ue_count = $tokenizer_self->[_unexpected_error_count_];
+    if ( $maxue > 0 && $ue_count > $maxue ) {
+        warning(<<EOM);
+Formatting will be skipped since unexpected token count = $ue_count > -maxue=$maxue; use -maxue=0 to force formatting
+EOM
         $severe_error = 1;
     }
 
diff --git a/local-docs/BugLog.pod b/local-docs/BugLog.pod
index 9ab53985..ccd73534 100644
--- a/local-docs/BugLog.pod
+++ b/local-docs/BugLog.pod
@@ -2,6 +2,11 @@
 
 =over 4
 
+=item B<added parameters -maxue=n and maxle=n>
+
+These parameters had tentatively been hardwired in the tokenizer.
+Now the user can control them or turn the checks off altogether.
+
 =item B<Fix problem parsing '$$*'>
 
 In random testing, an error was encountered parsing the following line
@@ -12,7 +17,8 @@ In random testing, an error was encountered parsing the following line
 
 The line parsed correctly with a space between the '$$' and the '*'. The
 problem had to do with an error in some newer code for postfix dereferencing,
-and this was fixed on 2 Nov 2020.
+and this was fixed on 2 Nov 2020,  
+'fix problem scanning '$$'; revise call to operator_expected', 49d993b.
 
 =item B<Update for git #44, fix exit status for assert-tidy/untidy>