From: Steve Hancock Date: Tue, 3 Nov 2020 16:33:57 +0000 (-0800) Subject: added flags -maxle and -maxue X-Git-Tag: 20201001.03~9 X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=4299dc515ac0fa73ed489f76c75fc41cbc491667;p=perltidy.git added flags -maxle and -maxue --- diff --git a/CHANGES.md b/CHANGES.md index 3630dc8f..db09ba65 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -6,12 +6,27 @@ always 0 for --assert-tidy if the user had turned off all error messages with the -quiet. This has been fixed. - - Add flag -mfs=n, --maximum-file-size-mb=n. This parameter is provided to + - Add flag -maxfs=n, --maximum-file-size-mb=n. This parameter is provided to avoid causing system problems by accidentally attempting to format an extremely large data file. The default is n=10. The command to increase the limit to 20 MB for example would be -mfs=20. This only applies to files specified by filename on the command line. + - Skip formatting if too many indentation level errors. This is controlled + with -maxle=n, --maximum-level-errors=n. This means that if the ending + indentation differs from the starting indentation by more than + n levels, the file will be output verbatim. The default is n=1. + To skip this check, set n=0. + + - Skip formatting if too many 'unexpected' tokenization errors. This is controlled + with -maxue=n, --maximum-unexpected-errors=n. This means that if the + number of times the tokenizer found unexpected tokens is greater than n, + the file will be output verbatim. The intention is to avoid formatting + non-perl scripts. The default is n=3. To skip this check, set n=0. + It is possible that some extended syntaxes will require setting + -maxue=0. It would be better to try to work out a patch to perltidy to handle + such cases. + - Add flag -xci, --extended-continuation-indentation, regarding issue git #28 This flag causes continuation indentation to "extend" deeper into structures. Since this is a fairly new flag, the default is -nxci to avoid disturbing @@ -34,6 +49,9 @@ - Added 'state' as a keyword. + - This version is about 15% faster than previous versions due to some optimization work + using Devel::NYTProf. + - Numerous minor issues that the average user would not encounter were found and fixed. They can be seen in the more complete list of updates at diff --git a/bin/perltidy b/bin/perltidy index aa2f1973..88c9d5ed 100755 --- a/bin/perltidy +++ b/bin/perltidy @@ -3931,7 +3931,7 @@ increasing size, when multiple files are being processed. This is useful during program development, when large numbers of files with varying sizes are processed, because it can reduce virtual memory usage. -B<--maximum-file-size-mb=n> or B<-mfs=n> specifies the maximum file size in +B<--maximum-file-size-mb=n> or B<-maxfs=n> specifies the maximum file size in megabytes that perltidy will attempt to format. This parameter is provided to avoid causing system problems by accidentally attempting to format an extremely large data file. Most perl scripts are less than about 2 MB in size. The @@ -3939,10 +3939,39 @@ integer B has a default value of 10, so perltidy will skip formatting files which have a size greater than 10 MB. The command to increase the limit to 20 MB for example would be - perltidy -mfs=20 + perltidy -maxfs=20 This only applies to files specified by filename on the command line. +B<--maximum-level-errors=n> or B<-maxle=n> specifies the maximum number of +indentation level errors are allowed before perltidy skips formatting and just +outputs a file verbatim. The default is n=1. This means that if the final indentation +of a script differs from the starting indentation by more than 1 levels, the file +will be output verbatim. To skip this check completely, set n=0. + +For example, the following script has level error of 3 and will be output verbatim + + Input and default output: + {{{ + + + perltidy -maxle=0 + { + { + { + +B<--maximum-unexpected-errors=n> or B<-maxue=n> specifies the maximum number of +unexpected tokenization errors are allowed before formatting is skipped and a +script is output verbatim. The intention of this flag is to avoid accidentally +formatting something like an html file, for example. The default is n=3. This +means that if the number of times the tokenizer found unexpected tokens is +greater than 3, the file will be output verbatim. To skip this check +completely, set n=0. + +It is possible that some extended syntaxes will require setting -maxue=0. It +would be better to try to work out a patch to perltidy to handle such cases +cleanly so that the tokenizer is not left in an uncertain state. + B<-DEBUG> will write a file with extension F<.DEBUG> for each input file showing the tokenization of all lines of code. diff --git a/lib/Perl/Tidy.pm b/lib/Perl/Tidy.pm index 452eb8d1..df0b9e4d 100644 --- a/lib/Perl/Tidy.pm +++ b/lib/Perl/Tidy.pm @@ -906,9 +906,9 @@ EOM next; } - # And avoid formatting extremely large files. Since perltidy reads - # files into memory, trying to process an extremely large file - # could cause system problems. + # And avoid formatting extremely large files. Since perltidy reads + # files into memory, trying to process an extremely large file + # could cause system problems. my $size_in_mb = ( -s $input_file ) / ( 1024 * 1024 ); if ( $size_in_mb > $rOpts->{'maximum-file-size-mb'} ) { $size_in_mb = sprintf( "%0.1f", $size_in_mb ); @@ -1393,6 +1393,7 @@ EOM debugger_object => $debugger_object, diagnostics_object => $diagnostics_object, tabsize => $tabsize, + rOpts => $rOpts, starting_level => $rOpts->{'starting-indentation-level'}, indent_columns => $rOpts->{'indent-columns'}, @@ -2365,25 +2366,27 @@ sub generate_options { $category = 13; # Debugging ######################################## ## $add_option->( 'DIAGNOSTICS', 'I', '!' ); - $add_option->( 'DEBUG', 'D', '!' ); - $add_option->( 'dump-cuddled-block-list', 'dcbl', '!' ); - $add_option->( 'dump-defaults', 'ddf', '!' ); - $add_option->( 'dump-long-names', 'dln', '!' ); - $add_option->( 'dump-options', 'dop', '!' ); - $add_option->( 'dump-profile', 'dpro', '!' ); - $add_option->( 'dump-short-names', 'dsn', '!' ); - $add_option->( 'dump-token-types', 'dtt', '!' ); - $add_option->( 'dump-want-left-space', 'dwls', '!' ); - $add_option->( 'dump-want-right-space', 'dwrs', '!' ); - $add_option->( 'fuzzy-line-length', 'fll', '!' ); - $add_option->( 'help', 'h', '' ); - $add_option->( 'short-concatenation-item-length', 'scl', '=i' ); - $add_option->( 'show-options', 'opt', '!' ); - $add_option->( 'timestamp', 'ts', '!' ); - $add_option->( 'version', 'v', '' ); - $add_option->( 'memoize', 'mem', '!' ); - $add_option->( 'file-size-order', 'fso', '!' ); - $add_option->( 'maximum-file-size-mb', 'mfs', '=i' ); + $add_option->( 'DEBUG', 'D', '!' ); + $add_option->( 'dump-cuddled-block-list', 'dcbl', '!' ); + $add_option->( 'dump-defaults', 'ddf', '!' ); + $add_option->( 'dump-long-names', 'dln', '!' ); + $add_option->( 'dump-options', 'dop', '!' ); + $add_option->( 'dump-profile', 'dpro', '!' ); + $add_option->( 'dump-short-names', 'dsn', '!' ); + $add_option->( 'dump-token-types', 'dtt', '!' ); + $add_option->( 'dump-want-left-space', 'dwls', '!' ); + $add_option->( 'dump-want-right-space', 'dwrs', '!' ); + $add_option->( 'fuzzy-line-length', 'fll', '!' ); + $add_option->( 'help', 'h', '' ); + $add_option->( 'short-concatenation-item-length', 'scl', '=i' ); + $add_option->( 'show-options', 'opt', '!' ); + $add_option->( 'timestamp', 'ts', '!' ); + $add_option->( 'version', 'v', '' ); + $add_option->( 'memoize', 'mem', '!' ); + $add_option->( 'file-size-order', 'fso', '!' ); + $add_option->( 'maximum-file-size-mb', 'maxfs', '=i' ); + $add_option->( 'maximum-level-errors', 'maxle', '=i' ); + $add_option->( 'maximum-unexpected-errors', 'maxue', '=i' ); #--------------------------------------------------------------------- @@ -2527,6 +2530,8 @@ sub generate_options { maximum-fields-per-table=0 maximum-line-length=80 maximum-file-size-mb=10 + maximum-level-errors=1 + maximum-unexpected-errors=3 memoize minimum-space-to-comment=4 nobrace-left-and-indent diff --git a/lib/Perl/Tidy/Tokenizer.pm b/lib/Perl/Tidy/Tokenizer.pm index d96be7b3..08fb4e69 100644 --- a/lib/Perl/Tidy/Tokenizer.pm +++ b/lib/Perl/Tidy/Tokenizer.pm @@ -166,6 +166,8 @@ BEGIN { _extended_syntax_ => $i++, _maximum_level_ => $i++, _true_brace_error_count_ => $i++, + _rOpts_maximum_level_errors_ => $i++, + _rOpts_maximum_unexpected_errors_ => $i++, }; } @@ -247,11 +249,13 @@ sub new { look_for_selfloader => 1, starting_line_number => 1, extended_syntax => 0, + rOpts => {}, ); my %args = ( %defaults, @args ); # we are given an object with a get_line() method to supply source lines my $source_object = $args{source_object}; + my $rOpts = $args{rOpts}; # we create another object with a get_line() and peek_ahead() method my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object); @@ -327,6 +331,9 @@ sub new { $self->[_extended_syntax_] = $args{extended_syntax}; $self->[_maximum_level_] = 0; $self->[_true_brace_error_count_] = 0; + $self->[_rOpts_maximum_level_errors_] = $rOpts->{'maximum-level-errors'}; + $self->[_rOpts_maximum_unexpected_errors_] = + $rOpts->{'maximum-unexpected-errors'}; bless $self, $class; $tokenizer_self = $self; @@ -457,15 +464,26 @@ sub report_tokenization_errors { # (i.e. unexpected binary characters) my $severe_error = $self->[_in_error_]; + my $maxle = $self->[_rOpts_maximum_level_errors_]; + my $maxue = $self->[_rOpts_maximum_unexpected_errors_]; + $maxle = 1 unless defined($maxle); + $maxue = 3 unless defined($maxue); + my $level = get_indentation_level(); if ( $level != $tokenizer_self->[_starting_level_] ) { warning("final indentation level: $level\n"); my $level_diff = $tokenizer_self->[_starting_level_] - $level; + if ( $level_diff < 0 ) { $level_diff = -$level_diff } # Set severe error flag if the level error is greater than 1. # The formatter can function for any level error but it is probably # best not to attempt formatting for a high level error. - $severe_error = 1 if ( $level_diff < -1 || $level_diff > 1 ); + if ( $maxle > 0 && $level_diff > $maxle ) { + $severe_error = 1; + warning(<[_unexpected_error_count_] > 3 ) { + my $ue_count = $tokenizer_self->[_unexpected_error_count_]; + if ( $maxue > 0 && $ue_count > $maxue ) { + warning(< -maxue=$maxue; use -maxue=0 to force formatting +EOM $severe_error = 1; } diff --git a/local-docs/BugLog.pod b/local-docs/BugLog.pod index 9ab53985..ccd73534 100644 --- a/local-docs/BugLog.pod +++ b/local-docs/BugLog.pod @@ -2,6 +2,11 @@ =over 4 +=item B + +These parameters had tentatively been hardwired in the tokenizer. +Now the user can control them or turn the checks off altogether. + =item B In random testing, an error was encountered parsing the following line @@ -12,7 +17,8 @@ In random testing, an error was encountered parsing the following line The line parsed correctly with a space between the '$$' and the '*'. The problem had to do with an error in some newer code for postfix dereferencing, -and this was fixed on 2 Nov 2020. +and this was fixed on 2 Nov 2020, +'fix problem scanning '$$'; revise call to operator_expected', 49d993b. =item B