From: Steve Hancock Date: Wed, 4 Sep 2019 22:58:39 +0000 (-0700) Subject: added rt#130425 feature, --assert-unchanged X-Git-Tag: 20190915~4 X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=6b3e0bcbdc61e5d39a080132b1ca0b9786ccaa74;p=perltidy.git added rt#130425 feature, --assert-unchanged --- diff --git a/CHANGES.md b/CHANGES.md index f6b8750c..346d457e 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,6 +2,14 @@ ## 2019 06 01.01 + - implement issue RT#130425: check mode. A new flag '--assert-unchanged' + will cause an error message if the output script is not identical to + the input script. + + - iteration speedup for unchanged code. Previously, when iterations were + requested, at least two formatting passes were made. Now just a single pass + is made if the formatted code is identical to the input code. + - fixed issue RT#130344: false warning "operator in print statement" for "use lib". diff --git a/bin/perltidy b/bin/perltidy index c0fdd92b..1b3b1050 100755 --- a/bin/perltidy +++ b/bin/perltidy @@ -359,6 +359,12 @@ error messages, perltidy skips files identified by the system as non-text. However, valid perl scripts containing binary data may sometimes be identified as non-text, and this flag forces perltidy to process them. +=item B<-auc>, B<--assert-unchanged> + +This flag asserts that the input and output files are identical, and produces +an error message if they are not. It has no other effect on the functioning of +perltidy. This can be used to identify files which do not need to be updated. + =back =head1 FORMATTING OPTIONS diff --git a/lib/Perl/Tidy.pm b/lib/Perl/Tidy.pm index 99937135..ffc13658 100644 --- a/lib/Perl/Tidy.pm +++ b/lib/Perl/Tidy.pm @@ -420,6 +420,25 @@ EOM croak "unexpected return to Die"; } + my $md5_hex = sub { + my ($buf) = @_; + + # Evaluate the MD5 sum for a string + # Patch for [rt.cpan.org #88020] + # Use utf8::encode since md5_hex() only operates on bytes. + # my $digest = md5_hex( utf8::encode($sink_buffer) ); + + # Note added 20180114: the above patch did not work correctly. I'm not + # sure why. But switching to the method recommended in the Perl 5 + # documentation for Encode worked. According to this we can either use + # $octets = encode_utf8($string) or equivalently + # $octets = encode("utf8",$string) + # and then calculate the checksum. So: + my $octets = Encode::encode( "utf8", $buf ); + my $digest = md5_hex($octets); + return $digest; + }; + # extract various dump parameters my $dump_options_type = $input_hash{'dump_options_type'}; my $dump_options = $get_hash_ref->('dump_options'); @@ -729,7 +748,7 @@ EOM while ( my $input_file = shift @ARGV ) { my $fileroot; my @input_file_stat; - my $display_name; + my $display_name; #--------------------------------------------------------------- # prepare this input stream @@ -849,6 +868,12 @@ EOM $rpending_logfile_message ); next unless ($source_object); + my $max_iterations = $rOpts->{'iterations'}; + my $do_convergence_test = $max_iterations > 1; + my $convergence_log_message; + my %saw_md5; + my $digest_input = 0; + # Prefilters and postfilters: The prefilter is a code reference # that will be applied to the source before tidying, and the # postfilter is a code reference to the result before outputting. @@ -856,6 +881,8 @@ EOM $prefilter || ( $rOpts_character_encoding && $rOpts_character_encoding eq 'utf8' ) + || $rOpts->{'assert-unchanged'} + || $do_convergence_test ) { my $buf = ''; @@ -863,8 +890,6 @@ EOM $buf .= $line; } - $buf = $prefilter->($buf) if $prefilter; - if ( $rOpts_character_encoding && $rOpts_character_encoding eq 'utf8' && !utf8::is_utf8($buf) ) @@ -881,6 +906,19 @@ EOM } } + # MD5 sum of input file is evaluated before any prefilter + if ( $rOpts->{'assert-unchanged'} ) { + $digest_input = $md5_hex->($buf); + } + + $buf = $prefilter->($buf) if $prefilter; + + # starting MD5 sum for convergence test is evaluated after any prefilter + if ($do_convergence_test) { + my $digest = $md5_hex->($buf); + $saw_md5{$digest} = 1; + } + $source_object = Perl::Tidy::LineSource->new( \$buf, $rOpts, $rpending_logfile_message ); } @@ -982,7 +1020,7 @@ EOM $line_separator = "\n" unless defined($line_separator); my ( $sink_object, $postfilter_buffer ); - if ($postfilter) { + if ( $postfilter || $rOpts->{'assert-unchanged'} ) { $sink_object = Perl::Tidy::LineSink->new( \$postfilter_buffer, $tee_file, $line_separator, $rOpts, $rpending_logfile_message, $binmode ); @@ -1028,26 +1066,6 @@ EOM # loop over iterations for one source stream #--------------------------------------------------------------- - # We will do a convergence test if 3 or more iterations are allowed. - # It would be pointless for fewer because we have to make at least - # two passes before we can see if we are converged, and the test - # would just slow things down. - my $max_iterations = $rOpts->{'iterations'}; - my $convergence_log_message; - my %saw_md5; - my $do_convergence_test = $max_iterations > 2; - - # Since Digest::MD5 qw(md5_hex) has been in the earliest version of Perl - # we are requiring (5.8), I have commented out this check -##? if ($do_convergence_test) { -##? eval "use Digest::MD5 qw(md5_hex)"; -##? $do_convergence_test = !$@; -##? -##? ### Trying to avoid problems with ancient versions of perl -##? ##eval { my $string = "perltidy"; utf8::encode($string) }; -##? ##$do_convergence_test = $do_convergence_test && !$@; -##? } - # save objects to allow redirecting output during iterations my $sink_object_final = $sink_object; my $debugger_object_final = $debugger_object; @@ -1163,19 +1181,7 @@ EOM } elsif ($do_convergence_test) { - # Patch for [rt.cpan.org #88020] - # Use utf8::encode since md5_hex() only operates on bytes. - # my $digest = md5_hex( utf8::encode($sink_buffer) ); - - # Note added 20180114: this patch did not work correctly. - # I'm not sure why. But switching to the method - # recommended in the Perl 5 documentation for Encode - # worked. According to this we can either use - # $octets = encode_utf8($string) or equivalently - # $octets = encode("utf8",$string) - # and then calculate the checksum. So: - my $octets = Encode::encode( "utf8", $sink_buffer ); - my $digest = md5_hex($octets); + my $digest = $md5_hex->($sink_buffer); if ( !$saw_md5{$digest} ) { $saw_md5{$digest} = $iter; } @@ -1232,12 +1238,27 @@ EOM #--------------------------------------------------------------- # Perform any postfilter operation #--------------------------------------------------------------- - if ($postfilter) { + if ( $postfilter || $rOpts->{'assert-unchanged'} ) { $sink_object->close_output_file(); $sink_object = Perl::Tidy::LineSink->new( $output_file, $tee_file, $line_separator, $rOpts, $rpending_logfile_message, $binmode ); - my $buf = $postfilter->($postfilter_buffer); + + my $buf = + $postfilter + ? $postfilter->($postfilter_buffer) + : $postfilter_buffer; + + # Check if file changed if requested, but only after any postfilter + if ( $rOpts->{'assert-unchanged'} ) { + my $digest_output = $md5_hex->($buf); + if ( $digest_output ne $digest_input ) { + $logger_object->warning( +"assertion failure: '--assert-unchanged' is set but output differs from input\n" + ); + } + } + $source_object = Perl::Tidy::LineSource->new( \$buf, $rOpts, $rpending_logfile_message ); @@ -1437,7 +1458,7 @@ EOM # Fix for RT #130297: return a true value if anything was written to the # standard error output, even non-fatal warning messages, otherwise return - # false. + # false. # To allow the caller to determine the error severity, these exit codes are # returned: @@ -1446,8 +1467,8 @@ EOM # 2 - successful run but with non-fatal warning messages # Note that if perltidy is run with multiple files, any single file with - # errors or warnings will write a line like - # '## Please see file testing.t.ERR' + # errors or warnings will write a line like + # '## Please see file testing.t.ERR' # to standard output for each file with errors, so the flag will be true, # even only some of the multiple files may have had errors. @@ -1729,6 +1750,7 @@ sub generate_options { $add_option->( 'tabs', 't', '!' ); $add_option->( 'default-tabsize', 'dt', '=i' ); $add_option->( 'extended-syntax', 'xs', '!' ); + $add_option->( 'assert-unchanged', 'auc', '!' ); ######################################## $category = 2; # Code indentation control