From cac802a1ab40a12b03701d80b04305dcb28a21c9 Mon Sep 17 00:00:00 2001 From: Steve Hancock Date: Wed, 7 Jul 2021 18:16:04 -0700 Subject: [PATCH] rewrote side_comment_test.pl to use masking --- dev-bin/side_comment_test.pl | 262 +++++++++++++++++++++++++++++++---- 1 file changed, 233 insertions(+), 29 deletions(-) diff --git a/dev-bin/side_comment_test.pl b/dev-bin/side_comment_test.pl index d6809a3d..5daeec4a 100755 --- a/dev-bin/side_comment_test.pl +++ b/dev-bin/side_comment_test.pl @@ -3,7 +3,7 @@ use strict; use warnings; # This is a utility to stress test perltidy by inserting as many side comments -# into a script as possible. +# into a script as possible. It has helped locate several bugs. # Usage: @@ -11,20 +11,35 @@ use warnings; # perl scripts. # 2. Then enter: -# side_comment_test.pl ../* +# +# side_comment_test.pl +# +# to operate on all regular files in the parent directory, i.e. '../*' + +# More generally: +# side_comment_test.pl file1 [ file2 ... +# where file1 .. are the files to operate on # 3. Look at any files named '*.2.ERR' and try to resolve the problem # 4. When done, remove the temporary directory -# NOTE: A little glitch is that unuaual here doc targets (i.e., lower case -# words) will get side comments. +my $cmd; +my @files = @ARGV; +if ( !@files ) { @files = glob('../*'); } +foreach my $file (@files) { -# If this happens, edit the '.2' file and remove the side comment and run -# perltidy on that file again. + unless ( -e $file && -f $file && -s $file ) { + print STDERR "skipping $file\n"; + next; + } + + # Best to skip files written by perltidy + if ( $file =~ /\.(tdy|ERR|LOG|DEBUG)$/ ) { + print STDERR "skipping $file\n"; + next; + } -my $cmd; -foreach my $file (@ARGV) { my $basename = $file; if ( $basename =~ /^(.*)\/([^\/]+)$/ ) { $basename = $2 } my $file1 = "$basename.1"; @@ -62,38 +77,48 @@ sub add_side_comments { # Given file named $ifile, # add as many side comments as possible and write result to $ofile - my $string1 = get_string($ifile); - my @lines = split /\n/, $string1; - foreach my $line (@lines) { + # create a mask for use in avoiding placing side comments in unsafe places + my ( @mask, @lines ); + PerlMask::perlmask( + _source => $ifile, + _rmasked_file => \@mask, + _roriginal_file => \@lines, + _compression => undef + ); - # Skip comments, including hash bang and #<< $>> lines - next if ( $line =~ /^\s*#/ ); + # Check: be sure the source and mask arrays are the same size + my $num_mask = @mask; + my $num_source = @lines; + if ( $num_mask != $num_source ) { + print STDERR + "num_source=$num_source != num_mask=$num_mask for file '$ifile'\n"; + exit 1; + } + + # Loop over lines to add side comments where safe to do so + my $ix = -1; + foreach my $line (@lines) { + $ix++; + chomp $line; - next if ( $line =~ /^(__END__|__DATA__)\s*$/ ); + # Do not put side comments on blank lines + next unless ($line); - # Skip end of format - next if ( $line eq '.' ); + # Do not put side comments on patterns, here targets, __END__, + # __DATA__, format end, comments, etc. This is essential to make this + # error free. + next if ( $mask[$ix] =~ /[Qq#]\s*$/ ); - # Optional: Avoid problems involving guessing if / starts a pattern - next if ( $line eq '/' ); + # Add a space if needed to avoid creating a punctuation variable + if ( $line =~ /[\@\%\$\*]$/ ) { $line .= " " } - # Try to skip here targets; see note above - next if ( $line =~ /^\s*[A-Z_0-9=\.\-]+\s*$/ ); + # Append the comment $line .= "#sc#"; } my $string2 = join "\n", @lines; write_file( $ofile, $string2 ); } -sub get_string { - my ($file) = @_; - open my $fh, '<', $file or die "cannot open $file: $!\n"; - local $/ = undef; - my $string = <$fh>; - close $fh; - return $string; -} - sub write_file { my ( $fname, $string, $msg ) = @_; open my $fh, '>', $fname or die "cannot open $fname: $!\n"; @@ -102,3 +127,182 @@ sub write_file { print STDERR "Wrote $fname\n" if ($msg); return; } + +##################################################################### +# +# The PerlMask package is an interface to perltidy which accepts a +# source filehandle and returns a 'masked' version of the source as +# a string or array. It can also optionally return the original file +# as a string or array. +# +# It works by making a callback object with a write_line() method to +# receive tokenized lines from perltidy. This write_line method +# selectively replaces tokens with either their original text or with a +# benign masking character (such as '#' or 'Q'). +# +# Usage: +# +# PerlMask::perlmask( +# _source => $fh, # required source +# _rmasked_file => \$masked_file, # required ref to ARRAY or SCALAR +# _roriginal_file => \$original_file, # optional ref to ARRAY or SCALAR +# _compression => $opt_c # optional +# ); +# +# _source is any source that perltidy will accept, including a +# filehandle or reference to SCALAR or ARRAY +# +# The compression flag may have these values: +# 0 all mask file line numbers and character positions agree with +# original file (DEFAULT) +# 1 line numbers agree and character positions agree within lines of code +# 2 line numbers agree but character positions do not +# 3 no correspondence between line numbers or character positions +# +##################################################################### + +package PerlMask; +use Carp; +use Perl::Tidy; + +sub perlmask { + + my %args = ( _compression => 0, @_ ); + my $rfile = $args{_rmasked_file}; + unless ( defined($rfile) ) { + croak + "Missing required parameter '_rmasked_file' in call to perlmask\n"; + } + my $ref = ref($rfile); + unless ( $ref =~ /^(SCALAR|ARRAY)$/ ) { + croak < $args{_source}, + 'formatter' => bless( \%args, __PACKAGE__ ), # callback object + 'argv' => "-npro -se", # -npro : ignore .perltidyrc, + # -se : errors to STDOUT + ); + if ($err) { + my $name = $args{_source}; + print STDERR "perltidy returns error flag for source=$name\n"; + return; + } +} + +sub print_line { + + # called from write_line to dispatch one line (either masked or original).. + # here we'll either append it to a string or array, as appropriate + my ( $rfile, $line ) = @_; + if ( defined($rfile) ) { + if ( ref($rfile) eq 'SCALAR' ) { + $$rfile .= $line . "\n"; + } + elsif ( ref($rfile) eq 'ARRAY' ) { + push @{$rfile}, $line . "\n"; + } + } +} + +sub write_line { + + # This is called from perltidy line-by-line + my ( $self, $line_of_tokens ) = @_; + my $rmasked_file = $self->{_rmasked_file}; + my $roriginal_file = $self->{_roriginal_file}; + my $opt_c = $self->{_compression}; + $opt_c = 0 unless defined($opt_c); + + my $line_type = $line_of_tokens->{_line_type}; + my $input_line_number = $line_of_tokens->{_line_number}; + my $input_line = $line_of_tokens->{_line_text}; + my $rtoken_type = $line_of_tokens->{_rtoken_type}; + my $rtokens = $line_of_tokens->{_rtokens}; + chomp $input_line; + + # mask non-CODE lines + if ( $line_type ne 'CODE' ) { + return if ( $opt_c == 3 ); + my $len = length($input_line); + if ( $opt_c == 0 && $len > 0 ) { + print_line( $roriginal_file, $input_line ) if $roriginal_file; + print_line( $rmasked_file, '#' x $len ); + } + else { + print_line( $roriginal_file, $input_line ) if $roriginal_file; + print_line( $rmasked_file, "" ); + } + return; + } + + # we'll build the masked line token by token + my $masked_line = ""; + + # add leading spaces if not in a higher compression mode + if ( $opt_c <= 1 ) { + + # Find leading whitespace. But be careful..we don't want the + # whitespace if it is part of quoted text, because it will + # already be contained in a token. + if ( $input_line =~ /^(\s+)/ && !$line_of_tokens->{_starting_in_quote} ) + { + $masked_line = $1; + } + } + + # loop over tokens to construct one masked line + for ( my $j = 0 ; $j < @$rtoken_type ; $j++ ) { + + # Mask certain token types by replacing them with their type code: + # type definition + # ---- ---------- + # Q quote or pattern + # q qw quote + # h << here doc operator + # # comment + # + # This choice will produce a mask file that has balanced + # container tokens and does not cause parsing problems. + if ( $$rtoken_type[$j] =~ /^[Qqh]$/ ) { + if ( $opt_c <= 1 ) { + $masked_line .= $$rtoken_type[$j] x length( $$rtokens[$j] ); + } + else { + $masked_line .= $$rtoken_type[$j]; + } + } + + # Mask a comment + elsif ( $$rtoken_type[$j] eq '#' ) { + if ( $opt_c == 0 ) { + $masked_line .= '#' x length( $$rtokens[$j] ); + } + } + + # All other tokens go out verbatim + else { + $masked_line .= $$rtokens[$j]; + } + } + print_line( $roriginal_file, $input_line ) if $roriginal_file; + print_line( $rmasked_file, $masked_line ); + + # self-check lengths; this error should never happen + if ( $opt_c == 0 && length($masked_line) != length($input_line) ) { + my $lmask = length($masked_line); + my $linput = length($input_line); + print STDERR +"$input_line_number: length ERROR, masked length=$lmask but input length=$linput\n"; + } +} + +# called once after the last line of a file +sub finish_formatting { + my $self = shift; + return; +} -- 2.39.5