From cac802a1ab40a12b03701d80b04305dcb28a21c9 Mon Sep 17 00:00:00 2001
From: Steve Hancock <perltidy@users.sourceforge.net>
Date: Wed, 7 Jul 2021 18:16:04 -0700
Subject: [PATCH] rewrote side_comment_test.pl to use masking

---
 dev-bin/side_comment_test.pl | 262 +++++++++++++++++++++++++++++++----
 1 file changed, 233 insertions(+), 29 deletions(-)

diff --git a/dev-bin/side_comment_test.pl b/dev-bin/side_comment_test.pl
index d6809a3d..5daeec4a 100755
--- a/dev-bin/side_comment_test.pl
+++ b/dev-bin/side_comment_test.pl
@@ -3,7 +3,7 @@ use strict;
 use warnings;
 
 # This is a utility to stress test perltidy by inserting as many side comments
-# into a script as possible.
+# into a script as possible.  It has helped locate several bugs.
 
 # Usage:
 
@@ -11,20 +11,35 @@ use warnings;
 # perl scripts.
 
 # 2. Then enter:
-#   side_comment_test.pl ../*
+#
+#      side_comment_test.pl
+#
+#   to operate on all regular files in the parent directory, i.e. '../*'
+
+#   More generally:
+#      side_comment_test.pl file1 [ file2 ...
+#   where file1 .. are the files to operate on
 
 # 3. Look at any files named '*.2.ERR' and try to resolve the problem
 
 # 4. When done, remove the temporary directory
 
-# NOTE: A little glitch is that unuaual here doc targets (i.e., lower case
-# words) will get side comments.
+my $cmd;
+my @files = @ARGV;
+if ( !@files ) { @files = glob('../*'); }
+foreach my $file (@files) {
 
-# If this happens, edit the '.2' file and remove the side comment and run
-# perltidy on that file again.
+    unless ( -e $file && -f $file && -s $file ) {
+        print STDERR "skipping $file\n";
+        next;
+    }
+
+    # Best to skip files written by perltidy
+    if ( $file =~ /\.(tdy|ERR|LOG|DEBUG)$/ ) {
+        print STDERR "skipping $file\n";
+        next;
+    }
 
-my $cmd;
-foreach my $file (@ARGV) {
     my $basename = $file;
     if ( $basename =~ /^(.*)\/([^\/]+)$/ ) { $basename = $2 }
     my $file1 = "$basename.1";
@@ -62,38 +77,48 @@ sub add_side_comments {
     # Given file named $ifile,
     # add as many side comments as possible and write result to $ofile
 
-    my $string1 = get_string($ifile);
-    my @lines   = split /\n/, $string1;
-    foreach my $line (@lines) {
+    # create a mask for use in avoiding placing side comments in unsafe places
+    my ( @mask, @lines );
+    PerlMask::perlmask(
+        _source         => $ifile,
+        _rmasked_file   => \@mask,
+        _roriginal_file => \@lines,
+        _compression    => undef
+    );
 
-        # Skip comments, including hash bang and #<< $>> lines
-        next if ( $line =~ /^\s*#/ );
+    # Check: be sure the source and mask arrays are the same size
+    my $num_mask   = @mask;
+    my $num_source = @lines;
+    if ( $num_mask != $num_source ) {
+        print STDERR
+          "num_source=$num_source != num_mask=$num_mask for file '$ifile'\n";
+        exit 1;
+    }
+
+    # Loop over lines to add side comments where safe to do so
+    my $ix = -1;
+    foreach my $line (@lines) {
+        $ix++;
+        chomp $line;
 
-        next if ( $line =~ /^(__END__|__DATA__)\s*$/ );
+        # Do not put side comments on blank lines
+        next unless ($line);
 
-        # Skip end of format
-        next if ( $line eq '.' );
+        # Do not put side comments on patterns, here targets, __END__,
+        # __DATA__, format end, comments, etc.  This is essential to make this
+        # error free.
+        next if ( $mask[$ix] =~ /[Qq#]\s*$/ );
 
-        # Optional: Avoid problems involving guessing if / starts a pattern
-        next if ( $line eq '/' );
+        # Add a space if needed to avoid creating a punctuation variable
+        if ( $line =~ /[\@\%\$\*]$/ ) { $line .= " " }
 
-        # Try to skip here targets; see note above
-        next if ( $line =~ /^\s*[A-Z_0-9=\.\-]+\s*$/ );
+        # Append the comment
         $line .= "#sc#";
     }
     my $string2 = join "\n", @lines;
     write_file( $ofile, $string2 );
 }
 
-sub get_string {
-    my ($file) = @_;
-    open my $fh, '<', $file or die "cannot open $file: $!\n";
-    local $/ = undef;
-    my $string = <$fh>;
-    close $fh;
-    return $string;
-}
-
 sub write_file {
     my ( $fname, $string, $msg ) = @_;
     open my $fh, '>', $fname or die "cannot open $fname: $!\n";
@@ -102,3 +127,182 @@ sub write_file {
     print STDERR "Wrote $fname\n" if ($msg);
     return;
 }
+
+#####################################################################
+#
+# The PerlMask package is an interface to perltidy which accepts a
+# source filehandle and returns a 'masked' version of the source as
+# a string or array.  It can also optionally return the original file
+# as a string or array.
+#
+# It works by making a callback object with a write_line() method to
+# receive tokenized lines from perltidy.  This write_line method
+# selectively replaces tokens with either their original text or with a
+# benign masking character (such as '#' or 'Q').
+#
+# Usage:
+#
+#   PerlMask::perlmask(
+#       _source         => $fh,             # required source
+#       _rmasked_file   => \$masked_file,   # required ref to ARRAY or SCALAR
+#       _roriginal_file => \$original_file, # optional ref to ARRAY or SCALAR
+#       _compression    => $opt_c           # optional
+#   );
+#
+# _source is any source that perltidy will accept, including a
+# filehandle or reference to SCALAR or ARRAY
+#
+# The compression flag may have these values:
+#  0 all mask file line numbers and character positions agree with
+#    original file (DEFAULT)
+#  1 line numbers agree and character positions agree within lines of code
+#  2 line numbers agree but character positions do not
+#  3 no correspondence between line numbers or character positions
+#
+#####################################################################
+
+package PerlMask;
+use Carp;
+use Perl::Tidy;
+
+sub perlmask {
+
+    my %args  = ( _compression => 0, @_ );
+    my $rfile = $args{_rmasked_file};
+    unless ( defined($rfile) ) {
+        croak
+          "Missing required parameter '_rmasked_file' in call to perlmask\n";
+    }
+    my $ref = ref($rfile);
+    unless ( $ref =~ /^(SCALAR|ARRAY)$/ ) {
+        croak <<EOM;
+Expecting _rmasked_file = ref to SCALAR or ARRAY in perlmask but got : ($ref)
+EOM
+    }
+
+    # run perltidy, which will call $formatter's write_line() for each line
+    my $err = perltidy(
+        'source'    => $args{_source},
+        'formatter' => bless( \%args, __PACKAGE__ ),    # callback object
+        'argv'      => "-npro -se",    # -npro : ignore .perltidyrc,
+                                       # -se   : errors to STDOUT
+    );
+    if ($err) {
+        my $name = $args{_source};
+        print STDERR "perltidy returns error flag for source=$name\n";
+        return;
+    }
+}
+
+sub print_line {
+
+    # called from write_line to dispatch one line (either masked or original)..
+    # here we'll either append it to a string or array, as appropriate
+    my ( $rfile, $line ) = @_;
+    if ( defined($rfile) ) {
+        if ( ref($rfile) eq 'SCALAR' ) {
+            $$rfile .= $line . "\n";
+        }
+        elsif ( ref($rfile) eq 'ARRAY' ) {
+            push @{$rfile}, $line . "\n";
+        }
+    }
+}
+
+sub write_line {
+
+    # This is called from perltidy line-by-line
+    my ( $self, $line_of_tokens ) = @_;
+    my $rmasked_file   = $self->{_rmasked_file};
+    my $roriginal_file = $self->{_roriginal_file};
+    my $opt_c          = $self->{_compression};
+    $opt_c = 0 unless defined($opt_c);
+
+    my $line_type         = $line_of_tokens->{_line_type};
+    my $input_line_number = $line_of_tokens->{_line_number};
+    my $input_line        = $line_of_tokens->{_line_text};
+    my $rtoken_type       = $line_of_tokens->{_rtoken_type};
+    my $rtokens           = $line_of_tokens->{_rtokens};
+    chomp $input_line;
+
+    # mask non-CODE lines
+    if ( $line_type ne 'CODE' ) {
+        return if ( $opt_c == 3 );
+        my $len = length($input_line);
+        if ( $opt_c == 0 && $len > 0 ) {
+            print_line( $roriginal_file, $input_line ) if $roriginal_file;
+            print_line( $rmasked_file,   '#' x $len );
+        }
+        else {
+            print_line( $roriginal_file, $input_line ) if $roriginal_file;
+            print_line( $rmasked_file,   "" );
+        }
+        return;
+    }
+
+    # we'll build the masked line token by token
+    my $masked_line = "";
+
+    # add leading spaces if not in a higher compression mode
+    if ( $opt_c <= 1 ) {
+
+        # Find leading whitespace.  But be careful..we don't want the
+        # whitespace if it is part of quoted text, because it will
+        # already be contained in a token.
+        if ( $input_line =~ /^(\s+)/ && !$line_of_tokens->{_starting_in_quote} )
+        {
+            $masked_line = $1;
+        }
+    }
+
+    # loop over tokens to construct one masked line
+    for ( my $j = 0 ; $j < @$rtoken_type ; $j++ ) {
+
+        # Mask certain token types by replacing them with their type code:
+        # type  definition
+        # ----  ----------
+        # Q     quote or pattern
+        # q     qw quote
+        # h     << here doc operator
+        # #     comment
+        #
+        # This choice will produce a mask file that has balanced
+        # container tokens and does not cause parsing problems.
+        if ( $$rtoken_type[$j] =~ /^[Qqh]$/ ) {
+            if ( $opt_c <= 1 ) {
+                $masked_line .= $$rtoken_type[$j] x length( $$rtokens[$j] );
+            }
+            else {
+                $masked_line .= $$rtoken_type[$j];
+            }
+        }
+
+        # Mask a comment
+        elsif ( $$rtoken_type[$j] eq '#' ) {
+            if ( $opt_c == 0 ) {
+                $masked_line .= '#' x length( $$rtokens[$j] );
+            }
+        }
+
+        # All other tokens go out verbatim
+        else {
+            $masked_line .= $$rtokens[$j];
+        }
+    }
+    print_line( $roriginal_file, $input_line ) if $roriginal_file;
+    print_line( $rmasked_file,   $masked_line );
+
+    # self-check lengths; this error should never happen
+    if ( $opt_c == 0 && length($masked_line) != length($input_line) ) {
+        my $lmask  = length($masked_line);
+        my $linput = length($input_line);
+        print STDERR
+"$input_line_number: length ERROR, masked length=$lmask but input length=$linput\n";
+    }
+}
+
+# called once after the last line of a file
+sub finish_formatting {
+    my $self = shift;
+    return;
+}
-- 
2.39.5