From 66a082ab742d42fd14de8859016027dc61c58349 Mon Sep 17 00:00:00 2001
From: Steve Hancock <perltidy@users.sourceforge.net>
Date: Sat, 28 Dec 2024 16:09:31 -0800
Subject: [PATCH] -duk updates

---
 examples/dump_unique_keys.pl | 75 ++++++++++++++++++++++++++++++++++++
 lib/Perl/Tidy.pm             | 13 ++++++-
 lib/Perl/Tidy/Formatter.pm   | 26 ++++++++++---
 3 files changed, 107 insertions(+), 7 deletions(-)
 create mode 100755 examples/dump_unique_keys.pl

diff --git a/examples/dump_unique_keys.pl b/examples/dump_unique_keys.pl
new file mode 100755
index 00000000..badf9b9f
--- /dev/null
+++ b/examples/dump_unique_keys.pl
@@ -0,0 +1,75 @@
+#!/usr/bin/perl
+use strict;
+use warnings;
+use File::Temp qw{ tempfile };
+
+# Run perltidy --dump-unique-keys on multiple files, and
+# just show keys which only appear in one file.
+# Requires Perl::Tidy version 20240903.08 or higher
+
+my @files;
+my %seen;
+foreach my $file (@ARGV) {
+    if ( !-e $file ) { die "file '$file' not found\n" }
+    next if ( $seen{$file}++ );
+    push @files, $file;
+}
+
+# TODO:
+# - if no files, look for a MANIFEST, or
+# - if 1 file, see if it is a MANIFEST, and
+#   - get files of the form lib/*.pm
+if ( !@files ) { exit 1 }
+
+my ( $fh_tmp, $tmpfile ) = tempfile();
+if ( !$fh_tmp ) {
+    die "unable to open temporary file $tmpfile\n";
+}
+
+# Loop to run perltidy -duk on each file
+foreach my $file (@files) {
+    my $cmd = "perltidy -npro -duk $file >>$tmpfile";
+    my $err = system($cmd);
+    if ($err) { die "perltidy return error $err\n" }
+}
+
+my $fh;
+if ( !open( $fh, '<', $tmpfile ) ) {
+    die "cannot open my temp file '$tmpfile'\n";
+}
+
+my %word_count;
+my @lines;
+foreach my $line (<$fh>) {
+    my $word;
+    if ( $line =~ /^(.*),(\d+)\s*$/ ) {
+        $word = $1;
+        if ( !defined( $word_count{$word} ) ) {
+            $word_count{$word} = 1;
+        }
+        else {
+            $word_count{$word}++;
+        }
+    }
+    push @lines, [ $line, $word ];
+}
+$fh->close();
+
+my @dups = grep { $word_count{$_} > 1 } keys %word_count;
+my %is_dup;
+@is_dup{@dups} = (1) x scalar(@dups);
+
+my $output_string = "";
+foreach my $item (@lines) {
+    my ( $line, $word ) = @{$item};
+    next if ( defined($word) && $is_dup{$word} );
+    $output_string .= $line;
+}
+
+print {*STDOUT} $output_string;
+
+END {
+    if ( defined($tmpfile) && -e $tmpfile ) {
+        unlink $tmpfile or warn "Could not unlink $tmpfile: $!";
+    }
+}
diff --git a/lib/Perl/Tidy.pm b/lib/Perl/Tidy.pm
index 7628574a..9cb8dfe9 100644
--- a/lib/Perl/Tidy.pm
+++ b/lib/Perl/Tidy.pm
@@ -535,6 +535,7 @@ BEGIN {
         _user_formatter_           => $i++,
         _input_copied_verbatim_    => $i++,
         _input_output_difference_  => $i++,
+        _dump_to_stdout_           => $i++,
     };
 } ## end BEGIN
 
@@ -935,10 +936,13 @@ EOM
         )
       )
     {
-        if ( $rOpts->{$opt_name} && $num_files != 1 ) {
-            Die(<<EOM);
+        if ( $rOpts->{$opt_name} ) {
+            $self->[_dump_to_stdout_] = 1;
+            if ( $num_files != 1 ) {
+                Die(<<EOM);
 --$opt_name expects 1 filename in the arg list but saw $num_files filenames
 EOM
+            }
         }
     }
 
@@ -2306,6 +2310,11 @@ EOM
             }
         }
 
+        # prepare standard output in case of a dump to stdout
+        if ( $is_encoded_data && $self->[_dump_to_stdout_] ) {
+            binmode *STDOUT, ':encoding(UTF-8)';
+        }
+
         $rstatus->{'file_count'} += 1;
         $rstatus->{'output_name'}     = $output_name;
         $rstatus->{'iteration_count'} = 0;
diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm
index d1fb80b3..f93cabda 100644
--- a/lib/Perl/Tidy/Formatter.pm
+++ b/lib/Perl/Tidy/Formatter.pm
@@ -8831,7 +8831,21 @@ sub dump_unique_keys {
     my @K_start_qw_list;
     my $rwords = {};
 
-    my %is_known_key;
+    # Table of some known keys
+    my %is_known_key = (
+        ALRM     => { '$SIG' => 1 },
+        TERM     => { '$SIG' => 1 },
+        INT      => { '$SIG' => 1 },
+        __DIE__  => { '$SIG' => 1 },
+        __WARN__ => { '$SIG' => 1 },
+        HOME     => { '$ENV' => 1 },
+        USER     => { '$ENV' => 1 },
+        LOGNAME  => { '$ENV' => 1 },
+        PATH     => { '$ENV' => 1 },
+        SHELL    => { '$ENV' => 1 },
+        PERL5LIB => { '$ENV' => 1 },
+        PERLLIB  => { '$ENV' => 1 },
+    );
 
     my $add_known_keys = sub {
         my ( $rhash, $name ) = @_;
@@ -8845,6 +8859,7 @@ sub dump_unique_keys {
         }
     }; ## end $add_known_keys = sub
 
+    # Add keys which may be unique to this environment.
     $add_known_keys->( \%SIG, '$SIG' );
     $add_known_keys->( \%ENV, '$ENV' );
     $add_known_keys->( \%!,   '$!' );
@@ -8909,11 +8924,12 @@ sub dump_unique_keys {
         }
         return unless ($word);
 
-        # Skip known hash keys
-        if ( $is_known_key{$word} && $is_known_hash->($word) ) { return }
+        # Bump count of known keys by 1 so that they will not appear as unique
+        my $one = 1;
+        if ( $is_known_key{$word} && $is_known_hash->($word) ) { $one++ }
 
         if ( !defined( $rwords->{$word} ) ) {
-            $rwords->{$word} = [ 1, $KK_last_nb ];
+            $rwords->{$word} = [ $one, $KK_last_nb ];
         }
         else {
             $rwords->{$word}->[0]++;
@@ -9092,7 +9108,7 @@ EOM
         my $input_stream_name = get_input_stream_name();
         chomp $output_string;
         print {*STDOUT} <<EOM;
-$input_stream_name: output for --dump-unique-hash-keys
+==> $input_stream_name <==
 $output_string
 EOM
     }
-- 
2.39.5