From ebc595e86de657b570d5b151b74affd9c81b336d Mon Sep 17 00:00:00 2001 From: Steve Hancock Date: Mon, 23 Dec 2024 18:00:34 -0800 Subject: [PATCH] add preliminary code for --dump-unique-hash-key --- lib/Perl/Tidy/Formatter.pm | 217 +++++++++++++++++++++++++++++++++++++ 1 file changed, 217 insertions(+) diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm index 645cffad..8df53377 100644 --- a/lib/Perl/Tidy/Formatter.pm +++ b/lib/Perl/Tidy/Formatter.pm @@ -8093,6 +8093,14 @@ EOM $self->find_multiline_qw($rqw_lines); } +## POSSIBLE FUTURE OPTION: +## # Dump unique hash keys +## $rOpts->{'dump-unique-hash_keys'}=1; +## if ( $rOpts->{'dump-unique-hash_keys'} ) { +## $self->dump_unique_hash_keys(); +## Exit(0); +## } + # Dump any requested block summary data if ( $rOpts->{'dump-block-summary'} ) { $self->dump_block_summary(); @@ -8797,6 +8805,215 @@ sub follow_if_chain { return $rchain; } ## end sub follow_if_chain +sub dump_unique_hash_keys { + my ($self) = @_; + + # NOTE: This is a possible future dump option, still under development + # Implement --dump-unique-hash-keys; -duk? + # Look through all tokens and dump list of hash keys used just one time + + my $rLL = $self->[_rLL_]; + my $Klimit = $self->[_Klimit_]; + my $ris_list_by_seqno = $self->[_ris_list_by_seqno_]; + my $K_closing_container = $self->[_K_closing_container_]; + + my $KK = -1; + my $KK_last_nb; + my $KK_this_nb = 0; + + my $K_end_constant = -1; + + #---------------------------------------------- + # Main loop to examine all hash keys and quotes + #---------------------------------------------- + my @Q_list; + my @q_list; + my $rwords = {}; + + my $push_KK_last_nb = sub { + + # if the previous nonblank token was a hash key of type + # 'Q' or 'w', then update its count + + # We are ignoring constant definitions + if ( $KK < $K_end_constant ) { return } + + my $type_last = $rLL->[$KK_last_nb]->[_TYPE_]; + my $token_last = $rLL->[$KK_last_nb]->[_TOKEN_]; + my $word; + if ( $type_last eq 'w' ) { + $word = $token_last; + } + elsif ( $type_last eq 'Q' ) { + $word = substr( $token_last, 1, -1 ); + + # Ignore text with interpolated values + my $ch0 = substr( $token_last, 0, 1 ); + if ( $ch0 eq '"' ) { + foreach my $sigil ( '$', '@' ) { + my $pos = index( $word, $sigil ); + next if ( $pos < 0 ); + return if ( $pos == 0 ); + my $ch_test = substr( $word, $pos - 1, 1 ); + return if ( $ch_test ne '\\' ); + } + } + + pop @Q_list; + } + else { + # not a quote - possibly identifier + return; + } + return unless ($word); + if ( !defined( $rwords->{$word} ) ) { + $rwords->{$word} = [ 1, $KK_last_nb ]; + } + else { + $rwords->{$word}->[0]++; + } + return; + }; ## end $push_KK_last_nb = sub + + #-------------------------- + # Main loop over all tokens + #-------------------------- + while ( ++$KK <= $Klimit ) { + + my $type = $rLL->[$KK]->[_TYPE_]; + next if ( $type eq 'b' ); + next if ( $type eq '#' ); + $KK_last_nb = $KK_this_nb; + $KK_this_nb = $KK; + my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_]; + if ($seqno) { + ##my $token = $rLL->[$KK]->[_TOKEN_]; + if ( $is_opening_type{$type} ) { + + } + else { + + # closing hash brace, '}' + if ( $type eq 'R' ) { + $push_KK_last_nb->(); + } + } + } + else { + if ( $type eq '=>' ) { + my $parent_seqno = $self->parent_seqno_by_K($KK); + if ( $parent_seqno && $ris_list_by_seqno->{$parent_seqno} ) { + $push_KK_last_nb->(); + } + } + elsif ( $type eq 'Q' ) { + push @Q_list, $KK; + } + elsif ( $type eq 'q' ) { + push @q_list, $KK; + } + elsif ( $type eq 'k' ) { + + # Look for 'use constant' and define its ending token + if ( $rLL->[$KK]->[_TOKEN_] eq 'use' ) { + my $Kn = $self->K_next_code($KK); + next if ( !defined($Kn) ); + next if ( $rLL->[$Kn]->[_TOKEN_] ne 'constant' ); + $Kn = $self->K_next_code($Kn); + next if ( !defined($Kn) ); + my $seqno_n = $rLL->[$Kn]->[_TYPE_SEQUENCE_]; + if ($seqno_n) { + + # skip a block of constant definitions + my $token_n = $rLL->[$Kn]->[_TOKEN_]; + if ( $token_n eq '{' ) { + $K_end_constant = $K_closing_container->{$seqno_n}; + } + else { + ## unexpected format, skip + } + } + else { + + # skip a single constant definition + $K_end_constant = $Kn + 1; + } + } + } + else { + # continue search + } + } + } ## end while ( ++$KK <= $Klimit ) + + # find hash keys seen just one time + my %unique_words; + foreach my $key ( keys %{$rwords} ) { + my ( $count, $K ) = @{ $rwords->{$key} }; + next if ( $count != 1 ); + $unique_words{$key} = $K; + } + + return if ( !%unique_words ); + + # check each unique word against the list of type Q tokens + if (@Q_list) { + my $imax = $#Q_list; + foreach my $i ( 0 .. $imax ) { + + # Ignore multiline quotes + my $K = $Q_list[$i]; + if ( ( $i == 0 || $Q_list[ $i - 1 ] + 1 != $K ) + && ( $i == $imax || $Q_list[ $i + 1 ] != $K + 1 ) ) + { + + # remove quotes + my $word = substr( $rLL->[$K]->[_TOKEN_], 1, -1 ); + + if ( $unique_words{$word} ) { + delete $unique_words{$word}; + } + } + } + } + + return if ( !%unique_words ); + + # TODO: check each remaining word against the list of type q tokens + foreach my $K_uu (@q_list) { + + # TODO: may need to check for multiline + + } + + return if ( !%unique_words ); + + # report unique words + my $output_string = EMPTY_STRING; + my @list; + foreach my $word ( keys %unique_words ) { + my $K = $unique_words{$word}; + my $lno = $rLL->[$K]->[_LINE_INDEX_] + 1; + push @list, [ $word, $lno ]; + } + @list = sort { $a->[1] <=> $b->[1] || $a->[0] <=> $b->[0] } @list; + foreach my $item (@list) { + my ( $word, $lno ) = @{$item}; + $output_string .= "$word,$lno\n"; + } + if ($output_string) { + my $input_stream_name = get_input_stream_name(); + chomp $output_string; + print {*STDOUT} <