From: Don Armstrong Date: Sat, 17 Jan 2009 00:48:08 +0000 (+0000) Subject: * update anamang X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=ca8868f3f26e125fa44418d1faa76c2f0e5de00c;p=bin.git * update anamang --- diff --git a/anamang b/anamang new file mode 100755 index 0000000..7a497d1 --- /dev/null +++ b/anamang @@ -0,0 +1,352 @@ +#! /usr/bin/perl +# acromang tries to solve acrostics, and is released +# under the terms of the GPL version 2, or any later version, at your +# option. See the file README and COPYING for more information. +# Copyright 2008 by Don Armstrong . +# $Id: perl_script 1153 2008-04-08 00:04:20Z don $ + + +use warnings; +use strict; + +use Getopt::Long; +use Pod::Usage; + +=head1 NAME + +acromang - try to solve acrostics + +=head1 SYNOPSIS + +acromang m??s??c??o [options] + + Options: + --debug, -d debugging level (Default 0) + --help, -h display this help + --man, -m display manual + +=head1 OPTIONS + +=over + +=item B<--wordlist> + +Default wordlist, should be gigantic; defaults to +/usr/share/dict/words + +=item B<--database-dir> + +Directory for storing databases; defaults to ~/.acromang + +=item B<--debug, -d> + +Debug verbosity. (Default 0) + +=item B<--help, -h> + +Display brief useage information. + +=item B<--man, -m> + +Display this manual. + +=back + +=head1 EXAMPLES + + +=cut + + +use vars qw($DEBUG); + +use User; + + +use Params::Validate qw(:types validate_with); +use IO::File; +use DB_File; +use MLDBM qw(DB_File Storable); +use Fcntl qw/O_RDWR O_RDONLY O_CREAT O_TRUNC/; +# Use portable Storable images +$MLDBM::DumpMeth=q(portable); + + +use File::stat; +use List::Util qw(min max); + +my %options = (debug => 0, + help => 0, + man => 0, + wordlist => '/usr/share/dict/words', + database_dir => User->Home.'/.acromang/', + show_all => 0, + multiword => 0, + ); + +my @puzzle_types = qw(acrostic anagram substitution); + + + +GetOptions(\%options, + 'wordlist=s','database_dir|database-dir=s', + @puzzle_types, + 'debug|d+','help|h|?','man|m'); + +pod2usage() if $options{help}; +pod2usage({verbose=>2}) if $options{man}; + +$DEBUG = $options{debug}; + +my @USAGE_ERRORS; +if (not grep {exists $options{$_}} @puzzle_types) { + $options{acrostic} = 1 +} + +if (1 != grep {exists $options{$_}} @puzzle_types) { + push @USAGE_ERRORS, "You must pass exactly one of ".join(', ',map {"--$_ "} @puzzle_types); +} + +pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS; + +my $text; +{ local $/; + $text = <>; +} +my @words = map {s/[^A-Za-z\?]//g; length $_?$_:();} split /[\s\n]+/, $text; +if (not @words) { + print STDERR "There were no words in the input\n"; + exit 1; +} + +my $database = update_and_load_database($options{database_dir},$options{wordlist}); + +# letter order +my @l_o = ('a'..'z'); +my %l_o; +@l_o{@l_o} = (0 .. 25); + + +# run through and use the database + +# for now, concentrate on finding single word solutions +if ($options{acrostic}) { + foreach my $word (@words) { + my $length = length($word); + my @let = split //, $word; + my %pos; + for my $p (0..$#let) { + next if $let[$p] eq '?'; + $pos{$p} = $let[$p]; + } + my @allowable_words = find_allowable_words($database,\%pos,$length); + if (@allowable_words > 200 and not $options{show_all}) { + print STDERR "Only showing 200 of the " . @allowable_words . " possible words\n"; + @allowable_words = @allowable_words[0..199]; + } + while ((@allowable_words == 0 or $options{multiword}) and + (@allowable_words < 200 or $options{show_all}) + ) { + # try for multiple word solutions, start with the longest + # words possible + + # try to split the number of known letters in half, and + # start increasing and decreasing in both directions + + # don't attempt to split each part into bits unless there + # are no solutions + + # avoid searching for words when we only have a length and + # no position information + } + print map {$_,qq(\n)} sort @allowable_words; + } +} +elsif ($options{anagram}) { + foreach my $word (@words) { + my $length = length($word); + my $sorted_word = join ('', sort split //, $word); + my @allowable_words = find_allowable_words(database => $database, + anagram => $sorted_word, + ); + print map {$_,qq(\n)} sort @allowable_words; + } +} + +sub find_allowable_words { + my %param = validate_with(params => \@_, + spec => {database => {type => HASHREF, + }, + position => {type => HASHREF, + optional => 1, + }, + length => {type => SCALAR, + optional => 1, + }, + substitution => {type => SCALAR, + optional => 1, + }, + rotation => {type => SCALAR, + optional => 1, + }, + anagram => {type => SCALAR, + optional => 1, + }, + }, + ); + + my $database = $param{database}; + my $must_match = 0; + if (exists $param{length} and defined $param{length} and $param{length} > 0) { + $must_match++; + } + if (exists $param{position} and defined $param{position}) { + $must_match += scalar keys %{$param{position}}; + } + if (exists $param{anagram} and defined $param{anagram}) { + $must_match++; + } + + if ($must_match <= 0) { + die "There must be something to try matching against"; + } + my %words; + if (exists $param{length} and defined $param{length} and $param{length} > 0) { + for my $word (@{$database->{length}{$param{length}}}) { + $words{$word}++; + } + } + if (exists $param{position} and defined $param{position}) { + for my $position (keys %{$param{position}}) { + for my $word (@{$database->{position}{$position . ' ' . $param{position}{$position}}}) { + $words{$word}++; + } + } + } + if (exists $param{anagram} and defined $param{anagram}) { + for my $word (@{$database->{anagram}{$param{anagram}}}) { + $words{$word}++; + } + } + return grep {$words{$_} >= $must_match} keys %words; +} + + +sub update_and_load_database { + my ($dir,$wordlist) = @_; + # check to see if the wordlist is newer than our database + my $update_required = 0; + my $wordlist_time = 1; + if (! -r $wordlist) { + die "Word list $wordlist doesn't exist or isn't readable"; + } + my $wordlist_stat = stat($wordlist); + $wordlist_time = max($wordlist_time,$wordlist_stat->mtime); + for my $db (qw(length position)) { + if (! -e "$dir/db_${db}") { + $update_required = 1; + } + elsif (stat($wordlist)->mtime < $wordlist_time) { + $update_required = 1; + } + } + my $database; + if ($update_required) { + my $disk_db = {}; + if (not -d "$dir") { + mkdir($dir) or die "Unable to create directory $dir: $!"; + } + for my $db (qw(length position substitution rotation anagram)) { + $database->{$db} = {}; + $disk_db->{$db} = {}; + tie %{$disk_db->{$db}}, MLDBM => "$dir/db_${db}", + O_RDWR|O_CREAT|O_TRUNC, 0666 + or die "Unable to open/create $dir/db_${db}: $!"; + } + my $wordlist_fh = IO::File->new($wordlist,'r') or + die "Unable to open $wordlist for reading: $!"; + my %seen_words; + while (<$wordlist_fh>) { + chomp; + next unless length $_; + my $word = lc($_); + next if exists $seen_words{$word}; + $seen_words{$word} = 1; + if ((keys %seen_words) % 100 == 0) { + print STDERR "Handled ".(keys %seen_words) . "words, on $word\n"; + } + my @l = split //, $word; + my $l = length($word); + for my $p (0..$#l) { + # position from the beginning + my $temp = $database->{position}{"$p $l[$p]"}; + $temp = [] if not defined $temp; + push @{$temp}, $word; + $database->{position}{"$p $l[$p]"} = $temp; + # this is the position from the end + $temp = $database->{position}{$p-$l . " " . $l[$p-$l]}; + $temp = [] if not defined $temp; + push @{$temp}, $word; + $database->{position}{$p-$l . " " . $l[$p-$l]} = $temp; + } + my $temp = $database->{length}{$l}; + $temp = [] if not defined $temp; + push @{$temp}, $word; + $database->{length}{$l} = $temp; + # this is the substitution database + # in it, hello and giddy both become abccd + my %uc = (); + my @uc_order = map { + if (exists $uc{$_}) { + (); + } else { + $uc{$_} = 1; + $_; + } + } @l; + my %s_map; + @s_map{@uc_order} = ('a'..'z')[0..$#uc_order] if @uc_order; + my $mapped_word = join('',map {$s_map{$_}} @l); + $temp = $database->{substitution}{$mapped_word}; + $temp = [] if not defined $temp; + push @{$temp}, $word; + $database->{substitution}{$mapped_word} = $temp; + + # this is the rotation database (for ceaser cyphers) + # in it, hello becomes axeeh, giddy acxxs + my $fl = $l[0]; + my $index = $l_o{$fl}; + $mapped_word = join('',map {$l_o[($l_o{$_} - $index) % 26]} @l); + $temp = $database->{rotation}{$mapped_word}; + $temp = [] if not defined $temp; + push @{$temp}, $word; + $database->{rotation}{$mapped_word} = $temp; + + # this is the anagram database + # in it, hello becomes ehllo, giddy ddgiy + $mapped_word = join('',sort @l); + $temp = $database->{anagram}{$mapped_word}; + $temp = [] if not defined $temp; + push @{$temp}, $word; + $database->{anagram}{$mapped_word} = $temp; + } + for my $key1 (keys %{$database}) { + for my $key2 (keys %{$database->{$key1}}) { + $disk_db->{$key1}{$key2} = $database->{$key1}{$key2}; + } + } + } + else { + for my $db (qw(length position)) { + $database->{$db} = {}; + tie %{$database->{$db}}, MLDBM => "$dir/db_${db}", + O_RDONLY, 0666 + or die "Unable to open $dir/db_${db}: $!"; + } + } + return $database; + +} + + +__END__