#! /usr/bin/perl # acromang tries to solve acrostics, and is released # under the terms of the GPL version 2, or any later version, at your # option. See the file README and COPYING for more information. # Copyright 2008 by Don Armstrong . # $Id: perl_script 1153 2008-04-08 00:04:20Z don $ use warnings; use strict; use Getopt::Long; use Pod::Usage; =head1 NAME acromang - try to solve acrostics =head1 SYNOPSIS acromang m??s??c??o [options] Options: --debug, -d debugging level (Default 0) --help, -h display this help --man, -m display manual =head1 OPTIONS =over =item B<--wordlist> Default wordlist, should be gigantic; defaults to /usr/share/dict/words =item B<--database-dir> Directory for storing databases; defaults to ~/.acromang =item B<--debug, -d> Debug verbosity. (Default 0) =item B<--help, -h> Display brief useage information. =item B<--man, -m> Display this manual. =back =head1 EXAMPLES =cut use vars qw($DEBUG); use User; use Params::Validate qw(:types validate_with); use IO::File; use DB_File; use MLDBM qw(DB_File Storable); use Fcntl qw/O_RDWR O_RDONLY O_CREAT O_TRUNC/; # Use portable Storable images $MLDBM::DumpMeth=q(portable); use File::stat; use List::Util qw(min max); use POSIX qw(floor); my %options = (debug => 0, help => 0, man => 0, wordlist => '/usr/share/dict/words', database_dir => User->Home.'/.acromang/', show_all => 0, multiword => 0, ); my @puzzle_types = qw(acrostic anagram substitution); GetOptions(\%options, 'wordlist=s','database_dir|database-dir=s', @puzzle_types, 'debug|d+','help|h|?','man|m'); pod2usage() if $options{help}; pod2usage({verbose=>2}) if $options{man}; $DEBUG = $options{debug}; my @USAGE_ERRORS; if (not grep {exists $options{$_}} @puzzle_types) { $options{acrostic} = 1 } if (1 != grep {exists $options{$_}} @puzzle_types) { push @USAGE_ERRORS, "You must pass exactly one of ".join(', ',map {"--$_ "} @puzzle_types); } pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS; my $text; { local $/; $text = <>; } my @words = map {s/[^A-Za-z\?]//g; length $_?$_:();} split /[\s\n]+/, $text; if (not @words) { print STDERR "There were no words in the input\n"; exit 1; } # letter order my @l_o = ('a'..'z'); my %l_o; @l_o{@l_o} = (0 .. 25); sub word_sanitize{ return map {tr/A-Z/a-z/; tr/éüöáí/euoai/; $_ } @_ } my $database = update_and_load_database($options{database_dir},$options{wordlist}); # run through and use the database # for now, concentrate on finding single word solutions if ($options{acrostic}) { } elsif ($options{acrostic}) { foreach my $word (@words) { my $length = length($word); my @let = split //, $word; my %pos; for my $p (0..$#let) { next if $let[$p] eq '?'; $pos{$p} = $let[$p]; } my @allowable_words = find_allowable_words(database => $database,position => \%pos,length => $length); if (@allowable_words > 200 and not $options{show_all}) { print STDERR "Only showing 200 of the " . @allowable_words . " possible words\n"; @allowable_words = @allowable_words[0..199]; } # multiword currently not enabled # while ((@allowable_words == 0 or $options{multiword}) and # (@allowable_words < 200 or $options{show_all}) # ) { # # try for multiple word solutions, start with the longest # # words possible # # # try to split the number of known letters in half, and # # start increasing and decreasing in both directions # # # don't attempt to split each part into bits unless there # # are no solutions # # # avoid searching for words when we only have a length and # # no position information # } print map {$_,qq(\n)} sort @allowable_words; } } elsif ($options{anagram}) { foreach my $word (@words) { my $length = length($word); my $sorted_word = join ('', sort split //, $word); my @allowable_words = find_allowable_words(database => $database, anagram => $sorted_word, ); print map {$_,qq(\n)} sort @allowable_words; } } sub find_allowable_words { my %param = validate_with(params => \@_, spec => {database => {type => HASHREF, }, position => {type => HASHREF, optional => 1, }, length => {type => SCALAR, optional => 1, }, substitution => {type => SCALAR, optional => 1, }, rotation => {type => SCALAR, optional => 1, }, anagram => {type => SCALAR, optional => 1, }, }, ); my $database = $param{database}; my $must_match = 0; if (exists $param{length} and defined $param{length} and $param{length} > 0) { $must_match++; } if (exists $param{position} and defined $param{position}) { $must_match += scalar keys %{$param{position}}; } if (exists $param{anagram} and defined $param{anagram}) { $must_match++; } if ($must_match <= 0) { die "There must be something to try matching against"; } my %words; if (exists $param{length} and defined $param{length} and $param{length} > 0) { for my $word (@{$database->{length}{$param{length}}}) { $words{$word}++; } } if (exists $param{position} and defined $param{position}) { for my $position (keys %{$param{position}}) { for my $word (@{$database->{position}{$position . ' ' . $param{position}{$position}}}) { $words{$word}++; } } } if (exists $param{anagram} and defined $param{anagram}) { for my $word (@{$database->{anagram}{$param{anagram}}}) { $words{$word}++; } } return grep {$words{$_} >= $must_match} keys %words; } sub solve_acrostic { my ($acrostic) = @_; # need to find unsolved words # if there are multiple intersecting unsolved locations, try to # solve the hard ones first, then the easy ones. # given the current state of the board, find all possible words # that fit the fixed positions # find unsolved words # find positions with _ or ?, * or | are unsolved bits # scan the puzzle from left to right, top to bottom (across # answers), then top to bottom, left to right (down answers) my $in_word=0; my $current_answer=0; sub __add_to_answers { my ($acrostic,$current_answer,$in_word,$pos) = @_; if ($acrostic->{grid}[$pos] !~ /^\*\|$/) { if (not $$in_word) { $$current_answer++; } $$in_word = 1; $acrostic->{answers}[$$current_answer]{letters} .= $acrostic->{grid}[$pos]; $acrostic->{answers}[$$current_answer]{length}++; $acrostic->{answers}[$$current_answer]{unsolved} = 0; push @{$acrostic->{answers}[$current_answer]{positions}}, $pos; push @{$acrostic->{answer_grid}[$pos]}, $current_answer; if ($acrostic->{grid}[$pos] =~ /^\_\?$/) { $acrostic->{answers}[$$current_answer]{unsolved} = 1; } } else { $$in_word = 0; } } for my $width (0..($acrostic->{width}-1)) { $in_word = 0; for my $height (0..($acrostic->{height}-1)) { my $pos = $height*$acrostic->{height} + $width; __add_to_answers($acrostic,\$current_answer,\$in_word,$pos); } } for my $height (0..($acrostic->{height}-1)) { $in_word = 0; for my $width (0..($acrostic->{width}-1)) { my $pos = $height*$acrostic->{height} + $width; __add_to_answers($acrostic,\$current_answer,\$in_word,$pos); } } # for all possible answers which are unsolved, find allowable words for my $answer (0..$#{$acrostic->{answers}}) { } # for all crossing positions, find allowed letters and eliminate # words which do not share a letter in common with a word in the # opposite direction # for all crossing positions, eliminate words which do not share a # letter in common with a word in the opposite direction } sub update_and_load_database { my ($dir,$wordlist) = @_; # check to see if the wordlist is newer than our database my @dbs = qw(length position substitution rotation anagram); my $update_required = 0; my $wordlist_time = 1; if (! -r $wordlist) { die "Word list $wordlist doesn't exist or isn't readable"; } my $wordlist_stat = stat($wordlist); $wordlist_time = max($wordlist_time,$wordlist_stat->mtime); for my $db (@dbs) { if (! -e "$dir/db_${db}") { $update_required = 1; } elsif (stat("$dir/db_${db}")->mtime < $wordlist_time) { $update_required = 1; } } my $database; if ($update_required) { my $disk_db = {}; if (not -d "$dir") { mkdir($dir) or die "Unable to create directory $dir: $!"; } for my $db (@dbs) { $database->{$db} = {}; $disk_db->{$db} = {}; tie %{$disk_db->{$db}}, MLDBM => "$dir/db_${db}", O_RDWR|O_CREAT|O_TRUNC, 0666 or die "Unable to open/create $dir/db_${db}: $!"; } my $wordlist_fh = IO::File->new($wordlist,'r') or die "Unable to open $wordlist for reading: $!"; my %seen_words; while (<$wordlist_fh>) { chomp; next unless length $_; utf8::upgrade($_); my ($word) = word_sanitize($_); $word =~ s/[^a-z]//g; next unless length $_; next if exists $seen_words{$word}; $seen_words{$word} = 1; if ((keys %seen_words) % 100 == 0) { print STDERR "Handled ".(keys %seen_words) . " words, on $word\n"; } my @l = split //, $word; my $l = length($word); for my $p (0..$#l) { # position from the beginning my $temp = $database->{position}{"$p $l[$p]"}; $temp = [] if not defined $temp; push @{$temp}, $word; $database->{position}{"$p $l[$p]"} = $temp; # this is the position from the end $temp = $database->{position}{$p-$l . " " . $l[$p-$l]}; $temp = [] if not defined $temp; push @{$temp}, $word; $database->{position}{$p-$l . " " . $l[$p-$l]} = $temp; } my $temp = $database->{length}{$l}; $temp = [] if not defined $temp; push @{$temp}, $word; $database->{length}{$l} = $temp; # this is the substitution database # in it, hello and giddy both become abccd my %uc = (); my @uc_order = map { if (exists $uc{$_}) { (); } else { $uc{$_} = 1; $_; } } @l; my %s_map; @s_map{@uc_order} = ('a'..'z')[0..$#uc_order] if @uc_order; my $mapped_word = join('',map {$s_map{$_}} @l); $temp = $database->{substitution}{$mapped_word}; $temp = [] if not defined $temp; push @{$temp}, $word; $database->{substitution}{$mapped_word} = $temp; # this is the rotation database (for ceaser cyphers) # in it, hello becomes axeeh, giddy acxxs my $fl = $l[0]; my $index = $l_o{$fl}; if (not defined $index or grep {not exists $l_o{$_}} @l) { print STDERR "Problem with some letters in '$word'\n"; } $mapped_word = join('',map {$l_o[($l_o{$_} - $index + 26) % 26]} @l); $temp = $database->{rotation}{$mapped_word}; $temp = [] if not defined $temp; push @{$temp}, $word; $database->{rotation}{$mapped_word} = $temp; # this is the anagram database # in it, hello becomes ehllo, giddy ddgiy $mapped_word = join('',sort @l); $temp = $database->{anagram}{$mapped_word}; $temp = [] if not defined $temp; push @{$temp}, $word; $database->{anagram}{$mapped_word} = $temp; } for my $key1 (keys %{$database}) { for my $key2 (keys %{$database->{$key1}}) { $disk_db->{$key1}{$key2} = $database->{$key1}{$key2}; } } } else { for my $db (@dbs) { $database->{$db} = {}; tie %{$database->{$db}}, MLDBM => "$dir/db_${db}", O_RDONLY, 0666 or die "Unable to open $dir/db_${db}: $!"; } } return $database; } __END__