X-Git-Url: https://git.donarmstrong.com/?p=bin.git;a=blobdiff_plain;f=anamang;h=42467a8a5dba0ed2e3608b269ed70b3fb4c5f9df;hp=5ed37612fe3bf9b12671445adfa3b98e3c7217cf;hb=HEAD;hpb=3438a2f1682867b0244721ca9b1bf15e4da2496f diff --git a/anamang b/anamang index 5ed3761..42467a8 100755 --- a/anamang +++ b/anamang @@ -74,6 +74,7 @@ $MLDBM::DumpMeth=q(portable); use File::stat; use List::Util qw(min max); +use POSIX qw(floor); my %options = (debug => 0, help => 0, @@ -119,18 +120,22 @@ if (not @words) { exit 1; } -my $database = update_and_load_database($options{database_dir},$options{wordlist}); - # letter order my @l_o = ('a'..'z'); my %l_o; @l_o{@l_o} = (0 .. 25); +sub word_sanitize{ + return map {tr/A-Z/a-z/; tr/éüöáí/euoai/; $_ } @_ +} +my $database = update_and_load_database($options{database_dir},$options{wordlist}); # run through and use the database # for now, concentrate on finding single word solutions if ($options{acrostic}) { +} +elsif ($options{acrostic}) { foreach my $word (@words) { my $length = length($word); my @let = split //, $word; @@ -139,26 +144,27 @@ if ($options{acrostic}) { next if $let[$p] eq '?'; $pos{$p} = $let[$p]; } - my @allowable_words = find_allowable_words($database,\%pos,$length); + my @allowable_words = find_allowable_words(database => $database,position => \%pos,length => $length); if (@allowable_words > 200 and not $options{show_all}) { print STDERR "Only showing 200 of the " . @allowable_words . " possible words\n"; @allowable_words = @allowable_words[0..199]; } - while ((@allowable_words == 0 or $options{multiword}) and - (@allowable_words < 200 or $options{show_all}) - ) { - # try for multiple word solutions, start with the longest - # words possible - - # try to split the number of known letters in half, and - # start increasing and decreasing in both directions - - # don't attempt to split each part into bits unless there - # are no solutions - - # avoid searching for words when we only have a length and - # no position information - } + # multiword currently not enabled + # while ((@allowable_words == 0 or $options{multiword}) and + # (@allowable_words < 200 or $options{show_all}) + # ) { + # # try for multiple word solutions, start with the longest + # # words possible + # + # # try to split the number of known letters in half, and + # # start increasing and decreasing in both directions + # + # # don't attempt to split each part into bits unless there + # # are no solutions + # + # # avoid searching for words when we only have a length and + # # no position information + # } print map {$_,qq(\n)} sort @allowable_words; } } @@ -231,10 +237,78 @@ sub find_allowable_words { return grep {$words{$_} >= $must_match} keys %words; } +sub solve_acrostic { + my ($acrostic) = @_; + # need to find unsolved words + + # if there are multiple intersecting unsolved locations, try to + # solve the hard ones first, then the easy ones. + + # given the current state of the board, find all possible words + # that fit the fixed positions + + # find unsolved words + + # find positions with _ or ?, * or | are unsolved bits + + # scan the puzzle from left to right, top to bottom (across + # answers), then top to bottom, left to right (down answers) + my $in_word=0; + my $current_answer=0; + + sub __add_to_answers { + my ($acrostic,$current_answer,$in_word,$pos) = @_; + if ($acrostic->{grid}[$pos] !~ /^\*\|$/) { + if (not $$in_word) { + $$current_answer++; + } + $$in_word = 1; + $acrostic->{answers}[$$current_answer]{letters} .= $acrostic->{grid}[$pos]; + $acrostic->{answers}[$$current_answer]{length}++; + $acrostic->{answers}[$$current_answer]{unsolved} = 0; + push @{$acrostic->{answers}[$current_answer]{positions}}, $pos; + push @{$acrostic->{answer_grid}[$pos]}, $current_answer; + if ($acrostic->{grid}[$pos] =~ /^\_\?$/) { + $acrostic->{answers}[$$current_answer]{unsolved} = 1; + } + } + else { + $$in_word = 0; + } + } + + for my $width (0..($acrostic->{width}-1)) { + $in_word = 0; + for my $height (0..($acrostic->{height}-1)) { + my $pos = $height*$acrostic->{height} + $width; + __add_to_answers($acrostic,\$current_answer,\$in_word,$pos); + } + } + for my $height (0..($acrostic->{height}-1)) { + $in_word = 0; + for my $width (0..($acrostic->{width}-1)) { + my $pos = $height*$acrostic->{height} + $width; + __add_to_answers($acrostic,\$current_answer,\$in_word,$pos); + } + } + + # for all possible answers which are unsolved, find allowable words + for my $answer (0..$#{$acrostic->{answers}}) { + } + + # for all crossing positions, find allowed letters and eliminate + # words which do not share a letter in common with a word in the + # opposite direction + + # for all crossing positions, eliminate words which do not share a + # letter in common with a word in the opposite direction +} + sub update_and_load_database { my ($dir,$wordlist) = @_; # check to see if the wordlist is newer than our database + my @dbs = qw(length position substitution rotation anagram); my $update_required = 0; my $wordlist_time = 1; if (! -r $wordlist) { @@ -242,11 +316,11 @@ sub update_and_load_database { } my $wordlist_stat = stat($wordlist); $wordlist_time = max($wordlist_time,$wordlist_stat->mtime); - for my $db (qw(length position)) { + for my $db (@dbs) { if (! -e "$dir/db_${db}") { $update_required = 1; } - elsif (stat($wordlist)->mtime < $wordlist_time) { + elsif (stat("$dir/db_${db}")->mtime < $wordlist_time) { $update_required = 1; } } @@ -256,7 +330,7 @@ sub update_and_load_database { if (not -d "$dir") { mkdir($dir) or die "Unable to create directory $dir: $!"; } - for my $db (qw(length position substitution rotation anagram)) { + for my $db (@dbs) { $database->{$db} = {}; $disk_db->{$db} = {}; tie %{$disk_db->{$db}}, MLDBM => "$dir/db_${db}", @@ -269,11 +343,14 @@ sub update_and_load_database { while (<$wordlist_fh>) { chomp; next unless length $_; - my $word = lc($_); + utf8::upgrade($_); + my ($word) = word_sanitize($_); + $word =~ s/[^a-z]//g; + next unless length $_; next if exists $seen_words{$word}; $seen_words{$word} = 1; if ((keys %seen_words) % 100 == 0) { - print STDERR "Handled ".(keys %seen_words) . "words, on $word\n"; + print STDERR "Handled ".(keys %seen_words) . " words, on $word\n"; } my @l = split //, $word; my $l = length($word); @@ -316,6 +393,9 @@ sub update_and_load_database { # in it, hello becomes axeeh, giddy acxxs my $fl = $l[0]; my $index = $l_o{$fl}; + if (not defined $index or grep {not exists $l_o{$_}} @l) { + print STDERR "Problem with some letters in '$word'\n"; + } $mapped_word = join('',map {$l_o[($l_o{$_} - $index + 26) % 26]} @l); $temp = $database->{rotation}{$mapped_word}; $temp = [] if not defined $temp; @@ -337,7 +417,7 @@ sub update_and_load_database { } } else { - for my $db (qw(length position)) { + for my $db (@dbs) { $database->{$db} = {}; tie %{$database->{$db}}, MLDBM => "$dir/db_${db}", O_RDONLY, 0666