#! /usr/bin/perl
# acromang tries to solve acrostics, and is released
# under the terms of the GPL version 2, or any later version, at your
# option. See the file README and COPYING for more information.
# Copyright 2008 by Don Armstrong <don@donarmstrong.com>.
# $Id: perl_script 1153 2008-04-08 00:04:20Z don $


use warnings;
use strict;

use Getopt::Long;
use Pod::Usage;

=head1 NAME

acromang - try to solve acrostics

=head1 SYNOPSIS

acromang m??s??c??o [options]

 Options:
  --debug, -d debugging level (Default 0)
  --help, -h display this help
  --man, -m display manual

=head1 OPTIONS

=over

=item B<--wordlist>

Default wordlist, should be gigantic; defaults to
/usr/share/dict/words

=item B<--database-dir>

Directory for storing databases; defaults to ~/.acromang

=item B<--debug, -d>

Debug verbosity. (Default 0)

=item B<--help, -h>

Display brief useage information.

=item B<--man, -m>

Display this manual.

=back

=head1 EXAMPLES


=cut


use vars qw($DEBUG);

use User;


use Params::Validate qw(:types validate_with);
use IO::File;
use DB_File;
use MLDBM qw(DB_File Storable);
use Fcntl qw/O_RDWR O_RDONLY O_CREAT O_TRUNC/;
# Use portable Storable images
$MLDBM::DumpMeth=q(portable);


use File::stat;
use List::Util qw(min max);
use POSIX qw(floor);

my %options = (debug           => 0,
	       help            => 0,
	       man             => 0,
	       wordlist        => '/usr/share/dict/words',
	       database_dir    => User->Home.'/.acromang/',
	       show_all        => 0,
	       multiword       => 0,
	       );

my @puzzle_types = qw(acrostic anagram substitution);


GetOptions(\%options,
	   'wordlist=s','database_dir|database-dir=s',
	   @puzzle_types,
	   'debug|d+','help|h|?','man|m');

pod2usage() if $options{help};
pod2usage({verbose=>2}) if $options{man};

$DEBUG = $options{debug};

my @USAGE_ERRORS;
if (not grep {exists $options{$_}} @puzzle_types) {
    $options{acrostic} = 1
}

if (1 != grep {exists $options{$_}} @puzzle_types) {
    push @USAGE_ERRORS, "You must pass exactly one of ".join(', ',map {"--$_ "} @puzzle_types);
}

pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS;

my $text;
{ local $/;
  $text = <>;
}
my @words = map {s/[^A-Za-z\?]//g; length $_?$_:();} split /[\s\n]+/, $text;
if (not @words) {
    print STDERR "There were no words in the input\n";
    exit 1;
}

# letter order
my @l_o = ('a'..'z');
my %l_o;
@l_o{@l_o} = (0 .. 25);
sub word_sanitize{
    return map {tr/A-Z/a-z/; tr/éüöáí/euoai/; $_ } @_
}

my $database = update_and_load_database($options{database_dir},$options{wordlist});

# run through and use the database

# for now, concentrate on finding single word solutions
if ($options{acrostic}) {
}
elsif ($options{acrostic}) {
    foreach my $word (@words) {
	my $length = length($word);
	my @let = split //, $word;
	my %pos;
	for my $p (0..$#let) {
	    next if $let[$p] eq '?';
	    $pos{$p} = $let[$p];
	}
	my @allowable_words = find_allowable_words(database => $database,position => \%pos,length => $length);
	if (@allowable_words > 200 and not $options{show_all}) {
	    print STDERR "Only showing 200 of the " . @allowable_words . " possible words\n";
	    @allowable_words = @allowable_words[0..199];
	}
	# multiword currently not enabled
	# while ((@allowable_words == 0 or $options{multiword}) and
	#        (@allowable_words < 200 or $options{show_all})
	#       ) {
	#     # try for multiple word solutions, start with the longest
	#     # words possible
	# 
	#     # try to split the number of known letters in half, and
	#     # start increasing and decreasing in both directions
	# 
	#     # don't attempt to split each part into bits unless there
	#     # are no solutions
	# 
	#     # avoid searching for words when we only have a length and
	#     # no position information
	# }
	print map {$_,qq(\n)} sort @allowable_words;
    }
}
elsif ($options{anagram}) {
    foreach my $word (@words) {
	my $length = length($word);
	my $sorted_word = join ('', sort split //, $word);
	my @allowable_words = find_allowable_words(database => $database,
						   anagram => $sorted_word,
						  );
	print map {$_,qq(\n)} sort @allowable_words;
    }
}

sub find_allowable_words {
    my %param = validate_with(params => \@_,
			      spec   => {database => {type => HASHREF,
						     },
					 position => {type => HASHREF,
						      optional => 1,
						     },
					 length   => {type => SCALAR,
						      optional => 1,
						     },
					 substitution => {type => SCALAR,
							  optional => 1,
							 },
					 rotation     => {type => SCALAR,
							  optional => 1,
							 },
					 anagram      => {type => SCALAR,
							  optional => 1,
							 },
					},
			     );

    my $database = $param{database};
    my $must_match = 0;
    if (exists $param{length} and defined $param{length} and $param{length} > 0) {
	$must_match++;
    }
    if (exists $param{position} and defined $param{position}) {
	$must_match += scalar keys %{$param{position}};
    }
    if (exists $param{anagram} and defined $param{anagram}) {
	$must_match++;
    }
    
    if ($must_match <= 0) {
	die "There must be something to try matching against";
    }
    my %words;
    if (exists $param{length} and defined $param{length} and $param{length} > 0) {
	for my $word (@{$database->{length}{$param{length}}}) {
	    $words{$word}++;
	}
    }
    if (exists $param{position} and defined $param{position}) {
	for my $position (keys %{$param{position}}) {
	    for my $word (@{$database->{position}{$position . ' ' . $param{position}{$position}}}) {
		$words{$word}++;
	    }
	}
    }
    if (exists $param{anagram} and defined $param{anagram}) {
	for my $word (@{$database->{anagram}{$param{anagram}}}) {
	    $words{$word}++;
	}
    }
    return grep {$words{$_} >= $must_match} keys %words;
}

sub solve_acrostic {
    my ($acrostic) = @_;
    # need to find unsolved words

    # if there are multiple intersecting unsolved locations, try to
    # solve the hard ones first, then the easy ones.

    # given the current state of the board, find all possible words
    # that fit the fixed positions

    # find unsolved words

    # find positions with _ or ?, * or | are unsolved bits
    
    # scan the puzzle from left to right, top to bottom (across
    # answers), then top to bottom, left to right (down answers)
    my $in_word=0;
    my $current_answer=0;

    sub __add_to_answers {
	my ($acrostic,$current_answer,$in_word,$pos) = @_;
	if ($acrostic->{grid}[$pos] !~ /^\*\|$/) {
	    if (not $$in_word) {
		$$current_answer++;
	    }
	    $$in_word = 1;
	    $acrostic->{answers}[$$current_answer]{letters} .= $acrostic->{grid}[$pos];
	    $acrostic->{answers}[$$current_answer]{length}++;
	    $acrostic->{answers}[$$current_answer]{unsolved} = 0;
	    push @{$acrostic->{answers}[$current_answer]{positions}}, $pos;
	    push @{$acrostic->{answer_grid}[$pos]}, $current_answer;
	    if ($acrostic->{grid}[$pos] =~ /^\_\?$/) {
		$acrostic->{answers}[$$current_answer]{unsolved} = 1;
	    }
	}
	else {
	    $$in_word = 0;
	}
    }

    for my $width (0..($acrostic->{width}-1)) {
	$in_word = 0;
	for my $height (0..($acrostic->{height}-1)) {
	    my $pos = $height*$acrostic->{height} + $width;
	    __add_to_answers($acrostic,\$current_answer,\$in_word,$pos);
	}
    }
    for my $height (0..($acrostic->{height}-1)) {
	$in_word = 0;
	for my $width (0..($acrostic->{width}-1)) {
	    my $pos = $height*$acrostic->{height} + $width;
	    __add_to_answers($acrostic,\$current_answer,\$in_word,$pos);
	}
    }

    # for all possible answers which are unsolved, find allowable words
    for my $answer (0..$#{$acrostic->{answers}}) {
    }

    # for all crossing positions, find allowed letters and eliminate
    # words which do not share a letter in common with a word in the
    # opposite direction

    # for all crossing positions, eliminate words which do not share a
    # letter in common with a word in the opposite direction
}


sub update_and_load_database {
    my ($dir,$wordlist) = @_;
    # check to see if the wordlist is newer than our database
    my @dbs = qw(length position substitution rotation anagram);
    my $update_required = 0;
    my $wordlist_time = 1;
    if (! -r $wordlist) {
	die "Word list $wordlist doesn't exist or isn't readable";
    }
    my $wordlist_stat = stat($wordlist);
    $wordlist_time = max($wordlist_time,$wordlist_stat->mtime);
    for my $db (@dbs) {
	if (! -e "$dir/db_${db}") {
	    $update_required = 1;
	}
	elsif (stat("$dir/db_${db}")->mtime < $wordlist_time) {
	    $update_required = 1;
	}
    }
    my $database;
    if ($update_required) {
	my $disk_db = {};
	if (not -d "$dir") {
	    mkdir($dir) or die "Unable to create directory $dir: $!";
	}
	for my $db (@dbs) {
	    $database->{$db} = {};
	    $disk_db->{$db} = {};
	    tie %{$disk_db->{$db}}, MLDBM => "$dir/db_${db}",
		O_RDWR|O_CREAT|O_TRUNC, 0666
		    or die "Unable to open/create $dir/db_${db}: $!";
	}
	my $wordlist_fh = IO::File->new($wordlist,'r') or
	    die "Unable to open $wordlist for reading: $!";
	my %seen_words;
	while (<$wordlist_fh>) {
	    chomp;
	    next unless length $_;
	    utf8::upgrade($_);
	    my ($word) = word_sanitize($_);
	    $word =~ s/[^a-z]//g;
	    next unless length $_;
	    next if exists $seen_words{$word};
	    $seen_words{$word} = 1;
	    if ((keys %seen_words) % 100 == 0) {
		print STDERR "Handled ".(keys %seen_words) . " words, on $word\n";
	    }
	    my @l = split //, $word;
	    my $l = length($word);
	    for my $p (0..$#l) {
		# position from the beginning
		my $temp = $database->{position}{"$p $l[$p]"};
		$temp = [] if not defined $temp;
		push @{$temp}, $word;
		$database->{position}{"$p $l[$p]"} = $temp;
		# this is the position from the end
		$temp = $database->{position}{$p-$l . " " . $l[$p-$l]};
		$temp = [] if not defined $temp;
		push @{$temp}, $word;
		$database->{position}{$p-$l . " " . $l[$p-$l]} = $temp;
	    }
	    my $temp = $database->{length}{$l};
	    $temp = [] if not defined $temp;
	    push @{$temp}, $word;
	    $database->{length}{$l} = $temp;
	    # this is the substitution database
	    # in it, hello and giddy both become abccd
	    my %uc = ();
	    my @uc_order = map {
		if (exists $uc{$_}) {
		    ();
		} else {
		    $uc{$_} = 1;
		    $_;
		}
	    } @l;
	    my %s_map;
	    @s_map{@uc_order} = ('a'..'z')[0..$#uc_order] if @uc_order;
	    my $mapped_word = join('',map {$s_map{$_}} @l);
	    $temp = $database->{substitution}{$mapped_word};
	    $temp = [] if not defined $temp;
	    push @{$temp}, $word;
	    $database->{substitution}{$mapped_word} = $temp;

	    # this is the rotation database (for ceaser cyphers)
	    # in it, hello becomes axeeh, giddy acxxs
	    my $fl = $l[0];
	    my $index = $l_o{$fl};
	    if (not defined $index or grep {not exists $l_o{$_}} @l) {
		print STDERR "Problem with some letters in '$word'\n";
	    }
	    $mapped_word = join('',map {$l_o[($l_o{$_} - $index + 26) % 26]} @l);
	    $temp = $database->{rotation}{$mapped_word};
	    $temp = [] if not defined $temp;
	    push @{$temp}, $word;
	    $database->{rotation}{$mapped_word} = $temp;

	    # this is the anagram database
	    # in it, hello becomes ehllo, giddy ddgiy
	    $mapped_word = join('',sort @l);
	    $temp = $database->{anagram}{$mapped_word};
	    $temp = [] if not defined $temp;
	    push @{$temp}, $word;
	    $database->{anagram}{$mapped_word} = $temp;
	}
	for my $key1 (keys %{$database}) {
	    for my $key2 (keys %{$database->{$key1}}) {
		$disk_db->{$key1}{$key2} = $database->{$key1}{$key2};
	    }
	}
    }
    else {
	for my $db (@dbs) {
	    $database->{$db} = {};
	    tie %{$database->{$db}}, MLDBM => "$dir/db_${db}",
		O_RDONLY, 0666
		    or die "Unable to open $dir/db_${db}: $!";
	}
    }
    return $database;

}


__END__