]> git.donarmstrong.com Git - bin.git/commitdiff
* tweak utf8 issues
authorDon Armstrong <don@donarmstrong.com>
Fri, 24 Jun 2011 17:59:33 +0000 (17:59 +0000)
committerDon Armstrong <don@donarmstrong.com>
Fri, 24 Jun 2011 17:59:33 +0000 (17:59 +0000)
anamang

diff --git a/anamang b/anamang
index e14f213b72cd1b8c02f44ae580f177e3ef70338d..81841ec89f123a71b63e5c17e6b0badd50d91bfd 100755 (executable)
--- a/anamang
+++ b/anamang
@@ -273,8 +273,9 @@ sub update_and_load_database {
        while (<$wordlist_fh>) {
            chomp;
            next unless length $_;
-           my $word = word_sanitize($_);
-           $word =~ s/[^a-z]//;
+           utf8::upgrade($_);
+           my ($word) = word_sanitize($_);
+           $word =~ s/[^a-z]//g;
            next unless length $_;
            next if exists $seen_words{$word};
            $seen_words{$word} = 1;