From: Don Armstrong Date: Fri, 24 Jun 2011 17:59:33 +0000 (+0000) Subject: * tweak utf8 issues X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=1ce8a201f7e91a14f5dcaa1616d2f2e94afa6efd;p=bin.git * tweak utf8 issues --- diff --git a/anamang b/anamang index e14f213..81841ec 100755 --- a/anamang +++ b/anamang @@ -273,8 +273,9 @@ sub update_and_load_database { while (<$wordlist_fh>) { chomp; next unless length $_; - my $word = word_sanitize($_); - $word =~ s/[^a-z]//; + utf8::upgrade($_); + my ($word) = word_sanitize($_); + $word =~ s/[^a-z]//g; next unless length $_; next if exists $seen_words{$word}; $seen_words{$word} = 1;