From 1ce8a201f7e91a14f5dcaa1616d2f2e94afa6efd Mon Sep 17 00:00:00 2001 From: Don Armstrong Date: Fri, 24 Jun 2011 17:59:33 +0000 Subject: [PATCH] * tweak utf8 issues --- anamang | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/anamang b/anamang index e14f213..81841ec 100755 --- a/anamang +++ b/anamang @@ -273,8 +273,9 @@ sub update_and_load_database { while (<$wordlist_fh>) { chomp; next unless length $_; - my $word = word_sanitize($_); - $word =~ s/[^a-z]//; + utf8::upgrade($_); + my ($word) = word_sanitize($_); + $word =~ s/[^a-z]//g; next unless length $_; next if exists $seen_words{$word}; $seen_words{$word} = 1; -- 2.39.5