From 12a8d052900da9adc9a598fd998a49a55873fb32 Mon Sep 17 00:00:00 2001 From: Patrick McCarty Date: Thu, 29 Oct 2009 19:00:58 -0700 Subject: [PATCH] Fix UTF-8 validation checks The bitwise comparisons are only for 8-bit characters, not 32-bit characters. The checks were mixing up binary and hex. :-) --- lily/text-interface.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lily/text-interface.cc b/lily/text-interface.cc index c7d425bcb5..8f2f7edc06 100644 --- a/lily/text-interface.cc +++ b/lily/text-interface.cc @@ -31,15 +31,15 @@ replace_whitespace (string *str) char cur = (*str)[i]; // U+10000 - U+10FFFF - if ((cur & 0x11110000) == 0x11110000) + if ((cur & 0xF0) == 0xF0) char_len = 4; // U+0800 - U+FFFF - else if ((cur & 0x11100000) == 0x11100000) + else if ((cur & 0xE0) == 0xE0) char_len = 3; // U+0080 - U+07FF - else if ((cur & 0x11000000) == 0x11000000) + else if ((cur & 0xC0) == 0xC0) char_len = 2; - else if (cur & 0x10000000) + else if (cur & 0x80) programming_error ("invalid utf-8 string"); else // avoid the locale-dependent isspace -- 2.39.2