From 471381c46d9e137f23112295f8a6b7723b95c16a Mon Sep 17 00:00:00 2001 From: Werner Lemberg Date: Wed, 28 Nov 2007 16:17:12 +0100 Subject: [PATCH] (replace_whitespace): UTF-8 characters are up to 4 bytes long. --- lily/text-interface.cc | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/lily/text-interface.cc b/lily/text-interface.cc index 4b2f6c4445..b1ee537baa 100644 --- a/lily/text-interface.cc +++ b/lily/text-interface.cc @@ -30,16 +30,21 @@ replace_whitespace (string *str) vsize char_len = 1; char cur = (*str)[i]; - if ((cur & 0x11100000) == 0x11100000) + // U+10000 - U+10FFFF + if ((cur & 0x11110000) == 0x11110000) + char_len = 4; + // U+0800 - U+FFFF + else if ((cur & 0x11100000) == 0x11100000) char_len = 3; + // U+0080 - U+07FF else if ((cur & 0x11000000) == 0x11000000) char_len = 2; else if (cur & 0x10000000) programming_error ("invalid utf-8 string"); - - /* avoid the locale-dependent isspace */ - if (cur == '\n' || cur == '\t' || cur == '\v') - (*str)[i] = ' '; + else + // avoid the locale-dependent isspace + if (cur == '\n' || cur == '\t' || cur == '\v') + (*str)[i] = ' '; i += char_len; } -- 2.39.5