return result;
}
-
-vsize
-utf8_char_len (char current)
-{
- vsize char_len = 1;
-
- // U+10000 - U+10FFFF
- if ((current & 0xF0) == 0xF0)
- char_len = 4;
- // U+0800 - U+FFFF
- else if ((current & 0xE0) == 0xE0)
- char_len = 3;
- // U+0080 - U+07FF
- else if ((current & 0xC0) == 0xC0)
- char_len = 2;
- else if (current & 0x80)
- programming_error ("invalid UTF-8 string");
-
- return char_len;
-}
char const *line_start = (char const *)data + line[LEFT];
ssize left = (char const *) pos_str0 - line_start;
+ *byte_offset = left;
+
string line_begin (line_start, left);
char const *line_chars = line_begin.c_str ();
- while (left > 0)
+ for (; left > 0; --left, ++line_chars)
{
- size_t thislen = utf8_char_len (*line_chars);
-
- if (thislen == 1 && line_chars[0] == '\t')
+ // Skip UTF-8 continuation bytes. This is simplistic but
+ // robust, and we warn against non-UTF-8 input in the lexer
+ // already. In the case of non-UTF-8 or of this function being
+ // called in mid-character, the results are somewhat arbitrary,
+ // but there is no really sane definition anyway.
+ if ((*line_chars & 0xc0) == 0x80)
+ continue;
+
+ if (*line_chars == '\t')
(*column) = (*column / 8 + 1) * 8;
else
(*column)++;
(*line_char)++;
-
- /*
- To have decent output in UTF-8 aware terminals,
- we must keep track of the number of bytes from
- the left edge of the terminal.
- */
- *byte_offset += thislen;
-
- /* Advance past this character. */
- line_chars += thislen;
- left -= thislen;
}
}
#include "warn.hh"
static void
-replace_special_characters (string *str, SCM props)
+replace_special_characters (string &str, SCM props)
{
- vsize i = 0;
SCM replacement_alist = ly_chain_assoc_get (ly_symbol2scm ("replacement-alist"),
props,
SCM_EOL);
(scm_string_length (scm_caar (s))));
}
- while (i <= str->size ())
+ for (vsize i = 0; i < str.size (); i++)
{
+ /* Don't match in mid-UTF-8 */
+ if ((str[i] & 0xc0) == 0x80)
+ continue;
for (vsize j = max_length + 1; j--;)
{
- string dummy = str->substr (i, j);
- string ligature = robust_scm2string
- (ly_assoc_get (ly_string2scm (dummy),
- replacement_alist, SCM_BOOL_F), "");
- if (ligature != "")
- str->replace (i, j, ligature);
+ if (j > str.size () - i)
+ continue;
+ string dummy = str.substr (i, j);
+ SCM ligature = ly_assoc_get (ly_string2scm (dummy),
+ replacement_alist, SCM_BOOL_F);
+ if (scm_is_true (ligature))
+ str.replace (i, j, robust_scm2string (ligature, ""));
}
- i += utf8_char_len ((*str)[i]);
}
}
Output_def *layout = unsmob_output_def (layout_smob);
Font_metric *fm = select_encoded_font (layout, props);
- replace_special_characters (&str, props);
+ replace_special_characters (str, props);
/*
We want to filter strings with a music font that pass through