- size_t thislen = utf8_char_len (*line_chars);
-
- if (thislen == 1 && line_chars[0] == '\t')
- (*column) = (*column / 8 + 1) * 8;
+ // Skip UTF-8 continuation bytes. This is simplistic but
+ // robust, and we warn against non-UTF-8 input in the lexer
+ // already. In the case of non-UTF-8 or of this function being
+ // called in mid-character, the results are somewhat arbitrary,
+ // but there is no really sane definition anyway.
+ if ((*line_chars & 0xc0) == 0x80)
+ continue;
+
+ if (*line_chars == '\t')
+ (*column) = (*column / 8 + 1) * 8;