From 2a5c70110bba2014507091c353c3e80d5dc5f796 Mon Sep 17 00:00:00 2001 From: Patrick McCarty Date: Tue, 10 Nov 2009 21:22:25 -0800 Subject: [PATCH] Remove mbrtowc(), and use utf8_char_len() instead. - In get_counts(), revert the line_char variable to its original behavior: keeping track of the character count, not the byte offset. - Introduce a new parameter to get_counts() to keep track of the byte offset. - Use the byte offset data in Source_file::quote_input() to make sure the source line is split at the correct place. --- lily/include/input.hh | 2 +- lily/include/source-file.hh | 2 +- lily/input-scheme.cc | 6 ++-- lily/input.cc | 12 +++---- lily/source-file.cc | 62 +++++++++++-------------------------- 5 files changed, 28 insertions(+), 56 deletions(-) diff --git a/lily/include/input.hh b/lily/include/input.hh index 94e00527a2..c56b487dee 100644 --- a/lily/include/input.hh +++ b/lily/include/input.hh @@ -45,7 +45,7 @@ public: int end_line_number ()const; int end_column_number ()const; - void get_counts (int *line, int *char_count, int *col) const; + void get_counts (int *, int *, int *, int *) const; Input (Input const &i); Input (); diff --git a/lily/include/source-file.hh b/lily/include/source-file.hh index 2fd1bfc073..5c0993cf6c 100644 --- a/lily/include/source-file.hh +++ b/lily/include/source-file.hh @@ -53,7 +53,7 @@ public: public: Slice line_slice (char const *pos_str0) const; string line_string (char const *pos_str0) const; - void get_counts (char const *pos_str0, int *, int *, int *) const; + void get_counts (char const *pos_str0, int *, int *, int *, int *) const; SCM get_port () const; string name_; diff --git a/lily/input-scheme.cc b/lily/input-scheme.cc index f313a3c27e..996a4a59b2 100644 --- a/lily/input-scheme.cc +++ b/lily/input-scheme.cc @@ -46,10 +46,8 @@ LY_DEFINE (ly_input_file_line_char_column, LY_ASSERT_TYPE (unsmob_input, sip, 1); Input *ip = unsmob_input (sip); - int l = 0; - int ch = 0; - int col = 0; - ip->get_counts (&l, &ch, &col); + int l, ch, col, offset = 0; + ip->get_counts (&l, &ch, &col, &offset); return scm_list_4 (ly_string2scm (ip->file_string ()), scm_from_int (l), scm_from_int (ch), diff --git a/lily/input.cc b/lily/input.cc index f2ddedd4e8..e302795042 100644 --- a/lily/input.cc +++ b/lily/input.cc @@ -147,8 +147,8 @@ Input::line_number () const int Input::column_number () const { - int line, chr, col = 0; - source_file_->get_counts (start_, &line, &chr, &col); + int line, chr, col, offset = 0; + source_file_->get_counts (start_, &line, &chr, &col, &offset); return col; } @@ -164,16 +164,16 @@ Input::end_line_number () const int Input::end_column_number () const { - int line, chr, col = 0; - source_file_->get_counts (end_, &line, &chr, &col); + int line, chr, col, offset = 0; + source_file_->get_counts (end_, &line, &chr, &col, &offset); return col; } void -Input::get_counts (int *line, int *chr, int *col) const +Input::get_counts (int *line, int *chr, int *col, int *offset) const { - source_file_->get_counts (start_, line, chr, col); + source_file_->get_counts (start_, line, chr, col, offset); } void diff --git a/lily/source-file.cc b/lily/source-file.cc index 96264fb4fb..7687b4749a 100644 --- a/lily/source-file.cc +++ b/lily/source-file.cc @@ -16,12 +16,6 @@ #include "config.hh" -#if HAVE_UTF8_WCHAR_H -#include /* mbrtowc */ -#else /* !HAVE_UTF8_WCHAR_H */ -#include /* mbrtowc */ -#endif /* HAVE_UTF8_WCHAR_H */ - #include #if HAVE_SSTREAM @@ -34,6 +28,7 @@ using namespace std; #include "file-name-map.hh" #include "international.hh" +#include "misc.hh" #include "warn.hh" void @@ -173,8 +168,8 @@ Source_file::file_line_column_string (char const *context_str0) const return " (" + _ ("position unknown") + ")"; else { - int l, ch, col; - get_counts (context_str0, &l, &ch, &col); + int l, ch, col, offset; + get_counts (context_str0, &l, &ch, &col, &offset); return name_string () + ":" + to_string (l) + ":" + to_string (col); @@ -187,13 +182,13 @@ Source_file::quote_input (char const *pos_str0) const if (!contains (pos_str0)) return " (" + _ ("position unknown") + ")"; - int l, ch, col; - get_counts (pos_str0, &l, &ch, &col); + int l, ch, col, offset; + get_counts (pos_str0, &l, &ch, &col, &offset); string line = line_string (pos_str0); - string context = line.substr (0, ch) + string context = line.substr (0, offset) + to_string ('\n') + to_string (' ', col) - + line.substr (ch, line.length ()-ch); + + line.substr (offset, line.length () - offset); return context; } @@ -253,11 +248,10 @@ void Source_file::get_counts (char const *pos_str0, int *line_number, int *line_char, - int *column) const + int *column, + int *byte_offset) const { *line_number = 0; - *line_char = 0; - *column = 0; if (!contains (pos_str0)) return; @@ -272,47 +266,27 @@ Source_file::get_counts (char const *pos_str0, string line_begin (line_start, left); char const *line_chars = line_begin.c_str (); - *column = 0; *line_char = 0; - - mbstate_t state; - - /* Initialize the state. */ - memset (&state, '\0', sizeof (state)); + *column = 0; + *byte_offset = 0; while (left > 0) { - /* - FIXME, this is apparently locale dependent. - */ -#if HAVE_MBRTOWC - wchar_t multibyte[2]; - size_t thislen = mbrtowc (multibyte, line_chars, left, &state); -#else - size_t thislen = 1; -#endif /* !HAVE_MBRTOWC */ - - /* Stop converting at invalid character; - this can mean we have read just the first part - of a valid character. */ - if (thislen == (size_t) -1) - break; - - /* We want to handle embedded NUL bytes - but the return value is 0. Correct this. */ - if (thislen == 0) - thislen = 1; + size_t thislen = utf8_char_len (*line_chars); if (thislen == 1 && line_chars[0] == '\t') (*column) = (*column / 8 + 1) * 8; else (*column)++; + (*line_char)++; + /* - For accurate error output, consider multibyte - characters as a series of characters. + To have decent output in UTF-8 aware terminals, + we must keep track of the number of bytes from + the left edge of the terminal. */ - (*line_char) += thislen; + *byte_offset += thislen; /* Advance past this character. */ line_chars += thislen; -- 2.39.2