From 2a5c70110bba2014507091c353c3e80d5dc5f796 Mon Sep 17 00:00:00 2001
From: Patrick McCarty <pnorcks@gmail.com>
Date: Tue, 10 Nov 2009 21:22:25 -0800
Subject: [PATCH] Remove mbrtowc(), and use utf8_char_len() instead.

- In get_counts(), revert the line_char variable to its original
  behavior: keeping track of the character count, not the byte offset.

- Introduce a new parameter to get_counts() to keep track of the byte
  offset.

- Use the byte offset data in Source_file::quote_input() to make sure
  the source line is split at the correct place.
---
 lily/include/input.hh       |  2 +-
 lily/include/source-file.hh |  2 +-
 lily/input-scheme.cc        |  6 ++--
 lily/input.cc               | 12 +++----
 lily/source-file.cc         | 62 +++++++++++--------------------------
 5 files changed, 28 insertions(+), 56 deletions(-)
diff --git a/lily/include/input.hh b/lily/include/input.hh
index 94e00527a2..c56b487dee 100644
--- a/lily/include/input.hh
+++ b/lily/include/input.hh
@@ -45,7 +45,7 @@ public:
   int end_line_number ()const;
   int end_column_number ()const;
 
-  void get_counts (int *line, int *char_count, int *col) const;
+  void get_counts (int *, int *, int *, int *) const;
 
   Input (Input const &i);
   Input ();
diff --git a/lily/include/source-file.hh b/lily/include/source-file.hh
index 2fd1bfc073..5c0993cf6c 100644
--- a/lily/include/source-file.hh
+++ b/lily/include/source-file.hh
@@ -53,7 +53,7 @@ public:
 public:
   Slice line_slice (char const *pos_str0) const;
   string line_string (char const *pos_str0) const;
-  void get_counts (char const *pos_str0, int *, int *, int *) const;
+  void get_counts (char const *pos_str0, int *, int *, int *, int *) const;
   
   SCM get_port () const;
   string name_;
diff --git a/lily/input-scheme.cc b/lily/input-scheme.cc
index f313a3c27e..996a4a59b2 100644
--- a/lily/input-scheme.cc
+++ b/lily/input-scheme.cc
@@ -46,10 +46,8 @@ LY_DEFINE (ly_input_file_line_char_column,
   LY_ASSERT_TYPE (unsmob_input, sip, 1);
   Input *ip = unsmob_input (sip);
 
-  int l = 0;
-  int ch = 0;
-  int col = 0;
-  ip->get_counts (&l, &ch, &col);
+  int l, ch, col, offset = 0;
+  ip->get_counts (&l, &ch, &col, &offset);
   return scm_list_4 (ly_string2scm (ip->file_string ()),
 		     scm_from_int (l),
 		     scm_from_int (ch),
diff --git a/lily/input.cc b/lily/input.cc
index f2ddedd4e8..e302795042 100644
--- a/lily/input.cc
+++ b/lily/input.cc
@@ -147,8 +147,8 @@ Input::line_number () const
 int
 Input::column_number () const
 {
-  int line, chr, col = 0;
-  source_file_->get_counts (start_, &line, &chr, &col);
+  int line, chr, col, offset = 0;
+  source_file_->get_counts (start_, &line, &chr, &col, &offset);
 
   return col;
 }
@@ -164,16 +164,16 @@ Input::end_line_number () const
 int
 Input::end_column_number () const
 {
-  int line, chr, col = 0;
-  source_file_->get_counts (end_, &line, &chr, &col);
+  int line, chr, col, offset = 0;
+  source_file_->get_counts (end_, &line, &chr, &col, &offset);
 
   return col;
 }
 
 void
-Input::get_counts (int *line, int *chr, int *col) const
+Input::get_counts (int *line, int *chr, int *col, int *offset) const
 {
-  source_file_->get_counts (start_, line, chr, col);
+  source_file_->get_counts (start_, line, chr, col, offset);
 }
 
 void
diff --git a/lily/source-file.cc b/lily/source-file.cc
index 96264fb4fb..7687b4749a 100644
--- a/lily/source-file.cc
+++ b/lily/source-file.cc
@@ -16,12 +16,6 @@
 
 #include "config.hh"
 
-#if HAVE_UTF8_WCHAR_H
-#include <utf8/wchar.h>  /* mbrtowc */
-#else /* !HAVE_UTF8_WCHAR_H */
-#include <cwchar> /* mbrtowc */
-#endif /* HAVE_UTF8_WCHAR_H */
-
 #include <cstdio>
 
 #if HAVE_SSTREAM
@@ -34,6 +28,7 @@ using namespace std;
 
 #include "file-name-map.hh"
 #include "international.hh"
+#include "misc.hh"
 #include "warn.hh"
 
 void
@@ -173,8 +168,8 @@ Source_file::file_line_column_string (char const *context_str0) const
     return " (" + _ ("position unknown") + ")";
   else
     {
-      int l, ch, col;
-      get_counts (context_str0, &l, &ch, &col);
+      int l, ch, col, offset;
+      get_counts (context_str0, &l, &ch, &col, &offset);
 
       return name_string () + ":" + to_string (l)
 	+ ":" + to_string (col);
@@ -187,13 +182,13 @@ Source_file::quote_input (char const *pos_str0) const
   if (!contains (pos_str0))
     return " (" + _ ("position unknown") + ")";
 
-  int l, ch, col;
-  get_counts (pos_str0, &l, &ch, &col);
+  int l, ch, col, offset;
+  get_counts (pos_str0, &l, &ch, &col, &offset);
   string line = line_string (pos_str0);
-  string context = line.substr (0, ch)
+  string context = line.substr (0, offset)
     + to_string ('\n')
     + to_string (' ', col)
-    + line.substr (ch, line.length ()-ch);
+    + line.substr (offset, line.length () - offset);
   return context;
 }
 
@@ -253,11 +248,10 @@ void
 Source_file::get_counts (char const *pos_str0,
 			 int *line_number,
 			 int *line_char,
-			 int *column) const
+			 int *column,
+			 int *byte_offset) const
 {
   *line_number = 0;
-  *line_char = 0;
-  *column = 0;
     
   if (!contains (pos_str0))
     return;
@@ -272,47 +266,27 @@ Source_file::get_counts (char const *pos_str0,
   string line_begin (line_start, left);
   char const *line_chars = line_begin.c_str ();
 
-  *column = 0;
   *line_char = 0;
-
-  mbstate_t state;
-
-  /* Initialize the state.  */
-  memset (&state, '\0', sizeof (state));
+  *column = 0;
+  *byte_offset = 0;
 
   while (left > 0)
     {
-      /*
-	FIXME, this is apparently locale dependent.
-      */
-#if HAVE_MBRTOWC
-      wchar_t multibyte[2];
-      size_t thislen = mbrtowc (multibyte, line_chars, left, &state);
-#else
-      size_t thislen = 1;
-#endif /* !HAVE_MBRTOWC */
-
-      /* Stop converting at invalid character;
-	 this can mean we have read just the first part
-	 of a valid character.  */
-      if (thislen == (size_t) -1)
-	break;
-
-      /* We want to handle embedded NUL bytes
-	 but the return value is 0.  Correct this.  */
-      if (thislen == 0)
-	thislen = 1;
+      size_t thislen = utf8_char_len (*line_chars);
 
       if (thislen == 1 && line_chars[0] == '\t')
 	(*column) = (*column / 8 + 1) * 8;
       else
 	(*column)++;
 
+      (*line_char)++;
+
       /*
-	For accurate error output, consider multibyte
-	characters as a series of characters.
+	To have decent output in UTF-8 aware terminals,
+	we must keep track of the number of bytes from
+	the left edge of the terminal.
       */
-      (*line_char) += thislen;
+      *byte_offset += thislen;
 
       /* Advance past this character. */
       line_chars += thislen;
-- 
2.39.5