Remove mbrtowc(), and use utf8_char_len() instead.

author Patrick McCarty <pnorcks@gmail.com>

Wed, 11 Nov 2009 05:22:25 +0000 (21:22 -0800)

committer Patrick McCarty <pnorcks@gmail.com>

Fri, 13 Nov 2009 00:37:48 +0000 (16:37 -0800)
author Patrick McCarty <pnorcks@gmail.com>
Wed, 11 Nov 2009 05:22:25 +0000 (21:22 -0800)
committer Patrick McCarty <pnorcks@gmail.com>
Fri, 13 Nov 2009 00:37:48 +0000 (16:37 -0800)
diff --git a/lily/include/input.hh b/lily/include/input.hh

index 94e00527a22395a62d3fb3391f7982f29cf849f5..c56b487dee6e4364eeee831f1d07147d57964915 100644 (file)
--- a/lily/include/input.hh
+++ b/lily/include/input.hh
@@ -45,7 +45,7 @@ public:
    int end_line_number ()const;
    int end_column_number ()const;
  
-  void get_counts (int *line, int *char_count, int *col) const;
+  void get_counts (int *, int *, int *, int *) const;
  
    Input (Input const &i);
    Input ();
diff --git a/lily/include/source-file.hh b/lily/include/source-file.hh

index 2fd1bfc073b9959cb056cd6af6a6dfe08a31e131..5c0993cf6ca7696e6a9014f8d1febaf18eecbca9 100644 (file)
--- a/lily/include/source-file.hh
+++ b/lily/include/source-file.hh
@@ -53,7 +53,7 @@ public:
  public:
    Slice line_slice (char const *pos_str0) const;
    string line_string (char const *pos_str0) const;
-  void get_counts (char const *pos_str0, int *, int *, int *) const;
+  void get_counts (char const *pos_str0, int *, int *, int *, int *) const;
    
    SCM get_port () const;
    string name_;
diff --git a/lily/input-scheme.cc b/lily/input-scheme.cc

index f313a3c27e023e4986cdbd4e17a4aa3bf5434893..996a4a59b2c9ab14976a65554b95544f3ec0d38e 100644 (file)
--- a/lily/input-scheme.cc
+++ b/lily/input-scheme.cc
@@ -46,10 +46,8 @@ LY_DEFINE (ly_input_file_line_char_column,
    LY_ASSERT_TYPE (unsmob_input, sip, 1);
    Input *ip = unsmob_input (sip);
  
-  int l = 0;
-  int ch = 0;
-  int col = 0;
-  ip->get_counts (&l, &ch, &col);
+  int l, ch, col, offset = 0;
+  ip->get_counts (&l, &ch, &col, &offset);
    return scm_list_4 (ly_string2scm (ip->file_string ()),
                      scm_from_int (l),
                      scm_from_int (ch),
diff --git a/lily/input.cc b/lily/input.cc

index f2ddedd4e8b619e61e45c933f8a8c25aa8170445..e3027950424fdfad0b4d4891037a11a65122ea1c 100644 (file)
--- a/lily/input.cc
+++ b/lily/input.cc
@@ -147,8 +147,8 @@ Input::line_number () const
  int
  Input::column_number () const
  {
-  int line, chr, col = 0;
-  source_file_->get_counts (start_, &line, &chr, &col);
+  int line, chr, col, offset = 0;
+  source_file_->get_counts (start_, &line, &chr, &col, &offset);
  
    return col;
  }
@@ -164,16 +164,16 @@ Input::end_line_number () const
  int
  Input::end_column_number () const
  {
-  int line, chr, col = 0;
-  source_file_->get_counts (end_, &line, &chr, &col);
+  int line, chr, col, offset = 0;
+  source_file_->get_counts (end_, &line, &chr, &col, &offset);
  
    return col;
  }
  
  void
-Input::get_counts (int *line, int *chr, int *col) const
+Input::get_counts (int *line, int *chr, int *col, int *offset) const
  {
-  source_file_->get_counts (start_, line, chr, col);
+  source_file_->get_counts (start_, line, chr, col, offset);
  }
  
  void
diff --git a/lily/source-file.cc b/lily/source-file.cc

index 96264fb4fb476955f19eb5b37736ac469d3265cd..7687b4749a4ea12e941e675472ded1e07a386b3a 100644 (file)
--- a/lily/source-file.cc
+++ b/lily/source-file.cc
@@ -16,12 +16,6 @@
  
  #include "config.hh"
  
-#if HAVE_UTF8_WCHAR_H
-#include <utf8/wchar.h>  /* mbrtowc */
-#else /* !HAVE_UTF8_WCHAR_H */
-#include <cwchar> /* mbrtowc */
-#endif /* HAVE_UTF8_WCHAR_H */
-
  #include <cstdio>
  
  #if HAVE_SSTREAM
@@ -34,6 +28,7 @@ using namespace std;
  
  #include "file-name-map.hh"
  #include "international.hh"
+#include "misc.hh"
  #include "warn.hh"
  
  void
@@ -173,8 +168,8 @@ Source_file::file_line_column_string (char const *context_str0) const
      return " (" + _ ("position unknown") + ")";
    else
      {
-      int l, ch, col;
-      get_counts (context_str0, &l, &ch, &col);
+      int l, ch, col, offset;
+      get_counts (context_str0, &l, &ch, &col, &offset);
  
        return name_string () + ":" + to_string (l)
         + ":" + to_string (col);
@@ -187,13 +182,13 @@ Source_file::quote_input (char const *pos_str0) const
    if (!contains (pos_str0))
      return " (" + _ ("position unknown") + ")";
  
-  int l, ch, col;
-  get_counts (pos_str0, &l, &ch, &col);
+  int l, ch, col, offset;
+  get_counts (pos_str0, &l, &ch, &col, &offset);
    string line = line_string (pos_str0);
-  string context = line.substr (0, ch)
+  string context = line.substr (0, offset)
      + to_string ('\n')
      + to_string (' ', col)
-    + line.substr (ch, line.length ()-ch);
+    + line.substr (offset, line.length () - offset);
    return context;
  }
  
@@ -253,11 +248,10 @@ void
  Source_file::get_counts (char const *pos_str0,
                          int *line_number,
                          int *line_char,
-                        int *column) const
+                        int *column,
+                        int *byte_offset) const
  {
    *line_number = 0;
-  *line_char = 0;
-  *column = 0;
      
    if (!contains (pos_str0))
      return;
@@ -272,47 +266,27 @@ Source_file::get_counts (char const *pos_str0,
    string line_begin (line_start, left);
    char const *line_chars = line_begin.c_str ();
  
-  *column = 0;
    *line_char = 0;
-
-  mbstate_t state;
-
-  /* Initialize the state.  */
-  memset (&state, '\0', sizeof (state));
+  *column = 0;
+  *byte_offset = 0;
  
    while (left > 0)
      {
-      /*
-       FIXME, this is apparently locale dependent.
-      */
-#if HAVE_MBRTOWC
-      wchar_t multibyte[2];
-      size_t thislen = mbrtowc (multibyte, line_chars, left, &state);
-#else
-      size_t thislen = 1;
-#endif /* !HAVE_MBRTOWC */
-
-      /* Stop converting at invalid character;
-        this can mean we have read just the first part
-        of a valid character.  */
-      if (thislen == (size_t) -1)
-       break;
-
-      /* We want to handle embedded NUL bytes
-        but the return value is 0.  Correct this.  */
-      if (thislen == 0)
-       thislen = 1;
+      size_t thislen = utf8_char_len (*line_chars);
  
        if (thislen == 1 && line_chars[0] == '\t')
         (*column) = (*column / 8 + 1) * 8;
        else
         (*column)++;
  
+      (*line_char)++;
+
        /*
-       For accurate error output, consider multibyte
-       characters as a series of characters.
+       To have decent output in UTF-8 aware terminals,
+       we must keep track of the number of bytes from
+       the left edge of the terminal.
        */
-      (*line_char) += thislen;
+      *byte_offset += thislen;
  
        /* Advance past this character. */
        line_chars += thislen;
author	Patrick McCarty <pnorcks@gmail.com>
	Wed, 11 Nov 2009 05:22:25 +0000 (21:22 -0800)
committer	Patrick McCarty <pnorcks@gmail.com>
	Fri, 13 Nov 2009 00:37:48 +0000 (16:37 -0800)
lily/include/input.hh		patch \| blob \| history
lily/include/source-file.hh		patch \| blob \| history
lily/input-scheme.cc		patch \| blob \| history
lily/input.cc		patch \| blob \| history
lily/source-file.cc		patch \| blob \| history