misc.cc: remove utf8_char_len, change callers.

author David Kastrup <dak@gnu.org>

Mon, 2 Jan 2012 15:43:09 +0000 (16:43 +0100)

committer David Kastrup <dak@gnu.org>

Mon, 2 Jan 2012 15:58:12 +0000 (16:58 +0100)
author David Kastrup <dak@gnu.org>
Mon, 2 Jan 2012 15:43:09 +0000 (16:43 +0100)
committer David Kastrup <dak@gnu.org>
Mon, 2 Jan 2012 15:58:12 +0000 (16:58 +0100)
diff --git a/lily/include/misc.hh b/lily/include/misc.hh

index ce4f4c838b49674a656e985dbd632410e8f2524b..18f996ad4b8b07a0847e6ffc96da4dfedc9337c5 100644 (file)
--- a/lily/include/misc.hh
+++ b/lily/include/misc.hh
@@ -64,7 +64,6 @@ Real directed_round (Real f, Direction d);
  Real peak_around (Real epsilon, Real threshold, Real x);
  Real convex_amplifier (Real standard_x, Real increase_factor, Real x);
  string camel_case_to_lisp_identifier (string in);
-vsize utf8_char_len (char);
  
  #endif
  
diff --git a/lily/misc.cc b/lily/misc.cc

index 0fa4a9024006b4e24b0fe0f00e824fc65538dd86..cc673e954898d1c7ec7186f451a3d6a344b6a277 100644 (file)
--- a/lily/misc.cc
+++ b/lily/misc.cc
@@ -94,23 +94,3 @@ camel_case_to_lisp_identifier (string in)
  
    return result;
  }
-
-vsize
-utf8_char_len (char current)
-{
-  vsize char_len = 1;
-
-  // U+10000 - U+10FFFF
-  if ((current & 0xF0) == 0xF0)
-    char_len = 4;
-  // U+0800 - U+FFFF
-  else if ((current & 0xE0) == 0xE0)
-    char_len = 3;
-  // U+0080 - U+07FF
-  else if ((current & 0xC0) == 0xC0)
-    char_len = 2;
-  else if (current & 0x80)
-    programming_error ("invalid UTF-8 string");
-
-  return char_len;
-}
diff --git a/lily/source-file.cc b/lily/source-file.cc

index e6f7a4fb1951a0f66c401368faea76c3da517c39..69611709e1ee4266357ae6dcb752cf3ce8c1e497 100644 (file)
--- a/lily/source-file.cc
+++ b/lily/source-file.cc
@@ -277,30 +277,27 @@ Source_file::get_counts (char const *pos_str0,
    char const *line_start = (char const *)data + line[LEFT];
  
    ssize left = (char const *) pos_str0 - line_start;
+  *byte_offset = left;
+
    string line_begin (line_start, left);
    char const *line_chars = line_begin.c_str ();
  
-  while (left > 0)
+  for (; left > 0; --left, ++line_chars)
      {
-      size_t thislen = utf8_char_len (*line_chars);
-
-      if (thislen == 1 && line_chars[0] == '\t')
+      // Skip UTF-8 continuation bytes.  This is simplistic but
+      // robust, and we warn against non-UTF-8 input in the lexer
+      // already.  In the case of non-UTF-8 or of this function being
+      // called in mid-character, the results are somewhat arbitrary,
+      // but there is no really sane definition anyway.
+      if ((*line_chars & 0xc0) == 0x80)
+       continue;
+
+      if (*line_chars == '\t')
          (*column) = (*column / 8 + 1) * 8;
        else
          (*column)++;
  
        (*line_char)++;
-
-      /*
-        To have decent output in UTF-8 aware terminals,
-        we must keep track of the number of bytes from
-        the left edge of the terminal.
-      */
-      *byte_offset += thislen;
-
-      /* Advance past this character. */
-      line_chars += thislen;
-      left -= thislen;
      }
  }
  
diff --git a/lily/text-interface.cc b/lily/text-interface.cc

index 47171b0949c2fbc5ccd9ac35f92c426edb8e6af0..2bb4545a2067b4529567b419e78f8b585a5f1d8b 100644 (file)
--- a/lily/text-interface.cc
+++ b/lily/text-interface.cc
@@ -33,9 +33,8 @@
  #include "warn.hh"
  
  static void
-replace_special_characters (string *str, SCM props)
+replace_special_characters (string &str, SCM props)
  {
-  vsize i = 0;
    SCM replacement_alist = ly_chain_assoc_get (ly_symbol2scm ("replacement-alist"),
                                                props,
                                                SCM_EOL);
@@ -47,18 +46,21 @@ replace_special_characters (string *str, SCM props)
                          (scm_string_length (scm_caar (s))));
      }
  
-  while (i <= str->size ())
+  for (vsize i = 0; i < str.size (); i++)
      {
+      /* Don't match in mid-UTF-8 */
+      if ((str[i] & 0xc0) == 0x80)
+       continue;
        for (vsize j = max_length + 1; j--;)
          {
-          string dummy = str->substr (i, j);
-          string ligature = robust_scm2string
-                            (ly_assoc_get (ly_string2scm (dummy),
-                                           replacement_alist, SCM_BOOL_F), "");
-          if (ligature != "")
-            str->replace (i, j, ligature);
+         if (j > str.size () - i)
+           continue;
+          string dummy = str.substr (i, j);
+          SCM ligature = ly_assoc_get (ly_string2scm (dummy),
+                                      replacement_alist, SCM_BOOL_F);
+         if (scm_is_true (ligature))
+            str.replace (i, j, robust_scm2string (ligature, ""));
          }
-      i += utf8_char_len ((*str)[i]);
      }
  }
  
@@ -75,7 +77,7 @@ Text_interface::interpret_string (SCM layout_smob,
    Output_def *layout = unsmob_output_def (layout_smob);
    Font_metric *fm = select_encoded_font (layout, props);
  
-  replace_special_characters (&str, props);
+  replace_special_characters (str, props);
  
    /*
      We want to filter strings with a music font that pass through
author	David Kastrup <dak@gnu.org>
	Mon, 2 Jan 2012 15:43:09 +0000 (16:43 +0100)
committer	David Kastrup <dak@gnu.org>
	Mon, 2 Jan 2012 15:58:12 +0000 (16:58 +0100)
lily/include/misc.hh		patch \| blob \| history
lily/misc.cc		patch \| blob \| history
lily/source-file.cc		patch \| blob \| history
lily/text-interface.cc		patch \| blob \| history