From aa730216358403d7a94ff54195de9f4dfd27f402 Mon Sep 17 00:00:00 2001 From: Reinhold Kainhofer Date: Tue, 26 Apr 2011 12:11:33 +0200 Subject: [PATCH] Fix "make doc" error: Character set messup in pdf-scheme.cc In "make doc", somehow the LANG env variable is cleared, so glib returned charset=ANSI_X3.4-1968 for the input string. This would disallow any accented characters in the input string. However, we know that the input string is by definition always UTF-8, so the solution is not to use the current locale, but hardcode UTF-8. Once we switch to guile 2.0, we should use (sting->utf16 str 'big) instead of manually converting the string in C. --- lily/pdf-scheme.cc | 63 ++++++++++++++++++++++++++++++-------------- scm/framework-ps.scm | 2 ++ 2 files changed, 45 insertions(+), 20 deletions(-) diff --git a/lily/pdf-scheme.cc b/lily/pdf-scheme.cc index 6d717c55ad..d9b7aec601 100644 --- a/lily/pdf-scheme.cc +++ b/lily/pdf-scheme.cc @@ -20,41 +20,64 @@ #include using namespace std; +#include "international.hh" +#include "warn.hh" #include "lily-guile.hh" LY_DEFINE (ly_encode_string_for_pdf, "ly:encode-string-for-pdf", 1, 0, 0, (SCM str), - "Check whether the string needs to be encoded for PDF output (Latin1," - " PDFDocEncoding or in the most general case UTF-16BE).") + "Encode the given string to either Latin1 (which is a subset of" + " the PDFDocEncoding) or if that's not possible to full UTF-16BE" + " with Byte-Order-Mark (BOM).") { LY_ASSERT_TYPE (scm_is_string, str, 1); char *p = ly_scm2str0 (str); char *g = NULL; - const char *charset; + char const *charset="ISO-8859-1"; // Input is ALWAYS UTF-8! gsize bytes_written = 0; - g_get_charset (&charset); /* The current locale */ - /* First, try to convert to ISO-8859-1 (no encodings required) */ + /* First, try to convert to ISO-8859-1 (no encodings required). This will + * fail, if the string contains accented characters, so we do not check + * for errors. */ g = g_convert (p, -1, "ISO-8859-1", charset, 0, &bytes_written, 0); /* If that fails, we have to resolve to full UTF-16BE */ - if (!g) { - char *g_without_BOM = g_convert (p, -1, "UTF-16BE", charset, 0, &bytes_written, 0); - /* prepend the BOM manually, g_convert doesn't do it! */ - g = new char[bytes_written+3]; - g[0] = (char)254; - g[1] = (char)255; - memcpy (&g[2], g_without_BOM, bytes_written+1); // Copy string + \0 - free (g_without_BOM); - bytes_written += 2; - } + if (!g) + { + GError *e = NULL; + char *g_without_BOM = g_convert (p, -1, "UTF-16BE", charset, 0, &bytes_written, &e); + if (e != NULL) + { + warning (_f("Conversion of string `%s' to UTF-16be failed: %s", p, e->message)); + g_error_free (e); + } + /* UTF-16BE allows/recommends a byte-order-mark (BOM) of two bytes + * \xFE\xFF at the begin of the string. The pdfmark specification + * requires it and depends on it to distinguish PdfDocEncoding from + * UTF-16BE. As g_convert does not automatically prepend this BOM + * for UTF-16BE (only for UTF-16, which uses lower endian by default, + * though), we have to prepend it manually. */ + if (g_without_BOM) // conversion to UTF-16be might have failed (shouldn't!) + { + g = new char[bytes_written+3]; + char const *BOM = "\xFE\xFF"; + strcpy (g, BOM); + memcpy (&g[2], g_without_BOM, bytes_written+1); // Copy string + \0 + free (g_without_BOM); + bytes_written += 2; + } + } free (p); /* Convert back to SCM object and return it */ - if (g) { - return scm_from_locale_stringn (g, bytes_written); - } else { + /* FIXME guile-2.0: With guile 2.0 the internal representation of a string + * has changed (char vector rather than binary bytes in + * UTF-8). However, with guile 2.0, ly:encode-string-for-pdf + * is no longer needed and can be replaced by the new + * (string->utf16 str 'big) + */ + if (g) + return scm_take_str (g, bytes_written); // scm_take_str eventually frees g! + else return str; - } - } diff --git a/scm/framework-ps.scm b/scm/framework-ps.scm index b0359bcd88..7c44920d67 100644 --- a/scm/framework-ps.scm +++ b/scm/framework-ps.scm @@ -416,6 +416,8 @@ (define (metadata-encode val) ;; First, call ly:encode-string-for-pdf to encode the string (latin1 or ;; utf-16be), then escape all parentheses and backslashes + ;; FIXME guile-2.0: use (string->utf16 str 'big) instead + (ps-quote (ly:encode-string-for-pdf val))) (define (metadata-lookup-output overridevar fallbackvar field) (let* ((overrideval (ly:modules-lookup (list header) overridevar)) -- 2.39.2