Release: bump VERSION.

[lilypond.git] / lily / pdf-scheme.cc
diff --git a/lily/pdf-scheme.cc b/lily/pdf-scheme.cc

index 6d717c55ad70dfd732462ca750536d049edece76..61cf382e6b45e21955f87d34df57609fc39b48a7 100644 (file)
--- a/lily/pdf-scheme.cc
+++ b/lily/pdf-scheme.cc
@@ -1,7 +1,7 @@
  /*
    This file is part of LilyPond, the GNU music typesetter.
  
-  Copyright (C) 2011 Reinhold Kainhofer <reinhold@kainhofer.com>
+  Copyright (C) 2011--2015 Reinhold Kainhofer <reinhold@kainhofer.com>
  
    LilyPond is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -20,41 +20,80 @@
  #include <glib.h>
  using namespace std;
  
+#include "international.hh"
+#include "warn.hh"
  #include "lily-guile.hh"
  
-
  LY_DEFINE (ly_encode_string_for_pdf, "ly:encode-string-for-pdf",
-          1, 0, 0, (SCM str),
-          "Check whether the string needs to be encoded for PDF output (Latin1,"
-          " PDFDocEncoding or in the most general case UTF-16BE).")
+           1, 0, 0, (SCM str),
+           "Encode the given string to either Latin1 (which is a subset of"
+           " the PDFDocEncoding) or if that's not possible to full UTF-16BE"
+           " with Byte-Order-Mark (BOM).")
  {
    LY_ASSERT_TYPE (scm_is_string, str, 1);
    char *p = ly_scm2str0 (str);
    char *g = NULL;
-  const char *charset;
+  char const *charset = "UTF-8"; // Input is ALWAYS UTF-8!
    gsize bytes_written = 0;
-  g_get_charset (&charset); /* The current locale */
  
-  /* First, try to convert to ISO-8859-1 (no encodings required) */
+#if 0
+
+  /* First, try to convert to ISO-8859-1 (no encodings required). This will
+   * fail, if the string contains accented characters, so we do not check
+   * for errors. */
    g = g_convert (p, -1, "ISO-8859-1", charset, 0, &bytes_written, 0);
+
+#else
+
+  /* In contrast to the above comment, we do _not_ try full ISO-8859-1
+   * since a number of Ghostscript versions fail to properly convert
+   * this into PDF.  UTF-16BE, in contrast, works better with recent
+   * versions of Ghostscript.
+   */
+
+  g = g_convert (p, -1, "ASCII", charset, 0, &bytes_written, 0);
+
+#endif
+
    /* If that fails, we have to resolve to full UTF-16BE */
-  if (!g) {
-    char *g_without_BOM = g_convert (p, -1,  "UTF-16BE", charset, 0, &bytes_written, 0);
-    /* prepend the BOM manually, g_convert doesn't do it! */
-    g = new char[bytes_written+3];
-    g[0] = (char)254;
-    g[1] = (char)255;
-    memcpy (&g[2], g_without_BOM, bytes_written+1); // Copy string + \0
-    free (g_without_BOM);
-    bytes_written += 2;
-  }
+  if (!g)
+    {
+      GError *e = NULL;
+      char *g_without_BOM = g_convert (p, -1, "UTF-16BE", charset, 0, &bytes_written, &e);
+      if (e != NULL)
+        {
+          warning (_f ("Conversion of string `%s' to UTF-16be failed: %s", p, e->message));
+          g_error_free (e);
+        }
+      /* UTF-16BE allows/recommends a byte-order-mark (BOM) of two bytes
+       * \xFE\xFF at the begin of the string. The pdfmark specification
+       * requires it and depends on it to distinguish PdfDocEncoding from
+       * UTF-16BE. As g_convert does not automatically prepend this BOM
+       * for UTF-16BE (only for UTF-16, which uses lower endian by default,
+       * though), we have to prepend it manually. */
+      if (g_without_BOM) // conversion to UTF-16be might have failed (shouldn't!)
+        {
+          g = (char *)malloc ( sizeof (char) * (bytes_written + 3));
+          char const *BOM = "\xFE\xFF";
+          strcpy (g, BOM);
+          memcpy (&g[2], g_without_BOM, bytes_written + 1); // Copy string + \0
+          g_free (g_without_BOM);
+          bytes_written += 2;
+        }
+    }
    free (p);
  
    /* Convert back to SCM object and return it */
-  if (g) {
-    return scm_from_locale_stringn (g, bytes_written);
-  } else {
+  if (g)
+    {
+      /*
+       * Return the raw byte representation of the UTF-16BE encoded string,
+       * in a locale independent way.
+       */
+      SCM string = scm_from_latin1_stringn (g, bytes_written);
+      free(g);
+      return string;
+    }
+  else
      return str;
-  }
-
  }