lily/pdf-scheme.cc

   1 /*
   2   This file is part of LilyPond, the GNU music typesetter.
   3
   4   Copyright (C) 2011--2015 Reinhold Kainhofer <reinhold@kainhofer.com>
   5
   6   LilyPond is free software: you can redistribute it and/or modify
   7   it under the terms of the GNU General Public License as published by
   8   the Free Software Foundation, either version 3 of the License, or
   9   (at your option) any later version.
  10
  11   LilyPond is distributed in the hope that it will be useful,
  12   but WITHOUT ANY WARRANTY; without even the implied warranty of
  13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14   GNU General Public License for more details.
  15
  16   You should have received a copy of the GNU General Public License
  17   along with LilyPond.  If not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20 #include <glib.h>
  21 using namespace std;
  22
  23 #include "international.hh"
  24 #include "warn.hh"
  25 #include "lily-guile.hh"
  26
  27 LY_DEFINE (ly_encode_string_for_pdf, "ly:encode-string-for-pdf",
  28            1, 0, 0, (SCM str),
  29            "Encode the given string to either Latin1 (which is a subset of"
  30            " the PDFDocEncoding) or if that's not possible to full UTF-16BE"
  31            " with Byte-Order-Mark (BOM).")
  32 {
  33   LY_ASSERT_TYPE (scm_is_string, str, 1);
  34   char *p = ly_scm2str0 (str);
  35   char *g = NULL;
  36   char const *charset = "UTF-8"; // Input is ALWAYS UTF-8!
  37   gsize bytes_written = 0;
  38
  39 #if 0
  40
  41   /* First, try to convert to ISO-8859-1 (no encodings required). This will
  42    * fail, if the string contains accented characters, so we do not check
  43    * for errors. */
  44   g = g_convert (p, -1, "ISO-8859-1", charset, 0, &bytes_written, 0);
  45
  46 #else
  47
  48   /* In contrast to the above comment, we do _not_ try full ISO-8859-1
  49    * since a number of Ghostscript versions fail to properly convert
  50    * this into PDF.  UTF-16BE, in contrast, works better with recent
  51    * versions of Ghostscript.
  52    */
  53
  54   g = g_convert (p, -1, "ASCII", charset, 0, &bytes_written, 0);
  55
  56 #endif
  57
  58   /* If that fails, we have to resolve to full UTF-16BE */
  59   if (!g)
  60     {
  61       GError *e = NULL;
  62       char *g_without_BOM = g_convert (p, -1, "UTF-16BE", charset, 0, &bytes_written, &e);
  63       if (e != NULL)
  64         {
  65           warning (_f ("Conversion of string `%s' to UTF-16be failed: %s", p, e->message));
  66           g_error_free (e);
  67         }
  68       /* UTF-16BE allows/recommends a byte-order-mark (BOM) of two bytes
  69        * \xFE\xFF at the begin of the string. The pdfmark specification
  70        * requires it and depends on it to distinguish PdfDocEncoding from
  71        * UTF-16BE. As g_convert does not automatically prepend this BOM
  72        * for UTF-16BE (only for UTF-16, which uses lower endian by default,
  73        * though), we have to prepend it manually. */
  74       if (g_without_BOM) // conversion to UTF-16be might have failed (shouldn't!)
  75         {
  76           g = (char *)malloc ( sizeof (char) * (bytes_written + 3));
  77           char const *BOM = "\xFE\xFF";
  78           strcpy (g, BOM);
  79           memcpy (&g[2], g_without_BOM, bytes_written + 1); // Copy string + \0
  80           g_free (g_without_BOM);
  81           bytes_written += 2;
  82         }
  83     }
  84   free (p);
  85
  86   /* Convert back to SCM object and return it */
  87   if (g)
  88     {
  89       /*
  90        * Return the raw byte representation of the UTF-16BE encoded string,
  91        * in a locale independent way.
  92        */
  93       SCM string = scm_from_latin1_stringn (g, bytes_written);
  94       free(g);
  95       return string;
  96     }
  97   else
  98     return str;
  99 }