2 This file is part of LilyPond, the GNU music typesetter.
4 Copyright (C) 2011 Reinhold Kainhofer <reinhold@kainhofer.com>
6 LilyPond is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
11 LilyPond is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with LilyPond. If not, see <http://www.gnu.org/licenses/>.
23 #include "international.hh"
25 #include "lily-guile.hh"
27 LY_DEFINE (ly_encode_string_for_pdf, "ly:encode-string-for-pdf",
29 "Encode the given string to either Latin1 (which is a subset of"
30 " the PDFDocEncoding) or if that's not possible to full UTF-16BE"
31 " with Byte-Order-Mark (BOM).")
33 LY_ASSERT_TYPE (scm_is_string, str, 1);
34 char *p = ly_scm2str0 (str);
36 char const *charset = "UTF-8"; // Input is ALWAYS UTF-8!
37 gsize bytes_written = 0;
39 /* First, try to convert to ISO-8859-1 (no encodings required). This will
40 * fail, if the string contains accented characters, so we do not check
42 g = g_convert (p, -1, "ISO-8859-1", charset, 0, &bytes_written, 0);
43 /* If that fails, we have to resolve to full UTF-16BE */
47 char *g_without_BOM = g_convert (p, -1, "UTF-16BE", charset, 0, &bytes_written, &e);
50 warning (_f ("Conversion of string `%s' to UTF-16be failed: %s", p, e->message));
53 /* UTF-16BE allows/recommends a byte-order-mark (BOM) of two bytes
54 * \xFE\xFF at the begin of the string. The pdfmark specification
55 * requires it and depends on it to distinguish PdfDocEncoding from
56 * UTF-16BE. As g_convert does not automatically prepend this BOM
57 * for UTF-16BE (only for UTF-16, which uses lower endian by default,
58 * though), we have to prepend it manually. */
59 if (g_without_BOM) // conversion to UTF-16be might have failed (shouldn't!)
61 g = (char *)malloc ( sizeof (char) * (bytes_written + 3));
62 char const *BOM = "\xFE\xFF";
64 memcpy (&g[2], g_without_BOM, bytes_written + 1); // Copy string + \0
65 g_free (g_without_BOM);
71 /* Convert back to SCM object and return it */
72 /* FIXME guile-2.0: With guile 2.0 the internal representation of a string
73 * has changed (char vector rather than binary bytes in
74 * UTF-8). However, with guile 2.0, ly:encode-string-for-pdf
75 * is no longer needed and can be replaced by the new
76 * (string->utf16 str 'big)
79 return scm_take_str (g, bytes_written); // scm_take_str eventually frees g!