X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=scm%2Fencoding.scm;h=d500c0c270b20c9f6302bd9ee8e9579961f5d298;hb=5b2bdf8c532aa1b4aa8626f6847938f2ef4ba1be;hp=0bd6fc91466675beb6354505336ecc9626f3a265;hpb=9458fa215af0294e9c38d62cc3a237a85fe50509;p=lilypond.git diff --git a/scm/encoding.scm b/scm/encoding.scm index 0bd6fc9146..d500c0c270 100644 --- a/scm/encoding.scm +++ b/scm/encoding.scm @@ -1,123 +1,60 @@ ;;;; encoding.scm -- font encoding ;;;; ;;;; source file of the GNU LilyPond music typesetter -;;;; -;;;; (c) 2004 Jan Nieuwenhuizen - -;; WIP -;; encoding.ly: -;; #(display (reencode-string "adobe" "latin1" "hellö fóebär")) -;; - - -(define-public (read-encoding-file filename) - "Read .enc file, return (COMMAND-NAME . VECTOR-OF-SYMBOLS)." - (let* ((raw (ly:gulp-file (ly:kpathsea-expand-path filename))) - (string (regexp-substitute/global #f "%[^\n]*" raw 'pre "" 'post)) - (command (match:substring - (string-match "/([^ \t\n\r]*)[ \t\n\r]+[[]" string) 1)) - (encoding (match:substring (string-match "[[](.*)[]]" string) 1)) - (ps-lst (string-tokenize encoding)) - (lst (map (lambda (x) (string->symbol (substring x 1))) ps-lst)) - (vector (list->vector lst))) - (cons command vector))) - -(define (make-encoding-table encoding-vector) - "Return a hash table mapping names to chars. ENCODING-VECTOR is a -vector of symbols." - - (let* ((h (make-hash-table 256))) - - (for-each - (lambda (i) - (hash-set! h (vector-ref encoding-vector i) - (integer->char i))) - (iota 256)) - - h)) - -(define-public (reencode-string mapping str) - "Apply MAPPING, a vector of [0..256) -> char, to STR" - (string-map (lambda (chr) - (vector-ref mapping (char->integer chr))) - str)) - -(define-public (make-encoding-mapping input-encoding output-encoding) - "Contruct a mapping by applying output-encoding after input-encoding " - (list->vector - (map - (lambda (byte) - (let ((new-char (hash-ref - output-encoding (vector-ref input-encoding byte) #f))) - ;;input-encoding (vector-ref output-encoding byte) #f))) - - ;; substitute space for unknown characters. - (if (char? new-char) - new-char - #\ ))) - (iota 256)))) - - -(define (get-coding-from-file filename) - "Read FILENAME, return a list containing encoding vector and table" - (let* ((coding (read-encoding-file filename)) - (com (car coding)) - (vec (cdr coding)) - (tab (make-encoding-table vec))) - (list com vec tab))) - -;; coding-alist maps NAME -> (list FILENAME COMMAND VECTOR TAB) -(define coding-alist - - (map (lambda (x) - (cons (car x) - (cons (cdr x) (delay (get-coding-from-file (cdr x)))))) - - '( - ;; teTeX - ("TeX-typewriter-text" . "09fbbfac.enc") ;; cmtt10 - ("TeX-math-symbols" . "10037936.enc") ;; cmbsy - ("ASCII-caps-and-digits" . "1b6d048e.enc") ;; cminch - ("TeX-math-italic" . "aae443f0.enc") ;; cmmi10 - ("TeX-extended-ASCII" . "d9b29452.enc") - ("TeX-text" . "cmr.enc") - ("TeX-text-without-f-ligatures" . "0ef0afca.enc") - ("Extended-TeX-Font-Encoding---Latin" . "tex256.enc") - ("AdobeStandardEncoding" . "8a.enc") - - ;; aliases - ("T1" . "tex256.enc") - ("adobe" . "8a.enc") - ("latin1" . "latin1.enc") - ("ec" . "EC.enc") - - ;; LilyPond. - ("fetaBraces" . "feta-braces-a.enc") - ("fetaNumber" . "feta-nummer10.enc") - ("fetaMusic" . "feta20.enc") - ("parmesanMusic" . "parmesan20.enc")) - )) - -(define (get-coding coding-name) - (let ((entry (assoc-get coding-name coding-alist))) - (if entry (cons (car entry) (force (cdr entry))) - (if (equal? coding-name "feta-music") - (begin - (ly:warn "installation problem: deprecated encoding requested: ~S" coding-name) - (exit 1)) - (let ((fallback "latin1")) - (ly:warn "programming error: no such encoding: ~S" coding-name) - (ly:warn "programming error: cross thumbs, using: ~S:" fallback) - (get-coding fallback)))))) - -(define-public (get-coding-filename coding-name) - (car (get-coding coding-name))) - -(define-public (get-coding-command coding-name) - (cadr (get-coding coding-name))) - -(define-public (get-coding-vector coding-name) - (caddr (get-coding coding-name))) - -(define-public (get-coding-table coding-name) - (cadddr (get-coding coding-name))) +;;;; +;;;; (c) 2004--2008 Jan Nieuwenhuizen + +(define-public latin1-coding-vector + #(.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef + .notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef + .notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef + .notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef + %% 0x20 + space exclam quotedbl numbersign dollar percent ampersand quoteright + parenleft parenright asterisk plus comma hyphen period slash + zero one two three four five six seven + eight nine colon semicolon less equal greater question + %% 0x40 + at A B C D E F G + H I J K L M N O + P Q R S T U V W + X Y Z bracketleft backslash bracketright asciicircum underscore + %% 0x60 + `quoteleft a b c d e f g + h i j k l m n o + p q r s t u v w + x y z braceleft bar braceright asciitilde .notdef + %% 0x80 + .notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef + .notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef + dotlessi grave acute circumflex tilde macron breve dotaccent + dieresis .notdef ring cedilla .notdef hungarumlaut ogonek caron + %% 0xA0 + space exclamdown cent sterling currency yen brokenbar section + dieresis copyright ordfeminine guillemotleft logicalnot hyphen registered macron + degree plusminus twosuperior threesuperior acute mu paragraph periodcentered + cedilla onesuperior ordmasculine guillemotright onequarter onehalf threequarters questiondown + %% 0xC0 + Agrave Aacute Acircumflex Atilde Adieresis Aring AE Ccedilla + Egrave Eacute Ecircumflex Edieresis Igrave Iacute Icircumflex Idieresis + Eth Ntilde Ograve Oacute Ocircumflex Otilde Odieresis multiply + Oslash Ugrave Uacute Ucircumflex Udieresis Yacute Thorn germandbls + %% 0xE0 + agrave aacute acircumflex atilde adieresis aring ae ccedilla + egrave eacute ecircumflex edieresis igrave iacute icircumflex idieresis + eth ntilde ograve oacute ocircumflex otilde odieresis divide + oslash ugrave uacute ucircumflex udieresis yacute thorn ydieresis)) + + +(define-public (decode-byte-string str) + "Return vector of glyphname symbols that correspond to string, +assuming that STR is byte-coded using ENCODING-NAME." + + (let* ((len (string-length str)) + (output-vector (make-vector len '.notdef))) + (do + ((idx 0 (1+ idx))) + ((>= idx len) output-vector) + (vector-set! output-vector idx + (vector-ref latin1-coding-vector + (char->integer (string-ref str idx)))))))