1 %{ // -*- mode: c++; c-file-style: "linux" -*-
3 This file is part of LilyPond, the GNU music typesetter.
5 Copyright (C) 1996--2012 Han-Wen Nienhuys <hanwen@xs4all.nl>
6 Jan Nieuwenhuizen <janneke@gnu.org>
8 LilyPond is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 LilyPond is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with LilyPond. If not, see <http://www.gnu.org/licenses/>.
22 /* Mode and indentation are at best a rough approximation based on TAB
23 * formatting (reasonable for compatibility with unspecific editor
24 * modes as Flex modes are hard to find) and need manual correction
25 * frequently. Without a reasonably dependable way of formatting a
26 * Flex file sensibly, there is little point in trying to fix the
27 * inconsistent state of indentation.
33 after making a change to the lexer rules, run
34 flex -b <this lexer file>
37 contains no backup states, but only the reminder
38 Compressed tables always back up.
39 (don-t forget to rm lex.yy.cc :-)
48 /* Flex >= 2.5.29 fix; FlexLexer.h's multiple include bracing breaks
49 when building the actual lexer. */
56 #include "context-def.hh"
57 #include "duration.hh"
58 #include "international.hh"
59 #include "interval.hh"
60 #include "lily-guile.hh"
61 #include "lily-lexer.hh"
62 #include "lily-parser.hh"
63 #include "lilypond-version.hh"
66 #include "music-function.hh"
67 #include "parse-scm.hh"
70 #include "source-file.hh"
71 #include "std-string.hh"
72 #include "string-convert.hh"
79 #define isatty HORRIBLEKLUDGE
81 void strip_trailing_white (string&);
82 void strip_leading_white (string&);
83 string lyric_fudge (string s);
84 SCM lookup_markup_command (string s);
85 SCM lookup_markup_list_command (string s);
86 bool is_valid_version (string s);
89 #define start_quote() do { \
90 yy_push_state (quote); \
94 #define start_lyric_quote() do { \
95 yy_push_state (lyric_quote); \
99 #define yylval (*lexval_)
101 #define yylloc (*lexloc_)
103 #define YY_USER_ACTION add_lexed_char (YYLeng ());
106 SCM scan_fraction (string);
107 SCM (* scm_parse_error_handler) (void *);
117 %option yyclass="Lily_lexer"
119 %option never-interactive
136 /* The strategy concerning multibyte characters is to accept them but
137 * call YYText_utf8 for patterns that might contain them, in order to
138 * get a single code path responsible for flagging non-UTF-8 input:
139 * Patterns for accepting only valid UTF-8 without backing up are
140 * really hard to do and complex, and if nice error messages are
141 * wanted, one would need patterns catching the invalid input as well.
143 * Since editors and operating environments don't necessarily behave
144 * reasonably in the presence of mixed encodings, we flag encoding
145 * errors also in identifiers, comments, and strings where it would be
146 * conceivable to just transparently work with the byte string. But
147 * the whole point of caring about UTF-8 in here at all is too avoid
148 * stranger errors later when input passes into backends or log files
149 * or console output or error messages.
158 NATIONAL [\001-\006\021-\027\031\036]
159 TEX {AA}|-|{PUNCT}|{NATIONAL}|{SPECIAL_CHAR}
160 WORD {A}([-_]{A}|{A})*
167 REAL ({INT}\.{N}*)|(-?\.{N}+)
169 HORIZONTALWHITE [ \t]
172 LYRICS ({AA}|{TEX})[^0-9 \t\n\r\f]*
176 BOM_UTF8 \357\273\277
182 // swallow and ignore carriage returns
185 <extratoken>{ANY_CHAR} {
186 /* Generate a token without swallowing anything */
188 /* First unswallow the eaten character */
189 add_lexed_char (-YYLeng ());
192 /* produce requested token */
193 int type = scm_to_int (scm_caar (extra_tokens_));
194 yylval = scm_cdar (extra_tokens_);
195 extra_tokens_ = scm_cdr (extra_tokens_);
196 if (scm_is_null (extra_tokens_))
202 <extratoken><<EOF>> {
203 /* Generate a token without swallowing anything */
205 /* produce requested token */
206 int type = scm_to_int (scm_caar (extra_tokens_));
207 yylval = scm_cdar (extra_tokens_);
208 extra_tokens_ = scm_cdr (extra_tokens_);
209 if (scm_is_null (extra_tokens_))
215 /* Use the trailing context feature. Otherwise, the BOM will not be
216 found if the file starts with an identifier definition. */
217 <INITIAL,chords,lyrics,figures,notes>{BOM_UTF8}/.* {
218 if (this->lexloc_->line_number () != 1 || this->lexloc_->column_number () != 0)
220 LexerWarning (_ ("stray UTF-8 BOM encountered").c_str ());
223 debug_output (_ ("Skipping UTF-8 BOM"));
226 <INITIAL,chords,figures,incl,lyrics,markup,notes>{
228 yy_push_state (longcomment);
230 %[^{\n\r][^\n\r]*[\n\r] {
231 (void) YYText_utf8 ();
233 %[^{\n\r] { // backup rule
234 (void) YYText_utf8 ();
239 (void) YYText_utf8 ();
246 <INITIAL,notes,figures,chords,markup>{
252 <INITIAL,chords,lyrics,notes,figures>\\version{WHITE}* {
253 yy_push_state (version);
255 <INITIAL,chords,lyrics,notes,figures>\\sourcefilename{WHITE}* {
256 yy_push_state (sourcefilename);
258 <INITIAL,chords,lyrics,notes,figures>\\sourcefileline{WHITE}* {
259 yy_push_state (sourcefileline);
261 <version>\"[^"]*\" { /* got the version number */
262 string s (YYText_utf8 () + 1);
263 s = s.substr (0, s.rfind ('\"'));
267 SCM top_scope = scm_car (scm_last_pair (scopes_));
268 scm_module_define (top_scope, ly_symbol2scm ("version-seen"), SCM_BOOL_T);
270 if (!is_valid_version (s)) {
271 yylval = SCM_UNSPECIFIED;
275 <sourcefilename>\"[^""]*\" {
276 string s (YYText_utf8 () + 1);
277 s = s.substr (0, s.rfind ('\"'));
280 this->here_input().get_source_file ()->name_ = s;
281 message (_f ("Renaming input to: `%s'", s.c_str ()));
282 progress_indication ("\n");
283 scm_module_define (scm_car (scopes_),
284 ly_symbol2scm ("input-file-name"),
289 <sourcefileline>{INT} {
291 sscanf (YYText (), "%d", &i);
294 this->here_input ().get_source_file ()->set_line (here_input ().start (), i);
297 <version>{ANY_CHAR} {
298 LexerError (_ ("quoted string expected after \\version").c_str ());
301 <sourcefilename>{ANY_CHAR} {
302 LexerError (_ ("quoted string expected after \\sourcefilename").c_str ());
305 <sourcefileline>{ANY_CHAR} {
306 LexerError (_ ("integer expected after \\sourcefileline").c_str ());
311 (void) YYText_utf8 ();
314 (void) YYText_utf8 ();
322 <INITIAL,chords,lyrics,notes,figures>\\maininput {
326 is_main_input_ = true;
329 error (_ ("\\maininput not allowed outside init files"));
332 <INITIAL,chords,lyrics,figures,notes>\\include {
333 yy_push_state (incl);
335 <incl>\"[^""]*\" { /* got the include file name */
336 string s (YYText_utf8 ()+1);
337 s = s.substr (0, s.rfind ('"'));
339 new_input (s, sources_);
342 <incl>\\{BLACK}*{WHITE}? { /* got the include identifier */
343 string s = YYText_utf8 () + 1;
344 strip_trailing_white (s);
345 if (s.length () && (s[s.length () - 1] == ';'))
346 s = s.substr (0, s.length () - 1);
348 SCM sid = lookup_identifier (s);
349 if (scm_is_string (sid)) {
350 new_input (ly_scm2string (sid), sources_);
353 string msg (_f ("wrong or undefined identifier: `%s'", s ));
355 LexerError (msg.c_str ());
356 SCM err = scm_current_error_port ();
357 scm_puts ("This value was found in the table: ", err);
358 scm_display (sid, err);
361 <incl>(\$|#) { // scm for the filename
363 Input hi = here_input();
365 SCM sval = ly_parse_scm (hi.start (), &n, hi,
366 be_safe_global && is_main_input_, parser_);
367 sval = eval_scm (sval);
369 for (int i = 0; i < n; i++)
373 char_count_stack_.back () += n;
375 if (scm_is_string (sval)) {
376 new_input (ly_scm2string (sval), sources_);
379 LexerError (_ ("string expected after \\include").c_str ());
380 if (sval != SCM_UNDEFINED) {
381 SCM err = scm_current_error_port ();
382 scm_puts ("This value was found instead: ", err);
383 scm_display (sval, err);
388 <incl,version,sourcefilename>\"[^""]* { // backup rule
389 error (_ ("end quote missing"));
393 /* Flex picks the longest matching pattern including trailing
394 * contexts. Without the backup pattern, r-. does not trigger the
395 * {RESTNAME} rule but rather the {WORD}/[-_] rule coming later,
396 * needed for avoiding backup states.
399 <chords,notes,figures>{RESTNAME}/[-_] | // pseudo backup rule
400 <chords,notes,figures>{RESTNAME} {
401 char const *s = YYText ();
402 yylval = scm_from_locale_string (s);
405 <chords,notes,figures>q/[-_] | // pseudo backup rule
406 <chords,notes,figures>q {
407 yylval = SCM_UNSPECIFIED;
408 return CHORD_REPETITION;
411 <chords,notes,figures>R/[-_] | // pseudo backup rule
412 <chords,notes,figures>R {
413 yylval = SCM_UNSPECIFIED;
414 return MULTI_MEASURE_REST;
416 <INITIAL,chords,figures,lyrics,markup,notes># { //embedded scm
418 Input hi = here_input();
420 SCM sval = ly_parse_scm (hi.start (), &n, hi,
421 be_safe_global && is_main_input_, parser_);
423 if (sval == SCM_UNDEFINED)
426 for (int i = 0; i < n; i++)
430 char_count_stack_.back () += n;
436 <INITIAL,chords,figures,lyrics,markup,notes>\$ { //immediate scm
438 Input hi = here_input();
440 SCM sval = ly_parse_scm (hi.start (), &n, hi,
441 be_safe_global && is_main_input_, parser_);
443 for (int i = 0; i < n; i++)
447 char_count_stack_.back () += n;
449 sval = eval_scm (sval, '$');
451 int token = scan_scm_id (sval);
452 if (!scm_is_eq (yylval, SCM_UNSPECIFIED))
456 <INITIAL,notes,lyrics>{
458 yylval = SCM_UNSPECIFIED;
459 return DOUBLE_ANGLE_OPEN;
462 yylval = SCM_UNSPECIFIED;
463 return DOUBLE_ANGLE_CLOSE;
469 yylval = SCM_UNSPECIFIED;
473 yylval = SCM_UNSPECIFIED;
480 yylval = SCM_UNSPECIFIED;
484 yylval = SCM_UNSPECIFIED;
488 yylval = SCM_UNSPECIFIED;
494 {WORD}/[-_] | // backup rule
496 return scan_bare_word (YYText_utf8 ());
499 {COMMAND}/[-_] | // backup rule
501 return scan_escaped_word (YYText_utf8 () + 1);
504 yylval = scan_fraction (YYText ());
507 {UNSIGNED}/\/ | // backup rule
509 yylval = scm_c_read_string (YYText ());
513 yylval = scm_c_read_string (YYText () + 1);
520 char c = escaped_char (YYText ()[1]);
521 yylval = scm_cons (scm_from_locale_stringn (&c, 1),
525 yylval = scm_cons (scm_from_locale_string (YYText_utf8 ()),
532 /* yylval is union. Must remember STRING before setting SCM*/
534 yylval = scm_string_concatenate_reverse (yylval,
538 return is_lyric_state () ? LYRICS_STRING : STRING;
541 yylval = scm_cons (scm_from_locale_string (YYText ()),
548 start_lyric_quote ();
551 yylval = scan_fraction (YYText ());
554 {UNSIGNED}/\/ | // backup rule
556 yylval = scm_c_read_string (YYText ());
559 {COMMAND}/[-_] | // backup rule
561 return scan_escaped_word (YYText_utf8 () + 1);
565 string s (YYText_utf8 ());
566 yylval = SCM_UNSPECIFIED;
573 char c = s[s.length () - 1];
574 if (c == '{' || c == '}') // brace open is for not confusing dumb tools.
575 here_input ().warning (
576 _ ("Brace found at end of lyric. Did you forget a space?"));
577 yylval = ly_string2scm (s);
580 return LYRICS_STRING;
583 yylval = SCM_UNSPECIFIED;
584 return YYText ()[0]; // LYRICS already catches all multibytes.
588 {WORD}/[-_] | // backup rule
590 return scan_bare_word (YYText_utf8 ());
592 {COMMAND}/[-_] | // backup rule
594 return scan_escaped_word (YYText_utf8 () + 1);
597 yylval = scan_fraction (YYText ());
600 {UNSIGNED}/\/ | // backup rule
602 yylval = scm_c_read_string (YYText ());
606 yylval = SCM_UNSPECIFIED;
610 yylval = SCM_UNSPECIFIED;
614 yylval = SCM_UNSPECIFIED;
618 yylval = SCM_UNSPECIFIED;
622 yylval = SCM_UNSPECIFIED;
626 yylval = SCM_UNSPECIFIED;
627 return YYText ()[0]; // WORD catches all multibyte.
634 yylval = SCM_UNSPECIFIED;
637 {COMMAND}/[-_] | // backup rule
639 string str (YYText_utf8 () + 1);
641 int token_type = MARKUP_FUNCTION;
642 SCM s = lookup_markup_command (str);
644 // lookup-markup-command returns a pair with the car
645 // being the function to call, and the cdr being the
646 // call signature specified to define-markup-command,
647 // a list of predicates.
649 if (!scm_is_pair (s)) {
650 // If lookup-markup-command was not successful, we
651 // try lookup-markup-list-command instead.
652 // If this fails as well, we just scan and return
654 s = lookup_markup_list_command (str);
656 token_type = MARKUP_LIST_FUNCTION;
658 return scan_escaped_word (str);
661 // If the list of predicates is, say,
662 // (number? number? markup?), then tokens
663 // EXPECT_MARKUP EXPECT_SCM EXPECT_SCM EXPECT_NO_MORE_ARGS
664 // will be generated. Note that we have to push them
665 // in reverse order, so the first token pushed in the
666 // loop will be EXPECT_NO_MORE_ARGS.
670 // yylval now contains the function to call as token
671 // value (for token type MARKUP_FUNCTION or
672 // MARKUP_LIST_FUNCTION).
674 push_extra_token(EXPECT_NO_MORE_ARGS);
676 for (; scm_is_pair(s); s = scm_cdr(s)) {
677 SCM predicate = scm_car(s);
679 if (predicate == ly_lily_module_constant ("markup-list?"))
680 push_extra_token(EXPECT_MARKUP_LIST);
681 else if (predicate == ly_lily_module_constant ("markup?"))
682 push_extra_token(EXPECT_MARKUP);
684 push_extra_token(EXPECT_SCM, predicate);
689 yylval = SCM_UNSPECIFIED;
692 [^$#{}\"\\ \t\n\r\f]+ {
693 string s (YYText_utf8 ());
695 yylval = ly_string2scm (s);
699 yylval = SCM_UNSPECIFIED;
700 return YYText()[0]; // Above is catchall for multibyte
704 <longcomment><<EOF>> {
705 LexerError (_ ("EOF found inside a comment").c_str ());
706 is_main_input_ = false; // should be safe , can't have \include in --safe.
707 yylval = SCM_UNSPECIFIED;
709 yyterminate (); // can't move this, since it actually rets a YY_NULL
713 yylval = SCM_UNSPECIFIED;
716 /* 2 = init.ly + current file.
717 > because we're before closing, but is_main_input_ should
720 is_main_input_ = include_stack_.size () > 2;
721 if (!close_input () || !is_main_input_)
722 /* Returns YY_NULL */
725 else if (!close_input ())
726 /* Returns YY_NULL */
731 {WORD}/[-_] | // backup rule
733 return scan_bare_word (YYText_utf8 ());
735 {COMMAND}/[-_] | // backup rule
737 return scan_escaped_word (YYText_utf8 () + 1);
742 yylval = scan_fraction (YYText ());
746 -{UNSIGNED} | // backup rule
748 yylval = scm_c_read_string (YYText ());
752 {UNSIGNED}/\/ | // backup rule
754 yylval = scm_c_read_string (YYText ());
760 yylval = SCM_UNSPECIFIED;
764 -/\. | // backup rule
766 yylval = SCM_UNSPECIFIED;
770 <INITIAL,notes,figures>. {
771 yylval = SCM_UNSPECIFIED;
775 <INITIAL,lyrics,notes,figures>\\. {
776 yylval = SCM_UNSPECIFIED;
777 char c = YYText ()[1];
781 return E_ANGLE_CLOSE;
785 return E_EXCLAMATION;
791 return E_BRACKET_OPEN;
795 return E_BRACKET_CLOSE;
807 string msg = _f ("invalid character: `%s'", YYText_utf8 ());
808 LexerError (msg.c_str ());
809 yylval = SCM_UNSPECIFIED;
810 return '%'; // Better not return half a utf8 character.
815 /* Make the lexer generate a token of the given type as the next token.
816 TODO: make it possible to define a value for the token as well */
818 Lily_lexer::push_extra_token (int token_type, SCM scm)
820 if (scm_is_null (extra_tokens_))
822 if (YY_START != extratoken)
823 hidden_state_ = YY_START;
824 yy_push_state (extratoken);
826 extra_tokens_ = scm_acons (scm_from_int (token_type), scm, extra_tokens_);
830 Lily_lexer::push_chord_state (SCM alist)
832 SCM p = scm_assq (alist, pitchname_tab_stack_);
834 if (scm_is_false (p))
835 p = scm_cons (alist, alist_to_hashq (alist));
836 pitchname_tab_stack_ = scm_cons (p, pitchname_tab_stack_);
837 yy_push_state (chords);
841 Lily_lexer::push_figuredbass_state ()
843 yy_push_state (figures);
847 Lily_lexer::push_initial_state ()
849 yy_push_state (INITIAL);
853 Lily_lexer::push_lyric_state ()
855 yy_push_state (lyrics);
859 Lily_lexer::push_markup_state ()
861 yy_push_state (markup);
865 Lily_lexer::push_note_state (SCM alist)
867 bool extra = (YYSTATE == extratoken);
869 SCM p = scm_assq (alist, pitchname_tab_stack_);
874 if (scm_is_false (p))
875 p = scm_cons (alist, alist_to_hashq (alist));
876 pitchname_tab_stack_ = scm_cons (p, pitchname_tab_stack_);
877 yy_push_state (notes);
880 hidden_state_ = YYSTATE;
881 yy_push_state (extratoken);
886 Lily_lexer::pop_state ()
888 bool extra = (YYSTATE == extratoken);
893 if (YYSTATE == notes || YYSTATE == chords)
894 pitchname_tab_stack_ = scm_cdr (pitchname_tab_stack_);
899 hidden_state_ = YYSTATE;
900 yy_push_state (extratoken);
905 Lily_lexer::identifier_type (SCM sid)
907 int k = try_special_identifiers (&yylval , sid);
908 return k >= 0 ? k : SCM_IDENTIFIER;
913 Lily_lexer::scan_escaped_word (string str)
915 // use more SCM for this.
917 // SCM sym = ly_symbol2scm (str.c_str ());
919 yylval = SCM_UNSPECIFIED;
920 int i = lookup_keyword (str);
921 if (i == MARKUP && is_lyric_state ())
926 SCM sid = lookup_identifier (str);
927 if (sid != SCM_UNDEFINED)
928 return scan_scm_id (sid);
930 string msg (_f ("unknown escaped string: `\\%s'", str));
931 LexerError (msg.c_str ());
933 yylval = ly_string2scm (str);
939 Lily_lexer::scan_scm_id (SCM sid)
941 if (is_music_function (sid))
943 int funtype = SCM_FUNCTION;
947 SCM s = get_music_function_signature (sid);
948 SCM cs = scm_car (s);
950 if (scm_is_pair (cs))
955 if (scm_is_eq (cs, ly_lily_module_constant ("ly:music?")))
956 funtype = MUSIC_FUNCTION;
957 else if (scm_is_eq (cs, ly_lily_module_constant ("ly:event?")))
958 funtype = EVENT_FUNCTION;
959 else if (ly_is_procedure (cs))
960 funtype = SCM_FUNCTION;
961 else programming_error ("Bad syntax function predicate");
963 push_extra_token (EXPECT_NO_MORE_ARGS);
964 for (s = scm_cdr (s); scm_is_pair (s); s = scm_cdr (s))
966 SCM optional = SCM_UNDEFINED;
969 if (scm_is_pair (cs))
971 optional = SCM_CDR (cs);
975 if (cs == Pitch_type_p_proc)
976 push_extra_token (EXPECT_PITCH);
977 else if (cs == Duration_type_p_proc)
978 push_extra_token (EXPECT_DURATION);
979 else if (ly_is_procedure (cs))
980 push_extra_token (EXPECT_SCM, cs);
983 programming_error ("Function parameter without type-checking predicate");
986 if (!scm_is_eq (optional, SCM_UNDEFINED))
987 push_extra_token (EXPECT_OPTIONAL, optional);
992 return identifier_type (sid);
996 Lily_lexer::scan_bare_word (string str)
998 SCM sym = ly_symbol2scm (str.c_str ());
999 if ((YYSTATE == notes) || (YYSTATE == chords)) {
1000 SCM handle = SCM_BOOL_F;
1001 if (scm_is_pair (pitchname_tab_stack_))
1002 handle = scm_hashq_get_handle (scm_cdar (pitchname_tab_stack_), sym);
1004 if (scm_is_pair (handle)) {
1005 yylval = scm_cdr (handle);
1006 if (unsmob_pitch (yylval))
1007 return (YYSTATE == notes) ? NOTENAME_PITCH : TONICNAME_PITCH;
1008 else if (scm_is_symbol (yylval))
1011 else if ((YYSTATE == chords)
1012 && (handle = scm_hashq_get_handle (chordmodifier_tab_, sym))!= SCM_BOOL_F)
1014 yylval = scm_cdr (handle);
1015 return CHORD_MODIFIER;
1018 yylval = ly_string2scm (str);
1023 Lily_lexer::get_state () const
1025 if (YY_START == extratoken)
1026 return hidden_state_;
1032 Lily_lexer::is_note_state () const
1034 return get_state () == notes;
1038 Lily_lexer::is_chord_state () const
1040 return get_state () == chords;
1044 Lily_lexer::is_lyric_state () const
1046 return get_state () == lyrics;
1050 Lily_lexer::is_figure_state () const
1052 return get_state () == figures;
1055 // The extra_token parameter specifies how to convert multiple values
1056 // into additional tokens. For '#', additional values get pushed as
1057 // SCM_IDENTIFIER. For '$', they get checked for their type and get
1058 // pushed as a corresponding *_IDENTIFIER token. Since the latter
1059 // tampers with yylval, it can only be done from the lexer itself, so
1060 // this function is private.
1063 Lily_lexer::eval_scm (SCM readerdata, char extra_token)
1065 SCM sval = SCM_UNDEFINED;
1067 if (!SCM_UNBNDP (readerdata))
1069 sval = ly_eval_scm (scm_car (readerdata),
1070 *unsmob_input (scm_cdr (readerdata)),
1071 be_safe_global && is_main_input_,
1075 if (SCM_UNBNDP (sval))
1078 return SCM_UNSPECIFIED;
1081 if (extra_token && SCM_VALUESP (sval))
1083 sval = scm_struct_ref (sval, SCM_INUM0);
1085 if (scm_is_pair (sval)) {
1086 for (SCM v = scm_reverse (scm_cdr (sval));
1091 switch (extra_token) {
1093 token = scan_scm_id (scm_car (v));
1094 if (!scm_is_eq (yylval, SCM_UNSPECIFIED))
1095 push_extra_token (token, yylval);
1098 push_extra_token (SCM_IDENTIFIER, scm_car (v));
1102 sval = scm_car (sval);
1104 sval = SCM_UNSPECIFIED;
1110 /* Check for valid UTF-8 that has no overlong or surrogate codes and
1111 is in the range 0-0x10ffff */
1114 Lily_lexer::YYText_utf8 ()
1116 const char * const p = YYText ();
1117 for (int i=0; p[i];) {
1118 if ((p[i] & 0xff) < 0x80) {
1122 int oldi = i; // start of character
1123 int more = 0; // # of followup bytes, 0 if bad
1124 switch (p[i++] & 0xff) {
1125 // 0xc0 and 0xc1 are overlong prefixes for
1126 // 0x00-0x3f and 0x40-0x7f respectively, bad.
1127 case 0xc2: // 0x80-0xbf
1128 case 0xc3: // 0xc0-0xff
1129 case 0xc4: // 0x100-0x13f
1130 case 0xc5: // 0x140-0x17f
1131 case 0xc6: // 0x180-0x1bf
1132 case 0xc7: // 0x1c0-0x1ff
1133 case 0xc8: // 0x200-0x23f
1134 case 0xc9: // 0x240-0x27f
1135 case 0xca: // 0x280-0x2bf
1136 case 0xcb: // 0x2c0-0x2ff
1137 case 0xcc: // 0x300-0x33f
1138 case 0xcd: // 0x340-0x37f
1139 case 0xce: // 0x380-0x3bf
1140 case 0xcf: // 0x3c0-0x3ff
1141 case 0xd0: // 0x400-0x43f
1142 case 0xd1: // 0x440-0x47f
1143 case 0xd2: // 0x480-0x4bf
1144 case 0xd3: // 0x4c0-0x4ff
1145 case 0xd4: // 0x500-0x53f
1146 case 0xd5: // 0x540-0x57f
1147 case 0xd6: // 0x580-0x5bf
1148 case 0xd7: // 0x5c0-0x5ff
1149 case 0xd8: // 0x600-0x63f
1150 case 0xd9: // 0x640-0x67f
1151 case 0xda: // 0x680-0x6bf
1152 case 0xdb: // 0x6c0-0x6ff
1153 case 0xdc: // 0x700-0x73f
1154 case 0xdd: // 0x740-0x77f
1155 case 0xde: // 0x780-0x7bf
1156 case 0xdf: // 0x7c0-0x7ff
1157 more = 1; // 2-byte sequences, 0x80-0x7ff
1160 // don't allow overlong sequences for 0-0x7ff
1161 if ((p[i] & 0xff) < 0xa0)
1163 case 0xe1: // 0x1000-0x1fff
1164 case 0xe2: // 0x2000-0x2fff
1165 case 0xe3: // 0x3000-0x3fff
1166 case 0xe4: // 0x4000-0x4fff
1167 case 0xe5: // 0x5000-0x5fff
1168 case 0xe6: // 0x6000-0x6fff
1169 case 0xe7: // 0x7000-0x7fff
1170 case 0xe8: // 0x8000-0x8fff
1171 case 0xe9: // 0x9000-0x9fff
1172 case 0xea: // 0xa000-0xafff
1173 case 0xeb: // 0xb000-0xbfff
1174 case 0xec: // 0xc000-0xcfff
1175 more = 2; // 3-byte sequences, 0x7ff-0xcfff
1177 case 0xed: // 0xd000-0xdfff
1178 // Don't allow surrogate codes 0xd800-0xdfff
1179 if ((p[i] & 0xff) >= 0xa0)
1181 case 0xee: // 0xe000-0xefff
1182 case 0xef: // 0xf000-0xffff
1183 more = 2; // 3-byte sequences,
1184 // 0xd000-0xd7ff, 0xe000-0xffff
1187 // don't allow overlong sequences for 0-0xffff
1188 if ((p[i] & 0xff) < 0x90)
1190 case 0xf1: // 0x40000-0x7ffff
1191 case 0xf2: // 0x80000-0xbffff
1192 case 0xf3: // 0xc0000-0xfffff
1193 more = 3; // 4-byte sequences, 0x10000-0xfffff
1196 // don't allow more than 0x10ffff
1197 if ((p[i] & 0xff) >= 0x90)
1199 more = 3; // 4-byte sequence, 0x100000-0x10ffff
1203 // check that all continuation bytes are valid
1205 if ((p[i++] & 0xc0) != 0x80)
1211 Input h = here_input ();
1212 h.set (h.get_source_file (), h.start () + oldi, h.start () + i);
1213 h.warning (_ ("non-UTF-8 input").c_str ());
1220 urg, belong to string (_convert)
1221 and should be generalised
1224 strip_leading_white (string&s)
1227 for (; i < s.length (); i++)
1228 if (!isspace (s[i]))
1235 strip_trailing_white (string&s)
1237 ssize i = s.length ();
1239 if (!isspace (s[i]))
1242 s = s.substr (0, i + 1);
1247 Lilypond_version oldest_version ("2.7.38");
1251 is_valid_version (string s)
1253 Lilypond_version current ( MAJOR_VERSION "." MINOR_VERSION "." PATCH_LEVEL );
1254 Lilypond_version ver (s);
1255 if (int (ver) < oldest_version)
1257 non_fatal_error (_f ("file too old: %s (oldest supported: %s)", ver.to_string (), oldest_version.to_string ()));
1258 non_fatal_error (_ ("consider updating the input with the convert-ly script"));
1264 non_fatal_error (_f ("program too old: %s (file requires: %s)", current.to_string (), ver.to_string ()));
1275 lyric_fudge (string s)
1279 while ((i = s.find ('_', i)) != string::npos)
1287 Convert "NUM/DEN" into a '(NUM . DEN) cons.
1290 scan_fraction (string frac)
1292 ssize i = frac.find ('/');
1293 string left = frac.substr (0, i);
1294 string right = frac.substr (i + 1, (frac.length () - i + 1));
1296 int n = String_convert::dec2int (left);
1297 int d = String_convert::dec2int (right);
1298 return scm_cons (scm_from_int (n), scm_from_int (d));
1302 lookup_markup_command (string s)
1304 SCM proc = ly_lily_module_constant ("lookup-markup-command");
1305 return scm_call_1 (proc, ly_string2scm (s));
1309 lookup_markup_list_command (string s)
1311 SCM proc = ly_lily_module_constant ("lookup-markup-list-command");
1312 return scm_call_1 (proc, ly_string2scm (s));
1315 /* Shut up lexer warnings. */
1335 silence_lexer_warnings ()
1337 (void) yy_start_stack_ptr;
1338 (void) yy_start_stack_depth;
1339 (void) yy_start_stack;
1340 (void) yy_push_state;
1341 (void) yy_pop_state;
1342 (void) yy_top_state;
1343 (void) silence_lexer_warnings;