%{ // -*-Fundamental-*- /* This file is part of LilyPond, the GNU music typesetter. Copyright (C) 1996--2011 Han-Wen Nienhuys Jan Nieuwenhuizen LilyPond is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. LilyPond is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with LilyPond. If not, see . */ /* backup rules after making a change to the lexer rules, run flex -b and make sure that lex.backup contains no backup states, but only the reminder Compressed tables always back up. (don-t forget to rm lex.yy.cc :-) */ #include #include #include /* Flex >= 2.5.29 fix; FlexLexer.h's multiple include bracing breaks when building the actual lexer. */ #define LEXER_CC #include using namespace std; #include "context-def.hh" #include "duration.hh" #include "identifier-smob.hh" #include "international.hh" #include "interval.hh" #include "lily-guile.hh" #include "lily-lexer.hh" #include "lily-parser.hh" #include "lilypond-version.hh" #include "main.hh" #include "music.hh" #include "music-function.hh" #include "parse-scm.hh" #include "parser.hh" #include "pitch.hh" #include "source-file.hh" #include "std-string.hh" #include "string-convert.hh" #include "version.hh" #include "warn.hh" /* RH 7 fix (?) */ #define isatty HORRIBLEKLUDGE void strip_trailing_white (string&); void strip_leading_white (string&); string lyric_fudge (string s); SCM lookup_markup_command (string s); SCM lookup_markup_list_command (string s); bool is_valid_version (string s); #define start_quote() \ yy_push_state (quote);\ yylval.string = new string #define start_lyric_quote() \ yy_push_state (lyric_quote);\ yylval.string = new string #define yylval \ (*(YYSTYPE*)lexval_) #define yylloc \ (*(YYLTYPE*)lexloc_) #define YY_USER_ACTION add_lexed_char (YYLeng ()); SCM scan_fraction (string); SCM (* scm_parse_error_handler) (void *); %} %option c++ %option noyywrap %option nodefault %option debug %option yyclass="Lily_lexer" %option stack %option never-interactive %option warn %x extratoken %x chords %x figures %x incl %x lyrics %x lyric_quote %x longcomment %x markup %x notes %x quote %x sourcefileline %x sourcefilename %x version A [a-zA-Z\200-\377] AA {A}|_ N [0-9] AN {AA}|{N} ANY_CHAR (.|\n) PUNCT [?!:'`] ACCENT \\[`'"^] SPECIAL_CHAR [&@] NATIONAL [\001-\006\021-\027\031\036] TEX {AA}|-|{PUNCT}|{ACCENT}|{NATIONAL}|{SPECIAL_CHAR} DASHED_WORD {A}({AN}|-)* DASHED_KEY_WORD \\{DASHED_WORD} ALPHAWORD {A}+ UNSIGNED {N}+ E_UNSIGNED \\{N}+ FRACTION {N}+\/{N}+ INT -?{UNSIGNED} REAL ({INT}\.{N}*)|(-?\.{N}+) WHITE [ \n\t\f\r] HORIZONTALWHITE [ \t] BLACK [^ \n\t\f\r] RESTNAME [rs] NOTECOMMAND \\{A}+ MARKUPCOMMAND \\({A}|[-_])+ LYRICS ({AA}|{TEX})[^0-9 \t\n\r\f]* ESCAPED [nt\\'"] EXTENDER __ HYPHEN -- BOM_UTF8 \357\273\277 %% <*>\r { // swallow and ignore carriage returns } {ANY_CHAR} { /* Generate a token without swallowing anything */ /* First unswallow the eaten character */ add_lexed_char (-YYLeng ()); yyless (0); /* produce requested token */ int type = scm_to_int (scm_caar (extra_tokens_)); yylval.scm = scm_cdar (extra_tokens_); extra_tokens_ = scm_cdr (extra_tokens_); if (scm_is_null (extra_tokens_)) yy_pop_state (); return type; } <> { /* Generate a token without swallowing anything */ /* produce requested token */ int type = scm_to_int (scm_caar (extra_tokens_)); yylval.scm = scm_cdar (extra_tokens_); extra_tokens_ = scm_cdr (extra_tokens_); if (scm_is_null (extra_tokens_)) yy_pop_state (); return type; } /* Use the trailing context feature. Otherwise, the BOM will not be found if the file starts with an identifier definition. */ {BOM_UTF8}/.* { if (this->lexloc_->line_number () != 1 || this->lexloc_->column_number () != 0) { LexerWarning (_ ("stray UTF-8 BOM encountered").c_str ()); // exit (1); } debug_output (_ ("Skipping UTF-8 BOM")); } { "%{" { yy_push_state (longcomment); } %[^{\n\r][^\n\r]*[\n\r] { } %[^{\n\r] { // backup rule } %[\n\r] { } %[^{\n\r][^\n\r]* { } {WHITE}+ { } } { \" { start_quote (); } } \\version{WHITE}* { yy_push_state (version); } \\sourcefilename{WHITE}* { yy_push_state (sourcefilename); } \\sourcefileline{WHITE}* { yy_push_state (sourcefileline); } \"[^"]*\" { /* got the version number */ string s (YYText () + 1); s = s.substr (0, s.rfind ('\"')); yy_pop_state (); SCM top_scope = scm_car (scm_last_pair (scopes_)); scm_module_define (top_scope, ly_symbol2scm ("version-seen"), SCM_BOOL_T); if (!is_valid_version (s)) return INVALID; } \"[^"]*\" { string s (YYText () + 1); s = s.substr (0, s.rfind ('\"')); yy_pop_state (); this->here_input().get_source_file ()->name_ = s; message (_f ("Renaming input to: `%s'", s.c_str ())); progress_indication ("\n"); scm_module_define (scm_car (scopes_), ly_symbol2scm ("input-file-name"), ly_string2scm (s)); } {INT} { int i; sscanf (YYText (), "%d", &i); yy_pop_state (); this->here_input ().get_source_file ()->set_line (here_input ().start (), i); } {ANY_CHAR} { LexerError (_ ("quoted string expected after \\version").c_str ()); yy_pop_state (); } {ANY_CHAR} { LexerError (_ ("quoted string expected after \\sourcefilename").c_str ()); yy_pop_state (); } {ANY_CHAR} { LexerError (_ ("integer expected after \\sourcefileline").c_str ()); yy_pop_state (); } { [^\%]* { } \%*[^}%]* { } "%"+"}" { yy_pop_state (); } } \\maininput { if (!is_main_input_) { start_main_input (); is_main_input_ = true; } else error (_ ("\\maininput not allowed outside init files")); } \\include { yy_push_state (incl); } \"[^"]*\" { /* got the include file name */ string s (YYText ()+1); s = s.substr (0, s.rfind ('"')); new_input (s, sources_); yy_pop_state (); } \\{BLACK}*{WHITE}? { /* got the include identifier */ string s = YYText () + 1; strip_trailing_white (s); if (s.length () && (s[s.length () - 1] == ';')) s = s.substr (0, s.length () - 1); SCM sid = lookup_identifier (s); if (scm_is_string (sid)) { new_input (ly_scm2string (sid), sources_); yy_pop_state (); } else { string msg (_f ("wrong or undefined identifier: `%s'", s )); LexerError (msg.c_str ()); SCM err = scm_current_error_port (); scm_puts ("This value was found in the table: ", err); scm_display (sid, err); } } \"[^"]* { // backup rule error (_ ("end quote missing")); exit (1); } {RESTNAME} { char const *s = YYText (); yylval.scm = scm_from_locale_string (s); return RESTNAME; } R { return MULTI_MEASURE_REST; } # { //embedded scm int n = 0; Input hi = here_input(); hi.step_forward (); SCM sval = ly_parse_scm (hi.start (), &n, hi, be_safe_global && is_main_input_, parser_); if (sval == SCM_UNDEFINED) { sval = SCM_UNSPECIFIED; error_level_ = 1; } for (int i = 0; i < n; i++) { yyinput (); } char_count_stack_.back () += n; if (unpack_identifier (sval) != SCM_UNDEFINED) { yylval.scm = unpack_identifier(sval); return identifier_type (yylval.scm); } for (size_t i = 0; i < pending_string_includes_.size (); i++) new_input ("", pending_string_includes_[i], parser_->sources_); pending_string_includes_.clear (); yylval.scm = sval; return SCM_TOKEN; } { \<\< { return DOUBLE_ANGLE_OPEN; } \>\> { return DOUBLE_ANGLE_CLOSE; } } { \< { return ANGLE_OPEN; } \> { return ANGLE_CLOSE; } } { _ { return FIGURE_SPACE; } \> { return FIGURE_CLOSE; } \< { return FIGURE_OPEN; } } { {ALPHAWORD} { return scan_bare_word (YYText ()); } {NOTECOMMAND} { return scan_escaped_word (YYText () + 1); } {FRACTION} { yylval.scm = scan_fraction (YYText ()); return FRACTION; } {UNSIGNED}/\/ | // backup rule {UNSIGNED} { yylval.i = String_convert::dec2int (string (YYText ())); return UNSIGNED; } {E_UNSIGNED} { yylval.i = String_convert::dec2int (string (YYText () +1)); return E_UNSIGNED; } } { \\{ESCAPED} { *yylval.string += to_string (escaped_char (YYText ()[1])); } [^\\""]+ { *yylval.string += YYText (); } \" { yy_pop_state (); /* yylval is union. Must remember STRING before setting SCM*/ string *sp = yylval.string; yylval.scm = ly_string2scm (*sp); delete sp; return is_lyric_state () ? LYRICS_STRING : STRING; } . { *yylval.string += YYText (); } } { \" { start_lyric_quote (); } {FRACTION} { yylval.scm = scan_fraction (YYText ()); return FRACTION; } {UNSIGNED}/\/[^0-9] { // backup rule yylval.i = String_convert::dec2int (string (YYText ())); return UNSIGNED; } {UNSIGNED}/\/ | // backup rule {UNSIGNED} { yylval.i = String_convert::dec2int (string (YYText ())); return UNSIGNED; } {NOTECOMMAND} { return scan_escaped_word (YYText () + 1); } {LYRICS} { /* ugr. This sux. */ string s (YYText ()); if (s == "__") return yylval.i = EXTENDER; if (s == "--") return yylval.i = HYPHEN; s = lyric_fudge (s); char c = s[s.length () - 1]; if (c == '{' || c == '}') // brace open is for not confusing dumb tools. here_input ().warning ( _ ("Brace found at end of lyric. Did you forget a space?")); yylval.scm = ly_string2scm (s); return LYRICS_STRING; } . { return YYText ()[0]; } } { {ALPHAWORD} { return scan_bare_word (YYText ()); } {NOTECOMMAND} { return scan_escaped_word (YYText () + 1); } {FRACTION} { yylval.scm = scan_fraction (YYText ()); return FRACTION; } {UNSIGNED}/\/[^0-9] { // backup rule yylval.i = String_convert::dec2int (string (YYText ())); return UNSIGNED; } {UNSIGNED}/\/ | // backup rule {UNSIGNED} { yylval.i = String_convert::dec2int (string (YYText ())); return UNSIGNED; } - { return CHORD_MINUS; } : { return CHORD_COLON; } \/\+ { return CHORD_BASS; } \/ { return CHORD_SLASH; } \^ { return CHORD_CARET; } . { return YYText ()[0]; } } { \\score { return SCORE; } {MARKUPCOMMAND} { string str (YYText () + 1); int token_type = MARKUP_FUNCTION; SCM s = lookup_markup_command (str); // lookup-markup-command returns a pair with the car // being the function to call, and the cdr being the // call signature specified to define-markup-command, // a list of predicates. if (!scm_is_pair (s)) { // If lookup-markup-command was not successful, we // try lookup-markup-list-command instead. // If this fails as well, we just scan and return // the escaped word. s = lookup_markup_list_command (str); if (scm_is_pair (s)) token_type = MARKUP_LIST_FUNCTION; else return scan_escaped_word (str); } // If the list of predicates is, say, // (number? number? markup?), then tokens // EXPECT_MARKUP EXPECT_SCM EXPECT_SCM EXPECT_NO_MORE_ARGS // will be generated. Note that we have to push them // in reverse order, so the first token pushed in the // loop will be EXPECT_NO_MORE_ARGS. yylval.scm = scm_car(s); // yylval now contains the function to call as token // value (for token type MARKUP_FUNCTION or // MARKUP_LIST_FUNCTION). push_extra_token(EXPECT_NO_MORE_ARGS); s = scm_cdr(s); for (; scm_is_pair(s); s = scm_cdr(s)) { SCM predicate = scm_car(s); if (predicate == ly_lily_module_constant ("markup-list?")) push_extra_token(EXPECT_MARKUP_LIST); else if (predicate == ly_lily_module_constant ("markup?")) push_extra_token(EXPECT_MARKUP); else push_extra_token(EXPECT_SCM, predicate); } return token_type; } [{}] { return YYText ()[0]; } [^#{}\"\\ \t\n\r\f]+ { string s (YYText ()); char c = s[s.length () - 1]; /* brace open is for not confusing dumb tools. */ if (c == '{' || c == '}') here_input ().warning ( _ ("Brace found at end of markup. Did you forget a space?")); yylval.scm = ly_string2scm (s); return STRING; } . { return YYText()[0]; } } <> { LexerError (_ ("EOF found inside a comment").c_str ()); is_main_input_ = false; // should be safe , can't have \include in --safe. if (!close_input ()) yyterminate (); // can't move this, since it actually rets a YY_NULL } <> { if (is_main_input_) { /* 2 = init.ly + current file. > because we're before closing, but is_main_input_ should reflect after. */ is_main_input_ = include_stack_.size () > 2; if (!close_input ()) /* Returns YY_NULL */ yyterminate (); } else if (!close_input ()) /* Returns YY_NULL */ yyterminate (); } { {DASHED_WORD} { return scan_bare_word (YYText ()); } {DASHED_KEY_WORD} { return scan_escaped_word (YYText () + 1); } } -{UNSIGNED} | // backup rule {REAL} { yylval.scm = scm_c_read_string (YYText ()); return REAL; } -\. { // backup rule yylval.scm = scm_from_double (0.0); return REAL; } {UNSIGNED} { yylval.i = String_convert::dec2int (string (YYText ())); return UNSIGNED; } [{}] { return YYText ()[0]; } [*:=] { char c = YYText ()[0]; return c; } . { return YYText ()[0]; } \\. { char c = YYText ()[1]; switch (c) { case '>': return E_ANGLE_CLOSE; case '<': return E_ANGLE_OPEN; case '!': return E_EXCLAMATION; case '(': return E_OPEN; case ')': return E_CLOSE; case '[': return E_BRACKET_OPEN; case '+': return E_PLUS; case ']': return E_BRACKET_CLOSE; case '~': return E_TILDE; case '\\': return E_BACKSLASH; default: return E_CHAR; } } <*>. { string msg = _f ("invalid character: `%c'", YYText ()[0]); LexerError (msg.c_str ()); return YYText ()[0]; } %% /* Make the lexer generate a token of the given type as the next token. TODO: make it possible to define a value for the token as well */ void Lily_lexer::push_extra_token (int token_type, SCM scm) { if (scm_is_null (extra_tokens_)) { if (YY_START != extratoken) hidden_state_ = YY_START; yy_push_state (extratoken); } extra_tokens_ = scm_acons (scm_from_int (token_type), scm, extra_tokens_); } void Lily_lexer::push_chord_state (SCM tab) { pitchname_tab_stack_ = scm_cons (tab, pitchname_tab_stack_); yy_push_state (chords); } void Lily_lexer::push_figuredbass_state () { yy_push_state (figures); } void Lily_lexer::push_initial_state () { yy_push_state (INITIAL); } void Lily_lexer::push_lyric_state () { yy_push_state (lyrics); } void Lily_lexer::push_markup_state () { yy_push_state (markup); } void Lily_lexer::push_note_state (SCM tab) { pitchname_tab_stack_ = scm_cons (tab, pitchname_tab_stack_); yy_push_state (notes); } void Lily_lexer::pop_state () { if (YYSTATE == notes || YYSTATE == chords) pitchname_tab_stack_ = scm_cdr (pitchname_tab_stack_); yy_pop_state (); } int Lily_lexer::identifier_type (SCM sid) { int k = try_special_identifiers (&yylval.scm , sid); return k >= 0 ? k : SCM_IDENTIFIER; } int Lily_lexer::scan_escaped_word (string str) { // use more SCM for this. // SCM sym = ly_symbol2scm (str.c_str ()); int i = lookup_keyword (str); if (i == MARKUP && is_lyric_state ()) return LYRIC_MARKUP; if (i != -1) return i; SCM sid = lookup_identifier (str); if (is_music_function (sid)) { int funtype = SCM_FUNCTION; yylval.scm = get_music_function_transform (sid); SCM s = scm_object_property (yylval.scm, ly_symbol2scm ("music-function-signature")); SCM cs = scm_car (s); if (scm_is_pair (cs)) { cs = SCM_CAR (cs); } if (scm_is_eq (cs, ly_lily_module_constant ("ly:music?"))) funtype = MUSIC_FUNCTION; else if (scm_is_eq (cs, ly_lily_module_constant ("ly:event?"))) funtype = EVENT_FUNCTION; else if (ly_is_procedure (cs)) funtype = SCM_FUNCTION; else programming_error ("Bad syntax function predicate"); push_extra_token (EXPECT_NO_MORE_ARGS); for (s = scm_cdr (s); scm_is_pair (s); s = scm_cdr (s)) { SCM optional = SCM_UNDEFINED; cs = scm_car (s); if (scm_is_pair (cs)) { optional = SCM_CDR (cs); cs = SCM_CAR (cs); } if (cs == ly_music_p_proc) push_extra_token (EXPECT_MUSIC); else if (cs == Pitch_type_p_proc) push_extra_token (EXPECT_PITCH); else if (cs == Duration_type_p_proc) push_extra_token (EXPECT_DURATION); else if (ly_is_procedure (cs)) push_extra_token (EXPECT_SCM, cs); else { programming_error ("Function parameter without type-checking predicate"); continue; } if (!scm_is_eq (optional, SCM_UNDEFINED)) push_extra_token (EXPECT_OPTIONAL, optional); } return funtype; } if (sid != SCM_UNDEFINED) { yylval.scm = sid; return identifier_type (sid); } string msg (_f ("unknown escaped string: `\\%s'", str)); LexerError (msg.c_str ()); yylval.scm = ly_string2scm (str); return STRING; } int Lily_lexer::scan_bare_word (string str) { SCM sym = ly_symbol2scm (str.c_str ()); if ((YYSTATE == notes) || (YYSTATE == chords)) { SCM handle = SCM_BOOL_F; if (scm_is_pair (pitchname_tab_stack_)) handle = scm_hashq_get_handle (scm_car (pitchname_tab_stack_), sym); if (scm_is_pair (handle)) { yylval.scm = scm_cdr (handle); if (unsmob_pitch (yylval.scm)) return (YYSTATE == notes) ? NOTENAME_PITCH : TONICNAME_PITCH; else if (scm_is_symbol (yylval.scm)) return DRUM_PITCH; } else if ((YYSTATE == chords) && (handle = scm_hashq_get_handle (chordmodifier_tab_, sym))!= SCM_BOOL_F) { yylval.scm = scm_cdr (handle); return CHORD_MODIFIER; } if ((chord_repetition_.repetition_symbol_ != SCM_EOL) && to_boolean (scm_equal_p (chord_repetition_.repetition_symbol_, sym))) return CHORD_REPETITION; } yylval.scm = ly_string2scm (str); return STRING; } int Lily_lexer::get_state () const { if (YY_START == extratoken) return hidden_state_; else return YY_START; } bool Lily_lexer::is_note_state () const { return get_state () == notes; } bool Lily_lexer::is_chord_state () const { return get_state () == chords; } bool Lily_lexer::is_lyric_state () const { return get_state () == lyrics; } bool Lily_lexer::is_figure_state () const { return get_state () == figures; } /* urg, belong to string (_convert) and should be generalised */ void strip_leading_white (string&s) { ssize i = 0; for (; i < s.length (); i++) if (!isspace (s[i])) break; s = s.substr (i); } void strip_trailing_white (string&s) { ssize i = s.length (); while (i--) if (!isspace (s[i])) break; s = s.substr (0, i + 1); } Lilypond_version oldest_version ("2.7.38"); bool is_valid_version (string s) { Lilypond_version current ( MAJOR_VERSION "." MINOR_VERSION "." PATCH_LEVEL ); Lilypond_version ver (s); if (int (ver) < oldest_version) { non_fatal_error (_f ("file too old: %s (oldest supported: %s)", ver.to_string (), oldest_version.to_string ())); non_fatal_error (_ ("consider updating the input with the convert-ly script")); return false; } if (ver > current) { non_fatal_error (_f ("program too old: %s (file requires: %s)", current.to_string (), ver.to_string ())); return false; } return true; } /* substitute _ and \, */ string lyric_fudge (string s) { char *chars = string_copy (s); for (char *p = chars; *p ; p++) { if (*p == '_' && (p == chars || *(p-1) != '\\')) *p = ' '; } s = string (chars); delete[] chars; ssize i = 0; if ((i = s.find ("\\,")) != NPOS) // change "\," to TeX's "\c " { * (((char*)s.c_str ()) + i + 1) = 'c'; s = s.substr (0, i + 2) + " " + s.substr (i - 2); } return s; } /* Convert "NUM/DEN" into a '(NUM . DEN) cons. */ SCM scan_fraction (string frac) { ssize i = frac.find ('/'); string left = frac.substr (0, i); string right = frac.substr (i + 1, (frac.length () - i + 1)); int n = String_convert::dec2int (left); int d = String_convert::dec2int (right); return scm_cons (scm_from_int (n), scm_from_int (d)); } SCM lookup_markup_command (string s) { SCM proc = ly_lily_module_constant ("lookup-markup-command"); return scm_call_1 (proc, ly_string2scm (s)); } SCM lookup_markup_list_command (string s) { SCM proc = ly_lily_module_constant ("lookup-markup-list-command"); return scm_call_1 (proc, ly_string2scm (s)); } /* Shut up lexer warnings. */ #if YY_STACK_USED static void yy_push_state (int) { } static void yy_pop_state () { } static int yy_top_state () { return 0; } static void silence_lexer_warnings () { (void) yy_start_stack_ptr; (void) yy_start_stack_depth; (void) yy_start_stack; (void) yy_push_state; (void) yy_pop_state; (void) yy_top_state; (void) silence_lexer_warnings; } #endif