From: David Kastrup Date: Sun, 29 Jul 2012 14:05:10 +0000 (+0200) Subject: Unify the lexer's idea of words and commands across all modes. X-Git-Tag: release/2.15.95-1~28 X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=70a153b14ba32767e1ecbe680f435bdd533ae819;hp=7a1912b8a517cd565ce8ef0acf137029adceab77;p=lilypond.git Unify the lexer's idea of words and commands across all modes. A "word" (a string recognized even when not quoted) and a "command" (something starting with \ and followed by letters and other folderol, indicating a Scheme control sequence or similar) get the same syntax in all modes: A "word" is a sequence of alphabetic characters possibly containing single dashes or underlines inside (not at the beginning or end). A "command" is a "word" preceded by a backslash. --- diff --git a/lily/lexer.ll b/lily/lexer.ll index c5cfc15ba4..57b9a59967 100644 --- a/lily/lexer.ll +++ b/lily/lexer.ll @@ -150,18 +150,14 @@ SCM (* scm_parse_error_handler) (void *); A [a-zA-Z\200-\377] AA {A}|_ N [0-9] -AN {AA}|{N} ANY_CHAR (.|\n) PUNCT [][()?!:'`] SPECIAL_CHAR [&@] NATIONAL [\001-\006\021-\027\031\036] TEX {AA}|-|{PUNCT}|{NATIONAL}|{SPECIAL_CHAR} -DASHED_WORD {A}({AN}|-)* -DASHED_KEY_WORD \\{DASHED_WORD} +WORD {A}([-_]{A}|{A})* +COMMAND \\{WORD} - - -ALPHAWORD {A}+ UNSIGNED {N}+ E_UNSIGNED \\{N}+ FRACTION {N}+\/{N}+ @@ -171,8 +167,6 @@ WHITE [ \n\t\f\r] HORIZONTALWHITE [ \t] BLACK [^ \n\t\f\r] RESTNAME [rs] -NOTECOMMAND \\{A}+ -MARKUPCOMMAND \\({A}|[-_])+ LYRICS ({AA}|{TEX})[^0-9 \t\n\r\f]* ESCAPED [nt\\'"] EXTENDER __ @@ -393,15 +387,25 @@ BOM_UTF8 \357\273\277 error (_ ("end quote missing")); exit (1); } + + /* Flex picks the longest matching pattern including trailing + * contexts. Without the backup pattern, r-. does not trigger the + * {RESTNAME} rule but rather the {WORD}/[-_] rule coming later, + * needed for avoiding backup states. + */ + +{RESTNAME}/[-_] | // pseudo backup rule {RESTNAME} { char const *s = YYText (); yylval.scm = scm_from_locale_string (s); return RESTNAME; } +q/[-_] | // pseudo backup rule q { return CHORD_REPETITION; } +R/[-_] | // pseudo backup rule R { return MULTI_MEASURE_REST; } @@ -476,11 +480,13 @@ BOM_UTF8 \357\273\277 } { - {ALPHAWORD} { + {WORD}/[-_] | // backup rule + {WORD} { return scan_bare_word (YYText_utf8 ()); } - {NOTECOMMAND} { + {COMMAND}/[-_] | // backup rule + {COMMAND} { return scan_escaped_word (YYText_utf8 () + 1); } {FRACTION} { @@ -533,7 +539,8 @@ BOM_UTF8 \357\273\277 yylval.scm = scm_c_read_string (YYText ()); return UNSIGNED; } - {NOTECOMMAND} { + {COMMAND}/[-_] | // backup rule + {COMMAND} { return scan_escaped_word (YYText_utf8 () + 1); } {LYRICS} { @@ -559,10 +566,12 @@ BOM_UTF8 \357\273\277 } } { - {ALPHAWORD} { + {WORD}/[-_] | // backup rule + {WORD} { return scan_bare_word (YYText_utf8 ()); } - {NOTECOMMAND} { + {COMMAND}/[-_] | // backup rule + {COMMAND} { return scan_escaped_word (YYText_utf8 () + 1); } {FRACTION} { @@ -590,7 +599,7 @@ BOM_UTF8 \357\273\277 return CHORD_CARET; } . { - return YYText ()[0]; // ALPHAWORD catches all multibyte. + return YYText ()[0]; // WORD catches all multibyte. } } @@ -599,7 +608,8 @@ BOM_UTF8 \357\273\277 \\score { return SCORE; } - {MARKUPCOMMAND} { + {COMMAND}/[-_] | // backup rule + {COMMAND} { string str (YYText_utf8 () + 1); int token_type = MARKUP_FUNCTION; @@ -694,10 +704,12 @@ BOM_UTF8 \357\273\277 } { - {DASHED_WORD} { + {WORD}/[-_] | // backup rule + {WORD} { return scan_bare_word (YYText_utf8 ()); } - {DASHED_KEY_WORD} { + {COMMAND}/[-_] | // backup rule + {COMMAND} { return scan_escaped_word (YYText_utf8 () + 1); } }