]> git.donarmstrong.com Git - lilypond.git/blobdiff - lily/lexer.ll
Issue 3618/1: Eliminate EXPECT_PITCH token in parser
[lilypond.git] / lily / lexer.ll
index e74dced0885534a2bee9ad22115dec42d60b4ef5..7044f100c892f5ded67f204ff53f8b07eb0c8134 100644 (file)
@@ -91,6 +91,15 @@ bool is_valid_version (string s);
                 yylval = SCM_EOL;               \
         } while (0)
 
+/*
+  The inside of \"violin1" is marked by commandquote mode
+*/
+
+#define start_command_quote() do {             \
+                yy_push_state (commandquote);  \
+                yylval = SCM_EOL;               \
+        } while (0)
+
 #define yylval (*lexval_)
 
 #define yylloc (*lexloc_)
@@ -120,9 +129,11 @@ SCM (* scm_parse_error_handler) (void *);
 %x incl
 %x lyrics
 %x longcomment
+%x maininput
 %x markup
 %x notes
 %x quote
+%x commandquote
 %x sourcefileline
 %x sourcefilename
 %x version
@@ -149,12 +160,16 @@ N         [0-9]
 ANY_CHAR       (.|\n)
 WORD           {A}([-_]{A}|{A})*
 COMMAND                \\{WORD}
-
+/* SPECIAL category is for every letter that needs to get passed to
+ * the parser rather than being redefinable by the user */
+SPECIAL                [-+*/=<>{}!?_^'',.:]
+SHORTHAND      (.|\\.)
 UNSIGNED       {N}+
 E_UNSIGNED     \\{N}+
 FRACTION       {N}+\/{N}+
 INT            -?{UNSIGNED}
 REAL           ({INT}\.{N}*)|(-?\.{N}+)
+STRICTREAL      {UNSIGNED}\.{UNSIGNED}
 WHITE          [ \n\t\f\r]
 HORIZONTALWHITE                [ \t]
 BLACK          [^ \n\t\f\r]
@@ -216,16 +231,10 @@ BOM_UTF8  \357\273\277
   "%{" {
        yy_push_state (longcomment);
   }
-  %[^{\n\r][^\n\r]*[\n\r]      {
-         (void) YYText_utf8 ();
-  }
-  %[^{\n\r]    { // backup rule
+  %[^{\n\r][^\n\r]*[\n\r]?     {
          (void) YYText_utf8 ();
   }
-  %[\n\r]      {
-  }
-  %[^{\n\r][^\n\r]*    {
-         (void) YYText_utf8 ();
+  %[\n\r]?     {
   }
   {WHITE}+     {
 
@@ -314,9 +323,12 @@ BOM_UTF8   \357\273\277
                start_main_input ();
                main_input_level_ = include_stack_.size ();
                is_main_input_ = true;
+               int state = YYSTATE;
+               yy_push_state (maininput);
+               yy_push_state (state);
        }
        else
-               error (_ ("\\maininput not allowed outside init files"));
+               LexerError (_ ("\\maininput not allowed outside init files").c_str ());
 }
 
 <INITIAL,chords,lyrics,figures,notes>\\include           {
@@ -376,8 +388,8 @@ BOM_UTF8    \357\273\277
 }
 
 <incl,version,sourcefilename>\"[^""]*   { // backup rule
-       error (_ ("end quote missing"));
-       exit (1);
+       LexerError (_ ("end quote missing").c_str ());
+       yy_pop_state ();
 }
 
     /* Flex picks the longest matching pattern including trailing
@@ -478,6 +490,22 @@ BOM_UTF8   \357\273\277
                 yylval = SCM_UNSPECIFIED;
                return FIGURE_OPEN;
        }
+       \\\+    {
+               yylval = SCM_UNSPECIFIED;
+               return E_PLUS;
+       }
+       \\!     {
+               yylval = SCM_UNSPECIFIED;
+               return E_EXCLAMATION;
+       }
+       \\\\    {
+               yylval = SCM_UNSPECIFIED;
+               return E_BACKSLASH;
+       }
+       [][]    {
+               yylval = SCM_UNSPECIFIED;
+               return  YYText ()[0];
+       }
 }
 
 <notes,figures>{
@@ -485,7 +513,9 @@ BOM_UTF8    \357\273\277
        {WORD}  {
                return scan_bare_word (YYText_utf8 ());
        }
-
+       \\\"    {
+               start_command_quote ();
+       }
        {COMMAND}/[-_]  | // backup rule
        {COMMAND}       {
                return scan_escaped_word (YYText_utf8 () + 1); 
@@ -494,8 +524,12 @@ BOM_UTF8   \357\273\277
                yylval =  scan_fraction (YYText ());
                return FRACTION;
        }
-       {UNSIGNED}/\/   | // backup rule
-       {UNSIGNED}              {
+       {STRICTREAL}    {
+               yylval = scm_c_read_string (YYText ());
+               return REAL;
+       }
+       {UNSIGNED}/[/.] | // backup rule
+       {UNSIGNED}      {
                yylval = scm_c_read_string (YYText ());
                return UNSIGNED;
        }
@@ -505,7 +539,7 @@ BOM_UTF8    \357\273\277
        }
 }
 
-<quote>{
+<quote,commandquote>{
        \\{ESCAPED}     {
                 char c = escaped_char (YYText ()[1]);
                yylval = scm_cons (scm_from_locale_stringn (&c, 1),
@@ -517,15 +551,20 @@ BOM_UTF8  \357\273\277
        }
        \"      {
 
-               yy_pop_state ();
-
                /* yylval is union. Must remember STRING before setting SCM*/
 
                 yylval = scm_string_concatenate_reverse (yylval,
                                                          SCM_UNDEFINED,
                                                          SCM_UNDEFINED);
 
-               return is_lyric_state () ? LYRICS_STRING : STRING;
+               if (get_state () == commandquote) {
+                       yy_pop_state ();
+                       return scan_escaped_word (ly_scm2string (yylval));
+               }
+
+               yy_pop_state ();
+
+               return STRING;
        }
        \\      {
                 yylval = scm_cons (scm_from_locale_string (YYText ()),
@@ -541,21 +580,32 @@ BOM_UTF8  \357\273\277
                yylval =  scan_fraction (YYText ());
                return FRACTION;
        }
-       {UNSIGNED}/\/   | // backup rule
+       {STRICTREAL}    {
+               yylval = scm_c_read_string (YYText ());
+               return REAL;
+       }
+       {UNSIGNED}/[/.] | // backup rule
        {UNSIGNED}              {
                yylval = scm_c_read_string (YYText ());
                return UNSIGNED;
        }
+       \\\"    {
+               start_command_quote ();
+       }
        {COMMAND}/[-_]  | // backup rule
        {COMMAND}       {
                return scan_escaped_word (YYText_utf8 () + 1);
        }
-       /* Characters needed to express durations, assignments, barchecks */
-       [*.=|]  {
+       \\.|\|  {
+               // UTF-8 already covered by COMMAND
+               return scan_shorthand (YYText ());
+       }
+       /* Characters needed to express durations, assignments */
+       [*.=]   {
                 yylval = SCM_UNSPECIFIED;
                return YYText ()[0];
        }
-       [^$#{}\"\\ \t\n\r\f0-9]+ {
+       [^|*.=$#{}\"\\ \t\n\r\f0-9][^$#{}\"\\ \t\n\r\f0-9]* {
                /* ugr. This sux. */
                string s (YYText_utf8 ());
                 yylval = SCM_UNSPECIFIED;
@@ -566,12 +616,12 @@ BOM_UTF8  \357\273\277
                s = lyric_fudge (s);
                yylval = ly_string2scm (s);
 
-               return LYRICS_STRING;
+               return STRING;
        }
        /* This should really just cover {} */
-       . {
+       [{}] {
                 yylval = SCM_UNSPECIFIED;
-               return YYText ()[0]; // above catches all multibytes.
+               return YYText ()[0];
        }
 }
 <chords>{
@@ -579,6 +629,9 @@ BOM_UTF8    \357\273\277
        {WORD}  {
                return scan_bare_word (YYText_utf8 ());
        }
+       \\\"    {
+               start_command_quote ();
+       }
        {COMMAND}/[-_]  | // backup rule
        {COMMAND}       {
                return scan_escaped_word (YYText_utf8 () + 1);
@@ -612,10 +665,6 @@ BOM_UTF8   \357\273\277
                 yylval = SCM_UNSPECIFIED;
                return CHORD_CARET;
        }
-       . {
-                yylval = SCM_UNSPECIFIED;
-               return YYText ()[0]; // WORD catches all multibyte.
-       }
 }
 
 
@@ -624,6 +673,9 @@ BOM_UTF8    \357\273\277
                 yylval = SCM_UNSPECIFIED;
                return SCORE;
        }
+       \\\"    {
+               start_command_quote ();
+       }
        {COMMAND}/[-_]  | // backup rule
        {COMMAND} {
                string str (YYText_utf8 () + 1);
@@ -681,27 +733,41 @@ BOM_UTF8  \357\273\277
                yylval = ly_string2scm (s);
                return STRING;
        }
-       .  {
+       [{}]  {
                 yylval = SCM_UNSPECIFIED;
-               return YYText ()[0];  // Above is catchall for multibyte
+               return YYText ()[0];
        }
 }
 
 <longcomment><<EOF>> {
                LexerError (_ ("EOF found inside a comment").c_str ());
-               is_main_input_ = false; // should be safe , can't have \include in --safe.
-                yylval = SCM_UNSPECIFIED;
-               if (!close_input ())
-                 yyterminate (); // can't move this, since it actually rets a YY_NULL
+               yy_pop_state ();
        }
 
+<quote,commandquote><<EOF>> {
+       LexerError (_ ("EOF found inside string").c_str ());
+       yy_pop_state ();
+}
+
 <<EOF>> {
         yylval = SCM_UNSPECIFIED;
         if (is_main_input_)
        {
                is_main_input_ = include_stack_.size () > main_input_level_;
                if (!is_main_input_)
+               {
                        main_input_level_ = 0;
+                       pop_state ();
+                       if (YYSTATE != maininput)
+                       {
+                               LexerError (_ ("Unfinished main input").c_str ());
+                               do {
+                                       yy_pop_state ();
+                               } while (YYSTATE != maininput);
+                       }
+                       extra_tokens_ = SCM_EOL;
+                       yy_pop_state ();
+               }
                if (!close_input () || !is_main_input_)
                /* Returns YY_NULL */
                        yyterminate ();
@@ -711,11 +777,21 @@ BOM_UTF8  \357\273\277
                yyterminate ();
 }
 
+<maininput>{ANY_CHAR} {
+       while (include_stack_.size () > main_input_level_
+              && close_input ())
+               ;
+       yyterminate ();
+}
+
 <INITIAL>{
        {WORD}/[-_]     | // backup rule
        {WORD}  {
                return scan_bare_word (YYText_utf8 ());
        }
+       \\\"    {
+               start_command_quote ();
+       }
        {COMMAND}/[-_]  | // backup rule
        {COMMAND}       {
                return scan_escaped_word (YYText_utf8 () + 1);
@@ -740,51 +816,18 @@ BOM_UTF8  \357\273\277
 }
 
 
-[{}]   {
+-/\.   { // backup rule
         yylval = SCM_UNSPECIFIED;
        return YYText ()[0];
 }
 
--/\.   | // backup rule
-[*:=]          {
+<INITIAL,chords,lyrics,figures,notes>{SPECIAL} {
         yylval = SCM_UNSPECIFIED;
        return YYText ()[0];
 }
 
-<INITIAL,notes,figures>.       {
-        yylval = SCM_UNSPECIFIED;
-       return YYText ()[0];
-}
-
-<INITIAL,lyrics,notes,figures>\\. {
-    yylval = SCM_UNSPECIFIED;
-    char c = YYText ()[1];
-
-    switch (c) {
-    case '>':
-       return E_ANGLE_CLOSE;
-    case '<':
-       return E_ANGLE_OPEN;
-    case '!':
-       return E_EXCLAMATION;
-    case '(':
-       return E_OPEN;
-    case ')':
-       return E_CLOSE;
-    case '[':
-       return E_BRACKET_OPEN;
-    case '+':
-       return E_PLUS;
-    case ']':
-       return E_BRACKET_CLOSE;
-    case '~':
-       return E_TILDE;
-    case '\\':
-       return E_BACKSLASH;
-
-    default:
-       return E_CHAR;
-    }
+<INITIAL,chords,lyrics,figures,notes>{SHORTHAND}       {
+       return scan_shorthand (YYText_utf8 ()); // should not be utf-8
 }
 
 <*>.[\200-\277]*       {
@@ -877,7 +920,9 @@ Lily_lexer::pop_state ()
        if (YYSTATE == notes || YYSTATE == chords)
                pitchname_tab_stack_ = scm_cdr (pitchname_tab_stack_);
 
-       yy_pop_state ();
+       // don't cross the maininput threshold
+       if (YYSTATE != maininput)
+               yy_pop_state ();
 
        if (extra) {
                hidden_state_ = YYSTATE;
@@ -894,7 +939,7 @@ Lily_lexer::identifier_type (SCM sid)
 
 
 int
-Lily_lexer::scan_escaped_word (string str)
+Lily_lexer::scan_escaped_word (const string &str)
 {
        // use more SCM for this.
 
@@ -902,12 +947,16 @@ Lily_lexer::scan_escaped_word (string str)
 
         yylval = SCM_UNSPECIFIED;
        int i = lookup_keyword (str);
-       if (i == MARKUP && is_lyric_state ())
-               return LYRIC_MARKUP;
+
        if (i != -1)
                return i;
 
        SCM sid = lookup_identifier (str);
+       if (Music *m = unsmob_music (sid))
+       {
+               m->set_spot (override_input (last_input_));
+       }
+
        if (sid != SCM_UNDEFINED)
                return scan_scm_id (sid);
 
@@ -919,6 +968,26 @@ Lily_lexer::scan_escaped_word (string str)
        return STRING;
 }
 
+int
+Lily_lexer::scan_shorthand (const string &str)
+{
+       SCM sid = lookup_identifier (str);
+       if (Music *m = unsmob_music (sid))
+       {
+               m->set_spot (override_input (last_input_));
+       }
+
+       if (sid != SCM_UNDEFINED)
+               return scan_scm_id (sid);
+
+       string msg (_f ("undefined character or shorthand: %s", str));  
+       LexerError (msg.c_str ());
+
+       yylval = ly_string2scm (str);
+
+       return STRING;
+}
+
 int
 Lily_lexer::scan_scm_id (SCM sid)
 {
@@ -956,9 +1025,7 @@ Lily_lexer::scan_scm_id (SCM sid)
                                cs = SCM_CAR (cs);
                        }
                        
-                       if (cs == Pitch_type_p_proc)
-                               push_extra_token (EXPECT_PITCH);
-                       else if (cs == Duration_type_p_proc)
+                       if (cs == Duration_type_p_proc)
                                push_extra_token (EXPECT_DURATION);
                        else if (ly_is_procedure (cs))
                                push_extra_token (EXPECT_SCM, cs);
@@ -977,7 +1044,7 @@ Lily_lexer::scan_scm_id (SCM sid)
 }
 
 int
-Lily_lexer::scan_bare_word (string str)
+Lily_lexer::scan_bare_word (const string &str)
 {
        SCM sym = ly_symbol2scm (str.c_str ());
        if ((YYSTATE == notes) || (YYSTATE == chords)) {
@@ -1067,19 +1134,26 @@ Lily_lexer::eval_scm (SCM readerdata, char extra_token)
                sval = scm_struct_ref (sval, SCM_INUM0);
 
                if (scm_is_pair (sval)) {
-                       for (SCM v = scm_reverse (scm_cdr (sval));
-                            scm_is_pair (v);
-                            v = scm_cdr (v))
+                       for (SCM p = scm_reverse (scm_cdr (sval));
+                            scm_is_pair (p);
+                            p = scm_cdr (p))
                        {
+                               SCM v = scm_car (p);
+                               if (Music *m = unsmob_music (v))
+                               {
+                                       if (!unsmob_input (m->get_property ("origin")))
+                                               m->set_spot (override_input (last_input_));
+                               }
+                                       
                                int token;
                                switch (extra_token) {
                                case '$':
-                                       token = scan_scm_id (scm_car (v));
+                                       token = scan_scm_id (v);
                                        if (!scm_is_eq (yylval, SCM_UNSPECIFIED))
                                                push_extra_token (token, yylval);
                                        break;
                                case '#':
-                                       push_extra_token (SCM_IDENTIFIER, scm_car (v));
+                                       push_extra_token (SCM_IDENTIFIER, v);
                                        break;
                                }
                        }
@@ -1088,6 +1162,12 @@ Lily_lexer::eval_scm (SCM readerdata, char extra_token)
                        sval = SCM_UNSPECIFIED;
        }
 
+       if (Music *m = unsmob_music (sval))
+       {
+               if (!unsmob_input (m->get_property ("origin")))
+                       m->set_spot (override_input (last_input_));
+       }
+
        return sval;
 }