From 6b0ff053180c478af89a60e28fbe57d6aeabc2d3 Mon Sep 17 00:00:00 2001 From: David Kastrup Date: Sun, 23 Dec 2012 22:13:08 +0100 Subject: [PATCH] Make lexer more robust against unexpected EOF This helps against EOF within strings, multiline comments and other constructs without losing track when terminating the main input parser while retaining the lexer. --- lily/lexer.ll | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/lily/lexer.ll b/lily/lexer.ll index 63d58527e8..df05b45c38 100644 --- a/lily/lexer.ll +++ b/lily/lexer.ll @@ -120,6 +120,7 @@ SCM (* scm_parse_error_handler) (void *); %x incl %x lyrics %x longcomment +%x maininput %x markup %x notes %x quote @@ -314,6 +315,9 @@ BOM_UTF8 \357\273\277 start_main_input (); main_input_level_ = include_stack_.size (); is_main_input_ = true; + int state = YYSTATE; + yy_push_state (maininput); + yy_push_state (state); } else error (_ ("\\maininput not allowed outside init files")); @@ -689,19 +693,32 @@ BOM_UTF8 \357\273\277 <> { LexerError (_ ("EOF found inside a comment").c_str ()); - is_main_input_ = false; // should be safe , can't have \include in --safe. - yylval = SCM_UNSPECIFIED; - if (!close_input ()) - yyterminate (); // can't move this, since it actually rets a YY_NULL + yy_pop_state (); } +<> { + LexerError (_ ("EOF found inside string").c_str ()); + yy_pop_state (); +} + <> { yylval = SCM_UNSPECIFIED; if (is_main_input_) { is_main_input_ = include_stack_.size () > main_input_level_; if (!is_main_input_) + { main_input_level_ = 0; + pop_state (); + if (YYSTATE != maininput) + { + LexerError (_ ("Unfinished main input").c_str ()); + do { + pop_state (); + } while (YYSTATE != maininput); + } + pop_state (); + } if (!close_input () || !is_main_input_) /* Returns YY_NULL */ yyterminate (); @@ -711,6 +728,13 @@ BOM_UTF8 \357\273\277 yyterminate (); } +. { + while (include_stack_.size () > main_input_level_ + && close_input ()) + ; + yyterminate (); +} + { {WORD}/[-_] | // backup rule {WORD} { -- 2.39.5