From 5c58da93f8859c3f15760631db4e63590e073e1e Mon Sep 17 00:00:00 2001
From: David Kastrup <dak@gnu.org>
Date: Sun, 4 Aug 2013 11:32:09 +0200
Subject: [PATCH] Issue 3487: Make several special characters with or without
 backslash "shorthands"

Single non-alphanumeric ASCII characters not requiring special
treatment in lexer or parser can now be redefined like escaped
identifiers.  The same holds for escaped non-alphanumeric ASCII
characters.  The identifying name you use for redefining them is the
string corresponding to the full shorthand, in contrast to escaped
identifiers where the identifying name omits the initial backslash.

Notable shorthands not treated specially in the parser (some of them
newly so) can be seen in the following definitions from
scm/declarations-init.ly:

"|" = #(make-music 'BarCheck)
"[" = #(make-span-event 'BeamEvent START)
"]" = #(make-span-event 'BeamEvent STOP)
"~" = #(make-music 'TieEvent)
"(" = #(make-span-event 'SlurEvent START)
")" = #(make-span-event 'SlurEvent STOP)
"\\!" = #(make-span-event 'CrescendoEvent STOP)
"\\(" = #(make-span-event 'PhrasingSlurEvent START)
"\\)" = #(make-span-event 'PhrasingSlurEvent STOP)
"\\>" = #(make-span-event 'DecrescendoEvent START)
"\\<" = #(make-span-event 'CrescendoEvent START)
"\\[" = #(make-span-event 'LigatureEvent START)
"\\]" = #(make-span-event 'LigatureEvent STOP)
"\\~" = #(make-music 'PesOrFlexaEvent)
"\\\\" = #(make-music 'VoiceSeparator)
---
 Documentation/changes.tely     |  20 ++++++
 lily/include/lily-lexer.hh     |   1 +
 lily/lexer.ll                  | 102 +++++++++++++++--------------
 lily/parser.yy                 | 114 ++-------------------------------
 ly/declarations-init.ly        |   4 ++
 python/convertrules.py         |  33 +++++++++-
 scm/ly-syntax-constructors.scm |   6 --
 7 files changed, 118 insertions(+), 162 deletions(-)

diff --git a/Documentation/changes.tely b/Documentation/changes.tely
index f32e9fece8..2111c8d3a2 100644
--- a/Documentation/changes.tely
+++ b/Documentation/changes.tely
@@ -61,6 +61,26 @@ which scares away people.
 
 @end ignore
 
+@item
+A number of shorthands like @code{(}, @code{)}, @code{|},
+@code{[}, @code{]}, @code{~}, @code{\(}, @code{\)} and others can
+now freely be redefined like normal commands.  An example would be
+@lilypond[verbatim,quote]
+"\\{" = (
+"\\}" = )
+"(" = \melisma
+")" = \melismaEnd
+
+\new Staff <<
+  \relative c' {
+    c8 \{ d e f \} % slurred
+    g ( a b c ) % no slur, but with melisma
+    c,1 \bar "|."
+   }
+   \addlyrics { Li -- ly -- pond. }
+>>
+@end lilypond
+
 @item
 The articulation shorthand for @code{\staccatissimo} has been
 renamed from @code{-|} to@tie{}@code{-!}.
diff --git a/lily/include/lily-lexer.hh b/lily/include/lily-lexer.hh
index 08701c9827..19b01ca6ff 100644
--- a/lily/include/lily-lexer.hh
+++ b/lily/include/lily-lexer.hh
@@ -40,6 +40,7 @@ private:
   int scan_bare_word (string);
   SCM scan_markup_word (string);
   int scan_escaped_word (string);
+  int scan_shorthand (string);
   int scan_scm_id (SCM);
   int identifier_type (SCM);
   char escaped_char (char) const;
diff --git a/lily/lexer.ll b/lily/lexer.ll
index 9e25ed7887..50b79a8488 100644
--- a/lily/lexer.ll
+++ b/lily/lexer.ll
@@ -150,7 +150,10 @@ N		[0-9]
 ANY_CHAR	(.|\n)
 WORD		{A}([-_]{A}|{A})*
 COMMAND		\\{WORD}
-
+/* SPECIAL category is for every letter that needs to get passed to
+ * the parser rather than being redefinable by the user */
+SPECIAL		[-+*/=<>{}!?_^'',.:]
+SHORTHAND	(.|\\.)
 UNSIGNED	{N}+
 E_UNSIGNED	\\{N}+
 FRACTION	{N}+\/{N}+
@@ -477,6 +480,22 @@ BOM_UTF8	\357\273\277
                 yylval = SCM_UNSPECIFIED;
 		return FIGURE_OPEN;
 	}
+	\\\+	{
+		yylval = SCM_UNSPECIFIED;
+		return E_PLUS;
+	}
+	\\!	{
+		yylval = SCM_UNSPECIFIED;
+		return E_EXCLAMATION;
+	}
+	\\\\	{
+		yylval = SCM_UNSPECIFIED;
+		return E_BACKSLASH;
+	}
+	[][]	{
+		yylval = SCM_UNSPECIFIED;
+		return	YYText ()[0];
+	}
 }
 
 <notes,figures>{
@@ -557,8 +576,12 @@ BOM_UTF8	\357\273\277
 	{COMMAND}	{
 		return scan_escaped_word (YYText_utf8 () + 1);
 	}
-	/* Characters needed to express durations, assignments, barchecks */
-	[*.=|]	{
+	\\.|\|	{
+		// UTF-8 already covered by COMMAND
+		return scan_shorthand (YYText ());
+	}
+	/* Characters needed to express durations, assignments */
+	[*.=]	{
                 yylval = SCM_UNSPECIFIED;
 		return YYText ()[0];
 	}
@@ -576,9 +599,9 @@ BOM_UTF8	\357\273\277
 		return STRING;
 	}
 	/* This should really just cover {} */
-	. {
+	[{}] {
                 yylval = SCM_UNSPECIFIED;
-		return YYText ()[0]; // above catches all multibytes.
+		return YYText ()[0];
 	}
 }
 <chords>{
@@ -619,10 +642,6 @@ BOM_UTF8	\357\273\277
                 yylval = SCM_UNSPECIFIED;
 		return CHORD_CARET;
 	}
-	. {
-                yylval = SCM_UNSPECIFIED;
-		return YYText ()[0]; // WORD catches all multibyte.
-	}
 }
 
 
@@ -688,9 +707,9 @@ BOM_UTF8	\357\273\277
 		yylval = ly_string2scm (s);
 		return STRING;
 	}
-	.  {
+	[{}]  {
                 yylval = SCM_UNSPECIFIED;
-		return YYText ()[0];  // Above is catchall for multibyte
+		return YYText ()[0];
 	}
 }
 
@@ -767,51 +786,18 @@ BOM_UTF8	\357\273\277
 }
 
 
-[{}]	{
-        yylval = SCM_UNSPECIFIED;
-	return YYText ()[0];
-}
-
--/\.	| // backup rule
-[*:=]		{
+-/\.	{ // backup rule
         yylval = SCM_UNSPECIFIED;
 	return YYText ()[0];
 }
 
-<INITIAL,notes,figures>.	{
+<INITIAL,chords,lyrics,figures,notes>{SPECIAL}	{
         yylval = SCM_UNSPECIFIED;
 	return YYText ()[0];
 }
 
-<INITIAL,lyrics,notes,figures>\\. {
-    yylval = SCM_UNSPECIFIED;
-    char c = YYText ()[1];
-
-    switch (c) {
-    case '>':
-	return E_ANGLE_CLOSE;
-    case '<':
-	return E_ANGLE_OPEN;
-    case '!':
-	return E_EXCLAMATION;
-    case '(':
-	return E_OPEN;
-    case ')':
-	return E_CLOSE;
-    case '[':
-	return E_BRACKET_OPEN;
-    case '+':
-	return E_PLUS;
-    case ']':
-	return E_BRACKET_CLOSE;
-    case '~':
-	return E_TILDE;
-    case '\\':
-	return E_BACKSLASH;
-
-    default:
-	return E_CHAR;
-    }
+<INITIAL,chords,lyrics,figures,notes>{SHORTHAND}	{
+	return scan_shorthand (YYText_utf8 ()); // should not be utf-8
 }
 
 <*>.[\200-\277]*	{
@@ -950,6 +936,26 @@ Lily_lexer::scan_escaped_word (string str)
 	return STRING;
 }
 
+int
+Lily_lexer::scan_shorthand (string str)
+{
+	SCM sid = lookup_identifier (str);
+	if (Music *m = unsmob_music (sid))
+	{
+		m->set_spot (override_input (last_input_));
+	}
+
+	if (sid != SCM_UNDEFINED)
+		return scan_scm_id (sid);
+
+	string msg (_f ("undefined character or shorthand: %s", str));	
+	LexerError (msg.c_str ());
+
+	yylval = ly_string2scm (str);
+
+	return STRING;
+}
+
 int
 Lily_lexer::scan_scm_id (SCM sid)
 {
diff --git a/lily/parser.yy b/lily/parser.yy
index ed9c90f907..89123a5e5b 100644
--- a/lily/parser.yy
+++ b/lily/parser.yy
@@ -309,16 +309,8 @@ int yylex (YYSTYPE *s, YYLTYPE *loc, Lily_parser *parser);
 %token DOUBLE_ANGLE_OPEN "<<"
 %token DOUBLE_ANGLE_CLOSE ">>"
 %token E_BACKSLASH "\\"
-%token E_ANGLE_CLOSE "\\>"
-%token E_CHAR "\\C[haracter]"
-%token E_CLOSE "\\)"
 %token E_EXCLAMATION "\\!"
-%token E_BRACKET_OPEN "\\["
-%token E_OPEN "\\("
-%token E_BRACKET_CLOSE "\\]"
-%token E_ANGLE_OPEN "\\<"
 %token E_PLUS "\\+"
-%token E_TILDE "\\~"
 %token EXTENDER "__"
 
 /*
@@ -2484,40 +2476,10 @@ command_element:
 	command_event {
 		$$ = $1;
 	}
-	| E_BRACKET_OPEN {
-		Music *m = MY_MAKE_MUSIC ("LigatureEvent", @$);
-		m->set_property ("span-direction", scm_from_int (START));
-		$$ = m->unprotect();
-	}
-	| E_BRACKET_CLOSE {
-		Music *m = MY_MAKE_MUSIC ("LigatureEvent", @$);
-		m->set_property ("span-direction", scm_from_int (STOP));
-		$$ = m->unprotect ();
-	}
-	| E_BACKSLASH {
-		$$ = MAKE_SYNTAX ("voice-separator", @$);
-	}
-	| '|'      {
-		SCM pipe = parser->lexer_->lookup_identifier ("pipeSymbol");
-
-		Music *m = unsmob_music (pipe);
-		if (m)
-		{
-			m = m->clone ();
-			m->set_spot (@$);
-			$$ = m->unprotect ();
-		}
-		else
-			$$ = MAKE_SYNTAX ("bar-check", @$);
-
-	}
 	;
 
 command_event:
-	E_TILDE {
-		$$ = MY_MAKE_MUSIC ("PesOrFlexaEvent", @$)->unprotect ();
-	}
-	| tempo_event {
+	tempo_event {
 		$$ = $1;
 	}
 	;
@@ -2614,55 +2576,8 @@ string_number_event:
 	}
 	;
 
-direction_less_char:
-	'['  {
-		$$ = ly_symbol2scm ("bracketOpenSymbol");
-	}
-	| ']'  {
-		$$ = ly_symbol2scm ("bracketCloseSymbol");
-	}
-	| '~'  {
-		$$ = ly_symbol2scm ("tildeSymbol");
-	}
-	| '('  {
-		$$ = ly_symbol2scm ("parenthesisOpenSymbol");
-	}
-	| ')'  {
-		$$ = ly_symbol2scm ("parenthesisCloseSymbol");
-	}
-	| E_EXCLAMATION  {
-		$$ = ly_symbol2scm ("escapedExclamationSymbol");
-	}
-	| E_OPEN  {
-		$$ = ly_symbol2scm ("escapedParenthesisOpenSymbol");
-	}
-	| E_CLOSE  {
-		$$ = ly_symbol2scm ("escapedParenthesisCloseSymbol");
-	}
-	| E_ANGLE_CLOSE  {
-		$$ = ly_symbol2scm ("escapedBiggerSymbol");
-	}
-	| E_ANGLE_OPEN  {
-		$$ = ly_symbol2scm ("escapedSmallerSymbol");
-	}
-	;
-
 direction_less_event:
-	direction_less_char {
-		SCM predefd = parser->lexer_->lookup_identifier_symbol ($1);
-		Music *m = 0;
-		if (unsmob_music (predefd))
-		{
-			m = unsmob_music (predefd)->clone ();
-			m->set_spot (@$);
-		}
-		else
-		{
-			m = MY_MAKE_MUSIC ("Music", @$);
-		}
-		$$ = m->unprotect ();
-	}
-	| string_number_event
+	string_number_event
 	| EVENT_IDENTIFIER	{
 		$$ = $1;
 	}
@@ -2958,38 +2873,23 @@ bass_figure:
 	}
 	| bass_figure figured_bass_modification  {
 		Music *m = unsmob_music ($1);
-		if ($2 == ly_symbol2scm ("plus"))
-			{
-			m->set_property ("augmented", SCM_BOOL_T);
-			}
-		else if ($2 == ly_symbol2scm ("slash"))
-			{
-			m->set_property ("diminished", SCM_BOOL_T);
-			}
-		else if ($2 == ly_symbol2scm ("exclamation"))
-			{
-			m->set_property ("no-continuation", SCM_BOOL_T);
-			}
-		else if ($2 == ly_symbol2scm ("backslash"))
-			{
-			m->set_property ("augmented-slash", SCM_BOOL_T);
-			}
+		m->set_property ($2, SCM_BOOL_T);
 	}
 	;
 
 
 figured_bass_modification:
 	E_PLUS		{
-		$$ = ly_symbol2scm ("plus");
+		$$ = ly_symbol2scm ("augmented");
 	}
 	| E_EXCLAMATION {
-		$$ = ly_symbol2scm ("exclamation");
+		$$ = ly_symbol2scm ("no-continuation");
 	}
 	| '/'		{
-		$$ = ly_symbol2scm ("slash");
+		$$ = ly_symbol2scm ("diminished");
 	}
 	| E_BACKSLASH {
-		$$ = ly_symbol2scm ("backslash");
+		$$ = ly_symbol2scm ("augmented-slash");
 	}
 	;
 
diff --git a/ly/declarations-init.ly b/ly/declarations-init.ly
index 0ffc57a578..0b8ab2c95c 100644
--- a/ly/declarations-init.ly
+++ b/ly/declarations-init.ly
@@ -85,6 +85,10 @@ escapedParenthesisOpenSymbol = #(make-span-event 'PhrasingSlurEvent START)
 escapedParenthesisCloseSymbol = #(make-span-event 'PhrasingSlurEvent STOP)
 escapedBiggerSymbol = #(make-span-event 'DecrescendoEvent START)
 escapedSmallerSymbol = #(make-span-event 'CrescendoEvent START)
+"\\[" = #(make-span-event 'LigatureEvent START)
+"\\]" = #(make-span-event 'LigatureEvent STOP)
+"\\~" = #(make-music 'PesOrFlexaEvent)
+"\\\\" = #(make-music 'VoiceSeparator)
 
 
 \include "scale-definitions-init.ly"
diff --git a/python/convertrules.py b/python/convertrules.py
index e2acf4e266..ba65d9683d 100644
--- a/python/convertrules.py
+++ b/python/convertrules.py
@@ -3592,7 +3592,8 @@ def conv(str):
     return str
 
 @rule((2, 17, 25), r'''\tempo 4. = 50~60 -> \tempo 4. = 50-60
--| -> -!''')
+-| -> -!
+pipeSymbol, escapedParenthesisOpenSymbol ... -> "|", "\\(" ...''')
 def conv(str):
 #  This goes for \tempo commands ending with a range, like
 #  = 50 ~ 60
@@ -3608,6 +3609,36 @@ def conv(str):
         return m.group (0)
     str = re.sub (r"([-^_])\||" + matchstring + r"|[-^_][-^_]", subnonstring, str)
     str = re.sub (r"\bdashBar\b", "dashBang", str)
+    orig = [ "pipeSymbol",
+             "bracketOpenSymbol",
+             "bracketCloseSymbol",
+             "tildeSymbol",
+             "parenthesisOpenSymbol",
+             "parenthesisCloseSymbol",
+             "escapedExclamationSymbol",
+             "escapedParenthesisOpenSymbol",
+             "escapedParenthesisCloseSymbol",
+             "escapedBiggerSymbol",
+             "escapedSmallerSymbol" ]
+    repl = [ r'"|"',
+             r'"["',
+             r'"]"',
+             r'"~"',
+             r'"("',
+             r'")"',
+             r'"\\!"',
+             r'"\\("',
+             r'"\\)"',
+             r'"\\>"',
+             r'"\\<"']
+    words = r"\b(?:(" + ")|(".join (orig) + r"))\b"
+    def wordreplace(m):
+        def instring(m):
+            return re.sub (r'["\\]',r'\\\g<0>',repl[m.lastindex-1])
+        if m.lastindex:
+            return repl[m.lastindex-1]
+        return '"' + re.sub (words, instring, m.group(0)[1:-1]) + '"'
+    str = re.sub (words + "|" + matchstring, wordreplace, str)
     return str
 
 # Guidelines to write rules (please keep this at the end of this file)
diff --git a/scm/ly-syntax-constructors.scm b/scm/ly-syntax-constructors.scm
index 5f1583589b..01fc9bcef8 100644
--- a/scm/ly-syntax-constructors.scm
+++ b/scm/ly-syntax-constructors.scm
@@ -98,12 +98,6 @@
               'change-to-type type
               'change-to-id id))
 
-(define-ly-syntax-simple (voice-separator)
-  (make-music 'VoiceSeparator))
-
-(define-ly-syntax-simple (bar-check)
-  (make-music 'BarCheck))
-
 (define-ly-syntax (tempo parser location text . rest)
   (let* ((unit (and (pair? rest)
                     (car rest)))
-- 
2.39.5