Remove CR LF from snippets using makelsr

[lilypond.git] / scripts / auxiliar / makelsr.py
diff --git a/scripts/auxiliar/makelsr.py b/scripts/auxiliar/makelsr.py

index be4e7daff3a2bfd15b16aef6d6c5d69e737110a6..71c8179f36721a047821e23cd25d9bf20fbe3618 100755 (executable)
--- a/scripts/auxiliar/makelsr.py
+++ b/scripts/auxiliar/makelsr.py
@@ -4,68 +4,182 @@ import sys
  import os
  import glob
  import re
+import optparse
+import tempfile
+
+lilypond_flags = "-dno-print-pages -dsafe"
+
+lys_from_lsr = os.path.join ('Documentation', 'snippets')
+new_lys = os.path.join ('Documentation', 'snippets', 'new')
+ly_output = os.path.join (tempfile.gettempdir (), 'lsrtest')
+
+# which convert-ly and lilypond to use
+P = os.path.join (os.environ.get ("LILYPOND_BUILD_DIR", ""),
+                  "out/bin/convert-ly")
+if os.path.isfile (P):
+    conv_path = os.path.dirname (P)
+elif os.path.isfile ("build/out/bin/convert-ly"):
+    conv_path = "build/out/bin/"
+else:
+    conv_path=''
+convert_ly = os.path.join (conv_path, 'convert-ly')
+lilypond_bin = os.path.join (conv_path, 'lilypond')
+
  
-sys.path.append ('python')
-import langdefs
  
-DEST = os.path.join ('Documentation', 'snippets')
-NEW_LYS = os.path.join ('Documentation', 'snippets', 'new')
-TEXIDOCS = [os.path.join ('Documentation', language_code, 'texidocs')
-            for language_code in langdefs.LANGDICT]
-
-USAGE = '''  Usage: makelsr.py [LSR_SNIPPETS_DIR]
-This script must be run from top of the source tree;
-it updates snippets %(DEST)s with snippets
-from %(NEW_LYS)s or LSR_SNIPPETS_DIR.
-If a snippet is present in both directories, the one
-from %(NEW_LYS)s is preferred.
-''' % vars ()
-
-LY_HEADER_LSR = '''%% Do not edit this file; it is automatically
-%% generated from LSR http://lsr.dsi.unimi.it
+LY_HEADER_LSR = '''%% DO NOT EDIT this file manually; it is automatically
+%% generated from LSR http://lsr.di.unimi.it
+%% Make any changes in LSR itself, or in Documentation/snippets/new/ ,
+%% and then run scripts/auxiliar/makelsr.py
+%%
  %% This file is in the public domain.
  '''
  
-LY_HEADER_NEW = '''%% Do not edit this file; it is automatically
-%% generated from %s
+new_lys_marker = "%% generated from %s" % new_lys
+LY_HEADER_NEW = '''%% DO NOT EDIT this file manually; it is automatically
+%s
+%% Make any changes in Documentation/snippets/new/
+%% and then run scripts/auxiliar/makelsr.py
+%%
  %% This file is in the public domain.
-''' % NEW_LYS
-
-TAGS = []
-# NR 1
-TAGS.extend (['pitches', 'rhythms', 'expressive-marks',
-'repeats', 'simultaneous-notes', 'staff-notation',
-'editorial-annotations', 'text'])
-# NR 2
-TAGS.extend (['vocal-music', 'chords', 'keyboards',
-'percussion', 'fretted-strings', 'unfretted-strings',
-'ancient-notation', 'winds', 'world-music'
-])
-
-# other
-TAGS.extend (['contexts-and-engravers', 'tweaks-and-overrides',
-'paper-and-layout', 'breaks', 'spacing', 'midi', 'titles', 'template'])
+''' % new_lys_marker
+
+options_parser = optparse.OptionParser (
+    description = "makelsr - update snippets directory from LSR",
+    usage = '''%%prog [options] [LSR_SNIPPETS_DIR]
+Unless -s option is specified, this script must be run from top of the
+source tree. If LSR_SNIPPETS_DIR is not specified, it defaults to
+current directory.
+
+Remove snippets in TOP_SOURCE_DIR/%(lys_from_lsr)s and put in snippets
+from LSR_SNIPPETS_DIR run through convert-ly or from
+TOP_SOURCE_DIR/%(new_lys)s; if a snippet is present in both
+directories, the one from TOP_SOURCE_DIR/%(new_lys)s is preferred.
+All written snippets are copied in LY_OUTPUT
+with appending translations from .texidoc files and are tested with
+lilypond with flags %(lilypond_flags)s
+
+''' % vars ())
+
+options_parser.add_option ('-s', '--top-source',
+                           dest="top_source_dir",
+                           action="store",
+                           metavar="TOP_SOURCE_DIR",
+                           default=".",
+                           help="set LilyPond top source directory")
+
+options_parser.add_option ('-o', '--ly-output',
+                           dest="ly_output",
+                           action="store",
+                           metavar="LY_OUTPUT",
+                           default=ly_output,
+                           help="set LilyPond output files temporary directory")
+
+options_parser.add_option ('-p', '--path',
+                           dest="bin_path",
+                           action="store",
+                           metavar="LY_PATH",
+                           default=conv_path,
+                           help="directory where looking for LilyPond binaries")
+
+options_parser.add_option ('-c', '--convert-ly',
+                           dest="convert_ly",
+                           action="store",
+                           metavar="CONVERT-LY",
+                           default="LY_PATH/convert-ly",
+                           help="convert-ly binary to use")
+
+options_parser.add_option ('-l', '--lilypond-binary',
+                           dest="lilypond_bin",
+                           action="store",
+                           metavar="LILYPOND_BIN",
+                           default="LY_PATH/lilypond",
+                           help="lilypond binary to use")
+
+(options, args) = options_parser.parse_args ()
+
+if not os.path.isdir (options.top_source_dir):
+    sys.stderr.write ("Error: top source: %s: not a directory\n" % options.top_source_dir)
+    sys.exit (4)
+
+lys_from_lsr = os.path.normpath (os.path.join (options.top_source_dir, lys_from_lsr))
+new_lys = os.path.normpath (os.path.join (options.top_source_dir, new_lys))
+sys.path.append (os.path.normpath (os.path.join (options.top_source_dir, 'python')))
+import langdefs
+texidoc_dirs = [
+    os.path.normpath (os.path.join (options.top_source_dir, 'Documentation', language_code, 'texidocs'))
+    for language_code in langdefs.LANGDICT]
+
+if not os.path.isdir (lys_from_lsr):
+    sys.stderr.write ("Error: snippets path: %s: not a directory\n" % lys_from_lsr)
+    sys.exit (3)
+if not os.path.isdir (new_lys):
+    sys.stderr.write ("Error: new snippets path: %s: not a directory\n" % lys_from_lsr)
+    sys.exit (3)
+
+ly_output_ok = False
+if os.path.isdir (options.ly_output):
+    ly_output = options.ly_output
+    ly_output_ok = True
+elif os.path.exists (options.ly_output):
+    try:
+        os.unlink (options.ly_output)
+    except Exception as e:
+        sys.stderr.write ("Warning: could not delete file before creating directory: %s\n" % e)
+    else:
+        try:
+            os.makedirs (options.ly_output)
+        except Exception as e:
+            sys.stderr.write ("Warning: could not create directory: %s\n" % e)
+        else:
+            ly_output = options.ly_output
+            ly_output_ok = True
+else:
+    try:
+        os.makedirs (options.ly_output)
+    except Exception as e:
+        sys.stderr.write ("Warning: could not create directory: %s\n" % e)
+    else:
+        ly_output = options.ly_output
+        ly_output_ok = True
+if not ly_output_ok:
+    ly_output = tempfile.gettempdir ()
+    sys.stderr.write ("Warning: could not use or create directory %s, using default %s\n" % (options.ly_output, ly_output))
  
  def exit_with_usage (n=0):
-    sys.stderr.write (USAGE)
+    options_parser.print_help (sys.stderr)
      sys.exit (n)
  
-if len (sys.argv) >= 2:
-    in_dir = sys.argv[1]
-    if len (sys.argv) >= 3:
+if len (args):
+    in_dir = args[0]
+    if not (os.path.isdir (in_dir)):
+        sys.stderr.write ("Error: %s: not a directory\n" % in_dir)
+        sys.exit (4)
+    if len (args) > 1:
          exit_with_usage (2)
-    if not (os.path.isdir (DEST) and os.path.isdir (NEW_LYS)):
-        exit_with_usage (3)
+    tags = os.listdir (in_dir)
  else:
      in_dir = ''
+    tags = [os.path.splitext (os.path.basename (f))[0]
+            for f in glob.glob (os.path.join (lys_from_lsr, '*.snippet-list'))]
+## Make sure all users get the same ordering of tags
+tags.sort ()
  
-# which convert-ly to use
-if os.path.isfile("out/bin/convert-ly"):
-    conv_path='out/bin/'
+if options.convert_ly == "LY_PATH/convert-ly":
+    convert_ly = os.path.join (options.bin_path, "convert-ly")
  else:
-    conv_path=''
-convert_ly=conv_path+'convert-ly'
-print 'using '+convert_ly
+    convert_ly = options.convert_ly
+if not os.path.exists (convert_ly):
+    sys.stderr.write ("Warning: %s: no such file\n" % convert_ly)
+    convert_ly = "convert-ly"
+if options.lilypond_bin == "LY_PATH/lilypond":
+    lilypond_bin = os.path.join (options.bin_path, "lilypond")
+else:
+    lilypond_bin = options.lilypond_bin
+if not os.path.exists (lilypond_bin):
+    sys.stderr.write ("Warning: %s: no such file\n" % lilypond_bin)
+    lilypond_bin = "lilypond"
+sys.stderr.write ("Using %s, %s\n" % (convert_ly, lilypond_bin))
  
  unsafe = []
  unconverted = []
@@ -92,188 +206,137 @@ lsr_comment_re = re.compile (r'\s*%+\s*LSR.*')
  begin_header_re = re.compile (r'\\header\s*{', re.M)
  ly_new_version_re = re.compile (r'\\version\s*"(.+?)"')
  strip_white_spaces_re = re.compile (r'[ \t]+(?=\n)')
+final_empty_lines_re = re.compile (r'\n{2,}$')
  
  # add tags to ly files from LSR
  def add_tags (ly_code, tags):
      return begin_header_re.sub ('\\g<0>\n  lsrtags = "' + tags + '"\n',
                                  ly_code, 1)
  
-# for snippets from input/new, add message for earliest working version
+# for snippets from Documentation/snippets/new, add message for earliest working version
  def add_version (ly_code):
      return '''%% Note: this file works from version ''' + \
          ly_new_version_re.search (ly_code).group (1) + '\n'
  
-s = 'Translation of GIT [Cc]ommittish'
-texidoc_chunk_re = re.compile (r'^(?:%+\s*' + s + \
-    r'.+)?\s*(?:texidoc|doctitle)([a-zA-Z]{2,4})\s+=(?:.|\n)*?(?=%+\s*' + \
-    s + r'|\n\} % begin verbatim|\n  (?:doctitle|texidoc|lsrtags) |$(?!.|\n))', re.M)
-
-def update_translated_texidoc (m, snippet_path, visited_languages):
-    base = os.path.splitext (os.path.basename (snippet_path))[0]
-    language_code = m.group (1)
-    visited_languages.append (language_code)
-    texidoc_path = os.path.join ('Documentation', language_code,
-                                 'texidocs', base + '.texidoc')
-    if os.path.isfile (texidoc_path):
-        return open (texidoc_path).read ()
-    else:
-        return m.group (0)
-
  def escape_backslashes_in_header(snippet):
      # ASSUME: the \header exists.
      header_char_number_start = snippet.find('\header {')
      header_char_number_end = snippet.find('} % begin verbatim')
  
      header = snippet[header_char_number_start:header_char_number_end]
-    # two levels of escaping happening here -- 4\ means 1\
-    # and the 10\ means two \ backslashes (that's 8\ ), and
-    # one backreference to group 1 (that's two 2\ ).
-    new_header = re.sub("@code\{\\\\([a-zA-Z])", "@code{\\\\\\\\\\1", header)
+    # only one level of escaping happening here
+    # thanks to raw strings
+    new_header = re.sub(r"@code\{\\([a-zA-Z])", r"@code{\\\\\1", header)
      escaped_snippet = (snippet[:header_char_number_start] +
-       new_header + snippet[header_char_number_end:])
+        new_header + snippet[header_char_number_end:])
      return escaped_snippet
  
  def copy_ly (srcdir, name, tags):
      global unsafe
      global unconverted
-    dest = os.path.join (DEST, name)
+    dest = os.path.join (lys_from_lsr, name)
      tags = ', '.join (tags)
-    s = open (os.path.join (srcdir, name)).read ()
-
-    for path in TEXIDOCS:
-        texidoc_translation_path = \
-            os.path.join (path, os.path.splitext (name)[0] + '.texidoc')
-        if os.path.exists (texidoc_translation_path):
-            texidoc_translation = open (texidoc_translation_path).read ()
-            # Since we want to insert the translations verbatim using a 
-            # regexp, \\ is understood as ONE escaped backslash. So we have
-            # to escape those backslashes once more...
-            texidoc_translation = texidoc_translation.replace ('\\', '\\\\')
-            s = begin_header_re.sub ('\\g<0>\n' + texidoc_translation, s, 1)
+    file_path = os.path.join (srcdir, name)
+    sys.stderr.write ("\nmakelsr.py: reading %s\n" % file_path)
+    s = open (file_path).read ()
  
      s = doctitle_re.sub (doctitle_sub, s)
-    if in_dir and in_dir in srcdir:
-        s = LY_HEADER_LSR + add_tags (s, tags)
-    else:
+    if "new" in srcdir:
          s = LY_HEADER_NEW + add_version (s) + s
+    else:
+        s = LY_HEADER_LSR + add_tags (s, tags)
  
      s = mark_verbatim_section (s)
      s = lsr_comment_re.sub ('', s)
      s = strip_white_spaces_re.sub ('', s)
+    s = final_empty_lines_re.sub ('\n', s)
      s = escape_backslashes_in_header (s)
+    s = s.replace ("\r\n", "\n")
+    sys.stderr.write ("makelsr.py: writing %s\n" % dest)
      open (dest, 'w').write (s)
  
-    e = os.system (convert_ly+(" -e '%s'" % dest))
+    e = os.system (convert_ly+(" -d -e '%s'" % dest))
      if e:
          unconverted.append (dest)
      if os.path.exists (dest + '~'):
          os.remove (dest + '~')
-    # no need to check snippets from input/new
-    if in_dir and in_dir in srcdir:
-        # -V seems to make unsafe snippets fail nicer/sooner
-        e = os.system ("lilypond -V -dno-print-pages -dsafe -o /tmp/lsrtest '%s'" % dest)
+    # no need to check snippets from Documentation/snippets/new
+    if not "new" in srcdir:
+        e = os.system (
+            "%s %s -o %s '%s'" %
+            (lilypond_bin, lilypond_flags, ly_output, dest))
          if e:
              unsafe.append (dest)
  
  def read_source_with_dirs (src):
-    s = {}
-    l = {}
-    for tag in TAGS:
+    snippet_list = {}
+    tag_list = {}
+    for tag in tags:
          srcdir = os.path.join (src, tag)
-        l[tag] = set (map (os.path.basename,
+        tag_list[tag] = set (map (os.path.basename,
                             glob.glob (os.path.join (srcdir, '*.ly'))))
-        for f in l[tag]:
-            if f in s:
-                s[f][1].append (tag)
+        for f in tag_list[tag]:
+            if f in snippet_list:
+                snippet_list[f][1].append (tag)
              else:
-                s[f] = (srcdir, [tag])
-    return s, l
+                snippet_list[f] = (srcdir, [tag])
+    return snippet_list
  
  
  tags_re = re.compile ('lsrtags\\s*=\\s*"(.+?)"')
  
  def read_source (src):
-    s = {}
-    l = dict ([(tag, set()) for tag in TAGS])
+    snippet_list = {}
+    tag_list = dict ([(tag, set()) for tag in tags])
      for f in glob.glob (os.path.join (src, '*.ly')):
          basename = os.path.basename (f)
          m = tags_re.search (open (f, 'r').read ())
          if m:
              file_tags = [tag.strip() for tag in m.group (1). split(',')]
-            s[basename] = (src, file_tags)
-            [l[tag].add (basename) for tag in file_tags if tag in TAGS]
+            snippet_list[basename] = (src, file_tags)
+            for tag in file_tags:
+                if tag in tags:
+                    tag_list[tag].add (basename)
+                else:
+                    tag_list[tag] = set ((basename,))
          else:
              notags_files.append (f)
-    return s, l
+    return snippet_list, tag_list
  
  
-def dump_file_list (file, file_list, update=False):
-    if update:
-        old_list = set (open (file, 'r').read ().splitlines ())
-        old_list.update (file_list)
-        new_list = list (old_list)
-    else:
-        new_list = file_list
+def dump_file_list (file, file_list):
+    new_list = file_list
      f = open (file, 'w')
      f.write ('\n'.join (sorted (new_list)) + '\n')
  
-def update_ly_in_place (snippet_path):
-    visited_languages = []
-    contents = open (snippet_path).read ()
-    contents = texidoc_chunk_re.sub \
-        (lambda m: update_translated_texidoc (m,
-                                              snippet_path,
-                                              visited_languages),
-         contents)
-    need_line_break_workaround = False
-    for language_code in langdefs.LANGDICT:
-        if not language_code in visited_languages:
-            base = os.path.splitext (os.path.basename (snippet_path))[0]
-            texidoc_path = os.path.join ('Documentation', language_code,
-                         'texidocs', base + '.texidoc')
-            if os.path.isfile (texidoc_path):
-                texidoc_translation = open (texidoc_path).read ()
-                texidoc_translation = texidoc_translation.replace ('\\', '\\\\')
-                contents = begin_header_re.sub ('\\g<0>\n' + texidoc_translation, contents, 1)
-        else:
-            need_line_break_workaround = True
-    contents = doctitle_re.sub (doctitle_sub, contents)
-    contents = escape_backslashes_in_header (contents)
-
-    # workaround for a bug in the regex's that I'm not smart
-    # enough to figure out.  -gp
-    if need_line_break_workaround:
-        first_translated = contents.find('%% Translation of')
-        keep = contents[:first_translated+5]
-        contents = keep + contents[first_translated+5:].replace('%% Translation of', '\n%% Translation of')
+## clean out existing lys and generated files - but when we're
+## not recreating all of them from the tarball don't delete
+## snippets that came from LSR.
+if in_dir:
+    map (os.remove, glob.glob (os.path.join (lys_from_lsr, '*.ly')) +
+        glob.glob (os.path.join (lys_from_lsr, '*.snippet-list')))
+else:
+    map (os.remove, glob.glob (os.path.join (lys_from_lsr, '*.snippet-list')))
+    for f in glob.glob (os.path.join (lys_from_lsr, '*.ly')):
+        if new_lys_marker in open (f).read ():
+            os.remove (f)
+snippets = {}
+if in_dir:
+    # read LSR source where tags are defined by subdirs
+    snippets = read_source_with_dirs (in_dir)
  
-    open (snippet_path, 'w').write (contents)
+# read Documentation/snippets/new where tags are directly defined
+snippets_new, not_used_list = read_source (new_lys)
+snippets.update (snippets_new)
  
-if in_dir:
-    ## clean out existing lys and generated files
-    map (os.remove, glob.glob (os.path.join (DEST, '*.ly')) +
-         glob.glob (os.path.join (DEST, '*.snippet-list')))
+for (name, (srcdir, file_tags)) in snippets.items ():
+    copy_ly (srcdir, name, file_tags)
  
-    # read LSR source where tags are defined by subdirs
-    snippets, tag_lists = read_source_with_dirs (in_dir)
+not_used_snippets, tag_lists = read_source (lys_from_lsr)
  
-    # read input/new where tags are directly defined
-    s, l = read_source (NEW_LYS)
-    snippets.update (s)
-    for t in TAGS:
-        tag_lists[t].update (l[t])
-else:
-    snippets, tag_lists = read_source (NEW_LYS)
-    ## update texidocs of snippets that don't come from NEW_LYS
-    for snippet_path in glob.glob (os.path.join (DEST, '*.ly')):
-        if not os.path.basename (snippet_path) in snippets:
-            update_ly_in_place (snippet_path)
-
-for (name, (srcdir, tags)) in snippets.items ():
-    copy_ly (srcdir, name, tags)
  for (tag, file_set) in tag_lists.items ():
-    dump_file_list (os.path.join (DEST, tag + '.snippet-list'),
-                    file_set, update=not(in_dir))
+    dump_file_list (os.path.join (lys_from_lsr, tag + '.snippet-list'),
+                    file_set)
  if unconverted:
      sys.stderr.write ('These files could not be converted successfully by convert-ly:\n')
      sys.stderr.write ('\n'.join (unconverted) + '\n\n')
@@ -285,7 +348,7 @@ if unsafe:
      sys.stderr.write ('''
  
  Unsafe files printed in lsr-unsafe.txt: CHECK MANUALLY!
-  git add %s/*.ly
+  git add %(lys_from_lsr)s/*.ly
    xargs git diff HEAD < lsr-unsafe.txt
  
-''' % DEST)
+''' % vars ())