X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=scripts%2Fbuild%2Fextract_texi_filenames.py;h=9e338b5daf0f26312c7b5c6ddf55cd80a5ea859d;hb=HEAD;hp=5798d5dab26878889869a5a3e1b49cab264519e8;hpb=a9d9433bc7b95cb2d4b3d96eefce7a8437c0d44e;p=lilypond.git

diff --git a/scripts/build/extract_texi_filenames.py b/scripts/build/extract_texi_filenames.py
index 5798d5dab2..9e338b5daf 100644
--- a/scripts/build/extract_texi_filenames.py
+++ b/scripts/build/extract_texi_filenames.py
@@ -18,39 +18,122 @@
 #        NODE\tFILENAME\tANCHOR
 # LANG is the document language in case it's not 'en'
 # Note: The filename does not have any extension appended!
-# This file can then be used by our texi2html init script to determine
+# This file should then be used by our texi2html init script to determine
 # the correct file name and anchor for external refs
 
+# For translated documentation: cross-references to nodes that exist
+# only in documentation in English are allowed, that's why the already
+# generated map file of docs in English is loaded with
+# --master-map-file option, then the node names that are defined in
+# the map for the manual in English but not in the translated manual
+# are added to the map for the translated manual.
+
+
 import sys
 import re
 import os
 import getopt
 
-optlist, args = getopt.getopt (sys.argv[1:],'o:')
-files = args
+options_list, files = getopt.getopt (sys.argv[1:],'o:s:hI:m:k:q',
+                                     ['output=', 'split=',
+                                      'help', 'include=',
+                                      'master-map-file=',
+                                      'known-missing-files=',
+                                      'quiet'])
+
+help_text = r"""Usage: %(program_name)s [OPTIONS]... TEXIFILE...
+Extract files names for texinfo (sub)sections from the texinfo files.
+
+Options:
+ -h, --help                     print this help
+ -I, --include=DIRECTORY        append DIRECTORY to include search path
+ -m, --master-map-file=FILE     use FILE as master map file
+ -o, --output=DIRECTORY         write .xref-map files to DIRECTORY
+ -s, --split=MODE               split manual according to MODE. Possible values
+                                are section and custom (default)
+ -k, --known-missing-files      a filename which has a list of files known
+                                to be missing for this make
+ -q, --quiet                    suppress most messages
+"""
+
+def help (text):
+    sys.stdout.write ( text)
+    sys.exit (0)
 
 outdir = '.'
-for x in optlist:
-    if x[0] == '-o':
-        outdir = x[1]
+split = "custom"
+include_path = ['.',]
+master_map_file = ''
+known_missing_files = []
+known_missing_files_file = ''
+docs_without_directories = ['changes', 'music-glossary']
+suppress_output = False
+initial_map = {}
+for opt in options_list:
+    o = opt[0]
+    a = opt[1]
+    if o == '-h' or o == '--help':
+        help (help_text % vars ())
+    if o == '-I' or o == '--include':
+        if os.path.isdir (a):
+            include_path.append (a)
+        else:
+            path_list = a.split('/')
+            file_name = path_list[len(path_list)-1]
+            if not (file_name in docs_without_directories):
+                print a, 'is not a directory.'
+                print 'Please consider adding it to the list of '
+                print 'known missing files in extract_texi_filename.py.'
+    elif o == '-o' or o == '--output':
+        outdir = a
+    elif o == '-s' or o == '--split':
+        split = a
+    elif o == '-m' or o == '--master-map-file':
+        if os.path.isfile (a):
+            master_map_file = a
+    elif o == '--known-missing-files':
+        if os.path.isfile (a):
+            known_missing_files_file = a
+        else:
+            print 'Missing files list file not found: ', a
+    elif o == '-q' or o == '--quiet':
+        suppress_output = True
+    else:
+        raise Exception ('unknown option: ' + o)
+
+if known_missing_files_file:
+    missing_files = open (known_missing_files_file, 'r')
+    known_missing_files = missing_files.read().splitlines()
+    missing_files.close()
 
 if not os.path.isdir (outdir):
     if os.path.exists (outdir):
         os.unlink (outdir)
     os.makedirs (outdir)
 
-include_re = re.compile (r'@include ((?!../lily-).*?)\.texi$', re.M)
+# Only look at @include if it is not preceeded by a @c:
+include_re = re.compile (r'^(?!.*@c .*@include)@include ((?!../lily-).*?\.i?te(xi|ly))$', re.M)
 whitespaces = re.compile (r'\s+')
 section_translation_re = re.compile ('^@(node|(?:unnumbered|appendix)\
 (?:(?:sub){0,2}sec)?|top|chapter|(?:sub){0,2}section|\
-(?:major|chap|(?:sub){0,2})heading|translationof) (.*?)\\s*$', re.MULTILINE)
+(?:major|chap|(?:sub){0,2})heading|lydoctitle|translationof|nodeprefix) \
+(.+)$', re.MULTILINE)
+external_node_re = re.compile (r'\s+@c\s+external.*')
 
 def expand_includes (m, filename):
-    filepath = os.path.join (os.path.dirname (filename), m.group(1)) + '.texi'
+    include_name = m.group (1)
+    filepath = os.path.join (os.path.dirname (filename), include_name)
     if os.path.exists (filepath):
         return extract_sections (filepath)[1]
     else:
-        print "Unable to locate include file " + filepath
+        for directory in include_path:
+            filepath = os.path.join (directory, include_name)
+            if os.path.exists (filepath):
+                return extract_sections (filepath)[1]
+        if not (include_name in known_missing_files):
+            # Not found
+            print 'Warning: No such file: ' + include_name + \
+                  ' (search path: ' + ':'.join (include_path)+')'
         return ''
 
 lang_re = re.compile (r'^@documentlanguage (.+)', re.M)
@@ -73,8 +156,8 @@ def extract_sections (filename):
         result += "@" + sec[0] + " " + sec[1] + "\n"
     return (lang_suffix, result)
 
-# Convert a given node name to its proper file name (normalization as explained
-# in the texinfo manual:
+# Convert a given node name to its proper file name (normalization as
+# explained in the texinfo manual:
 # http://www.gnu.org/software/texinfo/manual/texinfo/html_node/HTML-Xref-Node-Name-Expansion.html
 def texinfo_file_name(title):
     # exception: The top node is always mapped to index.html
@@ -107,20 +190,43 @@ def texinfo_file_name(title):
         result = 't_g' + result
     return result
 
-texinfo_re = re.compile (r'@.*{(.*)}')
+texinfo_re = re.compile (r'@.*?{(.*?)}')
 def remove_texinfo (title):
-    return texinfo_re.sub (r'\1', title)
+    title = title.replace ('--', '-')
+    return texinfo_re.sub (r'\1', title).strip ()
 
 def create_texinfo_anchor (title):
     return texinfo_file_name (remove_texinfo (title))
 
-unnumbered_re = re.compile (r'unnumbered.*')
+unnumbered_re = re.compile (r'unnumbered.+|lydoctitle')
+file_name_section_level = {
+    'top': 4,
+    'chapter':3,
+    'unnumbered':3,
+    'appendix':3,
+    'section':2,
+    'unnumberedsec':2,
+    'appendixsec':2,
+    'subsection':1,
+    'unnumberedsubsec':1,
+    'appendixsubsec':1,
+    'subsubsection':0,
+    'unnumberedsubsubsec':0,
+    'appendixsubsubsec':0
+}
+if split in file_name_section_level:
+    splitting_level = file_name_section_level[split]
+else:
+    splitting_level = -1
 def process_sections (filename, lang_suffix, page):
     sections = section_translation_re.findall (page)
     basename = os.path.splitext (os.path.basename (filename))[0]
     p = os.path.join (outdir, basename) + lang_suffix + '.xref-map'
+    if not suppress_output:
+        print 'writing:', p
     f = open (p, 'w')
 
+    node_prefix_title = ''
     this_title = ''
     this_filename = 'index'
     this_anchor = ''
@@ -128,43 +234,81 @@ def process_sections (filename, lang_suffix, page):
     had_section = False
     for sec in sections:
         if sec[0] == "node":
-            # Write out the cached values to the file and start a new section:
-            if this_title != '' and this_title != 'Top':
+            # Write out the cached values to the file and start a new
+            # section:
+            if this_title and this_title != 'Top':
                     f.write (this_title + "\t" + this_filename + "\t" + this_anchor + "\n")
             had_section = False
             this_title = remove_texinfo (sec[1])
             this_anchor = create_texinfo_anchor (sec[1])
+            # delete entry from master map file
+            if this_title in initial_map:
+                del initial_map[this_title]
         elif sec[0] == "translationof":
+            (original_node, external_node) = external_node_re.subn ('', sec[1])
+            original_node = remove_texinfo (original_node)
+            # The following binds the translator to use the
+            # translated node name in cross-references in case
+            # it exists
+            if external_node and original_node in initial_map:
+                del initial_map[original_node]
             anchor = create_texinfo_anchor (sec[1])
-            # If @translationof is used, it gives the original node name, which
-            # we use for the anchor and the file name (if it is a numbered node)
+            # If @translationof is used, it gives the original
+            # node name, which we use for the anchor and the file
+            # name (if it is a numbered node)
             this_anchor = anchor
             if not this_unnumbered:
                 this_filename = anchor
+            elif original_node in initial_map:
+                this_filename = initial_map[original_node][2]
+        elif sec[0] == "nodeprefix":
+            node_prefix_title = remove_texinfo (sec[1])
+            node_prefix_anchor = create_texinfo_anchor (sec[1])
         else:
-            # Some pages might not use a node for every section, so treat this
-            # case here, too: If we already had a section and encounter enother
-            # one before the next @node, we write out the old one and start
-            # with the new values
-            if had_section and this_title != '':
+            # Some pages might not use a node for every section, so
+            # treat this case here, too: If we already had a section
+            # and encounter another one before the next @node, we
+            # write out the old one and start with the new values
+            if had_section and split != 'node' and this_title:
                 f.write (this_title + "\t" + this_filename + "\t" + this_anchor + "\n")
                 this_title = remove_texinfo (sec[1])
                 this_anchor = create_texinfo_anchor (sec[1])
             had_section = True
 
-            # unnumbered nodes use the previously used file name, only numbered
-            # nodes get their own filename! However, top-level @unnumbered
-            # still get their own file.
-            this_unnumbered = unnumbered_re.match (sec[0])
-            if not this_unnumbered or sec[0] == "unnumbered":
+            if sec[0] == "lydoctitle" and node_prefix_title:
+                this_title = "%s: %s" % (node_prefix_title, this_title)
+                this_anchor = "%s-%s" % (node_prefix_anchor, this_anchor)
+
+            if split == 'custom':
+                # unnumbered nodes use the previously used file name,
+                # only numbered nodes get their own filename! However,
+                # top-level @unnumbered still get their own file.
+                this_unnumbered = unnumbered_re.match (sec[0])
+                if not this_unnumbered:
+                    this_filename = this_anchor
+            elif split == 'node':
                 this_filename = this_anchor
+            else:
+                if sec[0] in file_name_section_level and \
+                        file_name_section_level[sec[0]] >= splitting_level:
+                    this_filename = this_anchor
 
-    if this_title != '' and this_title != 'Top':
+    if this_title and this_title != 'Top':
         f.write (this_title + "\t" + this_filename + "\t" + this_anchor + "\n")
+
+    for node in initial_map:
+        f.write ("\t".join (initial_map[node]) + "\n")
     f.close ()
 
+xref_map_line_re = re.compile (r'(.*?)\t(.*?)\t(.*?)$')
+if master_map_file:
+    for line in open (master_map_file):
+        m = xref_map_line_re.match (line)
+        if m:
+            initial_map[m.group (1)] = (m.group (1), m.group (2), m.group (3))
 
 for filename in files:
-    print "extract_texi_filenames.py: Processing %s" % filename
+    if not suppress_output:
+        print "extract_texi_filenames.py: Processing %s" % filename
     (lang_suffix, sections) = extract_sections (filename)
     process_sections (filename, lang_suffix, sections)