X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=scripts%2Fbuild%2Fextract_texi_filenames.py;h=fda8fc1d6168854a39016ab2ad6f8b889dff55e6;hb=8af4e95352e8484f91bae02b62328226a5cca1fc;hp=5798d5dab26878889869a5a3e1b49cab264519e8;hpb=a9d9433bc7b95cb2d4b3d96eefce7a8437c0d44e;p=lilypond.git diff --git a/scripts/build/extract_texi_filenames.py b/scripts/build/extract_texi_filenames.py index 5798d5dab2..fda8fc1d61 100644 --- a/scripts/build/extract_texi_filenames.py +++ b/scripts/build/extract_texi_filenames.py @@ -18,39 +18,94 @@ # NODE\tFILENAME\tANCHOR # LANG is the document language in case it's not 'en' # Note: The filename does not have any extension appended! -# This file can then be used by our texi2html init script to determine +# This file should then be used by our texi2html init script to determine # the correct file name and anchor for external refs +# For translated documentation: cross-references to nodes that exist +# only in documentation in English are allowed, that's why the already +# generated map file of docs in English is loaded with +# --master-map-file option, then the node names that are defined in +# the map for the manual in English but not in the translated manual +# are added to the map for the translated manual. + + import sys import re import os import getopt -optlist, args = getopt.getopt (sys.argv[1:],'o:') -files = args +options_list, files = getopt.getopt (sys.argv[1:],'o:s:hI:m:', + ['output=', 'split=', + 'help', 'include=', + 'master-map-file=']) + +help_text = r"""Usage: %(program_name)s [OPTIONS]... TEXIFILE... +Extract files names for texinfo (sub)sections from the texinfo files. + +Options: + -h, --help print this help + -I, --include=DIRECTORY append DIRECTORY to include search path + -m, --master-map-file=FILE use FILE as master map file + -o, --output=DIRECTORY write .xref-map files to DIRECTORY + -s, --split=MODE split manual according to MODE. Possible values + are section and custom (default) +""" + +def help (text): + sys.stdout.write ( text) + sys.exit (0) outdir = '.' -for x in optlist: - if x[0] == '-o': - outdir = x[1] +split = "custom" +include_path = ['.',] +master_map_file = '' +initial_map = {} +for opt in options_list: + o = opt[0] + a = opt[1] + if o == '-h' or o == '--help': + help (help_text % vars ()) + if o == '-I' or o == '--include': + if os.path.isdir (a): + include_path.append (a) + else: + print 'NOT A DIR from: ', os.getcwd (), a + elif o == '-o' or o == '--output': + outdir = a + elif o == '-s' or o == '--split': + split = a + elif o == '-m' or o == '--master-map-file': + if os.path.isfile (a): + master_map_file = a + else: + raise Exception ('unknown option: ' + o) + if not os.path.isdir (outdir): if os.path.exists (outdir): os.unlink (outdir) os.makedirs (outdir) -include_re = re.compile (r'@include ((?!../lily-).*?)\.texi$', re.M) +include_re = re.compile (r'@include ((?!../lily-).*?\.i?te(xi|ly))$', re.M) whitespaces = re.compile (r'\s+') section_translation_re = re.compile ('^@(node|(?:unnumbered|appendix)\ (?:(?:sub){0,2}sec)?|top|chapter|(?:sub){0,2}section|\ -(?:major|chap|(?:sub){0,2})heading|translationof) (.*?)\\s*$', re.MULTILINE) +(?:major|chap|(?:sub){0,2})heading|lydoctitle|translationof) \ +(.+)$', re.MULTILINE) +external_node_re = re.compile (r'\s+@c\s+external.*') def expand_includes (m, filename): - filepath = os.path.join (os.path.dirname (filename), m.group(1)) + '.texi' + include_name = m.group (1) + filepath = os.path.join (os.path.dirname (filename), include_name) if os.path.exists (filepath): return extract_sections (filepath)[1] else: - print "Unable to locate include file " + filepath + for directory in include_path: + filepath = os.path.join (directory, include_name) + if os.path.exists (filepath): + return extract_sections (filepath)[1] + print 'No such file: ' + include_name + print 'Search path: ' + ':'.join (include_path) return '' lang_re = re.compile (r'^@documentlanguage (.+)', re.M) @@ -73,8 +128,8 @@ def extract_sections (filename): result += "@" + sec[0] + " " + sec[1] + "\n" return (lang_suffix, result) -# Convert a given node name to its proper file name (normalization as explained -# in the texinfo manual: +# Convert a given node name to its proper file name (normalization as +# explained in the texinfo manual: # http://www.gnu.org/software/texinfo/manual/texinfo/html_node/HTML-Xref-Node-Name-Expansion.html def texinfo_file_name(title): # exception: The top node is always mapped to index.html @@ -107,18 +162,39 @@ def texinfo_file_name(title): result = 't_g' + result return result -texinfo_re = re.compile (r'@.*{(.*)}') +texinfo_re = re.compile (r'@.*?{(.*?)}') def remove_texinfo (title): - return texinfo_re.sub (r'\1', title) + title = title.replace ('--', '-') + return texinfo_re.sub (r'\1', title).strip () def create_texinfo_anchor (title): return texinfo_file_name (remove_texinfo (title)) -unnumbered_re = re.compile (r'unnumbered.*') +unnumbered_re = re.compile (r'unnumbered.+|lydoctitle') +file_name_section_level = { + 'top': 4, + 'chapter':3, + 'unnumbered':3, + 'appendix':3, + 'section':2, + 'unnumberedsec':2, + 'appendixsec':2, + 'subsection':1, + 'unnumberedsubsec':1, + 'appendixsubsec':1, + 'subsubsection':0, + 'unnumberedsubsubsec':0, + 'appendixsubsubsec':0 +} +if split in file_name_section_level: + splitting_level = file_name_section_level[split] +else: + splitting_level = -1 def process_sections (filename, lang_suffix, page): sections = section_translation_re.findall (page) basename = os.path.splitext (os.path.basename (filename))[0] p = os.path.join (outdir, basename) + lang_suffix + '.xref-map' + print 'writing:', p f = open (p, 'w') this_title = '' @@ -128,41 +204,71 @@ def process_sections (filename, lang_suffix, page): had_section = False for sec in sections: if sec[0] == "node": - # Write out the cached values to the file and start a new section: - if this_title != '' and this_title != 'Top': + # Write out the cached values to the file and start a new + # section: + if this_title and this_title != 'Top': f.write (this_title + "\t" + this_filename + "\t" + this_anchor + "\n") had_section = False this_title = remove_texinfo (sec[1]) this_anchor = create_texinfo_anchor (sec[1]) + # delete entry from master map file + if this_title in initial_map: + del initial_map[this_title] elif sec[0] == "translationof": + (original_node, external_node) = external_node_re.subn ('', sec[1]) + original_node = remove_texinfo (original_node) + # The following binds the translator to use the + # translated node name in cross-references in case + # it exists + if external_node and original_node in initial_map: + del initial_map[original_node] anchor = create_texinfo_anchor (sec[1]) - # If @translationof is used, it gives the original node name, which - # we use for the anchor and the file name (if it is a numbered node) + # If @translationof is used, it gives the original + # node name, which we use for the anchor and the file + # name (if it is a numbered node) this_anchor = anchor if not this_unnumbered: this_filename = anchor + elif original_node in initial_map: + this_filename = initial_map[original_node][2] else: - # Some pages might not use a node for every section, so treat this - # case here, too: If we already had a section and encounter enother - # one before the next @node, we write out the old one and start - # with the new values - if had_section and this_title != '': + # Some pages might not use a node for every section, so + # treat this case here, too: If we already had a section + # and encounter another one before the next @node, we + # write out the old one and start with the new values + if had_section and split != 'node' and this_title: f.write (this_title + "\t" + this_filename + "\t" + this_anchor + "\n") this_title = remove_texinfo (sec[1]) this_anchor = create_texinfo_anchor (sec[1]) had_section = True - # unnumbered nodes use the previously used file name, only numbered - # nodes get their own filename! However, top-level @unnumbered - # still get their own file. - this_unnumbered = unnumbered_re.match (sec[0]) - if not this_unnumbered or sec[0] == "unnumbered": + if split == 'custom': + # unnumbered nodes use the previously used file name, + # only numbered nodes get their own filename! However, + # top-level @unnumbered still get their own file. + this_unnumbered = unnumbered_re.match (sec[0]) + if not this_unnumbered: + this_filename = this_anchor + elif split == 'node': this_filename = this_anchor + else: + if sec[0] in file_name_section_level and \ + file_name_section_level[sec[0]] >= splitting_level: + this_filename = this_anchor - if this_title != '' and this_title != 'Top': + if this_title and this_title != 'Top': f.write (this_title + "\t" + this_filename + "\t" + this_anchor + "\n") + + for node in initial_map: + f.write ("\t".join (initial_map[node]) + "\n") f.close () +xref_map_line_re = re.compile (r'(.*?)\t(.*?)\t(.*?)$') +if master_map_file: + for line in open (master_map_file): + m = xref_map_line_re.match (line) + if m: + initial_map[m.group (1)] = (m.group (1), m.group (2), m.group (3)) for filename in files: print "extract_texi_filenames.py: Processing %s" % filename