X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=buildscripts%2Fextract_texi_filenames.py;h=5798d5dab26878889869a5a3e1b49cab264519e8;hb=3c7ed0acd8e3cd7005ae3341012fef483b7b6cb7;hp=f074436e05ce0a49d1cfa9eb081a0ae933285b01;hpb=c7555d70732969277c5e906285ec04e5b561c38e;p=lilypond.git diff --git a/buildscripts/extract_texi_filenames.py b/buildscripts/extract_texi_filenames.py index f074436e05..5798d5dab2 100755 --- a/buildscripts/extract_texi_filenames.py +++ b/buildscripts/extract_texi_filenames.py @@ -1,6 +1,6 @@ #!@PYTHON@ # -*- coding: utf-8 -*- -# extrace_texi_filenames.py +# extract_texi_filenames.py # USAGE: extract_texi_filenames.py [-o OUTDIR] FILES # @@ -14,10 +14,11 @@ # which is then used for the filename/anchor. # # If this script is run on a file texifile.texi, it produces a file -# texifile_xref.map with tab-separated entries of the form +# texifile[.LANG].xref-map with tab-separated entries of the form # NODE\tFILENAME\tANCHOR +# LANG is the document language in case it's not 'en' # Note: The filename does not have any extension appended! -# This file can then be used by our texi2html init script to determine +# This file can then be used by our texi2html init script to determine # the correct file name and anchor for external refs import sys @@ -25,8 +26,6 @@ import re import os import getopt -#import langdefs - optlist, args = getopt.getopt (sys.argv[1:],'o:') files = args @@ -35,29 +34,44 @@ for x in optlist: if x[0] == '-o': outdir = x[1] +if not os.path.isdir (outdir): + if os.path.exists (outdir): + os.unlink (outdir) + os.makedirs (outdir) + include_re = re.compile (r'@include ((?!../lily-).*?)\.texi$', re.M) whitespaces = re.compile (r'\s+') -section_translation_re = re.compile (r'@(node|(?:unnumbered|appendix)(?:(?:sub){0,2}sec)?|top|chapter|(?:sub){0,2}section|(?:major|chap|(?:sub){0,2})heading|translationof) (.*?)\n') +section_translation_re = re.compile ('^@(node|(?:unnumbered|appendix)\ +(?:(?:sub){0,2}sec)?|top|chapter|(?:sub){0,2}section|\ +(?:major|chap|(?:sub){0,2})heading|translationof) (.*?)\\s*$', re.MULTILINE) def expand_includes (m, filename): filepath = os.path.join (os.path.dirname (filename), m.group(1)) + '.texi' if os.path.exists (filepath): - return extract_sections (filepath) + return extract_sections (filepath)[1] else: print "Unable to locate include file " + filepath return '' +lang_re = re.compile (r'^@documentlanguage (.+)', re.M) + def extract_sections (filename): result = '' f = open (filename, 'r') page = f.read () f.close() + # Search document language + m = lang_re.search (page) + if m and m.group (1) != 'en': + lang_suffix = '.' + m.group (1) + else: + lang_suffix = '' # Replace all includes by their list of sections and extract all sections page = include_re.sub (lambda m: expand_includes (m, filename), page) sections = section_translation_re.findall (page) for sec in sections: result += "@" + sec[0] + " " + sec[1] + "\n" - return result + return (lang_suffix, result) # Convert a given node name to its proper file name (normalization as explained # in the texinfo manual: @@ -89,7 +103,7 @@ def texinfo_file_name(title): else: result += "__%06x" % ccode # 7: if name begins with number, prepend 't_g' (so it starts with a letter) - if ord(result[0]) in range (ord('0'), ord('9')): + if (result != '') and (ord(result[0]) in range (ord('0'), ord('9'))): result = 't_g' + result return result @@ -101,21 +115,23 @@ def create_texinfo_anchor (title): return texinfo_file_name (remove_texinfo (title)) unnumbered_re = re.compile (r'unnumbered.*') -def process_sections (filename, page): +def process_sections (filename, lang_suffix, page): sections = section_translation_re.findall (page) - # TODO: Don't rely on the file having a 4-letter extension (texi)!!! - p = os.path.join (outdir, filename) [:-5] + '_xref.map' + basename = os.path.splitext (os.path.basename (filename))[0] + p = os.path.join (outdir, basename) + lang_suffix + '.xref-map' f = open (p, 'w') this_title = '' - this_filename = '' + this_filename = 'index' this_anchor = '' this_unnumbered = False + had_section = False for sec in sections: if sec[0] == "node": # Write out the cached values to the file and start a new section: - if this_title != '': - f.write (this_title + "\t" + this_filename + "\t" + this_anchor + "\n") + if this_title != '' and this_title != 'Top': + f.write (this_title + "\t" + this_filename + "\t" + this_anchor + "\n") + had_section = False this_title = remove_texinfo (sec[1]) this_anchor = create_texinfo_anchor (sec[1]) elif sec[0] == "translationof": @@ -126,18 +142,29 @@ def process_sections (filename, page): if not this_unnumbered: this_filename = anchor else: + # Some pages might not use a node for every section, so treat this + # case here, too: If we already had a section and encounter enother + # one before the next @node, we write out the old one and start + # with the new values + if had_section and this_title != '': + f.write (this_title + "\t" + this_filename + "\t" + this_anchor + "\n") + this_title = remove_texinfo (sec[1]) + this_anchor = create_texinfo_anchor (sec[1]) + had_section = True + # unnumbered nodes use the previously used file name, only numbered - # nodes get their own filename! + # nodes get their own filename! However, top-level @unnumbered + # still get their own file. this_unnumbered = unnumbered_re.match (sec[0]) - if not this_unnumbered: + if not this_unnumbered or sec[0] == "unnumbered": this_filename = this_anchor - if this_title != '': + if this_title != '' and this_title != 'Top': f.write (this_title + "\t" + this_filename + "\t" + this_anchor + "\n") f.close () for filename in files: print "extract_texi_filenames.py: Processing %s" % filename - sections = extract_sections (filename) - process_sections (filename, sections) + (lang_suffix, sections) = extract_sections (filename) + process_sections (filename, lang_suffix, sections)