From e4a9a3bd400466c26336385bb7e0af09fca66db1 Mon Sep 17 00:00:00 2001 From: Reinhold Kainhofer Date: Sat, 19 Jul 2008 20:26:11 +0200 Subject: [PATCH] texi2html: Script to generate the nodename<=>filename/anchor map for texi2html This script produces the (tab-separated) nodename<=>filename map (in the file basename_xref.map), which is then used by texi2html. It is not yet included in any makefile and the texi2html init script does not make use of these maps yet, either. --- buildscripts/extract_texi_filenames.py | 129 +++++++++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100755 buildscripts/extract_texi_filenames.py diff --git a/buildscripts/extract_texi_filenames.py b/buildscripts/extract_texi_filenames.py new file mode 100755 index 0000000000..de27ffadf7 --- /dev/null +++ b/buildscripts/extract_texi_filenames.py @@ -0,0 +1,129 @@ +#!@PYTHON@ +# -*- coding: utf-8 -*- +# extrace_texi_filenames.py + +# USAGE: extract_texi_filenames.py [-o OUTDIR] FILES +# +# -o OUTDIR specifies that output files should rather be written in OUTDIR +# +# This script parses the .texi file given and creates a file with the +# nodename <=> filename/anchor map (tab-separated as NODE\tFILENAME\tANCHOR). +# The idea behind: Unnumbered subsections go into the same file as the +# previous numbered section, @translationof gives the original node name, +# which is then used for the filename/anchor. +# +# If this script is run on a file texifile.texi, it produces a file +# texifile_xref.map, which can then be used by our texi2html init script +# to determine the correct file name and anchor for external refs + +import sys +import re +import os +import getopt + +#import langdefs + +optlist, args = getopt.getopt (sys.argv[1:],'o:') +files = args + +outdir = '.' +for x in optlist: + if x[0] == '-o': + outdir = x[1] + +include_re = re.compile (r'@include ((?!../lily-).*?)\.texi$', re.M) +whitespaces = re.compile (r'\s+') +section_translation_re = re.compile (r'@(node|(?:unnumbered|appendix)(?:(?:sub){0,2}sec)?|top|chapter|(?:sub){0,2}section|(?:major|chap|(?:sub){0,2})heading|translationof) (.*?)\n') + +def expand_includes (m): + filepath = os.path.join (os.path.dirname (m.group(0)), m.group(1)) + '.texi' + print "Including file: " + filepath + if os.path.exists (filepath): + return extract_sections (filepath) + return m.group(0) + +def extract_sections (filename): + result = '' + f = open (filename, 'r') + page = f.read () + f.close() + # Replace all includes by their list of sections and extract all sections + page = include_re.sub (expand_includes, page) + sections = section_translation_re.findall (page) + for sec in sections: + result += "@" + sec[0] + " " + sec[1] + "\n" + return result + +def texinfo_file_name(title): + # File name normalization by texinfo (described in the texinfo manual): + # 1/2: letters and numbers are left unchanged + # 3/4: multiple, leading and trailing whitespace is removed + title = title.strip (); + title = whitespaces.sub (' ', title) + # 5: all remaining spaces are converted to '-' + # 6: all other 7- or 8-bit chars are replaced by _xxxx (xxxx=ascii character code) + result = '' + for index in range(len(title)): + char = title[index] + if char == ' ': # space -> '-' + result += '-' + elif ( ('0' <= char and char <= '9' ) or + ('A' <= char and char <= 'Z' ) or + ('a' <= char and char <= 'z' ) ): # number or letter + result += char + else: + ccode = ord(char) + if ccode <= 0xFFFF: + result += "_%04x" % ccode + else: + result += "__%06x" % ccode + # 7: if name begins with number, prepend 't_g' (so it starts with a letter) + if ord(result[0]) in range (ord('0'), ord('9')): + result = 't_g' + result + return result + +texinfo_re = re.compile (r'@.*{(.*)}') +def remove_texinfo (title): + return texinfo_re.sub (r'\1', title) + +def create_texinfo_anchor (title): + return texinfo_file_name (remove_texinfo (title)) + +unnumbered_re = re.compile (r'unnumbered.*') +def process_sections (filename, page): + sections = section_translation_re.findall (page) + p = os.path.join (outdir, filename) [:-5] + '_xref.map' + f = open (p, 'w') + + this_title = '' + this_filename = '' + this_anchor = '' + this_unnumbered = False + for sec in sections: + if sec[0] == "node": + # Write out the cached values to the file and start a new section: + if this_title != '': + f.write (this_title + "\t" + this_filename + "\t" + this_anchor + "\n") + print (this_title + "\t" + this_filename + "\t" + this_anchor) + this_title = remove_texinfo (sec[1]) + this_anchor = create_texinfo_anchor (sec[1]) + if sec[0] == "translationof": + anchor = create_texinfo_anchor (sec[1]) + this_anchor = anchor + if not this_unnumbered: + this_filename = anchor + else: + this_unnumbered = unnumbered_re.match (sec[0]) + if not this_unnumbered: + this_filename = this_anchor + + if this_title != '': + f.write (this_title + "\t" + this_filename + "\t" + this_anchor + "\n") + print (this_title + "\t" + this_filename + "\t" + this_anchor) + f.close () + + +for filename in files: + print "extract_texi_filenames.py: Processing %s" % filename + sections = extract_sections (filename) + process_sections (filename, sections) -- 2.39.5