#!@PYTHON@
# -*- coding: utf-8 -*-
-# extrace_texi_filenames.py
+# extract_texi_filenames.py
# USAGE: extract_texi_filenames.py [-o OUTDIR] FILES
#
# which is then used for the filename/anchor.
#
# If this script is run on a file texifile.texi, it produces a file
-# texifile_xref.map with tab-separated entries of the form
+# texifile[.LANG].xref-map with tab-separated entries of the form
# NODE\tFILENAME\tANCHOR
+# LANG is the document language in case it's not 'en'
# Note: The filename does not have any extension appended!
-# This file can then be used by our texi2html init script to determine
+# This file can then be used by our texi2html init script to determine
# the correct file name and anchor for external refs
import sys
import os
import getopt
-#import langdefs
-
optlist, args = getopt.getopt (sys.argv[1:],'o:')
files = args
if x[0] == '-o':
outdir = x[1]
+if not os.path.isdir (outdir):
+ if os.path.exists (outdir):
+ os.unlink (outdir)
+ os.makedirs (outdir)
+
include_re = re.compile (r'@include ((?!../lily-).*?)\.texi$', re.M)
whitespaces = re.compile (r'\s+')
-section_translation_re = re.compile (r'@(node|(?:unnumbered|appendix)(?:(?:sub){0,2}sec)?|top|chapter|(?:sub){0,2}section|(?:major|chap|(?:sub){0,2})heading|translationof) (.*?)\n')
+section_translation_re = re.compile ('^@(node|(?:unnumbered|appendix)\
+(?:(?:sub){0,2}sec)?|top|chapter|(?:sub){0,2}section|\
+(?:major|chap|(?:sub){0,2})heading|translationof) (.*?)\\s*$', re.MULTILINE)
def expand_includes (m, filename):
filepath = os.path.join (os.path.dirname (filename), m.group(1)) + '.texi'
if os.path.exists (filepath):
- return extract_sections (filepath)
+ return extract_sections (filepath)[1]
else:
print "Unable to locate include file " + filepath
return ''
+lang_re = re.compile (r'^@documentlanguage (.+)', re.M)
+
def extract_sections (filename):
result = ''
f = open (filename, 'r')
page = f.read ()
f.close()
+ # Search document language
+ m = lang_re.search (page)
+ if m and m.group (1) != 'en':
+ lang_suffix = '.' + m.group (1)
+ else:
+ lang_suffix = ''
# Replace all includes by their list of sections and extract all sections
page = include_re.sub (lambda m: expand_includes (m, filename), page)
sections = section_translation_re.findall (page)
for sec in sections:
result += "@" + sec[0] + " " + sec[1] + "\n"
- return result
+ return (lang_suffix, result)
# Convert a given node name to its proper file name (normalization as explained
# in the texinfo manual:
else:
result += "__%06x" % ccode
# 7: if name begins with number, prepend 't_g' (so it starts with a letter)
- if ord(result[0]) in range (ord('0'), ord('9')):
+ if (result != '') and (ord(result[0]) in range (ord('0'), ord('9'))):
result = 't_g' + result
return result
return texinfo_file_name (remove_texinfo (title))
unnumbered_re = re.compile (r'unnumbered.*')
-def process_sections (filename, page):
+def process_sections (filename, lang_suffix, page):
sections = section_translation_re.findall (page)
- # TODO: Don't rely on the file having a 4-letter extension (texi)!!!
- p = os.path.join (outdir, filename) [:-5] + '_xref.map'
+ basename = os.path.splitext (os.path.basename (filename))[0]
+ p = os.path.join (outdir, basename) + lang_suffix + '.xref-map'
f = open (p, 'w')
this_title = ''
- this_filename = ''
+ this_filename = 'index'
this_anchor = ''
this_unnumbered = False
+ had_section = False
for sec in sections:
if sec[0] == "node":
# Write out the cached values to the file and start a new section:
- if this_title != '':
- f.write (this_title + "\t" + this_filename + "\t" + this_anchor + "\n")
+ if this_title != '' and this_title != 'Top':
+ f.write (this_title + "\t" + this_filename + "\t" + this_anchor + "\n")
+ had_section = False
this_title = remove_texinfo (sec[1])
this_anchor = create_texinfo_anchor (sec[1])
elif sec[0] == "translationof":
if not this_unnumbered:
this_filename = anchor
else:
+ # Some pages might not use a node for every section, so treat this
+ # case here, too: If we already had a section and encounter enother
+ # one before the next @node, we write out the old one and start
+ # with the new values
+ if had_section and this_title != '':
+ f.write (this_title + "\t" + this_filename + "\t" + this_anchor + "\n")
+ this_title = remove_texinfo (sec[1])
+ this_anchor = create_texinfo_anchor (sec[1])
+ had_section = True
+
# unnumbered nodes use the previously used file name, only numbered
- # nodes get their own filename!
+ # nodes get their own filename! However, top-level @unnumbered
+ # still get their own file.
this_unnumbered = unnumbered_re.match (sec[0])
- if not this_unnumbered:
+ if not this_unnumbered or sec[0] == "unnumbered":
this_filename = this_anchor
- if this_title != '':
+ if this_title != '' and this_title != 'Top':
f.write (this_title + "\t" + this_filename + "\t" + this_anchor + "\n")
f.close ()
for filename in files:
print "extract_texi_filenames.py: Processing %s" % filename
- sections = extract_sections (filename)
- process_sections (filename, sections)
+ (lang_suffix, sections) = extract_sections (filename)
+ process_sections (filename, lang_suffix, sections)