scripts/build/html-gettext.py

   1 #!@PYTHON@
   2 # html-gettext.py
   3
   4 # USAGE:  html-gettext.py [-o OUTDIR] LANG FILES
   5 #
   6 # -o OUTDIR specifies that output files should be written in OUTDIR
   7 #    rather than be overwritten
   8 #
   9
  10 import sys
  11 import re
  12 import os
  13 import getopt
  14
  15 import langdefs
  16
  17 optlist, args = getopt.getopt(sys.argv[1:],'o:')
  18 lang = args[0]
  19 files = args [1:]
  20
  21 outdir = '.'
  22 for x in optlist:
  23     if x[0] == '-o':
  24         outdir = x[1]
  25
  26 double_punct_char_separator = langdefs.LANGDICT[lang].double_punct_char_sep
  27 my_gettext = langdefs.translation[lang]
  28
  29 html_codes = ((' -- ', ' &ndash; '),
  30               (' --- ', ' &mdash; '),
  31               ("'", '&rsquo;'))
  32 texi_html_conversion = {
  33     'command': {
  34         'html2texi':
  35             (re.compile (r'(?:<samp><span class="command">|<code>)(.*?)(?:</span></samp>|</code>)'),
  36              r'@command{\1}'),
  37         'texi2html':
  38             (re.compile (r'@command{(.*?)}'),
  39              r'<code>\1</code>'),
  40         },
  41     'code': {
  42         'html2texi':
  43             (re.compile (r'<code>(.*?)</code>'),
  44              r'@code{\1}'),
  45         'texi2html':
  46             (re.compile (r'@code{(.*?)}'),
  47              r'<code>\1</code>'),
  48         },
  49     }
  50
  51 whitespaces = re.compile (r'\s+')
  52
  53
  54 def _ (s):
  55     if not s:
  56         return ''
  57     str = whitespaces.sub (' ', s)
  58     for c in html_codes:
  59         str = str.replace (c[1], c[0])
  60     for command in texi_html_conversion:
  61         d = texi_html_conversion[command]
  62         str = d['html2texi'][0].sub (d['html2texi'][1], str)
  63         str = my_gettext (str)
  64         str = d['texi2html'][0].sub (d['texi2html'][1], str)
  65     for c in html_codes:
  66         str = str.replace (c[0], c[1])
  67     return str
  68
  69 link_re =  re.compile (r'<link rel="(up|prev|next)" (.*?) title="([^"]*?)">')
  70
  71 def link_gettext (m):
  72     return '<link rel="' + m.group (1) + '" ' + m.group (2) \
  73         + ' title="' + _ (m.group (3)) + '">'
  74
  75 makeinfo_title_re = re.compile (r'<title>([^<]*?) - ([^<]*?)</title>')
  76
  77 def makeinfo_title_gettext (m):
  78     return '<title>' + _ (m.group (1)) + ' - ' + m.group (2) + '</title>'
  79
  80 texi2html_title_re = re.compile (r'<title>(.+): ([A-Z\d.]+ |)(.+?)</title>')
  81
  82 def texi2html_title_gettext (m):
  83     return '<title>' + _ (m.group (1)) + double_punct_char_separator + ': ' \
  84         + m.group (2) + _ (m.group (3)) + '</title>'
  85
  86 a_href_re = re.compile ('(?s)<a (?P<attributes>[^>]*?href="[\\w.#-_]+"[^>]*?>)(?P<code><code>)?\
  87 (?P<appendix>Appendix )?(?P<leading>[A-Z0-9.]+ | (?:&lt;){1,2} |&nbsp;[^>:]+?:&nbsp;|&nbsp;|)\
  88 (?P<name>(?:<samp><span class="command">|</?code>|</span>|[^>])+?)(?P<end_code>(?(code)</code>|))\
  89 (?P<trailing> (?:&gt;){1,2} |&nbsp;|)</a>:?')
  90
  91 def a_href_gettext (m):
  92     s = ''
  93     if m.group(0)[-1] == ':':
  94         s = double_punct_char_separator + ':'
  95     t = ''
  96     if m.group ('appendix'):
  97         t = _ (m.group ('appendix'))
  98     return '<a ' + m.group ('attributes') + (m.group ('code') or '') + \
  99         t + m.group ('leading') + _ (m.group ('name')) + \
 100         m.group ('end_code') + m.group ('trailing') + '</a>' + s
 101
 102 h_re = re.compile (r'<h(\d)( class="\w+"|)>\s*(Appendix |)([A-Z\d.]+ |)(.+?)\s*</h\1>')
 103
 104 def h_gettext (m):
 105     if m.group (3):
 106         s = _ (m.group (3))
 107     else:
 108         s= ''
 109     return '<h' + m.group (1) + m.group (2) + '>' + s +\
 110            m.group (4) + _ (m.group (5)) + '</h' + m.group (1) + '>'
 111
 112 for filename in files:
 113     f = open (filename, 'r')
 114     page = f.read ()
 115     f.close ()
 116     page = link_re.sub (link_gettext, page)
 117     page = makeinfo_title_re.sub (makeinfo_title_gettext, page)
 118     page = texi2html_title_re.sub (texi2html_title_gettext, page)
 119     page = a_href_re.sub (a_href_gettext, page)
 120     page = h_re.sub (h_gettext, page)
 121     for w in ('Next:', 'Previous:', 'Up:'):
 122         page = page.replace (w, _ (w))
 123     page = langdefs.LANGDICT[lang].html_filter (page)
 124     f = open (os.path.join (outdir, filename), 'w')
 125     f.write (page)
 126     f.close ()