buildscripts/html-gettext.py

   1 #!@PYTHON@
   2 # html-gettext.py
   3
   4 # USAGE:  html-gettext.py [-o OUTDIR] LANG FILES
   5 #
   6 # -o OUTDIR specifies that output files should be written in OUTDIR
   7 #    rather than be overwritten
   8 #
   9
  10 import sys
  11 import re
  12 import os
  13 import getopt
  14
  15 import langdefs
  16
  17 optlist, args = getopt.getopt(sys.argv[1:],'o:')
  18 lang = args[0]
  19 files = args [1:]
  20
  21 outdir = '.'
  22 for x in optlist:
  23     if x[0] == '-o':
  24         outdir = x[1]
  25
  26 double_punct_char_separator = langdefs.LANGDICT[lang].double_punct_char_sep
  27 my_gettext = langdefs.translation[lang]
  28
  29 html_codes = ((' -- ', ' &ndash; '),
  30               (' --- ', ' &mdash; '),
  31               ("'", '&rsquo;'))
  32 texi_html_conversion = {
  33     'command': {
  34         'html2texi':
  35             (re.compile (r'(?:<samp><span class="command">|<code>)(.*?)(?:</span></samp>|</code>)'),
  36              r'@command{\1}'),
  37         'texi2html':
  38             (re.compile (r'@command{(.*?)}'),
  39              r'<code>\1</code>'),
  40         },
  41     'code': {
  42         'html2texi':
  43             (re.compile (r'<code>(.*?)</code>'),
  44              r'@code{\1}'),
  45         'texi2html':
  46             (re.compile (r'@code{(.*?)}'),
  47              r'<code>\1</code>'),
  48         },
  49     }
  50
  51 whitespaces = re.compile (r'\s+')
  52
  53
  54 def _ (s):
  55     if not s:
  56         return ''
  57     str = whitespaces.sub (' ', s)
  58     for c in html_codes:
  59         str = str.replace (c[1], c[0])
  60     for command in texi_html_conversion:
  61         d = texi_html_conversion[command]
  62         str = d['html2texi'][0].sub (d['html2texi'][1], str)
  63         str = my_gettext (str)
  64         str = d['texi2html'][0].sub (d['texi2html'][1], str)
  65     for c in html_codes:
  66         str = str.replace (c[0], c[1])
  67     return str
  68
  69 link_re =  re.compile (r'<link rel="(up|prev|next)" (.*?) title="([^"]*?)">')
  70
  71 def link_gettext (m):
  72     return '<link rel="' + m.group (1) + '" ' + m.group (2) \
  73         + ' title="' + _ (m.group (3)) + '">'
  74
  75 makeinfo_title_re = re.compile (r'<title>([^<]*?) - ([^<]*?)</title>')
  76
  77 def makeinfo_title_gettext (m):
  78     return '<title>' + _ (m.group (1)) + ' - ' + m.group (2) + '</title>'
  79
  80 texi2html_title_re = re.compile (r'<title>(.+): ([A-Z\d.]+ |)(.+?)</title>')
  81
  82 def texi2html_title_gettext (m):
  83     return '<title>' + _ (m.group (1)) + double_punct_char_separator + ': ' \
  84         + m.group (2) + _ (m.group (3)) + '</title>'
  85
  86 a_href_re = re.compile ('(?s)<a (?P<attributes>[^>]*?href="[\\w.#-_]+"[^>]*>)(?P<code><code>)?\
  87 (?P<appendix>Appendix )?(?P<leading>[A-Z0-9.]+ | (?:&lt;){1,2} |&nbsp;[^:]+?:&nbsp;|&nbsp;|)\
  88 (?P<name>.+?)(?P<end_code>(?(code)</code>|))(?P<trailing>| (?:&gt;){1,2} |&nbsp;|)</a>:?')
  89
  90 def a_href_gettext (m):
  91     s = ''
  92     if m.group(0)[-1] == ':':
  93         s = double_punct_char_separator + ':'
  94     t = ''
  95     if m.group ('appendix'):
  96         t = _ (m.group ('appendix'))
  97     return '<a ' + m.group ('attributes') + (m.group ('code') or '') + \
  98         t + m.group ('leading') + _ (m.group ('name')) + \
  99         m.group ('end_code') + m.group ('trailing') + '</a>' + s
 100
 101 h_re = re.compile (r'<h(\d)( class="\w+"|)>\s*(Appendix |)([A-Z\d.]+ |)(.+?)\s*</h\1>')
 102
 103 def h_gettext (m):
 104     if m.group (3):
 105         s = _ (m.group (3))
 106     else:
 107         s= ''
 108     return '<h' + m.group (1) + m.group (2) + '>' + s +\
 109            m.group (4) + _ (m.group (5)) + '</h' + m.group (1) + '>'
 110
 111 for filename in files:
 112     f = open (filename, 'r')
 113     page = f.read ()
 114     f.close ()
 115     page = link_re.sub (link_gettext, page)
 116     page = makeinfo_title_re.sub (makeinfo_title_gettext, page)
 117     page = texi2html_title_re.sub (texi2html_title_gettext, page)
 118     page = a_href_re.sub (a_href_gettext, page)
 119     page = h_re.sub (h_gettext, page)
 120     for w in ('Next:', 'Previous:', 'Up:'):
 121         page = page.replace (w, _ (w))
 122     page = langdefs.LANGDICT[lang].html_filter (page)
 123     f = open (os.path.join (outdir, filename), 'w')
 124     f.write (page)
 125     f.close ()