Docs: hack HTML gettextization again

author John Mandereau <john.mandereau@gmail.com>

Sat, 22 Nov 2008 19:04:38 +0000 (20:04 +0100)

committer John Mandereau <john.mandereau@gmail.com>

Sat, 22 Nov 2008 19:04:38 +0000 (20:04 +0100)
author John Mandereau <john.mandereau@gmail.com>
Sat, 22 Nov 2008 19:04:38 +0000 (20:04 +0100)
committer John Mandereau <john.mandereau@gmail.com>
Sat, 22 Nov 2008 19:04:38 +0000 (20:04 +0100)
diff --git a/buildscripts/html-gettext.py b/buildscripts/html-gettext.py

index aaa8888a3fdac88bcc92052a88a52aa34d17c0ab..6920e50d2232ae679234b40ddbefb66f266ad89d 100644 (file)
--- a/buildscripts/html-gettext.py
+++ b/buildscripts/html-gettext.py
@@ -29,37 +29,42 @@ my_gettext = langdefs.translation[lang]
  html_codes = ((' -- ', ' &ndash; '),
                (' --- ', ' &mdash; '),
                ("'", '&rsquo;'))
-html2texi = {'command':
-                 (re.compile (r'<samp><span class="command">(.*?)</span></samp>'),
-                  r'@command{\1}'),
-             'code':
-                 (re.compile (r'<code>(.*?)</code>'),
-                  r'@code{\1}')
-             }
-texi2html = {'command':
-                 (re.compile (r'@command{(.*?)}'),
-                  r'<samp><span class="command">\1</span></samp>'),
-             'code':
-                 (re.compile (r'@code{(.*?)}'),
-                  r'<code>\1</code>')
-             }
+texi_html_conversion = {
+    'command': {
+        'html2texi':
+            (re.compile (r'(?:<samp><span class="command">|<code>)(.*?)(?:</span></samp>|</code>)'),
+             r'@command{\1}'),
+        'texi2html':
+            (re.compile (r'@command{(.*?)}'),
+             r'<code>\1</code>'),
+        },
+    'code': {
+        'html2texi':
+            (re.compile (r'<code>(.*?)</code>'),
+             r'@code{\1}'),
+        'texi2html':
+            (re.compile (r'@code{(.*?)}'),
+             r'<code>\1</code>'),
+        },
+    }
+
  whitespaces = re.compile (r'\s+')
  
  
  def _ (s):
      if not s:
          return ''
-    s = whitespaces.sub (' ', s)
+    str = whitespaces.sub (' ', s)
      for c in html_codes:
-        s = s.replace (c[1], c[0])
-    for u in html2texi.values():
-        s = u[0].sub (u[1], s)
-    s = my_gettext (s)
-    for u in texi2html.values():
-        s = u[0].sub (u[1], s)
+        str = str.replace (c[1], c[0])
+    for command in texi_html_conversion:
+        d = texi_html_conversion[command]
+        str = d['html2texi'][0].sub (d['html2texi'][1], str)
+        str = my_gettext (str)
+        str = d['texi2html'][0].sub (d['texi2html'][1], str)
      for c in html_codes:
-        s = s.replace (c[0], c[1])
-    return s
+        str = str.replace (c[0], c[1])
+    return str
  
  link_re =  re.compile (r'<link rel="(up|prev|next)" (.*?) title="([^"]*?)">')
  
@@ -72,27 +77,28 @@ makeinfo_title_re = re.compile (r'<title>([^<]*?) - ([^<]*?)</title>')
  def makeinfo_title_gettext (m):
      return '<title>' + _ (m.group (1)) + ' - ' + m.group (2) + '</title>'
  
-texi2html_title_re = re.compile (r'<title>(.+?): ([A-Z\d.]+ |)(.+?)</title>')
+texi2html_title_re = re.compile (r'<title>(.+): ([A-Z\d.]+ |)(.+?)</title>')
  
  def texi2html_title_gettext (m):
      return '<title>' + _ (m.group (1)) + double_punct_char_separator + ': ' \
          + m.group (2) + _ (m.group (3)) + '</title>'
  
-a_href_re = re.compile ('(?s)<a ([^>]*?href="[\\w.#-_]+"[^>]*>(?:<code>|))\
-(Appendix |)([A-Z0-9.]+ | (?:&lt;){1,2} |&nbsp;[^:<]+?:&nbsp;|&nbsp;|)\
-(.+?)(</code>| (?:&gt;){1,2} |&nbsp;|)</a>:?')
+a_href_re = re.compile ('(?s)<a (?P<attributes>[^>]*?href="[\\w.#-_]+"[^>]*>)(?P<code><code>)?\
+(?P<appendix>Appendix )?(?P<leading>[A-Z0-9.]+ | (?:&lt;){1,2} |&nbsp;[^:]+?:&nbsp;|&nbsp;|)\
+(?P<name>.+?)(?P<end_code>(?(code)</code>|))(?P<trailing>| (?:&gt;){1,2} |&nbsp;|)</a>:?')
  
  def a_href_gettext (m):
      s = ''
      if m.group(0)[-1] == ':':
          s = double_punct_char_separator + ':'
      t = ''
-    if m.group (2):
-        t = _ (m.group (2))
-    return '<a ' + m.group (1) + t + m.group (3) + _ (m.group (4)) + \
-        m.group (5) + '</a>' + s
+    if m.group ('appendix'):
+        t = _ (m.group ('appendix'))
+    return '<a ' + m.group ('attributes') + (m.group ('code') or '') + \
+        t + m.group ('leading') + _ (m.group ('name')) + \
+        m.group ('end_code') + m.group ('trailing') + '</a>' + s
  
-h_re = re.compile (r'<h(\d)( class="\w+"|)>\s*(Appendix |)([A-Z\d.]+ |)?([^<]+)\s*</h\1>')
+h_re = re.compile (r'<h(\d)( class="\w+"|)>\s*(Appendix |)([A-Z\d.]+ |)(.+?)\s*</h\1>')
  
  def h_gettext (m):
      if m.group (3):
author	John Mandereau <john.mandereau@gmail.com>
	Sat, 22 Nov 2008 19:04:38 +0000 (20:04 +0100)
committer	John Mandereau <john.mandereau@gmail.com>
	Sat, 22 Nov 2008 19:04:38 +0000 (20:04 +0100)