Improve translated docs gettext trickery for texi2html

author John Mandereau <john.mandereau@gmail.com>

Tue, 2 Sep 2008 10:21:21 +0000 (12:21 +0200)

committer John Mandereau <john.mandereau@gmail.com>

Tue, 2 Sep 2008 10:21:21 +0000 (12:21 +0200)
author John Mandereau <john.mandereau@gmail.com>
Tue, 2 Sep 2008 10:21:21 +0000 (12:21 +0200)
committer John Mandereau <john.mandereau@gmail.com>
Tue, 2 Sep 2008 10:21:21 +0000 (12:21 +0200)
diff --git a/buildscripts/html-gettext.py b/buildscripts/html-gettext.py

index 02a4c947c0220062f9cbe5a6e8db8b759f67656a..aaa8888a3fdac88bcc92052a88a52aa34d17c0ab 100644 (file)
--- a/buildscripts/html-gettext.py
+++ b/buildscripts/html-gettext.py
@@ -27,12 +27,21 @@ double_punct_char_separator = langdefs.LANGDICT[lang].double_punct_char_sep
  my_gettext = langdefs.translation[lang]
  
  html_codes = ((' -- ', ' &ndash; '),
-              (' --- ', ' &mdash; '))
-html2texi = {'command': (re.compile (r'<samp><span class="command">(.*?)</span></samp>'), r'@command{\1}'),
-             'code': (re.compile (r'<code>(.*?)</code>'), r'@code{\1}')
+              (' --- ', ' &mdash; '),
+              ("'", '&rsquo;'))
+html2texi = {'command':
+                 (re.compile (r'<samp><span class="command">(.*?)</span></samp>'),
+                  r'@command{\1}'),
+             'code':
+                 (re.compile (r'<code>(.*?)</code>'),
+                  r'@code{\1}')
               }
-texi2html = {'command': (re.compile (r'@command{(.*?)}'), r'<samp><span class="command">\1</span></samp>'),
-             'code': (re.compile (r'@code{(.*?)}'), r'<code>\1</code>')
+texi2html = {'command':
+                 (re.compile (r'@command{(.*?)}'),
+                  r'<samp><span class="command">\1</span></samp>'),
+             'code':
+                 (re.compile (r'@code{(.*?)}'),
+                  r'<code>\1</code>')
               }
  whitespaces = re.compile (r'\s+')
  
@@ -52,46 +61,58 @@ def _ (s):
          s = s.replace (c[0], c[1])
      return s
  
+link_re =  re.compile (r'<link rel="(up|prev|next)" (.*?) title="([^"]*?)">')
+
  def link_gettext (m):
-    return '<link rel="' + m.group(1) + '" ' + m.group(2) + ' title="' + _(m.group(3)) + '">'
+    return '<link rel="' + m.group (1) + '" ' + m.group (2) \
+        + ' title="' + _ (m.group (3)) + '">'
+
+makeinfo_title_re = re.compile (r'<title>([^<]*?) - ([^<]*?)</title>')
+
+def makeinfo_title_gettext (m):
+    return '<title>' + _ (m.group (1)) + ' - ' + m.group (2) + '</title>'
+
+texi2html_title_re = re.compile (r'<title>(.+?): ([A-Z\d.]+ |)(.+?)</title>')
  
-def title_gettext (m):
-    return '<title>' + _(m.group(1)) + ' - ' + m.group(2) + '</title>'
+def texi2html_title_gettext (m):
+    return '<title>' + _ (m.group (1)) + double_punct_char_separator + ': ' \
+        + m.group (2) + _ (m.group (3)) + '</title>'
+
+a_href_re = re.compile ('(?s)<a ([^>]*?href="[\\w.#-_]+"[^>]*>(?:<code>|))\
+(Appendix |)([A-Z0-9.]+ | (?:&lt;){1,2} |&nbsp;[^:<]+?:&nbsp;|&nbsp;|)\
+(.+?)(</code>| (?:&gt;){1,2} |&nbsp;|)</a>:?')
  
  def a_href_gettext (m):
      s = ''
      if m.group(0)[-1] == ':':
          s = double_punct_char_separator + ':'
      t = ''
-    if m.lastindex == 7:
-        t = m.group(7)
-    return '<a ' + (m.group(1) or '') + m.group(2) + (m.group(3) or '') + _(m.group(4)) + m.group(5) + _(m.group(6)) + t + '</a>' + s
+    if m.group (2):
+        t = _ (m.group (2))
+    return '<a ' + m.group (1) + t + m.group (3) + _ (m.group (4)) + \
+        m.group (5) + '</a>' + s
+
+h_re = re.compile (r'<h(\d)( class="\w+"|)>\s*(Appendix |)([A-Z\d.]+ |)?([^<]+)\s*</h\1>')
  
  def h_gettext (m):
      if m.group (3):
-        s = _(m.group(3))
+        s = _ (m.group (3))
      else:
          s= ''
-    return '<h' + m.group(1) + m.group(2) + '>' + s +\
-           m.group(4) + _(m.group(5)) + '</h' + m.group(1) + '>'
-
-def crossmanual_ref_gettext (m):
-    return '<a href="' + m.group(1) + '">' + _(m.group(2)) + '</a>'
+    return '<h' + m.group (1) + m.group (2) + '>' + s +\
+           m.group (4) + _ (m.group (5)) + '</h' + m.group (1) + '>'
  
  for filename in files:
      f = open (filename, 'r')
      page = f.read ()
-    f.close()
-    page = re.sub (r'<link rel="(up|prev|next)" (.*?) title="([^"]*?)">', link_gettext, page)
-    page = re.sub (r'<title>([^<]*?) - ([^<]*?)</title>', title_gettext, page)
-    # ugh
-    page = re.sub (r'(?ms)<a ((?:rel="\w+")? ?(?:accesskey="[^"]+?")? ?(?:name=".*?")? ?)(href=".+?">)(<code>)?(Appendix )?([A-Z\d.]+ |)(.+?)(?(3)</code>)</a>:?', a_href_gettext, page)
-    page = re.sub (r'<h(\d)( class="\w+"|)>\s*(Appendix |)([A-Z\d.]+ |)?([^<]+)\s*</h\1>', h_gettext, page)
-    page = re.sub (r'<a href="(\.\./(?:music-glossary|lilypond-program/)?(?:.+?))">(.+?)</a>', crossmanual_ref_gettext, page)
-    # this is necessary for entries not translated by a_href_gettext
-    page = re.sub (r'<a href="(.+?)">(.+?)</a>', crossmanual_ref_gettext, page)
+    f.close ()
+    page = link_re.sub (link_gettext, page)
+    page = makeinfo_title_re.sub (makeinfo_title_gettext, page)
+    page = texi2html_title_re.sub (texi2html_title_gettext, page)
+    page = a_href_re.sub (a_href_gettext, page)
+    page = h_re.sub (h_gettext, page)
      for w in ('Next:', 'Previous:', 'Up:'):
-        page = re.sub (w, _(w), page)
+        page = page.replace (w, _ (w))
      page = langdefs.LANGDICT[lang].html_filter (page)
      f = open (os.path.join (outdir, filename), 'w')
      f.write (page)
diff --git a/buildscripts/postprocess_html.py b/buildscripts/postprocess_html.py

index f4c4797f988a67e218bbc4a54b7942cb868f08a3..6ebe31a8e0619669b788adadd3a280a8fddf4cf0 100644 (file)
--- a/buildscripts/postprocess_html.py
+++ b/buildscripts/postprocess_html.py
@@ -182,8 +182,19 @@ def find_translations (prefix, lang_ext):
                  missing.append (e)
      return available, missing
  
-online_links_re = re.compile ('''(href|src)=[\'"]([^/][.]*[^.:\'"]*)(.html|.png)(#[^"\']*|)[\'"]''')
-offline_links_re = re.compile ('''href=[\'"]([^/][.]*[^.:\'"]*)(.html)(#[^"\']*|)[\'"]''')
+online_links_re = re.compile ('''(href|src)=['"]([^/][.]*[^.:'"]*)([.]html|[.]png)(#[^"']*|)['"]''')
+offline_links_re = re.compile ('''href=['"]([^/][.]*[^.:'"]*)([.]html)(#[^"']*|)['"]''')
+big_page_name_re = re.compile ('''(.+?)-big-page''')
+
+def process_i18n_big_page_links (match, prefix, lang_ext):
+    big_page_name = big_page_name_re.match (match.group (1))
+    if big_page_name:
+        destination_path = os.path.normpath (os.path.join (os.path.dirname (prefix),
+                                                           big_page_name.group (0)))
+        if not lang_ext in pages_dict[destination_path]:
+            return match.group (0)
+    return 'href="' + match.group (1) + '.' + lang_ext \
+        + match.group (2) + match.group (3) + '"'
  
  def process_links (s, prefix, lang_ext, file_name, missing, target):
      page_flavors = {}
@@ -196,7 +207,7 @@ def process_links (s, prefix, lang_ext, file_name, missing, target):
      elif target == 'offline':
          # in LANG doc index: don't rewrite .html suffixes
          # as not all .LANG.html pages exist;
-        # the doc index should be translated and contain the right links
+        # the doc index should be translated and contain links with the right suffixes
          if prefix == 'Documentation/out-www/index':
              page_flavors[file_name] = [lang_ext, s]
          elif lang_ext == '':
@@ -205,9 +216,18 @@ def process_links (s, prefix, lang_ext, file_name, missing, target):
                  page_flavors[langdefs.lang_file_name (prefix, e, '.html')] = \
                      [e, offline_links_re.sub ('href="\\1.' + e + '\\2\\3"', s)]
          else:
-            page_flavors[file_name] = \
-                [lang_ext,
-                 offline_links_re.sub ('href="\\1.' + lang_ext + '\\2\\3"', s)]
+            # For saving bandwidth and disk space, we don't duplicate big pages
+            # in English, so we must process translated big pages links differently.
+            if 'big-page' in prefix:
+                page_flavors[file_name] = \
+                    [lang_ext,
+                     offline_links_re.sub \
+                         (lambda match: process_i18n_big_page_links (match, prefix, lang_ext),
+                          s)]
+            else:
+                page_flavors[file_name] = \
+                    [lang_ext,
+                     offline_links_re.sub ('href="\\1.' + lang_ext + '\\2\\3"', s)]
      return page_flavors
  
  def add_menu (page_flavors, prefix, available, target, translation):
@@ -230,7 +250,6 @@ def add_menu (page_flavors, prefix, available, target, translation):
          if language_menu:
              language_available = t (lang_available) % language_menu
              languages = LANGUAGES_TEMPLATE % vars ()
-        # put language menu before '</body>' and '</html>' tags
          page_flavors[k][1] = add_footer (page_flavors[k][1], languages)
      return page_flavors
author	John Mandereau <john.mandereau@gmail.com>
	Tue, 2 Sep 2008 10:21:21 +0000 (12:21 +0200)
committer	John Mandereau <john.mandereau@gmail.com>
	Tue, 2 Sep 2008 10:21:21 +0000 (12:21 +0200)
buildscripts/html-gettext.py		patch \| blob \| history
buildscripts/postprocess_html.py		patch \| blob \| history