buildscripts/translations-status.py

   1 #!@PYTHON@
   2
   3 """
   4 USAGE: translations-status.py BUILDSCRIPT-DIR LOCALEDIR
   5
   6   This script must be run from Documentation/
   7
   8   Reads template files translations.template.html.in
   9 and for each LANG in LANGUAGES LANG/translations.template.html.in
  10
  11   Writes translations.html.in and for each LANG in LANGUAGES
  12 translations.LANG.html.in
  13 """
  14
  15 import sys
  16 import re
  17 import string
  18 import os
  19 import gettext
  20 import subprocess
  21
  22 def progress (str):
  23     sys.stderr.write (str + '\n')
  24
  25 progress ("translations-status.py")
  26
  27 buildscript_dir = sys.argv[1]
  28 localedir = sys.argv[2]
  29
  30 _doc = lambda s: s
  31
  32 sys.path.append (buildscript_dir)
  33 import langdefs
  34
  35 # load gettext messages catalogs
  36 translation = {}
  37 for l in langdefs.LANGUAGES:
  38     if l.enabled and l.code != 'en':
  39         translation[l.code] = gettext.translation('lilypond-doc', localedir, [l.code]).gettext
  40
  41 def read_pipe (command):
  42     child = subprocess.Popen (command,
  43                               stdout = subprocess.PIPE,
  44                               stderr = subprocess.PIPE,
  45                               shell = True)
  46     (output, error) = child.communicate ()
  47     code = str (child.wait ())
  48     if not child.stdout or child.stdout.close ():
  49         print "pipe failed: %(command)s" % locals ()
  50     if code != '0':
  51         error = code + ' ' + error
  52     return (output, error)
  53
  54 comments_re = re.compile (r'^@ignore\n(.|\n)*?\n@end ignore$|@c .*?$', re.M)
  55 space_re = re.compile (r'\s+', re.M)
  56 lilypond_re = re.compile (r'@lilypond({.*?}|(.|\n)*?\n@end lilypond$)', re.M)
  57 node_re = re.compile ('^@node .*?$', re.M)
  58 title_re = re.compile ('^@(top|chapter|(?:sub){0,2}section|(?:unnumbered|appendix)(?:(?:sub){0,2}sec)?) (.*?)$', re.M)
  59 include_re = re.compile ('^@include (.*?)$', re.M)
  60
  61 committish_re = re.compile ('GIT [Cc]ommittish: ([a-f0-9]+)')
  62 translators_re = re.compile (r'^@c\s+Translators\s*:\s*(.*?)$', re.M | re.I)
  63 checkers_re = re.compile (r'^@c\s+Translation\s*checkers\s*:\s*(.*?)$', re.M | re.I)
  64 status_re = re.compile (r'^@c\s+Translation\s*status\s*:\s*(.*?)$', re.M | re.I)
  65 post_gdp_re = re.compile ('post.GDP', re.I)
  66 untranslated_node_str = 'UNTRANSLATED NODE: IGNORE ME'
  67 skeleton_str = '-- SKELETON FILE --'
  68
  69 diff_cmd = 'git diff --no-color %(committish)s HEAD -- %(original)s | cat'
  70
  71 format_table = {
  72     'not translated': {'color':'d0f0f8', 'short':_doc ('no'), 'long':_doc ('not translated')},
  73     'partially translated': {'color':'dfef77', 'short':_doc ('partially (%(p)d %%)'),
  74                              'long':_doc ('partially translated (%(p)d %%)')},
  75     'fully translated': {'color':'1fff1f', 'short':_doc ('yes'), 'long': _doc ('translated')},
  76     'up to date': {'short':_doc ('yes'), 'long':_doc ('up to date')},
  77     'outdated': {'short':_doc ('partially (%(p)d %%)'), 'long':_doc ('partially up-to-date (%(p)d %%)')},
  78     'N/A': {'short':_doc ('N/A'), 'long':'', 'color':'d587ff' },
  79     'pre-GDP':_doc ('pre-GDP'),
  80     'post-GDP':_doc ('post-GDP')
  81 }
  82
  83 texi_level = {
  84 # (Unumbered/Numbered/Lettered, level)
  85     'top': ('u', 0),
  86     'unnumbered': ('u', 1),
  87     'unnumberedsec': ('u', 2),
  88     'unnumberedsubsec': ('u', 3),
  89     'chapter': ('n', 1),
  90     'section': ('n', 2),
  91     'subsection': ('n', 3),
  92     'appendix': ('l', 1)
  93 }
  94
  95 appendix_number_trans = string.maketrans ('@ABCDEFGHIJKLMNOPQRSTUVWXY',
  96                                           'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
  97
  98 class SectionNumber (object):
  99     def __init__ (self):
 100         self.__data = [[0,'u']]
 101
 102     def __increase_last_index (self):
 103         type = self.__data[-1][1]
 104         if type == 'l':
 105             self.__data[-1][0] = self.__data[-1][0].translate (appendix_number_trans)
 106         elif type == 'n':
 107             self.__data[-1][0] += 1
 108
 109     def format (self):
 110         if self.__data[-1][1] == 'u':
 111             return ''
 112         return '.'.join ([str (i[0]) for i in self.__data if i[1] != 'u']) + ' '
 113
 114     def increase (self, (type, level)):
 115         if level == 0:
 116             self.__data = [[0,'u']]
 117         while level + 1 < len (self.__data):
 118             del self.__data[-1]
 119         if level + 1 > len (self.__data):
 120             self.__data.append ([0, type])
 121             if type == 'l':
 122                 self.__data[-1][0] = '@'
 123         if type == self.__data[-1][1]:
 124             self.__increase_last_index ()
 125         else:
 126             self.__data[-1] = ([0, type])
 127             if type == 'l':
 128                 self.__data[-1][0] = 'A'
 129             elif type == 'n':
 130                 self.__data[-1][0] = 1
 131         return self.format ()
 132
 133
 134 def percentage_color (percent):
 135     p = percent / 100.0
 136     if p < 0.33:
 137         c = [hex (int (3 * p * b + (1 - 3 * p) * a))[2:] for (a, b) in [(0xff, 0xff), (0x5c, 0xa6), (0x5c, 0x4c)]]
 138     elif p < 0.67:
 139         c = [hex (int ((3 * p - 1) * b + (2 - 3 * p) * a))[2:] for (a, b) in [(0xff, 0xff), (0xa6, 0xff), (0x4c, 0x3d)]]
 140     else:
 141         c = [hex (int ((3 * p - 2) * b + 3 * (1 - p) * a))[2:] for (a, b) in [(0xff, 0x1f), (0xff, 0xff), (0x3d, 0x1f)]]
 142     return ''.join (c)
 143
 144 def tely_word_count (tely_doc):
 145     '''
 146     Calculate word count of a Texinfo document node by node.
 147
 148     Take string tely_doc as an argument.
 149     Return a list of integers.
 150
 151     Texinfo comments and @lilypond blocks are not included in word counts.
 152     '''
 153     tely_doc = comments_re.sub ('', tely_doc)
 154     tely_doc = lilypond_re.sub ('', tely_doc)
 155     nodes = node_re.split (tely_doc)
 156     return [len (space_re.split (n)) for n in nodes]
 157
 158
 159 class TelyDocument (object):
 160     def __init__ (self, filename):
 161         self.filename = filename
 162         self.contents = open (filename).read ()
 163
 164         ## record title and sectionning level of first Texinfo section
 165         m = title_re.search (self.contents)
 166         if m:
 167             self.title = m.group (2)
 168             self.level = texi_level [m.group (1)]
 169         else:
 170             self.title = 'Untitled'
 171             self.level = ('u', 1)
 172
 173         included_files = [os.path.join (os.path.dirname (filename), t) for t in include_re.findall (self.contents)]
 174         self.included_files = [p for p in included_files if os.path.exists (p)]
 175
 176     def print_title (self, section_number):
 177         return section_number.increase (self.level) + self.title
 178
 179
 180 class TranslatedTelyDocument (TelyDocument):
 181     def __init__ (self, filename, masterdocument, parent_translation=None):
 182         TelyDocument.__init__ (self, filename)
 183
 184         self.masterdocument = masterdocument
 185
 186         ## record authoring information
 187         m = translators_re.search (self.contents)
 188         if m:
 189             self.translators = [n.strip () for n in m.group (1).split (',')]
 190         else:
 191             self.translators = parent_translation.translators
 192         m = checkers_re.search (self.contents)
 193         if m:
 194             self.checkers = [n.strip () for n in m.group (1).split (',')]
 195         elif isinstance (parent_translation, TranslatedTelyDocument):
 196             self.checkers = parent_translation.checkers
 197         else:
 198             self.checkers = []
 199
 200         ## check whether translation is pre- or post-GDP
 201         m = status_re.search (self.contents)
 202         if m:
 203             self.post_gdp = bool (post_gdp_re.search (m.group (1)))
 204         else:
 205             self.post_gdp = False
 206
 207         ## record which parts (nodes) of the file are actually translated
 208         self.partially_translated = not skeleton_str in self.contents
 209         nodes = node_re.split (self.contents)
 210         self.translated_nodes = [not untranslated_node_str in n for n in nodes]
 211
 212         ## calculate translation percentage
 213         master_total_word_count = sum (masterdocument.word_count)
 214         translation_word_count = sum ([masterdocument.word_count[k] * self.translated_nodes[k]
 215                                        for k in range (min (len (masterdocument.word_count), len (self.translated_nodes)))])
 216         self.translation_percentage = 100 * translation_word_count / master_total_word_count
 217
 218         ## calculate how much the file is outdated
 219         m = committish_re.search (self.contents)
 220         if not m:
 221             sys.stderr.write ('error: ' + filename + \
 222                                   ": no 'GIT committish: <hash>' found.\nPlease check " + \
 223                                   'the whole file against the original in English, then ' + \
 224                                   'fill in HEAD committish in the header.\n')
 225             sys.exit (1)
 226         (diff_string, error) = read_pipe (diff_cmd % {'committish':m.group (1), 'original':masterdocument.filename})
 227         if error:
 228             sys.stderr.write ('warning: %s: %s' % (self.filename, error))
 229             self.uptodate_percentage = None
 230         else:
 231             diff = diff_string.splitlines ()
 232             insertions = sum ([len (l) - 1 for l in diff if l.startswith ('+') and not l.startswith ('+++')])
 233             deletions = sum ([len (l) - 1 for l in diff if l.startswith ('-') and not l.startswith ('---')])
 234             outdateness_percentage = 50.0 * (deletions + insertions) / (masterdocument.size + 0.5 * (deletions - insertions))
 235             self.uptodate_percentage = 100 - int (outdateness_percentage)
 236             if self.uptodate_percentage > 100:
 237                 alternative = 50
 238                 progress ("%s: strange uptodateness percentage %d %%, setting to %d %%" \
 239                               % (self.filename, self.uptodate_percentage, alternative))
 240                 self.uptodate_percentage = alternative
 241             elif self.uptodate_percentage < 1:
 242                 alternative = 1
 243                 progress ("%s: strange uptodateness percentage %d %%, setting to %d %%" \
 244                               % (self.filename, self.uptodate_percentage, alternative))
 245                 self.uptodate_percentage = alternative
 246
 247     def completeness (self, formats=['long']):
 248         if isinstance (formats, str):
 249             formats = [formats]
 250         p = self.translation_percentage
 251         if p == 0:
 252             status = 'not translated'
 253         elif p == 100:
 254             status = 'fully translated'
 255         else:
 256             status = 'partially translated'
 257         return dict ([(f, format_table[status][f] % locals()) for f in formats])
 258
 259     def uptodateness (self, formats=['long']):
 260         if isinstance (formats, str):
 261             formats = [formats]
 262         p = self.uptodate_percentage
 263         if p == None:
 264             status = 'N/A'
 265         elif p == 100:
 266             status = 'up to date'
 267         else:
 268             status = 'outdated'
 269         l = {}
 270         for f in formats:
 271             if f == 'color' and p != None:
 272                 l['color'] = percentage_color (p)
 273             else:
 274                 l[f] = format_table[status][f] % locals ()
 275         return l
 276
 277     def gdp_status (self, translation=lambda s: s):
 278         if self.post_gdp:
 279             return translation (format-table['post-GDP'])
 280         else:
 281             return translation (format-table['pre-GDP'])
 282
 283     def short_html_status (self):
 284         s = '  <td>'
 285         if self.partially_translated:
 286             s += '<br>\n   '.join (self.translators) + '<br>\n'
 287             if self.checkers:
 288                 s += '   <small>' + '<br>\n   '.join (self.checkers) + '</small><br>\n'
 289
 290         c = self.completeness (['long', 'color'])
 291         s += '   <span style="background-color: #%(color)s">%(long)s</span><br>\n' % c
 292
 293         if self.partially_translated:
 294             u = self.uptodateness (['long', 'color'])
 295             s += '   <span style="background-color: #%(color)s">%(long)s</span><br>\n' % u
 296
 297         s += '  </td>\n'
 298         return s
 299
 300     def html_status (self):
 301         # TODO
 302         return ''
 303
 304 class MasterTelyDocument (TelyDocument):
 305     def __init__ (self, filename, parent_translations=dict ([(lang, None) for lang in langdefs.LANGDICT.keys()])):
 306         #print "init MasterTelyDocument %s" % filename
 307         TelyDocument.__init__ (self, filename)
 308         self.size = len (self.contents)
 309         self.word_count = tely_word_count (self.contents)
 310         translations = dict ([(lang, os.path.join (lang, filename)) for lang in langdefs.LANGDICT.keys()])
 311         #print translations
 312         self.translations = dict ([(lang, TranslatedTelyDocument (translations[lang], self, parent_translations.get (lang)))
 313                                    for lang in langdefs.LANGDICT.keys() if os.path.exists (translations[lang])])
 314         if self.translations:
 315             self.includes = [MasterTelyDocument (f, self.translations) for f in self.included_files]
 316         else:
 317             self.includes = []
 318
 319     # TODO
 320     def print_wc_priority (self):
 321         return
 322
 323     def html_status (self, numbering=SectionNumber ()):
 324         if self.title == 'Untitled' or not self.translations:
 325             return ''
 326         if self.level[1] == 0: # if self is a master document
 327             s = '''<table align="center" border="2">
 328  <tr align="center">
 329   <th>%s</th>''' % self.print_title (numbering)
 330             s += ''.join (['  <th>%s</th>\n' % l for l in self.translations.keys ()])
 331             s += ' </tr>\n'
 332             s += ' <tr align="left">\n  <td>Section titles<br>(%d)</td>\n' \
 333                 % sum (self.word_count)
 334
 335         else:
 336             s = ' <tr align="left">\n  <td>%s<br>(%d)</td>\n' \
 337                 % (self.print_title (numbering), sum (self.word_count))
 338
 339         s += ''.join ([t.short_html_status () for t in self.translations.values ()])
 340         s += ' </tr>\n'
 341         s += ''.join ([i.html_status (numbering) for i in self.includes])
 342
 343         if self.level[1] == 0:
 344             s += '</table>\n<p></p>\n'
 345         return s
 346
 347 progress ("Reading documents...")
 348
 349 tely_files = read_pipe ("find -maxdepth 2 -name '*.tely'")[0].splitlines ()
 350 master_docs = [MasterTelyDocument (os.path.normpath (filename)) for filename in tely_files]
 351 master_docs = [doc for doc in master_docs if doc.translations]
 352
 353 main_status_page = open ('translations.template.html.in').read ()
 354
 355 ## TODO
 356 #per_lang_status_pages = dict ([(l, open (os.path.join (l, 'translations.template.html')). read ())
 357 #                               for l in langdefs.LANGDICT.keys ()
 358 #                               if langdefs.LANGDICT[l].enabled])
 359
 360 progress ("Generating status pages...")
 361
 362 main_status_html = ' <p><i>Last updated %s</i></p>\n' % read_pipe ('LANG= date -u')[0]
 363 main_status_html += '\n'.join ([doc.html_status () for doc in master_docs])
 364
 365 html_re = re.compile ('<html>', re.I)
 366 end_body_re = re.compile ('</body>', re.I)
 367
 368 main_status_page = html_re.sub ('''<html>
 369 <!-- This page is automatically generated by translation-status.py from
 370 translations.template.html.in; DO NOT EDIT !-->''', main_status_page)
 371
 372 main_status_page = end_body_re.sub (main_status_html + '\n</body>', main_status_page)
 373
 374 open ('translations.html.in', 'w').write (main_status_page)