scripts/aux/translations-status.py

   1 #!/usr/bin/env python
   2
   3 """
   4 USAGE: translations-status.py BUILDSCRIPT-DIR LOCALEDIR
   5
   6   This script must be run from Documentation/
   7
   8   Reads template files translations.template.html.in
   9 and for each LANG in LANGUAGES LANG/translations.template.html.in
  10   Writes translations.html.in and for each LANG in LANGUAGES
  11 translations.LANG.html.in
  12   Writes out/translations-status.txt
  13   Updates word counts in TRANSLATION
  14 """
  15
  16 import sys
  17 import re
  18 import string
  19 import os
  20
  21 import langdefs
  22 import buildlib
  23
  24 def progress (str):
  25     sys.stderr.write (str + '\n')
  26
  27 progress ("translations-status.py")
  28
  29 _doc = lambda s: s
  30
  31 # load gettext messages catalogs
  32 translation = langdefs.translation
  33
  34
  35 language_re = re.compile (r'^@documentlanguage (.+)', re.M)
  36 comments_re = re.compile (r'^@ignore\n(.|\n)*?\n@end ignore$|@c .*?$', re.M)
  37 space_re = re.compile (r'\s+', re.M)
  38 lilypond_re = re.compile (r'@lilypond({.*?}|(.|\n)*?\n@end lilypond$)', re.M)
  39 node_re = re.compile ('^@node .*?$', re.M)
  40 title_re = re.compile ('^@(top|chapter|(?:sub){0,2}section|' + \
  41 '(?:unnumbered|appendix)(?:(?:sub){0,2}sec)?) (.*?)$', re.M)
  42 include_re = re.compile ('^@include (.*?)$', re.M)
  43
  44 translators_re = re.compile (r'^@c\s+Translators\s*:\s*(.*?)$', re.M | re.I)
  45 checkers_re = re.compile (r'^@c\s+Translation\s*checkers\s*:\s*(.*?)$',
  46                           re.M | re.I)
  47 status_re = re.compile (r'^@c\s+Translation\s*status\s*:\s*(.*?)$', re.M | re.I)
  48 post_gdp_re = re.compile ('post.GDP', re.I)
  49 untranslated_node_str = '@untranslated'
  50 skeleton_str = '-- SKELETON FILE --'
  51
  52 section_titles_string = _doc ('Section titles')
  53 last_updated_string = _doc (' <p><i>Last updated %s</i></p>\n')
  54 detailed_status_heads = [_doc ('Translators'), _doc ('Translation checkers'),
  55                          _doc ('Translated'), _doc ('Up to date'),
  56                          _doc ('Other info')]
  57 format_table = {
  58     'not translated': {'color':'d0f0f8', 'short':_doc ('no'), 'abbr':'NT',
  59                        'long':_doc ('not translated')},
  60     'partially translated': {'color':'dfef77',
  61                              'short':_doc ('partially (%(p)d %%)'),
  62                              'abbr':'%(p)d%%',
  63                              'long':_doc ('partially translated (%(p)d %%)')},
  64     'fully translated': {'color':'1fff1f', 'short':_doc ('yes'), 'abbr':'FT',
  65                          'long': _doc ('translated')},
  66     'up to date': {'short':_doc ('yes'), 'long':_doc ('up to date'),
  67                    'abbr':'100%%', 'vague':_doc ('up to date')},
  68     'outdated': {'short':_doc ('partially'), 'abbr':'%(p)d%%',
  69                  'vague':_doc ('partially up to date')},
  70     'N/A': {'short':_doc ('N/A'), 'abbr':'N/A', 'color':'d587ff', 'vague':''},
  71     'pre-GDP':_doc ('pre-GDP'),
  72     'post-GDP':_doc ('post-GDP')
  73 }
  74
  75 texi_level = {
  76 # (Unumbered/Numbered/Lettered, level)
  77     'top': ('u', 0),
  78     'unnumbered': ('u', 1),
  79     'unnumberedsec': ('u', 2),
  80     'unnumberedsubsec': ('u', 3),
  81     'chapter': ('n', 1),
  82     'section': ('n', 2),
  83     'subsection': ('n', 3),
  84     'appendix': ('l', 1)
  85 }
  86
  87 appendix_number_trans = string.maketrans ('@ABCDEFGHIJKLMNOPQRSTUVWXY',
  88                                           'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
  89
  90 class SectionNumber (object):
  91     def __init__ (self):
  92         self.__data = [[0,'u']]
  93
  94     def __increase_last_index (self):
  95         type = self.__data[-1][1]
  96         if type == 'l':
  97             self.__data[-1][0] = \
  98                 self.__data[-1][0].translate (appendix_number_trans)
  99         elif type == 'n':
 100             self.__data[-1][0] += 1
 101
 102     def format (self):
 103         if self.__data[-1][1] == 'u':
 104             return ''
 105         return '.'.join ([str (i[0]) for i in self.__data if i[1] != 'u']) + ' '
 106
 107     def increase (self, (type, level)):
 108         if level == 0:
 109             self.__data = [[0,'u']]
 110         while level + 1 < len (self.__data):
 111             del self.__data[-1]
 112         if level + 1 > len (self.__data):
 113             self.__data.append ([0, type])
 114             if type == 'l':
 115                 self.__data[-1][0] = '@'
 116         if type == self.__data[-1][1]:
 117             self.__increase_last_index ()
 118         else:
 119             self.__data[-1] = ([0, type])
 120             if type == 'l':
 121                 self.__data[-1][0] = 'A'
 122             elif type == 'n':
 123                 self.__data[-1][0] = 1
 124         return self.format ()
 125
 126
 127 def percentage_color (percent):
 128     p = percent / 100.0
 129     if p < 0.33:
 130         c = [hex (int (3 * p * b + (1 - 3 * p) * a))[2:]
 131              for (a, b) in [(0xff, 0xff), (0x5c, 0xa6), (0x5c, 0x4c)]]
 132     elif p < 0.67:
 133         c = [hex (int ((3 * p - 1) * b + (2 - 3 * p) * a))[2:]
 134              for (a, b) in [(0xff, 0xff), (0xa6, 0xff), (0x4c, 0x3d)]]
 135     else:
 136         c = [hex (int ((3 * p - 2) * b + 3 * (1 - p) * a))[2:]
 137              for (a, b) in [(0xff, 0x1f), (0xff, 0xff), (0x3d, 0x1f)]]
 138     return ''.join (c)
 139
 140
 141 def update_word_count (text, filename, word_count):
 142     return re.sub (r'(?m)^(\d+) *' + filename,
 143                    str (word_count).ljust (6) + filename,
 144                    text)
 145
 146 po_msgid_re = re.compile (r'^msgid "(.*?)"(?:\n"(.*?)")*', re.M)
 147
 148 def po_word_count (po_content):
 149     s = ' '.join ([''.join (t) for t in po_msgid_re.findall (po_content)])
 150     return len (space_re.split (s))
 151
 152 sgml_tag_re = re.compile (r'<.*?>', re.S)
 153
 154 def sgml_word_count (sgml_doc):
 155     s = sgml_tag_re.sub ('', sgml_doc)
 156     return len (space_re.split (s))
 157
 158 def tely_word_count (tely_doc):
 159     '''
 160     Calculate word count of a Texinfo document node by node.
 161
 162     Take string tely_doc as an argument.
 163     Return a list of integers.
 164
 165     Texinfo comments and @lilypond blocks are not included in word counts.
 166     '''
 167     tely_doc = comments_re.sub ('', tely_doc)
 168     tely_doc = lilypond_re.sub ('', tely_doc)
 169     nodes = node_re.split (tely_doc)
 170     return [len (space_re.split (n)) for n in nodes]
 171
 172
 173 class TelyDocument (object):
 174     def __init__ (self, filename):
 175         self.filename = filename
 176         self.contents = open (filename).read ()
 177
 178         ## record title and sectionning level of first Texinfo section
 179         m = title_re.search (self.contents)
 180         if m:
 181             self.title = m.group (2)
 182             self.level = texi_level [m.group (1)]
 183         else:
 184             self.title = 'Untitled'
 185             self.level = ('u', 1)
 186
 187         m = language_re.search (self.contents)
 188         if m:
 189             self.language = m.group (1)
 190
 191         included_files = [os.path.join (os.path.dirname (filename), t)
 192                           for t in include_re.findall (self.contents)]
 193         self.included_files = [p for p in included_files if os.path.exists (p)]
 194
 195     def print_title (self, section_number):
 196         return section_number.increase (self.level) + self.title
 197
 198
 199 class TranslatedTelyDocument (TelyDocument):
 200     def __init__ (self, filename, masterdocument, parent_translation=None):
 201         TelyDocument.__init__ (self, filename)
 202
 203         self.masterdocument = masterdocument
 204         if not hasattr (self, 'language') \
 205                 and hasattr (parent_translation, 'language'):
 206             self.language = parent_translation.language
 207         if hasattr (self, 'language'):
 208             self.translation = translation[self.language]
 209         else:
 210             self.translation = lambda x: x
 211         self.title = self.translation (self.title)
 212
 213         ## record authoring information
 214         m = translators_re.search (self.contents)
 215         if m:
 216             self.translators = [n.strip () for n in m.group (1).split (',')]
 217         else:
 218             self.translators = parent_translation.translators
 219         m = checkers_re.search (self.contents)
 220         if m:
 221             self.checkers = [n.strip () for n in m.group (1).split (',')]
 222         elif isinstance (parent_translation, TranslatedTelyDocument):
 223             self.checkers = parent_translation.checkers
 224         else:
 225             self.checkers = []
 226
 227         ## check whether translation is pre- or post-GDP
 228         m = status_re.search (self.contents)
 229         if m:
 230             self.post_gdp = bool (post_gdp_re.search (m.group (1)))
 231         else:
 232             self.post_gdp = False
 233
 234         ## record which parts (nodes) of the file are actually translated
 235         self.partially_translated = not skeleton_str in self.contents
 236         nodes = node_re.split (self.contents)
 237         self.translated_nodes = [not untranslated_node_str in n for n in nodes]
 238
 239         ## calculate translation percentage
 240         master_total_word_count = sum (masterdocument.word_count)
 241         translation_word_count = \
 242             sum ([masterdocument.word_count[k] * self.translated_nodes[k]
 243                   for k in range (min (len (masterdocument.word_count),
 244                                        len (self.translated_nodes)))])
 245         self.translation_percentage = \
 246             100 * translation_word_count / master_total_word_count
 247
 248         ## calculate how much the file is outdated
 249         (diff_string, error) = \
 250             buildlib.check_translated_doc (masterdocument.filename, self.filename, self.contents)
 251         if error:
 252             sys.stderr.write ('warning: %s: %s' % (self.filename, error))
 253             self.uptodate_percentage = None
 254         else:
 255             diff = diff_string.splitlines ()
 256             insertions = sum ([len (l) - 1 for l in diff
 257                                if l.startswith ('+')
 258                                and not l.startswith ('+++')])
 259             deletions = sum ([len (l) - 1 for l in diff
 260                               if l.startswith ('-')
 261                               and not l.startswith ('---')])
 262             outdateness_percentage = 50.0 * (deletions + insertions) / \
 263                 (masterdocument.size + 0.5 * (deletions - insertions))
 264             self.uptodate_percentage = 100 - int (outdateness_percentage)
 265             if self.uptodate_percentage > 100:
 266                 alternative = 50
 267                 progress ("%s: strange uptodateness percentage %d %%, \
 268 setting to %d %%" % (self.filename, self.uptodate_percentage, alternative))
 269                 self.uptodate_percentage = alternative
 270             elif self.uptodate_percentage < 1:
 271                 alternative = 1
 272                 progress ("%s: strange uptodateness percentage %d %%, \
 273 setting to %d %%" % (self.filename, self.uptodate_percentage, alternative))
 274                 self.uptodate_percentage = alternative
 275
 276     def completeness (self, formats=['long'], translated=False):
 277         if translated:
 278             translation = self.translation
 279         else:
 280             translation = lambda x: x
 281
 282         if isinstance (formats, str):
 283             formats = [formats]
 284         p = self.translation_percentage
 285         if p == 0:
 286             status = 'not translated'
 287         elif p == 100:
 288             status = 'fully translated'
 289         else:
 290             status = 'partially translated'
 291         return dict ([(f, translation (format_table[status][f]) % locals())
 292                       for f in formats])
 293
 294     def uptodateness (self, formats=['long'], translated=False):
 295         if translated:
 296             translation = self.translation
 297         else:
 298             translation = lambda x: x
 299
 300         if isinstance (formats, str):
 301             formats = [formats]
 302         p = self.uptodate_percentage
 303         if p == None:
 304             status = 'N/A'
 305         elif p == 100:
 306             status = 'up to date'
 307         else:
 308             status = 'outdated'
 309         l = {}
 310         for f in formats:
 311             if f == 'color' and p != None:
 312                 l['color'] = percentage_color (p)
 313             else:
 314                 l[f] = translation (format_table[status][f]) % locals ()
 315         return l
 316
 317     def gdp_status (self):
 318         if self.post_gdp:
 319             return self.translation (format_table['post-GDP'])
 320         else:
 321             return self.translation (format_table['pre-GDP'])
 322
 323     def short_html_status (self):
 324         s = '  <td>'
 325         if self.partially_translated:
 326             s += '<br>\n   '.join (self.translators) + '<br>\n'
 327             if self.checkers:
 328                 s += '   <small>' + \
 329                     '<br>\n   '.join (self.checkers) + '</small><br>\n'
 330
 331         c = self.completeness (['color', 'long'])
 332         s += '   <span style="background-color: #%(color)s">\
 333 %(long)s</span><br>\n' % c
 334
 335         if self.partially_translated:
 336             u = self.uptodateness (['vague', 'color'])
 337             s += '   <span style="background-color: #%(color)s">\
 338 %(vague)s</span><br>\n' % u
 339
 340         s += '  </td>\n'
 341         return s
 342
 343     def text_status (self):
 344         s = self.completeness ('abbr')['abbr'] + ' '
 345
 346         if self.partially_translated:
 347             s += self.uptodateness ('abbr')['abbr'] + ' '
 348         return s
 349
 350     def html_status (self, numbering=SectionNumber ()):
 351         if self.title == 'Untitled':
 352             return ''
 353
 354         if self.level[1] == 0: # if self is a master document
 355             s = '''<table align="center" border="2">
 356  <tr align="center">
 357   <th>%s</th>''' % self.print_title (numbering)
 358             s += ''.join (['  <th>%s</th>\n' % self.translation (h)
 359                            for h in detailed_status_heads])
 360             s += ' </tr>\n'
 361             s += ' <tr align="left">\n  <td>%s<br>(%d)</td>\n' \
 362                 % (self.translation (section_titles_string),
 363                    sum (self.masterdocument.word_count))
 364
 365         else:
 366             s = ' <tr align="left">\n  <td>%s<br>(%d)</td>\n' \
 367                 % (self.print_title (numbering),
 368                    sum (self.masterdocument.word_count))
 369
 370         if self.partially_translated:
 371             s += '  <td>' + '<br>\n   '.join (self.translators) + '</td>\n'
 372             s += '  <td>' + '<br>\n   '.join (self.checkers) + '</td>\n'
 373         else:
 374             s += '  <td></td>\n' * 2
 375
 376         c = self.completeness (['color', 'short'], translated=True)
 377         s += '  <td><span style="background-color: #%(color)s">\
 378 %(short)s</span></td>\n' % {'color': c['color'],
 379                            'short': c['short']}
 380
 381         if self.partially_translated:
 382             u = self.uptodateness (['short', 'color'], translated=True)
 383             s += '  <td><span style="background-color: #%(color)s">\
 384 %(short)s</span></td>\n' % {'color': u['color'],
 385                            'short': u['short']}
 386         else:
 387             s += '  <td></td>\n'
 388
 389         s += '  <td>' + self.gdp_status () + '</td>\n </tr>\n'
 390         s += ''.join ([i.translations[self.language].html_status (numbering)
 391                        for i in self.masterdocument.includes
 392                        if self.language in i.translations])
 393
 394         if self.level[1] == 0:  # if self is a master document
 395             s += '</table>\n<p></p>\n'
 396         return s
 397
 398 class MasterTelyDocument (TelyDocument):
 399     def __init__ (self,
 400                   filename,
 401                   parent_translations=dict ([(lang, None)
 402                                              for lang in langdefs.LANGDICT])):
 403         TelyDocument.__init__ (self, filename)
 404         self.size = len (self.contents)
 405         self.word_count = tely_word_count (self.contents)
 406         translations = dict ([(lang, os.path.join (lang, filename))
 407                               for lang in langdefs.LANGDICT])
 408         self.translations = \
 409             dict ([(lang,
 410                     TranslatedTelyDocument (translations[lang],
 411                                             self, parent_translations.get (lang)))
 412                    for lang in langdefs.LANGDICT
 413                    if os.path.exists (translations[lang])])
 414         if self.translations:
 415             self.includes = [MasterTelyDocument (f, self.translations)
 416                              for f in self.included_files]
 417         else:
 418             self.includes = []
 419
 420     def update_word_counts (self, s):
 421         s = update_word_count (s, self.filename, sum (self.word_count))
 422         for i in self.includes:
 423             s = i.update_word_counts (s)
 424         return s
 425
 426     def html_status (self, numbering=SectionNumber ()):
 427         if self.title == 'Untitled' or not self.translations:
 428             return ''
 429         if self.level[1] == 0: # if self is a master document
 430             s = '''<table align="center" border="2">
 431  <tr align="center">
 432   <th>%s</th>''' % self.print_title (numbering)
 433             s += ''.join (['  <th>%s</th>\n' % l for l in self.translations])
 434             s += ' </tr>\n'
 435             s += ' <tr align="left">\n  <td>Section titles<br>(%d)</td>\n' \
 436                 % sum (self.word_count)
 437
 438         else:  # if self is an included file
 439             s = ' <tr align="left">\n  <td>%s<br>(%d)</td>\n' \
 440                 % (self.print_title (numbering), sum (self.word_count))
 441
 442         s += ''.join ([t.short_html_status ()
 443                        for t in self.translations.values ()])
 444         s += ' </tr>\n'
 445         s += ''.join ([i.html_status (numbering) for i in self.includes])
 446
 447         if self.level[1] == 0:  # if self is a master document
 448             s += '</table>\n<p></p>\n'
 449         return s
 450
 451     def text_status (self, numbering=SectionNumber (), colspec=[48,12]):
 452         if self.title == 'Untitled' or not self.translations:
 453             return ''
 454
 455         s = ''
 456         if self.level[1] == 0: # if self is a master document
 457             s += (self.print_title (numbering) + ' ').ljust (colspec[0])
 458             s += ''.join (['%s'.ljust (colspec[1]) % l
 459                            for l in self.translations])
 460             s += '\n'
 461             s += ('Section titles (%d)' % \
 462                       sum (self.word_count)).ljust (colspec[0])
 463
 464         else:
 465             s = '%s (%d) ' \
 466                 % (self.print_title (numbering), sum (self.word_count))
 467             s = s.ljust (colspec[0])
 468
 469         s += ''.join ([t.text_status ().ljust(colspec[1])
 470                        for t in self.translations.values ()])
 471         s += '\n\n'
 472         s += ''.join ([i.text_status (numbering) for i in self.includes])
 473
 474         if self.level[1] == 0:
 475             s += '\n'
 476         return s
 477
 478
 479 update_category_word_counts_re = re.compile (r'(?ms)^-(\d+)-(.*?\n)\d+ *total')
 480
 481 counts_re = re.compile (r'(?m)^(\d+) ')
 482
 483 def update_category_word_counts_sub (m):
 484     return '-' + m.group (1) + '-' + m.group (2) + \
 485         str (sum ([int (c)
 486                    for c in counts_re.findall (m.group (2))])).ljust (6) + \
 487         'total'
 488
 489
 490 progress ("Reading documents...")
 491
 492 tely_files = \
 493     buildlib.read_pipe ("find -maxdepth 2 -name '*.tely'")[0].splitlines ()
 494 tely_files.sort ()
 495 master_docs = [MasterTelyDocument (os.path.normpath (filename))
 496                for filename in tely_files]
 497 master_docs = [doc for doc in master_docs if doc.translations]
 498
 499 main_status_page = open ('translations.template.html.in').read ()
 500
 501 enabled_languages = [l for l in langdefs.LANGDICT
 502                      if langdefs.LANGDICT[l].enabled
 503                      and l != 'en']
 504 lang_status_pages = \
 505     dict ([(l, open (os.path.join (l, 'translations.template.html.in')). read ())
 506            for l in enabled_languages])
 507
 508 progress ("Generating status pages...")
 509
 510 date_time = buildlib.read_pipe ('LANG= date -u')[0]
 511
 512 main_status_html = last_updated_string % date_time
 513 main_status_html += '\n'.join ([doc.html_status () for doc in master_docs])
 514
 515 html_re = re.compile ('<html>', re.I)
 516 end_body_re = re.compile ('</body>', re.I)
 517
 518 html_header = '''<html>
 519 <!-- This page is automatically generated by translation-status.py from
 520 translations.template.html.in; DO NOT EDIT !-->'''
 521
 522 main_status_page = html_re.sub (html_header, main_status_page)
 523
 524 main_status_page = end_body_re.sub (main_status_html + '\n</body>',
 525                                     main_status_page)
 526
 527 open ('translations.html.in', 'w').write (main_status_page)
 528
 529 for l in enabled_languages:
 530     date_time = buildlib.read_pipe ('LANG=%s date -u' % l)[0]
 531     lang_status_pages[l] = translation[l] (last_updated_string) % date_time + lang_status_pages[l]
 532     lang_status_page = html_re.sub (html_header, lang_status_pages[l])
 533     html_status = '\n'.join ([doc.translations[l].html_status ()
 534                               for doc in master_docs
 535                               if l in doc.translations])
 536     lang_status_page = end_body_re.sub (html_status + '\n</body>',
 537                                         lang_status_page)
 538     open (os.path.join (l, 'translations.html.in'), 'w').write (lang_status_page)
 539
 540 main_status_txt = '''Documentation translations status
 541 Generated %s
 542 NT = not translated
 543 FT = fully translated
 544
 545 ''' % date_time
 546
 547 main_status_txt += '\n'.join ([doc.text_status () for doc in master_docs])
 548
 549 status_txt_file = 'out/translations-status.txt'
 550 progress ("Writing %s..." % status_txt_file)
 551 open (status_txt_file, 'w').write (main_status_txt)
 552
 553 translation_instructions_file = 'TRANSLATION'
 554 progress ("Updating %s..." % translation_instructions_file)
 555 translation_instructions = open (translation_instructions_file).read ()
 556
 557 for doc in master_docs:
 558     translation_instructions = doc.update_word_counts (translation_instructions)
 559
 560 for html_file in re.findall (r'(?m)^\d+ *(\S+?\.html\S*?)(?: |$)',
 561                              translation_instructions):
 562     word_count = sgml_word_count (open (html_file).read ())
 563     translation_instructions = update_word_count (translation_instructions,
 564                                                   html_file,
 565                                                   word_count)
 566
 567 for po_file in re.findall (r'(?m)^\d+ *(\S+?\.po\S*?)(?: |$)',
 568                            translation_instructions):
 569     word_count = po_word_count (open (po_file).read ())
 570     translation_instructions = update_word_count (translation_instructions,
 571                                                   po_file,
 572                                                   word_count)
 573
 574 translation_instructions = \
 575     update_category_word_counts_re.sub (update_category_word_counts_sub,
 576                                         translation_instructions)
 577
 578 open (translation_instructions_file, 'w').write (translation_instructions)