buildscripts/translations-status.py

   1 #!/usr/bin/env python
   2
   3 """
   4 USAGE: translations-status.py BUILDSCRIPT-DIR LOCALEDIR
   5
   6   This script must be run from Documentation/
   7
   8   Reads template files translations.template.html.in
   9 and for each LANG in LANGUAGES LANG/translations.template.html.in
  10   Writes translations.html.in and for each LANG in LANGUAGES
  11 translations.LANG.html.in
  12   Writes out/translations-status.txt
  13   Updates word counts in TRANSLATION
  14 """
  15
  16 import sys
  17 import re
  18 import string
  19 import os
  20
  21 import langdefs
  22
  23 def progress (str):
  24     sys.stderr.write (str + '\n')
  25
  26 progress ("translations-status.py")
  27
  28 buildscript_dir = sys.argv[1]
  29
  30 _doc = lambda s: s
  31
  32 sys.path.append (buildscript_dir)
  33 import buildlib
  34
  35 # load gettext messages catalogs
  36 translation = langdefs.translation
  37
  38
  39 language_re = re.compile (r'^@documentlanguage (.+)', re.M)
  40 comments_re = re.compile (r'^@ignore\n(.|\n)*?\n@end ignore$|@c .*?$', re.M)
  41 space_re = re.compile (r'\s+', re.M)
  42 lilypond_re = re.compile (r'@lilypond({.*?}|(.|\n)*?\n@end lilypond$)', re.M)
  43 node_re = re.compile ('^@node .*?$', re.M)
  44 title_re = re.compile ('^@(top|chapter|(?:sub){0,2}section|' + \
  45 '(?:unnumbered|appendix)(?:(?:sub){0,2}sec)?) (.*?)$', re.M)
  46 include_re = re.compile ('^@include (.*?)$', re.M)
  47
  48 translators_re = re.compile (r'^@c\s+Translators\s*:\s*(.*?)$', re.M | re.I)
  49 checkers_re = re.compile (r'^@c\s+Translation\s*checkers\s*:\s*(.*?)$',
  50                           re.M | re.I)
  51 status_re = re.compile (r'^@c\s+Translation\s*status\s*:\s*(.*?)$', re.M | re.I)
  52 post_gdp_re = re.compile ('post.GDP', re.I)
  53 untranslated_node_str = 'UNTRANSLATED NODE: IGNORE ME'
  54 skeleton_str = '-- SKELETON FILE --'
  55
  56 detailed_status_heads = [_doc ('Translators'), _doc ('Translation checkers'),
  57                          _doc ('Translated'), _doc ('Up to date'),
  58                          _doc ('Other info')]
  59 format_table = {
  60     'not translated': {'color':'d0f0f8', 'short':_doc ('no'), 'abbr':'NT',
  61                        'long':_doc ('not translated')},
  62     'partially translated': {'color':'dfef77',
  63                              'short':_doc ('partially (%(p)d %%)'),
  64                              'abbr':'%(p)d%%',
  65                              'long':_doc ('partially translated (%(p)d %%)')},
  66     'fully translated': {'color':'1fff1f', 'short':_doc ('yes'), 'abbr':'FT',
  67                          'long': _doc ('translated')},
  68     'up to date': {'short':_doc ('yes'), 'long':_doc ('up to date'),
  69                    'abbr':'100%%', 'vague':_doc ('up to date')},
  70     'outdated': {'short':_doc ('partially'), 'abbr':'%(p)d%%',
  71                  'vague':_doc ('partially up to date')},
  72     'N/A': {'short':_doc ('N/A'), 'abbr':'N/A', 'color':'d587ff', 'vague':''},
  73     'pre-GDP':_doc ('pre-GDP'),
  74     'post-GDP':_doc ('post-GDP')
  75 }
  76
  77 texi_level = {
  78 # (Unumbered/Numbered/Lettered, level)
  79     'top': ('u', 0),
  80     'unnumbered': ('u', 1),
  81     'unnumberedsec': ('u', 2),
  82     'unnumberedsubsec': ('u', 3),
  83     'chapter': ('n', 1),
  84     'section': ('n', 2),
  85     'subsection': ('n', 3),
  86     'appendix': ('l', 1)
  87 }
  88
  89 appendix_number_trans = string.maketrans ('@ABCDEFGHIJKLMNOPQRSTUVWXY',
  90                                           'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
  91
  92 class SectionNumber (object):
  93     def __init__ (self):
  94         self.__data = [[0,'u']]
  95
  96     def __increase_last_index (self):
  97         type = self.__data[-1][1]
  98         if type == 'l':
  99             self.__data[-1][0] = \
 100                 self.__data[-1][0].translate (appendix_number_trans)
 101         elif type == 'n':
 102             self.__data[-1][0] += 1
 103
 104     def format (self):
 105         if self.__data[-1][1] == 'u':
 106             return ''
 107         return '.'.join ([str (i[0]) for i in self.__data if i[1] != 'u']) + ' '
 108
 109     def increase (self, (type, level)):
 110         if level == 0:
 111             self.__data = [[0,'u']]
 112         while level + 1 < len (self.__data):
 113             del self.__data[-1]
 114         if level + 1 > len (self.__data):
 115             self.__data.append ([0, type])
 116             if type == 'l':
 117                 self.__data[-1][0] = '@'
 118         if type == self.__data[-1][1]:
 119             self.__increase_last_index ()
 120         else:
 121             self.__data[-1] = ([0, type])
 122             if type == 'l':
 123                 self.__data[-1][0] = 'A'
 124             elif type == 'n':
 125                 self.__data[-1][0] = 1
 126         return self.format ()
 127
 128
 129 def percentage_color (percent):
 130     p = percent / 100.0
 131     if p < 0.33:
 132         c = [hex (int (3 * p * b + (1 - 3 * p) * a))[2:]
 133              for (a, b) in [(0xff, 0xff), (0x5c, 0xa6), (0x5c, 0x4c)]]
 134     elif p < 0.67:
 135         c = [hex (int ((3 * p - 1) * b + (2 - 3 * p) * a))[2:]
 136              for (a, b) in [(0xff, 0xff), (0xa6, 0xff), (0x4c, 0x3d)]]
 137     else:
 138         c = [hex (int ((3 * p - 2) * b + 3 * (1 - p) * a))[2:]
 139              for (a, b) in [(0xff, 0x1f), (0xff, 0xff), (0x3d, 0x1f)]]
 140     return ''.join (c)
 141
 142
 143 def update_word_count (text, filename, word_count):
 144     return re.sub (r'(?m)^(\d+) *' + filename,
 145                    str (word_count).ljust (6) + filename,
 146                    text)
 147
 148 po_msgid_re = re.compile (r'^msgid "(.*?)"(?:\n"(.*?)")*', re.M)
 149
 150 def po_word_count (po_content):
 151     s = ' '.join ([''.join (t) for t in po_msgid_re.findall (po_content)])
 152     return len (space_re.split (s))
 153
 154 sgml_tag_re = re.compile (r'<.*?>', re.S)
 155
 156 def sgml_word_count (sgml_doc):
 157     s = sgml_tag_re.sub ('', sgml_doc)
 158     return len (space_re.split (s))
 159
 160 def tely_word_count (tely_doc):
 161     '''
 162     Calculate word count of a Texinfo document node by node.
 163
 164     Take string tely_doc as an argument.
 165     Return a list of integers.
 166
 167     Texinfo comments and @lilypond blocks are not included in word counts.
 168     '''
 169     tely_doc = comments_re.sub ('', tely_doc)
 170     tely_doc = lilypond_re.sub ('', tely_doc)
 171     nodes = node_re.split (tely_doc)
 172     return [len (space_re.split (n)) for n in nodes]
 173
 174
 175 class TelyDocument (object):
 176     def __init__ (self, filename):
 177         self.filename = filename
 178         self.contents = open (filename).read ()
 179
 180         ## record title and sectionning level of first Texinfo section
 181         m = title_re.search (self.contents)
 182         if m:
 183             self.title = m.group (2)
 184             self.level = texi_level [m.group (1)]
 185         else:
 186             self.title = 'Untitled'
 187             self.level = ('u', 1)
 188
 189         m = language_re.search (self.contents)
 190         if m:
 191             self.language = m.group (1)
 192
 193         included_files = [os.path.join (os.path.dirname (filename), t)
 194                           for t in include_re.findall (self.contents)]
 195         self.included_files = [p for p in included_files if os.path.exists (p)]
 196
 197     def print_title (self, section_number):
 198         return section_number.increase (self.level) + self.title
 199
 200
 201 class TranslatedTelyDocument (TelyDocument):
 202     def __init__ (self, filename, masterdocument, parent_translation=None):
 203         TelyDocument.__init__ (self, filename)
 204
 205         self.masterdocument = masterdocument
 206         if not hasattr (self, 'language') \
 207                 and hasattr (parent_translation, 'language'):
 208             self.language = parent_translation.language
 209         if hasattr (self, 'language'):
 210             self.translation = translation[self.language]
 211         else:
 212             self.translation = lambda x: x
 213
 214         ## record authoring information
 215         m = translators_re.search (self.contents)
 216         if m:
 217             self.translators = [n.strip () for n in m.group (1).split (',')]
 218         else:
 219             self.translators = parent_translation.translators
 220         m = checkers_re.search (self.contents)
 221         if m:
 222             self.checkers = [n.strip () for n in m.group (1).split (',')]
 223         elif isinstance (parent_translation, TranslatedTelyDocument):
 224             self.checkers = parent_translation.checkers
 225         else:
 226             self.checkers = []
 227
 228         ## check whether translation is pre- or post-GDP
 229         m = status_re.search (self.contents)
 230         if m:
 231             self.post_gdp = bool (post_gdp_re.search (m.group (1)))
 232         else:
 233             self.post_gdp = False
 234
 235         ## record which parts (nodes) of the file are actually translated
 236         self.partially_translated = not skeleton_str in self.contents
 237         nodes = node_re.split (self.contents)
 238         self.translated_nodes = [not untranslated_node_str in n for n in nodes]
 239
 240         ## calculate translation percentage
 241         master_total_word_count = sum (masterdocument.word_count)
 242         translation_word_count = \
 243             sum ([masterdocument.word_count[k] * self.translated_nodes[k]
 244                   for k in range (min (len (masterdocument.word_count),
 245                                        len (self.translated_nodes)))])
 246         self.translation_percentage = \
 247             100 * translation_word_count / master_total_word_count
 248
 249         ## calculate how much the file is outdated
 250         (diff_string, error) = \
 251             buildlib.check_translated_doc (masterdocument.filename, self.contents)
 252         if error:
 253             sys.stderr.write ('warning: %s: %s' % (self.filename, error))
 254             self.uptodate_percentage = None
 255         else:
 256             diff = diff_string.splitlines ()
 257             insertions = sum ([len (l) - 1 for l in diff
 258                                if l.startswith ('+')
 259                                and not l.startswith ('+++')])
 260             deletions = sum ([len (l) - 1 for l in diff
 261                               if l.startswith ('-')
 262                               and not l.startswith ('---')])
 263             outdateness_percentage = 50.0 * (deletions + insertions) / \
 264                 (masterdocument.size + 0.5 * (deletions - insertions))
 265             self.uptodate_percentage = 100 - int (outdateness_percentage)
 266             if self.uptodate_percentage > 100:
 267                 alternative = 50
 268                 progress ("%s: strange uptodateness percentage %d %%, \
 269 setting to %d %%" % (self.filename, self.uptodate_percentage, alternative))
 270                 self.uptodate_percentage = alternative
 271             elif self.uptodate_percentage < 1:
 272                 alternative = 1
 273                 progress ("%s: strange uptodateness percentage %d %%, \
 274 setting to %d %%" % (self.filename, self.uptodate_percentage, alternative))
 275                 self.uptodate_percentage = alternative
 276
 277     def completeness (self, formats=['long']):
 278         if isinstance (formats, str):
 279             formats = [formats]
 280         p = self.translation_percentage
 281         if p == 0:
 282             status = 'not translated'
 283         elif p == 100:
 284             status = 'fully translated'
 285         else:
 286             status = 'partially translated'
 287         return dict ([(f, format_table[status][f] % locals()) for f in formats])
 288
 289     def uptodateness (self, formats=['long']):
 290         if isinstance (formats, str):
 291             formats = [formats]
 292         p = self.uptodate_percentage
 293         if p == None:
 294             status = 'N/A'
 295         elif p == 100:
 296             status = 'up to date'
 297         else:
 298             status = 'outdated'
 299         l = {}
 300         for f in formats:
 301             if f == 'color' and p != None:
 302                 l['color'] = percentage_color (p)
 303             else:
 304                 l[f] = format_table[status][f] % locals ()
 305         return l
 306
 307     def gdp_status (self):
 308         if self.post_gdp:
 309             return self.translation (format_table['post-GDP'])
 310         else:
 311             return self.translation (format_table['pre-GDP'])
 312
 313     def short_html_status (self):
 314         s = '  <td>'
 315         if self.partially_translated:
 316             s += '<br>\n   '.join (self.translators) + '<br>\n'
 317             if self.checkers:
 318                 s += '   <small>' + \
 319                     '<br>\n   '.join (self.checkers) + '</small><br>\n'
 320
 321         c = self.completeness (['color', 'long'])
 322         s += '   <span style="background-color: #%(color)s">\
 323 %(long)s</span><br>\n' % c
 324
 325         if self.partially_translated:
 326             u = self.uptodateness (['vague', 'color'])
 327             s += '   <span style="background-color: #%(color)s">\
 328 %(vague)s</span><br>\n' % u
 329
 330         s += '  </td>\n'
 331         return s
 332
 333     def text_status (self):
 334         s = self.completeness ('abbr')['abbr'] + ' '
 335
 336         if self.partially_translated:
 337             s += self.uptodateness ('abbr')['abbr'] + ' '
 338         return s
 339
 340     def html_status (self, numbering=SectionNumber ()):
 341         if self.level[1] == 0: # if self is a master document
 342             s = '''<table align="center" border="2">
 343  <tr align="center">
 344   <th>%s</th>''' % self.print_title (numbering)
 345             s += ''.join (['  <th>%s</th>\n' % self.translation (h)
 346                            for h in detailed_status_heads])
 347             s += ' </tr>\n'
 348             s += ' <tr align="left">\n  <td>Section titles<br>(%d)</td>\n' \
 349                 % sum (self.masterdocument.word_count)
 350
 351         else:
 352             s = ' <tr align="left">\n  <td>%s<br>(%d)</td>\n' \
 353                 % (self.print_title (numbering),
 354                    sum (self.masterdocument.word_count))
 355
 356         if self.partially_translated:
 357             s += '  <td>' + '<br>\n   '.join (self.translators) + '</td>\n'
 358             s += '  <td>' + '<br>\n   '.join (self.checkers) + '</td>\n'
 359         else:
 360             s += '  <td></td>\n' * 2
 361
 362         c = self.completeness (['color', 'short'])
 363         s += '  <td><span style="background-color: #%(color)s">\
 364 %(short)s</span></td>\n' % {'color': c['color'],
 365                            'short': self.translation (c['short'])}
 366
 367         if self.partially_translated:
 368             u = self.uptodateness (['short', 'color'])
 369             s += '  <td><span style="background-color: #%(color)s">\
 370 %(short)s</span></td>\n' % {'color': u['color'],
 371                            'short': self.translation (u['short'])}
 372         else:
 373             s += '  <td></td>\n'
 374
 375         s += '  <td>' + self.gdp_status () + '</td>\n </tr>\n'
 376         s += ''.join ([i.translations[self.language].html_status (numbering)
 377                        for i in self.masterdocument.includes
 378                        if self.language in i.translations])
 379
 380         if self.level[1] == 0:  # if self is a master document
 381             s += '</table>\n<p></p>\n'
 382         return s
 383
 384 class MasterTelyDocument (TelyDocument):
 385     def __init__ (self,
 386                   filename,
 387                   parent_translations=dict ([(lang, None)
 388                                              for lang in langdefs.LANGDICT])):
 389         TelyDocument.__init__ (self, filename)
 390         self.size = len (self.contents)
 391         self.word_count = tely_word_count (self.contents)
 392         translations = dict ([(lang, os.path.join (lang, filename))
 393                               for lang in langdefs.LANGDICT])
 394         self.translations = \
 395             dict ([(lang,
 396                     TranslatedTelyDocument (translations[lang],
 397                                             self, parent_translations.get (lang)))
 398                    for lang in langdefs.LANGDICT
 399                    if os.path.exists (translations[lang])])
 400         if self.translations:
 401             self.includes = [MasterTelyDocument (f, self.translations)
 402                              for f in self.included_files]
 403         else:
 404             self.includes = []
 405
 406     def update_word_counts (self, s):
 407         s = update_word_count (s, self.filename, sum (self.word_count))
 408         for i in self.includes:
 409             s = i.update_word_counts (s)
 410         return s
 411
 412     def html_status (self, numbering=SectionNumber ()):
 413         if self.title == 'Untitled' or not self.translations:
 414             return ''
 415         if self.level[1] == 0: # if self is a master document
 416             s = '''<table align="center" border="2">
 417  <tr align="center">
 418   <th>%s</th>''' % self.print_title (numbering)
 419             s += ''.join (['  <th>%s</th>\n' % l for l in self.translations])
 420             s += ' </tr>\n'
 421             s += ' <tr align="left">\n  <td>Section titles<br>(%d)</td>\n' \
 422                 % sum (self.word_count)
 423
 424         else:  # if self is an included file
 425             s = ' <tr align="left">\n  <td>%s<br>(%d)</td>\n' \
 426                 % (self.print_title (numbering), sum (self.word_count))
 427
 428         s += ''.join ([t.short_html_status ()
 429                        for t in self.translations.values ()])
 430         s += ' </tr>\n'
 431         s += ''.join ([i.html_status (numbering) for i in self.includes])
 432
 433         if self.level[1] == 0:  # if self is a master document
 434             s += '</table>\n<p></p>\n'
 435         return s
 436
 437     def text_status (self, numbering=SectionNumber (), colspec=[48,12]):
 438         if self.title == 'Untitled' or not self.translations:
 439             return ''
 440
 441         s = ''
 442         if self.level[1] == 0: # if self is a master document
 443             s += (self.print_title (numbering) + ' ').ljust (colspec[0])
 444             s += ''.join (['%s'.ljust (colspec[1]) % l
 445                            for l in self.translations])
 446             s += '\n'
 447             s += ('Section titles (%d)' % \
 448                       sum (self.word_count)).ljust (colspec[0])
 449
 450         else:
 451             s = '%s (%d) ' \
 452                 % (self.print_title (numbering), sum (self.word_count))
 453             s = s.ljust (colspec[0])
 454
 455         s += ''.join ([t.text_status ().ljust(colspec[1])
 456                        for t in self.translations.values ()])
 457         s += '\n\n'
 458         s += ''.join ([i.text_status (numbering) for i in self.includes])
 459
 460         if self.level[1] == 0:
 461             s += '\n'
 462         return s
 463
 464
 465 update_category_word_counts_re = re.compile (r'(?ms)^-(\d+)-(.*?\n)\d+ *total')
 466
 467 counts_re = re.compile (r'(?m)^(\d+) ')
 468
 469 def update_category_word_counts_sub (m):
 470     return '-' + m.group (1) + '-' + m.group (2) + \
 471         str (sum ([int (c)
 472                    for c in counts_re.findall (m.group (2))])).ljust (6) + \
 473         'total'
 474
 475
 476 progress ("Reading documents...")
 477
 478 tely_files = \
 479     buildlib.read_pipe ("find -maxdepth 2 -name '*.tely'")[0].splitlines ()
 480 tely_files.sort ()
 481 master_docs = [MasterTelyDocument (os.path.normpath (filename))
 482                for filename in tely_files]
 483 master_docs = [doc for doc in master_docs if doc.translations]
 484
 485 main_status_page = open ('translations.template.html.in').read ()
 486
 487 enabled_languages = [l for l in langdefs.LANGDICT
 488                      if langdefs.LANGDICT[l].enabled
 489                      and l != 'en']
 490 lang_status_pages = \
 491     dict ([(l, open (os.path.join (l, 'translations.template.html.in')). read ())
 492            for l in enabled_languages])
 493
 494 progress ("Generating status pages...")
 495
 496 date_time = buildlib.read_pipe ('LANG= date -u')[0]
 497
 498 main_status_html = ' <p><i>Last updated %s</i></p>\n' % date_time
 499 main_status_html += '\n'.join ([doc.html_status () for doc in master_docs])
 500
 501 html_re = re.compile ('<html>', re.I)
 502 end_body_re = re.compile ('</body>', re.I)
 503
 504 html_header = '''<html>
 505 <!-- This page is automatically generated by translation-status.py from
 506 translations.template.html.in; DO NOT EDIT !-->'''
 507
 508 main_status_page = html_re.sub (html_header, main_status_page)
 509
 510 main_status_page = end_body_re.sub (main_status_html + '\n</body>',
 511                                     main_status_page)
 512
 513 open ('translations.html.in', 'w').write (main_status_page)
 514
 515 for l in enabled_languages:
 516     lang_status_page = html_re.sub (html_header, lang_status_pages[l])
 517     html_status = '\n'.join ([doc.translations[l].html_status ()
 518                               for doc in master_docs
 519                               if l in doc.translations])
 520     lang_status_page = end_body_re.sub (html_status + '\n</body>',
 521                                         lang_status_page)
 522     open (os.path.join (l, 'translations.html.in'), 'w').write (lang_status_page)
 523
 524 main_status_txt = '''Documentation translations status
 525 Generated %s
 526 NT = not translated
 527 FT = fully translated
 528
 529 ''' % date_time
 530
 531 main_status_txt += '\n'.join ([doc.text_status () for doc in master_docs])
 532
 533 status_txt_file = 'out/translations-status.txt'
 534 progress ("Writing %s..." % status_txt_file)
 535 open (status_txt_file, 'w').write (main_status_txt)
 536
 537 translation_instructions_file = 'TRANSLATION'
 538 progress ("Updating %s..." % translation_instructions_file)
 539 translation_instructions = open (translation_instructions_file).read ()
 540
 541 for doc in master_docs:
 542     translation_instructions = doc.update_word_counts (translation_instructions)
 543
 544 for html_file in re.findall (r'(?m)^\d+ *(\S+?\.html\S*?)(?: |$)',
 545                              translation_instructions):
 546     word_count = sgml_word_count (open (html_file).read ())
 547     translation_instructions = update_word_count (translation_instructions,
 548                                                   html_file,
 549                                                   word_count)
 550
 551 for po_file in re.findall (r'(?m)^\d+ *(\S+?\.po\S*?)(?: |$)',
 552                            translation_instructions):
 553     word_count = po_word_count (open (po_file).read ())
 554     translation_instructions = update_word_count (translation_instructions,
 555                                                   po_file,
 556                                                   word_count)
 557
 558 translation_instructions = \
 559     update_category_word_counts_re.sub (update_category_word_counts_sub,
 560                                         translation_instructions)
 561
 562 open (translation_instructions_file, 'w').write (translation_instructions)