buildscripts/translations-status.py

   1 #!/usr/bin/env python
   2
   3 """
   4 USAGE: translations-status.py BUILDSCRIPT-DIR LOCALEDIR
   5
   6   This script must be run from Documentation/
   7
   8   Reads template files translations.template.html.in
   9 and for each LANG in LANGUAGES LANG/translations.template.html.in
  10   Writes translations.html.in and for each LANG in LANGUAGES
  11 translations.LANG.html.in
  12   Writes out/translations-status.txt
  13   Updates word counts in TRANSLATION
  14 """
  15
  16 import sys
  17 import re
  18 import string
  19 import os
  20
  21 import langdefs
  22
  23 def progress (str):
  24     sys.stderr.write (str + '\n')
  25
  26 progress ("translations-status.py")
  27
  28 buildscript_dir = sys.argv[1]
  29
  30 _doc = lambda s: s
  31
  32 sys.path.append (buildscript_dir)
  33 import buildlib
  34
  35 # load gettext messages catalogs
  36 translation = langdefs.translation
  37
  38
  39 language_re = re.compile (r'^@documentlanguage (.+)', re.M)
  40 comments_re = re.compile (r'^@ignore\n(.|\n)*?\n@end ignore$|@c .*?$', re.M)
  41 space_re = re.compile (r'\s+', re.M)
  42 lilypond_re = re.compile (r'@lilypond({.*?}|(.|\n)*?\n@end lilypond$)', re.M)
  43 node_re = re.compile ('^@node .*?$', re.M)
  44 title_re = re.compile ('^@(top|chapter|(?:sub){0,2}section|' + \
  45 '(?:unnumbered|appendix)(?:(?:sub){0,2}sec)?) (.*?)$', re.M)
  46 include_re = re.compile ('^@include (.*?)$', re.M)
  47
  48 translators_re = re.compile (r'^@c\s+Translators\s*:\s*(.*?)$', re.M | re.I)
  49 checkers_re = re.compile (r'^@c\s+Translation\s*checkers\s*:\s*(.*?)$',
  50                           re.M | re.I)
  51 status_re = re.compile (r'^@c\s+Translation\s*status\s*:\s*(.*?)$', re.M | re.I)
  52 post_gdp_re = re.compile ('post.GDP', re.I)
  53 untranslated_node_str = '@untranslated'
  54 skeleton_str = '-- SKELETON FILE --'
  55
  56 section_titles_string = _doc ('Section titles')
  57 last_updated_string = _doc (' <p><i>Last updated %s</i></p>\n')
  58 detailed_status_heads = [_doc ('Translators'), _doc ('Translation checkers'),
  59                          _doc ('Translated'), _doc ('Up to date'),
  60                          _doc ('Other info')]
  61 format_table = {
  62     'not translated': {'color':'d0f0f8', 'short':_doc ('no'), 'abbr':'NT',
  63                        'long':_doc ('not translated')},
  64     'partially translated': {'color':'dfef77',
  65                              'short':_doc ('partially (%(p)d %%)'),
  66                              'abbr':'%(p)d%%',
  67                              'long':_doc ('partially translated (%(p)d %%)')},
  68     'fully translated': {'color':'1fff1f', 'short':_doc ('yes'), 'abbr':'FT',
  69                          'long': _doc ('translated')},
  70     'up to date': {'short':_doc ('yes'), 'long':_doc ('up to date'),
  71                    'abbr':'100%%', 'vague':_doc ('up to date')},
  72     'outdated': {'short':_doc ('partially'), 'abbr':'%(p)d%%',
  73                  'vague':_doc ('partially up to date')},
  74     'N/A': {'short':_doc ('N/A'), 'abbr':'N/A', 'color':'d587ff', 'vague':''},
  75     'pre-GDP':_doc ('pre-GDP'),
  76     'post-GDP':_doc ('post-GDP')
  77 }
  78
  79 texi_level = {
  80 # (Unumbered/Numbered/Lettered, level)
  81     'top': ('u', 0),
  82     'unnumbered': ('u', 1),
  83     'unnumberedsec': ('u', 2),
  84     'unnumberedsubsec': ('u', 3),
  85     'chapter': ('n', 1),
  86     'section': ('n', 2),
  87     'subsection': ('n', 3),
  88     'appendix': ('l', 1)
  89 }
  90
  91 appendix_number_trans = string.maketrans ('@ABCDEFGHIJKLMNOPQRSTUVWXY',
  92                                           'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
  93
  94 class SectionNumber (object):
  95     def __init__ (self):
  96         self.__data = [[0,'u']]
  97
  98     def __increase_last_index (self):
  99         type = self.__data[-1][1]
 100         if type == 'l':
 101             self.__data[-1][0] = \
 102                 self.__data[-1][0].translate (appendix_number_trans)
 103         elif type == 'n':
 104             self.__data[-1][0] += 1
 105
 106     def format (self):
 107         if self.__data[-1][1] == 'u':
 108             return ''
 109         return '.'.join ([str (i[0]) for i in self.__data if i[1] != 'u']) + ' '
 110
 111     def increase (self, (type, level)):
 112         if level == 0:
 113             self.__data = [[0,'u']]
 114         while level + 1 < len (self.__data):
 115             del self.__data[-1]
 116         if level + 1 > len (self.__data):
 117             self.__data.append ([0, type])
 118             if type == 'l':
 119                 self.__data[-1][0] = '@'
 120         if type == self.__data[-1][1]:
 121             self.__increase_last_index ()
 122         else:
 123             self.__data[-1] = ([0, type])
 124             if type == 'l':
 125                 self.__data[-1][0] = 'A'
 126             elif type == 'n':
 127                 self.__data[-1][0] = 1
 128         return self.format ()
 129
 130
 131 def percentage_color (percent):
 132     p = percent / 100.0
 133     if p < 0.33:
 134         c = [hex (int (3 * p * b + (1 - 3 * p) * a))[2:]
 135              for (a, b) in [(0xff, 0xff), (0x5c, 0xa6), (0x5c, 0x4c)]]
 136     elif p < 0.67:
 137         c = [hex (int ((3 * p - 1) * b + (2 - 3 * p) * a))[2:]
 138              for (a, b) in [(0xff, 0xff), (0xa6, 0xff), (0x4c, 0x3d)]]
 139     else:
 140         c = [hex (int ((3 * p - 2) * b + 3 * (1 - p) * a))[2:]
 141              for (a, b) in [(0xff, 0x1f), (0xff, 0xff), (0x3d, 0x1f)]]
 142     return ''.join (c)
 143
 144
 145 def update_word_count (text, filename, word_count):
 146     return re.sub (r'(?m)^(\d+) *' + filename,
 147                    str (word_count).ljust (6) + filename,
 148                    text)
 149
 150 po_msgid_re = re.compile (r'^msgid "(.*?)"(?:\n"(.*?)")*', re.M)
 151
 152 def po_word_count (po_content):
 153     s = ' '.join ([''.join (t) for t in po_msgid_re.findall (po_content)])
 154     return len (space_re.split (s))
 155
 156 sgml_tag_re = re.compile (r'<.*?>', re.S)
 157
 158 def sgml_word_count (sgml_doc):
 159     s = sgml_tag_re.sub ('', sgml_doc)
 160     return len (space_re.split (s))
 161
 162 def tely_word_count (tely_doc):
 163     '''
 164     Calculate word count of a Texinfo document node by node.
 165
 166     Take string tely_doc as an argument.
 167     Return a list of integers.
 168
 169     Texinfo comments and @lilypond blocks are not included in word counts.
 170     '''
 171     tely_doc = comments_re.sub ('', tely_doc)
 172     tely_doc = lilypond_re.sub ('', tely_doc)
 173     nodes = node_re.split (tely_doc)
 174     return [len (space_re.split (n)) for n in nodes]
 175
 176
 177 class TelyDocument (object):
 178     def __init__ (self, filename):
 179         self.filename = filename
 180         self.contents = open (filename).read ()
 181
 182         ## record title and sectionning level of first Texinfo section
 183         m = title_re.search (self.contents)
 184         if m:
 185             self.title = m.group (2)
 186             self.level = texi_level [m.group (1)]
 187         else:
 188             self.title = 'Untitled'
 189             self.level = ('u', 1)
 190
 191         m = language_re.search (self.contents)
 192         if m:
 193             self.language = m.group (1)
 194
 195         included_files = [os.path.join (os.path.dirname (filename), t)
 196                           for t in include_re.findall (self.contents)]
 197         self.included_files = [p for p in included_files if os.path.exists (p)]
 198
 199     def print_title (self, section_number):
 200         return section_number.increase (self.level) + self.title
 201
 202
 203 class TranslatedTelyDocument (TelyDocument):
 204     def __init__ (self, filename, masterdocument, parent_translation=None):
 205         TelyDocument.__init__ (self, filename)
 206
 207         self.masterdocument = masterdocument
 208         if not hasattr (self, 'language') \
 209                 and hasattr (parent_translation, 'language'):
 210             self.language = parent_translation.language
 211         if hasattr (self, 'language'):
 212             self.translation = translation[self.language]
 213         else:
 214             self.translation = lambda x: x
 215         self.title = self.translation (self.title)
 216
 217         ## record authoring information
 218         m = translators_re.search (self.contents)
 219         if m:
 220             self.translators = [n.strip () for n in m.group (1).split (',')]
 221         else:
 222             self.translators = parent_translation.translators
 223         m = checkers_re.search (self.contents)
 224         if m:
 225             self.checkers = [n.strip () for n in m.group (1).split (',')]
 226         elif isinstance (parent_translation, TranslatedTelyDocument):
 227             self.checkers = parent_translation.checkers
 228         else:
 229             self.checkers = []
 230
 231         ## check whether translation is pre- or post-GDP
 232         m = status_re.search (self.contents)
 233         if m:
 234             self.post_gdp = bool (post_gdp_re.search (m.group (1)))
 235         else:
 236             self.post_gdp = False
 237
 238         ## record which parts (nodes) of the file are actually translated
 239         self.partially_translated = not skeleton_str in self.contents
 240         nodes = node_re.split (self.contents)
 241         self.translated_nodes = [not untranslated_node_str in n for n in nodes]
 242
 243         ## calculate translation percentage
 244         master_total_word_count = sum (masterdocument.word_count)
 245         translation_word_count = \
 246             sum ([masterdocument.word_count[k] * self.translated_nodes[k]
 247                   for k in range (min (len (masterdocument.word_count),
 248                                        len (self.translated_nodes)))])
 249         self.translation_percentage = \
 250             100 * translation_word_count / master_total_word_count
 251
 252         ## calculate how much the file is outdated
 253         (diff_string, error) = \
 254             buildlib.check_translated_doc (masterdocument.filename, self.contents)
 255         if error:
 256             sys.stderr.write ('warning: %s: %s' % (self.filename, error))
 257             self.uptodate_percentage = None
 258         else:
 259             diff = diff_string.splitlines ()
 260             insertions = sum ([len (l) - 1 for l in diff
 261                                if l.startswith ('+')
 262                                and not l.startswith ('+++')])
 263             deletions = sum ([len (l) - 1 for l in diff
 264                               if l.startswith ('-')
 265                               and not l.startswith ('---')])
 266             outdateness_percentage = 50.0 * (deletions + insertions) / \
 267                 (masterdocument.size + 0.5 * (deletions - insertions))
 268             self.uptodate_percentage = 100 - int (outdateness_percentage)
 269             if self.uptodate_percentage > 100:
 270                 alternative = 50
 271                 progress ("%s: strange uptodateness percentage %d %%, \
 272 setting to %d %%" % (self.filename, self.uptodate_percentage, alternative))
 273                 self.uptodate_percentage = alternative
 274             elif self.uptodate_percentage < 1:
 275                 alternative = 1
 276                 progress ("%s: strange uptodateness percentage %d %%, \
 277 setting to %d %%" % (self.filename, self.uptodate_percentage, alternative))
 278                 self.uptodate_percentage = alternative
 279
 280     def completeness (self, formats=['long'], translated=False):
 281         if translated:
 282             translation = self.translation
 283         else:
 284             translation = lambda x: x
 285
 286         if isinstance (formats, str):
 287             formats = [formats]
 288         p = self.translation_percentage
 289         if p == 0:
 290             status = 'not translated'
 291         elif p == 100:
 292             status = 'fully translated'
 293         else:
 294             status = 'partially translated'
 295         return dict ([(f, translation (format_table[status][f]) % locals())
 296                       for f in formats])
 297
 298     def uptodateness (self, formats=['long'], translated=False):
 299         if translated:
 300             translation = self.translation
 301         else:
 302             translation = lambda x: x
 303
 304         if isinstance (formats, str):
 305             formats = [formats]
 306         p = self.uptodate_percentage
 307         if p == None:
 308             status = 'N/A'
 309         elif p == 100:
 310             status = 'up to date'
 311         else:
 312             status = 'outdated'
 313         l = {}
 314         for f in formats:
 315             if f == 'color' and p != None:
 316                 l['color'] = percentage_color (p)
 317             else:
 318                 l[f] = translation (format_table[status][f]) % locals ()
 319         return l
 320
 321     def gdp_status (self):
 322         if self.post_gdp:
 323             return self.translation (format_table['post-GDP'])
 324         else:
 325             return self.translation (format_table['pre-GDP'])
 326
 327     def short_html_status (self):
 328         s = '  <td>'
 329         if self.partially_translated:
 330             s += '<br>\n   '.join (self.translators) + '<br>\n'
 331             if self.checkers:
 332                 s += '   <small>' + \
 333                     '<br>\n   '.join (self.checkers) + '</small><br>\n'
 334
 335         c = self.completeness (['color', 'long'])
 336         s += '   <span style="background-color: #%(color)s">\
 337 %(long)s</span><br>\n' % c
 338
 339         if self.partially_translated:
 340             u = self.uptodateness (['vague', 'color'])
 341             s += '   <span style="background-color: #%(color)s">\
 342 %(vague)s</span><br>\n' % u
 343
 344         s += '  </td>\n'
 345         return s
 346
 347     def text_status (self):
 348         s = self.completeness ('abbr')['abbr'] + ' '
 349
 350         if self.partially_translated:
 351             s += self.uptodateness ('abbr')['abbr'] + ' '
 352         return s
 353
 354     def html_status (self, numbering=SectionNumber ()):
 355         if self.title == 'Untitled':
 356             return ''
 357
 358         if self.level[1] == 0: # if self is a master document
 359             s = '''<table align="center" border="2">
 360  <tr align="center">
 361   <th>%s</th>''' % self.print_title (numbering)
 362             s += ''.join (['  <th>%s</th>\n' % self.translation (h)
 363                            for h in detailed_status_heads])
 364             s += ' </tr>\n'
 365             s += ' <tr align="left">\n  <td>%s<br>(%d)</td>\n' \
 366                 % (self.translation (section_titles_string),
 367                    sum (self.masterdocument.word_count))
 368
 369         else:
 370             s = ' <tr align="left">\n  <td>%s<br>(%d)</td>\n' \
 371                 % (self.print_title (numbering),
 372                    sum (self.masterdocument.word_count))
 373
 374         if self.partially_translated:
 375             s += '  <td>' + '<br>\n   '.join (self.translators) + '</td>\n'
 376             s += '  <td>' + '<br>\n   '.join (self.checkers) + '</td>\n'
 377         else:
 378             s += '  <td></td>\n' * 2
 379
 380         c = self.completeness (['color', 'short'], translated=True)
 381         s += '  <td><span style="background-color: #%(color)s">\
 382 %(short)s</span></td>\n' % {'color': c['color'],
 383                            'short': c['short']}
 384
 385         if self.partially_translated:
 386             u = self.uptodateness (['short', 'color'], translated=True)
 387             s += '  <td><span style="background-color: #%(color)s">\
 388 %(short)s</span></td>\n' % {'color': u['color'],
 389                            'short': u['short']}
 390         else:
 391             s += '  <td></td>\n'
 392
 393         s += '  <td>' + self.gdp_status () + '</td>\n </tr>\n'
 394         s += ''.join ([i.translations[self.language].html_status (numbering)
 395                        for i in self.masterdocument.includes
 396                        if self.language in i.translations])
 397
 398         if self.level[1] == 0:  # if self is a master document
 399             s += '</table>\n<p></p>\n'
 400         return s
 401
 402 class MasterTelyDocument (TelyDocument):
 403     def __init__ (self,
 404                   filename,
 405                   parent_translations=dict ([(lang, None)
 406                                              for lang in langdefs.LANGDICT])):
 407         TelyDocument.__init__ (self, filename)
 408         self.size = len (self.contents)
 409         self.word_count = tely_word_count (self.contents)
 410         translations = dict ([(lang, os.path.join (lang, filename))
 411                               for lang in langdefs.LANGDICT])
 412         self.translations = \
 413             dict ([(lang,
 414                     TranslatedTelyDocument (translations[lang],
 415                                             self, parent_translations.get (lang)))
 416                    for lang in langdefs.LANGDICT
 417                    if os.path.exists (translations[lang])])
 418         if self.translations:
 419             self.includes = [MasterTelyDocument (f, self.translations)
 420                              for f in self.included_files]
 421         else:
 422             self.includes = []
 423
 424     def update_word_counts (self, s):
 425         s = update_word_count (s, self.filename, sum (self.word_count))
 426         for i in self.includes:
 427             s = i.update_word_counts (s)
 428         return s
 429
 430     def html_status (self, numbering=SectionNumber ()):
 431         if self.title == 'Untitled' or not self.translations:
 432             return ''
 433         if self.level[1] == 0: # if self is a master document
 434             s = '''<table align="center" border="2">
 435  <tr align="center">
 436   <th>%s</th>''' % self.print_title (numbering)
 437             s += ''.join (['  <th>%s</th>\n' % l for l in self.translations])
 438             s += ' </tr>\n'
 439             s += ' <tr align="left">\n  <td>Section titles<br>(%d)</td>\n' \
 440                 % sum (self.word_count)
 441
 442         else:  # if self is an included file
 443             s = ' <tr align="left">\n  <td>%s<br>(%d)</td>\n' \
 444                 % (self.print_title (numbering), sum (self.word_count))
 445
 446         s += ''.join ([t.short_html_status ()
 447                        for t in self.translations.values ()])
 448         s += ' </tr>\n'
 449         s += ''.join ([i.html_status (numbering) for i in self.includes])
 450
 451         if self.level[1] == 0:  # if self is a master document
 452             s += '</table>\n<p></p>\n'
 453         return s
 454
 455     def text_status (self, numbering=SectionNumber (), colspec=[48,12]):
 456         if self.title == 'Untitled' or not self.translations:
 457             return ''
 458
 459         s = ''
 460         if self.level[1] == 0: # if self is a master document
 461             s += (self.print_title (numbering) + ' ').ljust (colspec[0])
 462             s += ''.join (['%s'.ljust (colspec[1]) % l
 463                            for l in self.translations])
 464             s += '\n'
 465             s += ('Section titles (%d)' % \
 466                       sum (self.word_count)).ljust (colspec[0])
 467
 468         else:
 469             s = '%s (%d) ' \
 470                 % (self.print_title (numbering), sum (self.word_count))
 471             s = s.ljust (colspec[0])
 472
 473         s += ''.join ([t.text_status ().ljust(colspec[1])
 474                        for t in self.translations.values ()])
 475         s += '\n\n'
 476         s += ''.join ([i.text_status (numbering) for i in self.includes])
 477
 478         if self.level[1] == 0:
 479             s += '\n'
 480         return s
 481
 482
 483 update_category_word_counts_re = re.compile (r'(?ms)^-(\d+)-(.*?\n)\d+ *total')
 484
 485 counts_re = re.compile (r'(?m)^(\d+) ')
 486
 487 def update_category_word_counts_sub (m):
 488     return '-' + m.group (1) + '-' + m.group (2) + \
 489         str (sum ([int (c)
 490                    for c in counts_re.findall (m.group (2))])).ljust (6) + \
 491         'total'
 492
 493
 494 progress ("Reading documents...")
 495
 496 tely_files = \
 497     buildlib.read_pipe ("find -maxdepth 2 -name '*.tely'")[0].splitlines ()
 498 tely_files.sort ()
 499 master_docs = [MasterTelyDocument (os.path.normpath (filename))
 500                for filename in tely_files]
 501 master_docs = [doc for doc in master_docs if doc.translations]
 502
 503 main_status_page = open ('translations.template.html.in').read ()
 504
 505 enabled_languages = [l for l in langdefs.LANGDICT
 506                      if langdefs.LANGDICT[l].enabled
 507                      and l != 'en']
 508 lang_status_pages = \
 509     dict ([(l, open (os.path.join (l, 'translations.template.html.in')). read ())
 510            for l in enabled_languages])
 511
 512 progress ("Generating status pages...")
 513
 514 date_time = buildlib.read_pipe ('LANG= date -u')[0]
 515
 516 main_status_html = last_updated_string % date_time
 517 main_status_html += '\n'.join ([doc.html_status () for doc in master_docs])
 518
 519 html_re = re.compile ('<html>', re.I)
 520 end_body_re = re.compile ('</body>', re.I)
 521
 522 html_header = '''<html>
 523 <!-- This page is automatically generated by translation-status.py from
 524 translations.template.html.in; DO NOT EDIT !-->'''
 525
 526 main_status_page = html_re.sub (html_header, main_status_page)
 527
 528 main_status_page = end_body_re.sub (main_status_html + '\n</body>',
 529                                     main_status_page)
 530
 531 open ('translations.html.in', 'w').write (main_status_page)
 532
 533 for l in enabled_languages:
 534     date_time = buildlib.read_pipe ('LANG=%s date -u' % l)[0]
 535     lang_status_pages[l] = translation[l] (last_updated_string) % date_time + lang_status_pages[l]
 536     lang_status_page = html_re.sub (html_header, lang_status_pages[l])
 537     html_status = '\n'.join ([doc.translations[l].html_status ()
 538                               for doc in master_docs
 539                               if l in doc.translations])
 540     lang_status_page = end_body_re.sub (html_status + '\n</body>',
 541                                         lang_status_page)
 542     open (os.path.join (l, 'translations.html.in'), 'w').write (lang_status_page)
 543
 544 main_status_txt = '''Documentation translations status
 545 Generated %s
 546 NT = not translated
 547 FT = fully translated
 548
 549 ''' % date_time
 550
 551 main_status_txt += '\n'.join ([doc.text_status () for doc in master_docs])
 552
 553 status_txt_file = 'out/translations-status.txt'
 554 progress ("Writing %s..." % status_txt_file)
 555 open (status_txt_file, 'w').write (main_status_txt)
 556
 557 translation_instructions_file = 'TRANSLATION'
 558 progress ("Updating %s..." % translation_instructions_file)
 559 translation_instructions = open (translation_instructions_file).read ()
 560
 561 for doc in master_docs:
 562     translation_instructions = doc.update_word_counts (translation_instructions)
 563
 564 for html_file in re.findall (r'(?m)^\d+ *(\S+?\.html\S*?)(?: |$)',
 565                              translation_instructions):
 566     word_count = sgml_word_count (open (html_file).read ())
 567     translation_instructions = update_word_count (translation_instructions,
 568                                                   html_file,
 569                                                   word_count)
 570
 571 for po_file in re.findall (r'(?m)^\d+ *(\S+?\.po\S*?)(?: |$)',
 572                            translation_instructions):
 573     word_count = po_word_count (open (po_file).read ())
 574     translation_instructions = update_word_count (translation_instructions,
 575                                                   po_file,
 576                                                   word_count)
 577
 578 translation_instructions = \
 579     update_category_word_counts_re.sub (update_category_word_counts_sub,
 580                                         translation_instructions)
 581
 582 open (translation_instructions_file, 'w').write (translation_instructions)