scripts/auxiliar/translations-status.py

   1 #!/usr/bin/env python
   2
   3 """
   4 USAGE: translations-status.py BUILDSCRIPT-DIR LOCALEDIR
   5
   6   This script must be run from Documentation/
   7
   8   Reads template files translations.template.html.in
   9 and for each LANG in LANGUAGES LANG/translations.template.html.in
  10   Writes translations.html.in and for each LANG in LANGUAGES
  11 translations.LANG.html.in
  12   Writes out/translations-status.txt
  13   Updates word counts in TRANSLATION
  14 """
  15
  16 import sys
  17 import re
  18 import string
  19 import os
  20
  21 import langdefs
  22 import buildlib
  23
  24 def progress (str):
  25     sys.stderr.write (str + '\n')
  26
  27 exit_code = 0
  28
  29 def error (str, update_status=1):
  30     global exit_code
  31     sys.stderr.write ('translations-status.py: %s\n' % str)
  32     exit_code = max (exit_code, update_status)
  33
  34 progress ("translations-status.py")
  35
  36 _doc = lambda s: s
  37
  38 # load gettext messages catalogs
  39 translation = langdefs.translation
  40
  41
  42 language_re = re.compile (r'^@documentlanguage (.+)', re.M)
  43 comments_re = re.compile (r'^@ignore\n(.|\n)*?\n@end ignore$|@c .*?$', re.M)
  44 space_re = re.compile (r'\s+', re.M)
  45 lilypond_re = re.compile (r'@lilypond({.*?}|(.|\n)*?\n@end lilypond$)', re.M)
  46 node_re = re.compile ('^@node .*?$', re.M)
  47 title_re = re.compile ('^@(top|chapter|(?:sub){0,2}section|' + \
  48 '(?:unnumbered|appendix)(?:(?:sub){0,2}sec)?) (.*?)$', re.M)
  49 include_re = re.compile ('^@include (.*?)$', re.M)
  50
  51 translators_re = re.compile (r'^@c\s+Translators\s*:\s*(.*?)$', re.M | re.I)
  52 checkers_re = re.compile (r'^@c\s+Translation\s*checkers\s*:\s*(.*?)$',
  53                           re.M | re.I)
  54 status_re = re.compile (r'^@c\s+Translation\s*status\s*:\s*(.*?)$', re.M | re.I)
  55 post_gdp_re = re.compile ('post.GDP', re.I)
  56 untranslated_node_str = '@untranslated'
  57 skeleton_str = '-- SKELETON FILE --'
  58
  59 section_titles_string = _doc ('Section titles')
  60 last_updated_string = _doc (' <p><i>Last updated %s</i></p>\n')
  61 detailed_status_heads = [_doc ('Translators'), _doc ('Translation checkers'),
  62                          _doc ('Translated'), _doc ('Up to date'),
  63                          _doc ('Other info')]
  64 format_table = {
  65     'not translated': {'color':'d0f0f8', 'short':_doc ('no'), 'abbr':'NT',
  66                        'long':_doc ('not translated')},
  67     'partially translated': {'color':'dfef77',
  68                              'short':_doc ('partially (%(p)d %%)'),
  69                              'abbr':'%(p)d%%',
  70                              'long':_doc ('partially translated (%(p)d %%)')},
  71     'fully translated': {'color':'1fff1f', 'short':_doc ('yes'), 'abbr':'FT',
  72                          'long': _doc ('translated')},
  73     'up to date': {'short':_doc ('yes'), 'long':_doc ('up to date'),
  74                    'abbr':'100%%', 'vague':_doc ('up to date')},
  75     'outdated': {'short':_doc ('partially'), 'abbr':'%(p)d%%',
  76                  'vague':_doc ('partially up to date')},
  77     'N/A': {'short':_doc ('N/A'), 'abbr':'N/A', 'color':'d587ff', 'vague':''},
  78     'pre-GDP':_doc ('pre-GDP'),
  79     'post-GDP':_doc ('post-GDP')
  80 }
  81
  82 texi_level = {
  83 # (Unumbered/Numbered/Lettered, level)
  84     'top': ('u', 0),
  85     'unnumbered': ('u', 1),
  86     'unnumberedsec': ('u', 2),
  87     'unnumberedsubsec': ('u', 3),
  88     'chapter': ('n', 1),
  89     'section': ('n', 2),
  90     'subsection': ('n', 3),
  91     'appendix': ('l', 1),
  92     'appendixsec': ('l', 2),
  93 }
  94
  95 appendix_number_trans = string.maketrans ('@ABCDEFGHIJKLMNOPQRSTUVWXY',
  96                                           'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
  97
  98 class SectionNumber (object):
  99     def __init__ (self):
 100         self.__data = [[0,'u']]
 101
 102     def __increase_last_index (self):
 103         type = self.__data[-1][1]
 104         if type == 'l':
 105             self.__data[-1][0] = \
 106                 self.__data[-1][0].translate (appendix_number_trans)
 107         elif type == 'n':
 108             self.__data[-1][0] += 1
 109
 110     def format (self):
 111         if self.__data[-1][1] == 'u':
 112             return ''
 113         return '.'.join ([str (i[0]) for i in self.__data if i[1] != 'u']) + ' '
 114
 115     def increase (self, (type, level)):
 116         if level == 0:
 117             self.__data = [[0,'u']]
 118         while level + 1 < len (self.__data):
 119             del self.__data[-1]
 120         if level + 1 > len (self.__data):
 121             self.__data.append ([0, type])
 122             if type == 'l':
 123                 self.__data[-1][0] = '@'
 124         if type == self.__data[-1][1]:
 125             self.__increase_last_index ()
 126         else:
 127             self.__data[-1] = ([0, type])
 128             if type == 'l':
 129                 self.__data[-1][0] = 'A'
 130             elif type == 'n':
 131                 self.__data[-1][0] = 1
 132         return self.format ()
 133
 134
 135 def percentage_color (percent):
 136     p = percent / 100.0
 137     if p < 0.33:
 138         c = [hex (int (3 * p * b + (1 - 3 * p) * a))[2:]
 139              for (a, b) in [(0xff, 0xff), (0x5c, 0xa6), (0x5c, 0x4c)]]
 140     elif p < 0.67:
 141         c = [hex (int ((3 * p - 1) * b + (2 - 3 * p) * a))[2:]
 142              for (a, b) in [(0xff, 0xff), (0xa6, 0xff), (0x4c, 0x3d)]]
 143     else:
 144         c = [hex (int ((3 * p - 2) * b + 3 * (1 - p) * a))[2:]
 145              for (a, b) in [(0xff, 0x1f), (0xff, 0xff), (0x3d, 0x1f)]]
 146     return ''.join (c)
 147
 148
 149 def update_word_count (text, filename, word_count):
 150     return re.sub (r'(?m)^(\d+) *' + filename,
 151                    str (word_count).ljust (6) + filename,
 152                    text)
 153
 154 po_msgid_re = re.compile (r'^msgid "(.*?)"(?:\n"(.*?)")*', re.M)
 155
 156 def po_word_count (po_content):
 157     s = ' '.join ([''.join (t) for t in po_msgid_re.findall (po_content)])
 158     return len (space_re.split (s))
 159
 160 sgml_tag_re = re.compile (r'<.*?>', re.S)
 161
 162 def sgml_word_count (sgml_doc):
 163     s = sgml_tag_re.sub ('', sgml_doc)
 164     return len (space_re.split (s))
 165
 166 def tely_word_count (tely_doc):
 167     '''
 168     Calculate word count of a Texinfo document node by node.
 169
 170     Take string tely_doc as an argument.
 171     Return a list of integers.
 172
 173     Texinfo comments and @lilypond blocks are not included in word counts.
 174     '''
 175     tely_doc = comments_re.sub ('', tely_doc)
 176     tely_doc = lilypond_re.sub ('', tely_doc)
 177     nodes = node_re.split (tely_doc)
 178     return [len (space_re.split (n)) for n in nodes]
 179
 180
 181 class TelyDocument (object):
 182     def __init__ (self, filename):
 183         self.filename = filename
 184         self.contents = open (filename).read ()
 185
 186         ## record title and sectionning level of first Texinfo section
 187         m = title_re.search (self.contents)
 188         if m:
 189             self.title = m.group (2)
 190             self.level = texi_level [m.group (1)]
 191         else:
 192             self.title = 'Untitled'
 193             self.level = ('u', 1)
 194
 195         m = language_re.search (self.contents)
 196         if m:
 197             self.language = m.group (1)
 198
 199         included_files = [os.path.join (os.path.dirname (filename), t)
 200                           for t in include_re.findall (self.contents)]
 201         self.included_files = [p for p in included_files if os.path.exists (p)]
 202
 203     def print_title (self, section_number):
 204         return section_number.increase (self.level) + self.title
 205
 206
 207 class TranslatedTelyDocument (TelyDocument):
 208     def __init__ (self, filename, masterdocument, parent_translation=None):
 209         TelyDocument.__init__ (self, filename)
 210
 211         self.masterdocument = masterdocument
 212         if not hasattr (self, 'language') \
 213                 and hasattr (parent_translation, 'language'):
 214             self.language = parent_translation.language
 215         if hasattr (self, 'language'):
 216             self.translation = translation[self.language]
 217         else:
 218             self.translation = lambda x: x
 219         self.title = self.translation (self.title)
 220
 221         ## record authoring information
 222         m = translators_re.search (self.contents)
 223         if m:
 224             self.translators = [n.strip () for n in m.group (1).split (',')]
 225         else:
 226             try:
 227                 self.translators = parent_translation.translators
 228             except:
 229                 if 'macros.itexi' in self.filename:
 230                     self.translators = ['']
 231                 else:
 232                     error ('%s: no translator name found, \nplease \
 233 specify at least one in the master file as a line containing\n\
 234 @c Translators: FirstName1 LastName1, FirstName2 LastName2' % self.filename)
 235         m = checkers_re.search (self.contents)
 236         if m:
 237             self.checkers = [n.strip () for n in m.group (1).split (',')]
 238         elif isinstance (parent_translation, TranslatedTelyDocument):
 239             self.checkers = parent_translation.checkers
 240         else:
 241             self.checkers = []
 242
 243         ## check whether translation is pre- or post-GDP
 244         m = status_re.search (self.contents)
 245         if m:
 246             self.post_gdp = bool (post_gdp_re.search (m.group (1)))
 247         else:
 248             self.post_gdp = False
 249
 250         ## record which parts (nodes) of the file are actually translated
 251         self.partially_translated = not skeleton_str in self.contents
 252         nodes = node_re.split (self.contents)
 253         self.translated_nodes = [not untranslated_node_str in n for n in nodes]
 254
 255         ## calculate translation percentage
 256         master_total_word_count = sum (masterdocument.word_count)
 257         translation_word_count = \
 258             sum ([masterdocument.word_count[k] * self.translated_nodes[k]
 259                   for k in range (min (len (masterdocument.word_count),
 260                                        len (self.translated_nodes)))])
 261         self.translation_percentage = \
 262             100 * translation_word_count / master_total_word_count
 263
 264         ## calculate how much the file is outdated
 265         (diff_string, git_error) = \
 266             buildlib.check_translated_doc (masterdocument.filename, self.filename, self.contents)
 267         if git_error:
 268             sys.stderr.write ('warning: %s: %s' % (self.filename, git_error))
 269             self.uptodate_percentage = None
 270         else:
 271             diff = diff_string.splitlines ()
 272             insertions = sum ([len (l) - 1 for l in diff
 273                                if l.startswith ('+')
 274                                and not l.startswith ('+++')])
 275             deletions = sum ([len (l) - 1 for l in diff
 276                               if l.startswith ('-')
 277                               and not l.startswith ('---')])
 278             outdateness_percentage = 50.0 * (deletions + insertions) / \
 279                 (masterdocument.size + 0.5 * (deletions - insertions))
 280             self.uptodate_percentage = 100 - int (outdateness_percentage)
 281             if self.uptodate_percentage > 100:
 282                 alternative = 50
 283                 progress ("%s: strange uptodateness percentage %d %%, \
 284 setting to %d %%" % (self.filename, self.uptodate_percentage, alternative))
 285                 self.uptodate_percentage = alternative
 286             elif self.uptodate_percentage < 1:
 287                 alternative = 1
 288                 progress ("%s: strange uptodateness percentage %d %%, \
 289 setting to %d %%" % (self.filename, self.uptodate_percentage, alternative))
 290                 self.uptodate_percentage = alternative
 291
 292     def completeness (self, formats=['long'], translated=False):
 293         if translated:
 294             translation = self.translation
 295         else:
 296             translation = lambda x: x
 297
 298         if isinstance (formats, str):
 299             formats = [formats]
 300         p = self.translation_percentage
 301         if p == 0:
 302             status = 'not translated'
 303         elif p == 100:
 304             status = 'fully translated'
 305         else:
 306             status = 'partially translated'
 307         return dict ([(f, translation (format_table[status][f]) % locals())
 308                       for f in formats])
 309
 310     def uptodateness (self, formats=['long'], translated=False):
 311         if translated:
 312             translation = self.translation
 313         else:
 314             translation = lambda x: x
 315
 316         if isinstance (formats, str):
 317             formats = [formats]
 318         p = self.uptodate_percentage
 319         if p == None:
 320             status = 'N/A'
 321         elif p == 100:
 322             status = 'up to date'
 323         else:
 324             status = 'outdated'
 325         l = {}
 326         for f in formats:
 327             if f == 'color' and p != None:
 328                 l['color'] = percentage_color (p)
 329             else:
 330                 l[f] = translation (format_table[status][f]) % locals ()
 331         return l
 332
 333     def gdp_status (self):
 334         if self.post_gdp:
 335             return self.translation (format_table['post-GDP'])
 336         else:
 337             return self.translation (format_table['pre-GDP'])
 338
 339     def short_html_status (self):
 340         s = '  <td>'
 341         if self.partially_translated:
 342             s += '<br>\n   '.join (self.translators) + '<br>\n'
 343             if self.checkers:
 344                 s += '   <small>' + \
 345                     '<br>\n   '.join (self.checkers) + '</small><br>\n'
 346
 347         c = self.completeness (['color', 'long'])
 348         s += '   <span style="background-color: #%(color)s">\
 349 %(long)s</span><br>\n' % c
 350
 351         if self.partially_translated:
 352             u = self.uptodateness (['vague', 'color'])
 353             s += '   <span style="background-color: #%(color)s">\
 354 %(vague)s</span><br>\n' % u
 355
 356         s += '  </td>\n'
 357         return s
 358
 359     def text_status (self):
 360         s = self.completeness ('abbr')['abbr'] + ' '
 361
 362         if self.partially_translated:
 363             s += self.uptodateness ('abbr')['abbr'] + ' '
 364         return s
 365
 366     def html_status (self, numbering=SectionNumber ()):
 367         if self.title == 'Untitled':
 368             return ''
 369
 370         if self.level[1] == 0: # if self is a master document
 371             s = '''<table align="center" border="2">
 372  <tr align="center">
 373   <th>%s</th>''' % self.print_title (numbering)
 374             s += ''.join (['  <th>%s</th>\n' % self.translation (h)
 375                            for h in detailed_status_heads])
 376             s += ' </tr>\n'
 377             s += ' <tr align="left">\n  <td>%s<br>(%d)</td>\n' \
 378                 % (self.translation (section_titles_string),
 379                    sum (self.masterdocument.word_count))
 380
 381         else:
 382             s = ' <tr align="left">\n  <td>%s<br>(%d)</td>\n' \
 383                 % (self.print_title (numbering),
 384                    sum (self.masterdocument.word_count))
 385
 386         if self.partially_translated:
 387             s += '  <td>' + '<br>\n   '.join (self.translators) + '</td>\n'
 388             s += '  <td>' + '<br>\n   '.join (self.checkers) + '</td>\n'
 389         else:
 390             s += '  <td></td>\n' * 2
 391
 392         c = self.completeness (['color', 'short'], translated=True)
 393         s += '  <td><span style="background-color: #%(color)s">\
 394 %(short)s</span></td>\n' % {'color': c['color'],
 395                            'short': c['short']}
 396
 397         if self.partially_translated:
 398             u = self.uptodateness (['short', 'color'], translated=True)
 399             s += '  <td><span style="background-color: #%(color)s">\
 400 %(short)s</span></td>\n' % {'color': u['color'],
 401                            'short': u['short']}
 402         else:
 403             s += '  <td></td>\n'
 404
 405         s += '  <td>' + self.gdp_status () + '</td>\n </tr>\n'
 406         s += ''.join ([i.translations[self.language].html_status (numbering)
 407                        for i in self.masterdocument.includes
 408                        if self.language in i.translations])
 409
 410         if self.level[1] == 0:  # if self is a master document
 411             s += '</table>\n<p></p>\n'
 412         return s
 413
 414 class MasterTelyDocument (TelyDocument):
 415     def __init__ (self,
 416                   filename,
 417                   parent_translations=dict ([(lang, None)
 418                                              for lang in langdefs.LANGDICT])):
 419         TelyDocument.__init__ (self, filename)
 420         self.size = len (self.contents)
 421         self.word_count = tely_word_count (self.contents)
 422         translations = dict ([(lang, os.path.join (lang, filename))
 423                               for lang in langdefs.LANGDICT])
 424         self.translations = \
 425             dict ([(lang,
 426                     TranslatedTelyDocument (translations[lang],
 427                                             self, parent_translations.get (lang)))
 428                    for lang in langdefs.LANGDICT
 429                    if os.path.exists (translations[lang])])
 430         if self.translations:
 431             self.includes = [MasterTelyDocument (f, self.translations)
 432                              for f in self.included_files]
 433         else:
 434             self.includes = []
 435
 436     def update_word_counts (self, s):
 437         s = update_word_count (s, self.filename, sum (self.word_count))
 438         for i in self.includes:
 439             s = i.update_word_counts (s)
 440         return s
 441
 442     def html_status (self, numbering=SectionNumber ()):
 443         if self.title == 'Untitled' or not self.translations:
 444             return ''
 445         if self.level[1] == 0: # if self is a master document
 446             s = '''<table align="center" border="2">
 447  <tr align="center">
 448   <th>%s</th>''' % self.print_title (numbering)
 449             s += ''.join (['  <th>%s</th>\n' % l for l in self.translations])
 450             s += ' </tr>\n'
 451             s += ' <tr align="left">\n  <td>Section titles<br>(%d)</td>\n' \
 452                 % sum (self.word_count)
 453
 454         else:  # if self is an included file
 455             s = ' <tr align="left">\n  <td>%s<br>(%d)</td>\n' \
 456                 % (self.print_title (numbering), sum (self.word_count))
 457
 458         s += ''.join ([t.short_html_status ()
 459                        for t in self.translations.values ()])
 460         s += ' </tr>\n'
 461         s += ''.join ([i.html_status (numbering) for i in self.includes])
 462
 463         if self.level[1] == 0:  # if self is a master document
 464             s += '</table>\n<p></p>\n'
 465         return s
 466
 467     def text_status (self, numbering=SectionNumber (), colspec=[48,12]):
 468         if self.title == 'Untitled' or not self.translations:
 469             return ''
 470
 471         s = ''
 472         if self.level[1] == 0: # if self is a master document
 473             s += (self.print_title (numbering) + ' ').ljust (colspec[0])
 474             s += ''.join (['%s'.ljust (colspec[1]) % l
 475                            for l in self.translations])
 476             s += '\n'
 477             s += ('Section titles (%d)' % \
 478                       sum (self.word_count)).ljust (colspec[0])
 479
 480         else:
 481             s = '%s (%d) ' \
 482                 % (self.print_title (numbering), sum (self.word_count))
 483             s = s.ljust (colspec[0])
 484
 485         s += ''.join ([t.text_status ().ljust(colspec[1])
 486                        for t in self.translations.values ()])
 487         s += '\n\n'
 488         s += ''.join ([i.text_status (numbering) for i in self.includes])
 489
 490         if self.level[1] == 0:
 491             s += '\n'
 492         return s
 493
 494
 495 update_category_word_counts_re = re.compile (r'(?ms)^-(\d+)-(.*?\n)\d+ *total')
 496
 497 counts_re = re.compile (r'(?m)^(\d+) ')
 498
 499 def update_category_word_counts_sub (m):
 500     return '-' + m.group (1) + '-' + m.group (2) + \
 501         str (sum ([int (c)
 502                    for c in counts_re.findall (m.group (2))])).ljust (6) + \
 503         'total'
 504
 505
 506 progress ("Reading documents...")
 507
 508 tely_files = \
 509     buildlib.read_pipe ("find -maxdepth 2 -name '*.tely'")[0].splitlines ()
 510 tely_files.sort ()
 511 master_docs = [MasterTelyDocument (os.path.normpath (filename))
 512                for filename in tely_files]
 513 master_docs = [doc for doc in master_docs if doc.translations]
 514
 515 main_status_page = open ('translations.template.html.in').read ()
 516
 517 enabled_languages = [l for l in langdefs.LANGDICT
 518                      if langdefs.LANGDICT[l].enabled
 519                      and l != 'en']
 520 lang_status_pages = \
 521     dict ([(l, open (os.path.join (l, 'translations.template.html.in')). read ())
 522            for l in enabled_languages])
 523
 524 progress ("Generating status pages...")
 525
 526 date_time = buildlib.read_pipe ('LANG= date -u')[0]
 527
 528 main_status_html = last_updated_string % date_time
 529 main_status_html += '\n'.join ([doc.html_status () for doc in master_docs])
 530
 531 html_re = re.compile ('<html>', re.I)
 532 end_body_re = re.compile ('</body>', re.I)
 533
 534 html_header = '''<html>
 535 <!-- This page is automatically generated by translation-status.py from
 536 translations.template.html.in; DO NOT EDIT !-->'''
 537
 538 main_status_page = html_re.sub (html_header, main_status_page)
 539
 540 main_status_page = end_body_re.sub (main_status_html + '\n</body>',
 541                                     main_status_page)
 542
 543 open ('translations.html.in', 'w').write (main_status_page)
 544
 545 for l in enabled_languages:
 546     date_time = buildlib.read_pipe ('LANG=%s date -u' % l)[0]
 547     lang_status_pages[l] = translation[l] (last_updated_string) % date_time + lang_status_pages[l]
 548     lang_status_page = html_re.sub (html_header, lang_status_pages[l])
 549     html_status = '\n'.join ([doc.translations[l].html_status ()
 550                               for doc in master_docs
 551                               if l in doc.translations])
 552     lang_status_page = end_body_re.sub (html_status + '\n</body>',
 553                                         lang_status_page)
 554     open (os.path.join (l, 'translations.html.in'), 'w').write (lang_status_page)
 555
 556 main_status_txt = '''Documentation translations status
 557 Generated %s
 558 NT = not translated
 559 FT = fully translated
 560
 561 ''' % date_time
 562
 563 main_status_txt += '\n'.join ([doc.text_status () for doc in master_docs])
 564
 565 status_txt_file = 'out/translations-status.txt'
 566 progress ("Writing %s..." % status_txt_file)
 567 open (status_txt_file, 'w').write (main_status_txt)
 568
 569 translation_instructions_file = 'contributor/doc-translation-list.itexi'
 570 progress ("Updating %s..." % translation_instructions_file)
 571 translation_instructions = open (translation_instructions_file).read ()
 572
 573 for doc in master_docs:
 574     translation_instructions = doc.update_word_counts (translation_instructions)
 575
 576 for html_file in re.findall (r'(?m)^\d+ *(\S+?\.html\S*?)(?: |$)',
 577                              translation_instructions):
 578     word_count = sgml_word_count (open (html_file).read ())
 579     translation_instructions = update_word_count (translation_instructions,
 580                                                   html_file,
 581                                                   word_count)
 582
 583 for po_file in re.findall (r'(?m)^\d+ *(\S+?\.po\S*?)(?: |$)',
 584                            translation_instructions):
 585     word_count = po_word_count (open (po_file).read ())
 586     translation_instructions = update_word_count (translation_instructions,
 587                                                   po_file,
 588                                                   word_count)
 589
 590 translation_instructions = \
 591     update_category_word_counts_re.sub (update_category_word_counts_sub,
 592                                         translation_instructions)
 593
 594 open (translation_instructions_file, 'w').write (translation_instructions)
 595 sys.exit (exit_code)