scripts/auxiliar/translations-status.py

   1 #!/usr/bin/env python
   2
   3 """
   4 USAGE: translations-status.py BUILDSCRIPT-DIR LOCALEDIR
   5
   6   This script must be run from Documentation/
   7
   8   Reads template files translations.template.html.in
   9 and for each LANG in LANGUAGES LANG/translations.template.html.in
  10   Writes translations.html.in and for each LANG in LANGUAGES
  11 translations.LANG.html.in
  12   Writes out/translations-status.txt
  13   Updates word counts in TRANSLATION
  14 """
  15
  16 import sys
  17 import re
  18 import string
  19 import os
  20
  21 import langdefs
  22 import buildlib
  23
  24 def progress (str):
  25     sys.stderr.write (str + '\n')
  26
  27 exit_code = 0
  28
  29 def error (str, update_status=1):
  30     global exit_code
  31     sys.stderr.write ('translations-status.py: %s\n' % str)
  32     exit_code = max (exit_code, update_status)
  33
  34 progress ("translations-status.py")
  35
  36 _doc = lambda s: s
  37
  38 # load gettext messages catalogs
  39 translation = langdefs.translation
  40
  41
  42 language_re = re.compile (r'^@documentlanguage (.+)', re.M)
  43 comments_re = re.compile (r'^@ignore\n(.|\n)*?\n@end ignore$|@c .*?$', re.M)
  44 space_re = re.compile (r'\s+', re.M)
  45 lilypond_re = re.compile (r'@lilypond({.*?}|(.|\n)*?\n@end lilypond$)', re.M)
  46 node_re = re.compile ('^@node .*?$', re.M)
  47 title_re = re.compile ('^@(top|chapter|(?:sub){0,2}section|' + \
  48 '(?:unnumbered|appendix)(?:(?:sub){0,2}sec)?) (.*?)$', re.M)
  49 include_re = re.compile ('^@include (.*?)$', re.M)
  50
  51 translators_re = re.compile (r'^@c\s+Translators\s*:\s*(.*?)$', re.M | re.I)
  52 checkers_re = re.compile (r'^@c\s+Translation\s*checkers\s*:\s*(.*?)$',
  53                           re.M | re.I)
  54 status_re = re.compile (r'^@c\s+Translation\s*status\s*:\s*(.*?)$', re.M | re.I)
  55 post_gdp_re = re.compile ('post.GDP', re.I)
  56 untranslated_node_str = '@untranslated'
  57 skeleton_str = '-- SKELETON FILE --'
  58
  59 section_titles_string = _doc ('Section titles')
  60 last_updated_string = _doc (' <p><i>Last updated %s</i></p>\n')
  61 detailed_status_heads = [_doc ('Translators'), _doc ('Translation checkers'),
  62                          _doc ('Translated'), _doc ('Up to date'),
  63                          _doc ('Other info')]
  64 format_table = {
  65     'not translated': {'color':'d0f0f8', 'short':_doc ('no'), 'abbr':'NT',
  66                        'long':_doc ('not translated')},
  67     'partially translated': {'color':'dfef77',
  68                              'short':_doc ('partially (%(p)d %%)'),
  69                              'abbr':'%(p)d%%',
  70                              'long':_doc ('partially translated (%(p)d %%)')},
  71     'fully translated': {'color':'1fff1f', 'short':_doc ('yes'), 'abbr':'FT',
  72                          'long': _doc ('translated')},
  73     'up to date': {'short':_doc ('yes'), 'long':_doc ('up to date'),
  74                    'abbr':'100%%', 'vague':_doc ('up to date')},
  75     'outdated': {'short':_doc ('partially'), 'abbr':'%(p)d%%',
  76                  'vague':_doc ('partially up to date')},
  77     'N/A': {'short':_doc ('N/A'), 'abbr':'N/A', 'color':'d587ff', 'vague':''},
  78     'pre-GDP':_doc ('pre-GDP'),
  79     'post-GDP':_doc ('post-GDP')
  80 }
  81
  82 texi_level = {
  83 # (Unumbered/Numbered/Lettered, level)
  84     'top': ('u', 0),
  85     'unnumbered': ('u', 1),
  86     'unnumberedsec': ('u', 2),
  87     'unnumberedsubsec': ('u', 3),
  88     'chapter': ('n', 1),
  89     'section': ('n', 2),
  90     'subsection': ('n', 3),
  91     'appendix': ('l', 1)
  92 }
  93
  94 appendix_number_trans = string.maketrans ('@ABCDEFGHIJKLMNOPQRSTUVWXY',
  95                                           'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
  96
  97 class SectionNumber (object):
  98     def __init__ (self):
  99         self.__data = [[0,'u']]
 100
 101     def __increase_last_index (self):
 102         type = self.__data[-1][1]
 103         if type == 'l':
 104             self.__data[-1][0] = \
 105                 self.__data[-1][0].translate (appendix_number_trans)
 106         elif type == 'n':
 107             self.__data[-1][0] += 1
 108
 109     def format (self):
 110         if self.__data[-1][1] == 'u':
 111             return ''
 112         return '.'.join ([str (i[0]) for i in self.__data if i[1] != 'u']) + ' '
 113
 114     def increase (self, (type, level)):
 115         if level == 0:
 116             self.__data = [[0,'u']]
 117         while level + 1 < len (self.__data):
 118             del self.__data[-1]
 119         if level + 1 > len (self.__data):
 120             self.__data.append ([0, type])
 121             if type == 'l':
 122                 self.__data[-1][0] = '@'
 123         if type == self.__data[-1][1]:
 124             self.__increase_last_index ()
 125         else:
 126             self.__data[-1] = ([0, type])
 127             if type == 'l':
 128                 self.__data[-1][0] = 'A'
 129             elif type == 'n':
 130                 self.__data[-1][0] = 1
 131         return self.format ()
 132
 133
 134 def percentage_color (percent):
 135     p = percent / 100.0
 136     if p < 0.33:
 137         c = [hex (int (3 * p * b + (1 - 3 * p) * a))[2:]
 138              for (a, b) in [(0xff, 0xff), (0x5c, 0xa6), (0x5c, 0x4c)]]
 139     elif p < 0.67:
 140         c = [hex (int ((3 * p - 1) * b + (2 - 3 * p) * a))[2:]
 141              for (a, b) in [(0xff, 0xff), (0xa6, 0xff), (0x4c, 0x3d)]]
 142     else:
 143         c = [hex (int ((3 * p - 2) * b + 3 * (1 - p) * a))[2:]
 144              for (a, b) in [(0xff, 0x1f), (0xff, 0xff), (0x3d, 0x1f)]]
 145     return ''.join (c)
 146
 147
 148 def update_word_count (text, filename, word_count):
 149     return re.sub (r'(?m)^(\d+) *' + filename,
 150                    str (word_count).ljust (6) + filename,
 151                    text)
 152
 153 po_msgid_re = re.compile (r'^msgid "(.*?)"(?:\n"(.*?)")*', re.M)
 154
 155 def po_word_count (po_content):
 156     s = ' '.join ([''.join (t) for t in po_msgid_re.findall (po_content)])
 157     return len (space_re.split (s))
 158
 159 sgml_tag_re = re.compile (r'<.*?>', re.S)
 160
 161 def sgml_word_count (sgml_doc):
 162     s = sgml_tag_re.sub ('', sgml_doc)
 163     return len (space_re.split (s))
 164
 165 def tely_word_count (tely_doc):
 166     '''
 167     Calculate word count of a Texinfo document node by node.
 168
 169     Take string tely_doc as an argument.
 170     Return a list of integers.
 171
 172     Texinfo comments and @lilypond blocks are not included in word counts.
 173     '''
 174     tely_doc = comments_re.sub ('', tely_doc)
 175     tely_doc = lilypond_re.sub ('', tely_doc)
 176     nodes = node_re.split (tely_doc)
 177     return [len (space_re.split (n)) for n in nodes]
 178
 179
 180 class TelyDocument (object):
 181     def __init__ (self, filename):
 182         self.filename = filename
 183         self.contents = open (filename).read ()
 184
 185         ## record title and sectionning level of first Texinfo section
 186         m = title_re.search (self.contents)
 187         if m:
 188             self.title = m.group (2)
 189             self.level = texi_level [m.group (1)]
 190         else:
 191             self.title = 'Untitled'
 192             self.level = ('u', 1)
 193
 194         m = language_re.search (self.contents)
 195         if m:
 196             self.language = m.group (1)
 197
 198         included_files = [os.path.join (os.path.dirname (filename), t)
 199                           for t in include_re.findall (self.contents)]
 200         self.included_files = [p for p in included_files if os.path.exists (p)]
 201
 202     def print_title (self, section_number):
 203         return section_number.increase (self.level) + self.title
 204
 205
 206 class TranslatedTelyDocument (TelyDocument):
 207     def __init__ (self, filename, masterdocument, parent_translation=None):
 208         TelyDocument.__init__ (self, filename)
 209
 210         self.masterdocument = masterdocument
 211         if not hasattr (self, 'language') \
 212                 and hasattr (parent_translation, 'language'):
 213             self.language = parent_translation.language
 214         if hasattr (self, 'language'):
 215             self.translation = translation[self.language]
 216         else:
 217             self.translation = lambda x: x
 218         self.title = self.translation (self.title)
 219
 220         ## record authoring information
 221         m = translators_re.search (self.contents)
 222         if m:
 223             self.translators = [n.strip () for n in m.group (1).split (',')]
 224         else:
 225             try:
 226                 self.translators = parent_translation.translators
 227             except:
 228                 error ('%s: no translator name found, \nplease \
 229 specify at least one in the master file as a line containing\n\
 230 @c Translators: FirstName1 LastName1, FirstName2 LastName2' % self.filename)
 231         m = checkers_re.search (self.contents)
 232         if m:
 233             self.checkers = [n.strip () for n in m.group (1).split (',')]
 234         elif isinstance (parent_translation, TranslatedTelyDocument):
 235             self.checkers = parent_translation.checkers
 236         else:
 237             self.checkers = []
 238
 239         ## check whether translation is pre- or post-GDP
 240         m = status_re.search (self.contents)
 241         if m:
 242             self.post_gdp = bool (post_gdp_re.search (m.group (1)))
 243         else:
 244             self.post_gdp = False
 245
 246         ## record which parts (nodes) of the file are actually translated
 247         self.partially_translated = not skeleton_str in self.contents
 248         nodes = node_re.split (self.contents)
 249         self.translated_nodes = [not untranslated_node_str in n for n in nodes]
 250
 251         ## calculate translation percentage
 252         master_total_word_count = sum (masterdocument.word_count)
 253         translation_word_count = \
 254             sum ([masterdocument.word_count[k] * self.translated_nodes[k]
 255                   for k in range (min (len (masterdocument.word_count),
 256                                        len (self.translated_nodes)))])
 257         self.translation_percentage = \
 258             100 * translation_word_count / master_total_word_count
 259
 260         ## calculate how much the file is outdated
 261         (diff_string, git_error) = \
 262             buildlib.check_translated_doc (masterdocument.filename, self.filename, self.contents)
 263         if git_error:
 264             sys.stderr.write ('warning: %s: %s' % (self.filename, git_error))
 265             self.uptodate_percentage = None
 266         else:
 267             diff = diff_string.splitlines ()
 268             insertions = sum ([len (l) - 1 for l in diff
 269                                if l.startswith ('+')
 270                                and not l.startswith ('+++')])
 271             deletions = sum ([len (l) - 1 for l in diff
 272                               if l.startswith ('-')
 273                               and not l.startswith ('---')])
 274             outdateness_percentage = 50.0 * (deletions + insertions) / \
 275                 (masterdocument.size + 0.5 * (deletions - insertions))
 276             self.uptodate_percentage = 100 - int (outdateness_percentage)
 277             if self.uptodate_percentage > 100:
 278                 alternative = 50
 279                 progress ("%s: strange uptodateness percentage %d %%, \
 280 setting to %d %%" % (self.filename, self.uptodate_percentage, alternative))
 281                 self.uptodate_percentage = alternative
 282             elif self.uptodate_percentage < 1:
 283                 alternative = 1
 284                 progress ("%s: strange uptodateness percentage %d %%, \
 285 setting to %d %%" % (self.filename, self.uptodate_percentage, alternative))
 286                 self.uptodate_percentage = alternative
 287
 288     def completeness (self, formats=['long'], translated=False):
 289         if translated:
 290             translation = self.translation
 291         else:
 292             translation = lambda x: x
 293
 294         if isinstance (formats, str):
 295             formats = [formats]
 296         p = self.translation_percentage
 297         if p == 0:
 298             status = 'not translated'
 299         elif p == 100:
 300             status = 'fully translated'
 301         else:
 302             status = 'partially translated'
 303         return dict ([(f, translation (format_table[status][f]) % locals())
 304                       for f in formats])
 305
 306     def uptodateness (self, formats=['long'], translated=False):
 307         if translated:
 308             translation = self.translation
 309         else:
 310             translation = lambda x: x
 311
 312         if isinstance (formats, str):
 313             formats = [formats]
 314         p = self.uptodate_percentage
 315         if p == None:
 316             status = 'N/A'
 317         elif p == 100:
 318             status = 'up to date'
 319         else:
 320             status = 'outdated'
 321         l = {}
 322         for f in formats:
 323             if f == 'color' and p != None:
 324                 l['color'] = percentage_color (p)
 325             else:
 326                 l[f] = translation (format_table[status][f]) % locals ()
 327         return l
 328
 329     def gdp_status (self):
 330         if self.post_gdp:
 331             return self.translation (format_table['post-GDP'])
 332         else:
 333             return self.translation (format_table['pre-GDP'])
 334
 335     def short_html_status (self):
 336         s = '  <td>'
 337         if self.partially_translated:
 338             s += '<br>\n   '.join (self.translators) + '<br>\n'
 339             if self.checkers:
 340                 s += '   <small>' + \
 341                     '<br>\n   '.join (self.checkers) + '</small><br>\n'
 342
 343         c = self.completeness (['color', 'long'])
 344         s += '   <span style="background-color: #%(color)s">\
 345 %(long)s</span><br>\n' % c
 346
 347         if self.partially_translated:
 348             u = self.uptodateness (['vague', 'color'])
 349             s += '   <span style="background-color: #%(color)s">\
 350 %(vague)s</span><br>\n' % u
 351
 352         s += '  </td>\n'
 353         return s
 354
 355     def text_status (self):
 356         s = self.completeness ('abbr')['abbr'] + ' '
 357
 358         if self.partially_translated:
 359             s += self.uptodateness ('abbr')['abbr'] + ' '
 360         return s
 361
 362     def html_status (self, numbering=SectionNumber ()):
 363         if self.title == 'Untitled':
 364             return ''
 365
 366         if self.level[1] == 0: # if self is a master document
 367             s = '''<table align="center" border="2">
 368  <tr align="center">
 369   <th>%s</th>''' % self.print_title (numbering)
 370             s += ''.join (['  <th>%s</th>\n' % self.translation (h)
 371                            for h in detailed_status_heads])
 372             s += ' </tr>\n'
 373             s += ' <tr align="left">\n  <td>%s<br>(%d)</td>\n' \
 374                 % (self.translation (section_titles_string),
 375                    sum (self.masterdocument.word_count))
 376
 377         else:
 378             s = ' <tr align="left">\n  <td>%s<br>(%d)</td>\n' \
 379                 % (self.print_title (numbering),
 380                    sum (self.masterdocument.word_count))
 381
 382         if self.partially_translated:
 383             s += '  <td>' + '<br>\n   '.join (self.translators) + '</td>\n'
 384             s += '  <td>' + '<br>\n   '.join (self.checkers) + '</td>\n'
 385         else:
 386             s += '  <td></td>\n' * 2
 387
 388         c = self.completeness (['color', 'short'], translated=True)
 389         s += '  <td><span style="background-color: #%(color)s">\
 390 %(short)s</span></td>\n' % {'color': c['color'],
 391                            'short': c['short']}
 392
 393         if self.partially_translated:
 394             u = self.uptodateness (['short', 'color'], translated=True)
 395             s += '  <td><span style="background-color: #%(color)s">\
 396 %(short)s</span></td>\n' % {'color': u['color'],
 397                            'short': u['short']}
 398         else:
 399             s += '  <td></td>\n'
 400
 401         s += '  <td>' + self.gdp_status () + '</td>\n </tr>\n'
 402         s += ''.join ([i.translations[self.language].html_status (numbering)
 403                        for i in self.masterdocument.includes
 404                        if self.language in i.translations])
 405
 406         if self.level[1] == 0:  # if self is a master document
 407             s += '</table>\n<p></p>\n'
 408         return s
 409
 410 class MasterTelyDocument (TelyDocument):
 411     def __init__ (self,
 412                   filename,
 413                   parent_translations=dict ([(lang, None)
 414                                              for lang in langdefs.LANGDICT])):
 415         TelyDocument.__init__ (self, filename)
 416         self.size = len (self.contents)
 417         self.word_count = tely_word_count (self.contents)
 418         translations = dict ([(lang, os.path.join (lang, filename))
 419                               for lang in langdefs.LANGDICT])
 420         self.translations = \
 421             dict ([(lang,
 422                     TranslatedTelyDocument (translations[lang],
 423                                             self, parent_translations.get (lang)))
 424                    for lang in langdefs.LANGDICT
 425                    if os.path.exists (translations[lang])])
 426         if self.translations:
 427             self.includes = [MasterTelyDocument (f, self.translations)
 428                              for f in self.included_files]
 429         else:
 430             self.includes = []
 431
 432     def update_word_counts (self, s):
 433         s = update_word_count (s, self.filename, sum (self.word_count))
 434         for i in self.includes:
 435             s = i.update_word_counts (s)
 436         return s
 437
 438     def html_status (self, numbering=SectionNumber ()):
 439         if self.title == 'Untitled' or not self.translations:
 440             return ''
 441         if self.level[1] == 0: # if self is a master document
 442             s = '''<table align="center" border="2">
 443  <tr align="center">
 444   <th>%s</th>''' % self.print_title (numbering)
 445             s += ''.join (['  <th>%s</th>\n' % l for l in self.translations])
 446             s += ' </tr>\n'
 447             s += ' <tr align="left">\n  <td>Section titles<br>(%d)</td>\n' \
 448                 % sum (self.word_count)
 449
 450         else:  # if self is an included file
 451             s = ' <tr align="left">\n  <td>%s<br>(%d)</td>\n' \
 452                 % (self.print_title (numbering), sum (self.word_count))
 453
 454         s += ''.join ([t.short_html_status ()
 455                        for t in self.translations.values ()])
 456         s += ' </tr>\n'
 457         s += ''.join ([i.html_status (numbering) for i in self.includes])
 458
 459         if self.level[1] == 0:  # if self is a master document
 460             s += '</table>\n<p></p>\n'
 461         return s
 462
 463     def text_status (self, numbering=SectionNumber (), colspec=[48,12]):
 464         if self.title == 'Untitled' or not self.translations:
 465             return ''
 466
 467         s = ''
 468         if self.level[1] == 0: # if self is a master document
 469             s += (self.print_title (numbering) + ' ').ljust (colspec[0])
 470             s += ''.join (['%s'.ljust (colspec[1]) % l
 471                            for l in self.translations])
 472             s += '\n'
 473             s += ('Section titles (%d)' % \
 474                       sum (self.word_count)).ljust (colspec[0])
 475
 476         else:
 477             s = '%s (%d) ' \
 478                 % (self.print_title (numbering), sum (self.word_count))
 479             s = s.ljust (colspec[0])
 480
 481         s += ''.join ([t.text_status ().ljust(colspec[1])
 482                        for t in self.translations.values ()])
 483         s += '\n\n'
 484         s += ''.join ([i.text_status (numbering) for i in self.includes])
 485
 486         if self.level[1] == 0:
 487             s += '\n'
 488         return s
 489
 490
 491 update_category_word_counts_re = re.compile (r'(?ms)^-(\d+)-(.*?\n)\d+ *total')
 492
 493 counts_re = re.compile (r'(?m)^(\d+) ')
 494
 495 def update_category_word_counts_sub (m):
 496     return '-' + m.group (1) + '-' + m.group (2) + \
 497         str (sum ([int (c)
 498                    for c in counts_re.findall (m.group (2))])).ljust (6) + \
 499         'total'
 500
 501
 502 progress ("Reading documents...")
 503
 504 tely_files = \
 505     buildlib.read_pipe ("find -maxdepth 2 -name '*.tely'")[0].splitlines ()
 506 tely_files.sort ()
 507 master_docs = [MasterTelyDocument (os.path.normpath (filename))
 508                for filename in tely_files]
 509 master_docs = [doc for doc in master_docs if doc.translations]
 510
 511 main_status_page = open ('translations.template.html.in').read ()
 512
 513 enabled_languages = [l for l in langdefs.LANGDICT
 514                      if langdefs.LANGDICT[l].enabled
 515                      and l != 'en']
 516 lang_status_pages = \
 517     dict ([(l, open (os.path.join (l, 'translations.template.html.in')). read ())
 518            for l in enabled_languages])
 519
 520 progress ("Generating status pages...")
 521
 522 date_time = buildlib.read_pipe ('LANG= date -u')[0]
 523
 524 main_status_html = last_updated_string % date_time
 525 main_status_html += '\n'.join ([doc.html_status () for doc in master_docs])
 526
 527 html_re = re.compile ('<html>', re.I)
 528 end_body_re = re.compile ('</body>', re.I)
 529
 530 html_header = '''<html>
 531 <!-- This page is automatically generated by translation-status.py from
 532 translations.template.html.in; DO NOT EDIT !-->'''
 533
 534 main_status_page = html_re.sub (html_header, main_status_page)
 535
 536 main_status_page = end_body_re.sub (main_status_html + '\n</body>',
 537                                     main_status_page)
 538
 539 open ('translations.html.in', 'w').write (main_status_page)
 540
 541 for l in enabled_languages:
 542     date_time = buildlib.read_pipe ('LANG=%s date -u' % l)[0]
 543     lang_status_pages[l] = translation[l] (last_updated_string) % date_time + lang_status_pages[l]
 544     lang_status_page = html_re.sub (html_header, lang_status_pages[l])
 545     html_status = '\n'.join ([doc.translations[l].html_status ()
 546                               for doc in master_docs
 547                               if l in doc.translations])
 548     lang_status_page = end_body_re.sub (html_status + '\n</body>',
 549                                         lang_status_page)
 550     open (os.path.join (l, 'translations.html.in'), 'w').write (lang_status_page)
 551
 552 main_status_txt = '''Documentation translations status
 553 Generated %s
 554 NT = not translated
 555 FT = fully translated
 556
 557 ''' % date_time
 558
 559 main_status_txt += '\n'.join ([doc.text_status () for doc in master_docs])
 560
 561 status_txt_file = 'out/translations-status.txt'
 562 progress ("Writing %s..." % status_txt_file)
 563 open (status_txt_file, 'w').write (main_status_txt)
 564
 565 translation_instructions_file = 'devel/doc-translation-list.itexi'
 566 progress ("Updating %s..." % translation_instructions_file)
 567 translation_instructions = open (translation_instructions_file).read ()
 568
 569 for doc in master_docs:
 570     translation_instructions = doc.update_word_counts (translation_instructions)
 571
 572 for html_file in re.findall (r'(?m)^\d+ *(\S+?\.html\S*?)(?: |$)',
 573                              translation_instructions):
 574     word_count = sgml_word_count (open (html_file).read ())
 575     translation_instructions = update_word_count (translation_instructions,
 576                                                   html_file,
 577                                                   word_count)
 578
 579 for po_file in re.findall (r'(?m)^\d+ *(\S+?\.po\S*?)(?: |$)',
 580                            translation_instructions):
 581     word_count = po_word_count (open (po_file).read ())
 582     translation_instructions = update_word_count (translation_instructions,
 583                                                   po_file,
 584                                                   word_count)
 585
 586 translation_instructions = \
 587     update_category_word_counts_re.sub (update_category_word_counts_sub,
 588                                         translation_instructions)
 589
 590 open (translation_instructions_file, 'w').write (translation_instructions)
 591 sys.exit (exit_code)