buildscripts/translations-status.py

   1 #!@PYTHON@
   2
   3 """
   4 USAGE: translations-status.py BUILDSCRIPT-DIR LOCALEDIR
   5
   6   This script must be run from Documentation/
   7
   8   Reads template files translations.template.html.in
   9 and for each LANG in LANGUAGES LANG/translations.template.html.in
  10   Writes translations.html.in and for each LANG in LANGUAGES
  11 translations.LANG.html.in
  12   Writes out/translations-status.txt
  13   Updates word counts in TRANSLATION
  14 """
  15
  16 import sys
  17 import re
  18 import string
  19 import os
  20 import gettext
  21 import subprocess
  22
  23 def progress (str):
  24     sys.stderr.write (str + '\n')
  25
  26 progress ("translations-status.py")
  27
  28 buildscript_dir = sys.argv[1]
  29 localedir = sys.argv[2]
  30
  31 _doc = lambda s: s
  32
  33 sys.path.append (buildscript_dir)
  34 import langdefs
  35
  36 # load gettext messages catalogs
  37 translation = {}
  38 for l in langdefs.LANGUAGES:
  39     if l.enabled and l.code != 'en':
  40         translation[l.code] = gettext.translation('lilypond-doc', localedir, [l.code]).gettext
  41
  42 def read_pipe (command):
  43     child = subprocess.Popen (command,
  44                               stdout = subprocess.PIPE,
  45                               stderr = subprocess.PIPE,
  46                               shell = True)
  47     (output, error) = child.communicate ()
  48     code = str (child.wait ())
  49     if not child.stdout or child.stdout.close ():
  50         print "pipe failed: %(command)s" % locals ()
  51     if code != '0':
  52         error = code + ' ' + error
  53     return (output, error)
  54
  55 comments_re = re.compile (r'^@ignore\n(.|\n)*?\n@end ignore$|@c .*?$', re.M)
  56 space_re = re.compile (r'\s+', re.M)
  57 lilypond_re = re.compile (r'@lilypond({.*?}|(.|\n)*?\n@end lilypond$)', re.M)
  58 node_re = re.compile ('^@node .*?$', re.M)
  59 title_re = re.compile ('^@(top|chapter|(?:sub){0,2}section|(?:unnumbered|appendix)(?:(?:sub){0,2}sec)?) (.*?)$', re.M)
  60 include_re = re.compile ('^@include (.*?)$', re.M)
  61
  62 committish_re = re.compile ('GIT [Cc]ommittish: ([a-f0-9]+)')
  63 translators_re = re.compile (r'^@c\s+Translators\s*:\s*(.*?)$', re.M | re.I)
  64 checkers_re = re.compile (r'^@c\s+Translation\s*checkers\s*:\s*(.*?)$', re.M | re.I)
  65 status_re = re.compile (r'^@c\s+Translation\s*status\s*:\s*(.*?)$', re.M | re.I)
  66 post_gdp_re = re.compile ('post.GDP', re.I)
  67 untranslated_node_str = 'UNTRANSLATED NODE: IGNORE ME'
  68 skeleton_str = '-- SKELETON FILE --'
  69
  70 diff_cmd = 'git diff --no-color %(committish)s HEAD -- %(original)s | cat'
  71
  72 format_table = {
  73     'not translated': {'color':'d0f0f8', 'short':_doc ('no'), 'abbr':'NT',
  74                        'long':_doc ('not translated')},
  75     'partially translated': {'color':'dfef77', 'short':_doc ('partially (%(p)d %%)'),
  76                              'abbr':'%(p)d%%', 'long':_doc ('partially translated (%(p)d %%)')},
  77     'fully translated': {'color':'1fff1f', 'short':_doc ('yes'), 'abbr':'FT',
  78                          'long': _doc ('translated')},
  79     'up to date': {'short':_doc ('yes'), 'long':_doc ('up to date'), 'abbr':'100%%',
  80                    'vague':_doc ('up to date')},
  81     'outdated': {'short':_doc ('partially (%(p)d %%)'), 'abbr':'%(p)d%%',
  82                  'vague':_doc ('partially up to date')},
  83     'N/A': {'short':_doc ('N/A'), 'abbr':'N/A', 'color':'d587ff', 'vague':''},
  84     'pre-GDP':_doc ('pre-GDP'),
  85     'post-GDP':_doc ('post-GDP')
  86 }
  87
  88 texi_level = {
  89 # (Unumbered/Numbered/Lettered, level)
  90     'top': ('u', 0),
  91     'unnumbered': ('u', 1),
  92     'unnumberedsec': ('u', 2),
  93     'unnumberedsubsec': ('u', 3),
  94     'chapter': ('n', 1),
  95     'section': ('n', 2),
  96     'subsection': ('n', 3),
  97     'appendix': ('l', 1)
  98 }
  99
 100 appendix_number_trans = string.maketrans ('@ABCDEFGHIJKLMNOPQRSTUVWXY',
 101                                           'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
 102
 103 class SectionNumber (object):
 104     def __init__ (self):
 105         self.__data = [[0,'u']]
 106
 107     def __increase_last_index (self):
 108         type = self.__data[-1][1]
 109         if type == 'l':
 110             self.__data[-1][0] = self.__data[-1][0].translate (appendix_number_trans)
 111         elif type == 'n':
 112             self.__data[-1][0] += 1
 113
 114     def format (self):
 115         if self.__data[-1][1] == 'u':
 116             return ''
 117         return '.'.join ([str (i[0]) for i in self.__data if i[1] != 'u']) + ' '
 118
 119     def increase (self, (type, level)):
 120         if level == 0:
 121             self.__data = [[0,'u']]
 122         while level + 1 < len (self.__data):
 123             del self.__data[-1]
 124         if level + 1 > len (self.__data):
 125             self.__data.append ([0, type])
 126             if type == 'l':
 127                 self.__data[-1][0] = '@'
 128         if type == self.__data[-1][1]:
 129             self.__increase_last_index ()
 130         else:
 131             self.__data[-1] = ([0, type])
 132             if type == 'l':
 133                 self.__data[-1][0] = 'A'
 134             elif type == 'n':
 135                 self.__data[-1][0] = 1
 136         return self.format ()
 137
 138
 139 def percentage_color (percent):
 140     p = percent / 100.0
 141     if p < 0.33:
 142         c = [hex (int (3 * p * b + (1 - 3 * p) * a))[2:] for (a, b) in [(0xff, 0xff), (0x5c, 0xa6), (0x5c, 0x4c)]]
 143     elif p < 0.67:
 144         c = [hex (int ((3 * p - 1) * b + (2 - 3 * p) * a))[2:] for (a, b) in [(0xff, 0xff), (0xa6, 0xff), (0x4c, 0x3d)]]
 145     else:
 146         c = [hex (int ((3 * p - 2) * b + 3 * (1 - p) * a))[2:] for (a, b) in [(0xff, 0x1f), (0xff, 0xff), (0x3d, 0x1f)]]
 147     return ''.join (c)
 148
 149
 150 def update_word_count (text, filename, word_count):
 151     return re.sub (r'(?m)^(\d+) *' + filename,
 152                    str (word_count).ljust (6) + filename,
 153                    text)
 154
 155 po_msgid_re = re.compile (r'^msgid "(.*?)"(?:\n"(.*?)")*', re.M)
 156
 157 def po_word_count (po_content):
 158     s = ' '.join ([''.join (t) for t in po_msgid_re.findall (po_content)])
 159     return len (space_re.split (s))
 160
 161 sgml_tag_re = re.compile (r'<.*?>', re.S)
 162
 163 def sgml_word_count (sgml_doc):
 164     s = sgml_tag_re.sub ('', sgml_doc)
 165     return len (space_re.split (s))
 166
 167 def tely_word_count (tely_doc):
 168     '''
 169     Calculate word count of a Texinfo document node by node.
 170
 171     Take string tely_doc as an argument.
 172     Return a list of integers.
 173
 174     Texinfo comments and @lilypond blocks are not included in word counts.
 175     '''
 176     tely_doc = comments_re.sub ('', tely_doc)
 177     tely_doc = lilypond_re.sub ('', tely_doc)
 178     nodes = node_re.split (tely_doc)
 179     return [len (space_re.split (n)) for n in nodes]
 180
 181
 182 class TelyDocument (object):
 183     def __init__ (self, filename):
 184         self.filename = filename
 185         self.contents = open (filename).read ()
 186
 187         ## record title and sectionning level of first Texinfo section
 188         m = title_re.search (self.contents)
 189         if m:
 190             self.title = m.group (2)
 191             self.level = texi_level [m.group (1)]
 192         else:
 193             self.title = 'Untitled'
 194             self.level = ('u', 1)
 195
 196         included_files = [os.path.join (os.path.dirname (filename), t) for t in include_re.findall (self.contents)]
 197         self.included_files = [p for p in included_files if os.path.exists (p)]
 198
 199     def print_title (self, section_number):
 200         return section_number.increase (self.level) + self.title
 201
 202
 203 class TranslatedTelyDocument (TelyDocument):
 204     def __init__ (self, filename, masterdocument, parent_translation=None):
 205         TelyDocument.__init__ (self, filename)
 206
 207         self.masterdocument = masterdocument
 208
 209         ## record authoring information
 210         m = translators_re.search (self.contents)
 211         if m:
 212             self.translators = [n.strip () for n in m.group (1).split (',')]
 213         else:
 214             self.translators = parent_translation.translators
 215         m = checkers_re.search (self.contents)
 216         if m:
 217             self.checkers = [n.strip () for n in m.group (1).split (',')]
 218         elif isinstance (parent_translation, TranslatedTelyDocument):
 219             self.checkers = parent_translation.checkers
 220         else:
 221             self.checkers = []
 222
 223         ## check whether translation is pre- or post-GDP
 224         m = status_re.search (self.contents)
 225         if m:
 226             self.post_gdp = bool (post_gdp_re.search (m.group (1)))
 227         else:
 228             self.post_gdp = False
 229
 230         ## record which parts (nodes) of the file are actually translated
 231         self.partially_translated = not skeleton_str in self.contents
 232         nodes = node_re.split (self.contents)
 233         self.translated_nodes = [not untranslated_node_str in n for n in nodes]
 234
 235         ## calculate translation percentage
 236         master_total_word_count = sum (masterdocument.word_count)
 237         translation_word_count = sum ([masterdocument.word_count[k] * self.translated_nodes[k]
 238                                        for k in range (min (len (masterdocument.word_count), len (self.translated_nodes)))])
 239         self.translation_percentage = 100 * translation_word_count / master_total_word_count
 240
 241         ## calculate how much the file is outdated
 242         m = committish_re.search (self.contents)
 243         if not m:
 244             sys.stderr.write ('error: ' + filename + \
 245                                   ": no 'GIT committish: <hash>' found.\nPlease check " + \
 246                                   'the whole file against the original in English, then ' + \
 247                                   'fill in HEAD committish in the header.\n')
 248             sys.exit (1)
 249         (diff_string, error) = read_pipe (diff_cmd % {'committish':m.group (1), 'original':masterdocument.filename})
 250         if error:
 251             sys.stderr.write ('warning: %s: %s' % (self.filename, error))
 252             self.uptodate_percentage = None
 253         else:
 254             diff = diff_string.splitlines ()
 255             insertions = sum ([len (l) - 1 for l in diff if l.startswith ('+') and not l.startswith ('+++')])
 256             deletions = sum ([len (l) - 1 for l in diff if l.startswith ('-') and not l.startswith ('---')])
 257             outdateness_percentage = 50.0 * (deletions + insertions) / (masterdocument.size + 0.5 * (deletions - insertions))
 258             self.uptodate_percentage = 100 - int (outdateness_percentage)
 259             if self.uptodate_percentage > 100:
 260                 alternative = 50
 261                 progress ("%s: strange uptodateness percentage %d %%, setting to %d %%" \
 262                               % (self.filename, self.uptodate_percentage, alternative))
 263                 self.uptodate_percentage = alternative
 264             elif self.uptodate_percentage < 1:
 265                 alternative = 1
 266                 progress ("%s: strange uptodateness percentage %d %%, setting to %d %%" \
 267                               % (self.filename, self.uptodate_percentage, alternative))
 268                 self.uptodate_percentage = alternative
 269
 270     def completeness (self, formats=['long']):
 271         if isinstance (formats, str):
 272             formats = [formats]
 273         p = self.translation_percentage
 274         if p == 0:
 275             status = 'not translated'
 276         elif p == 100:
 277             status = 'fully translated'
 278         else:
 279             status = 'partially translated'
 280         return dict ([(f, format_table[status][f] % locals()) for f in formats])
 281
 282     def uptodateness (self, formats=['long']):
 283         if isinstance (formats, str):
 284             formats = [formats]
 285         p = self.uptodate_percentage
 286         if p == None:
 287             status = 'N/A'
 288         elif p == 100:
 289             status = 'up to date'
 290         else:
 291             status = 'outdated'
 292         l = {}
 293         for f in formats:
 294             if f == 'color' and p != None:
 295                 l['color'] = percentage_color (p)
 296             else:
 297                 l[f] = format_table[status][f] % locals ()
 298         return l
 299
 300     def gdp_status (self, translation=lambda s: s):
 301         if self.post_gdp:
 302             return translation (format-table['post-GDP'])
 303         else:
 304             return translation (format-table['pre-GDP'])
 305
 306     def short_html_status (self):
 307         s = '  <td>'
 308         if self.partially_translated:
 309             s += '<br>\n   '.join (self.translators) + '<br>\n'
 310             if self.checkers:
 311                 s += '   <small>' + '<br>\n   '.join (self.checkers) + '</small><br>\n'
 312
 313         c = self.completeness (['color', 'long'])
 314         s += '   <span style="background-color: #%(color)s">%(long)s</span><br>\n' % c
 315
 316         if self.partially_translated:
 317             u = self.uptodateness (['vague', 'color'])
 318             s += '   <span style="background-color: #%(color)s">%(vague)s</span><br>\n' % u
 319
 320         s += '  </td>\n'
 321         return s
 322
 323     def text_status (self):
 324         s = self.completeness ('abbr')['abbr'] + ' '
 325
 326         if self.partially_translated:
 327             s += self.uptodateness ('abbr')['abbr'] + ' '
 328         return s
 329
 330     def html_status (self):
 331         # TODO
 332         return ''
 333
 334 class MasterTelyDocument (TelyDocument):
 335     def __init__ (self, filename, parent_translations=dict ([(lang, None) for lang in langdefs.LANGDICT.keys()])):
 336         TelyDocument.__init__ (self, filename)
 337         self.size = len (self.contents)
 338         self.word_count = tely_word_count (self.contents)
 339         translations = dict ([(lang, os.path.join (lang, filename)) for lang in langdefs.LANGDICT.keys()])
 340         self.translations = dict ([(lang, TranslatedTelyDocument (translations[lang], self, parent_translations.get (lang)))
 341                                    for lang in langdefs.LANGDICT.keys() if os.path.exists (translations[lang])])
 342         if self.translations:
 343             self.includes = [MasterTelyDocument (f, self.translations) for f in self.included_files]
 344         else:
 345             self.includes = []
 346
 347     def update_word_counts (self, s):
 348         s = update_word_count (s, self.filename, sum (self.word_count))
 349         for i in self.includes:
 350             s = i.update_word_counts (s)
 351         return s
 352
 353     def html_status (self, numbering=SectionNumber ()):
 354         if self.title == 'Untitled' or not self.translations:
 355             return ''
 356         if self.level[1] == 0: # if self is a master document
 357             s = '''<table align="center" border="2">
 358  <tr align="center">
 359   <th>%s</th>''' % self.print_title (numbering)
 360             s += ''.join (['  <th>%s</th>\n' % l for l in self.translations.keys ()])
 361             s += ' </tr>\n'
 362             s += ' <tr align="left">\n  <td>Section titles<br>(%d)</td>\n' \
 363                 % sum (self.word_count)
 364
 365         else:
 366             s = ' <tr align="left">\n  <td>%s<br>(%d)</td>\n' \
 367                 % (self.print_title (numbering), sum (self.word_count))
 368
 369         s += ''.join ([t.short_html_status () for t in self.translations.values ()])
 370         s += ' </tr>\n'
 371         s += ''.join ([i.html_status (numbering) for i in self.includes])
 372
 373         if self.level[1] == 0:
 374             s += '</table>\n<p></p>\n'
 375         return s
 376
 377     def text_status (self, numbering=SectionNumber (), colspec=[48,12]):
 378         if self.title == 'Untitled' or not self.translations:
 379             return ''
 380
 381         s = ''
 382         if self.level[1] == 0: # if self is a master document
 383             s += (self.print_title (numbering) + ' ').ljust (colspec[0])
 384             s += ''.join (['%s'.ljust (colspec[1]) % l for l in self.translations.keys ()])
 385             s += '\n'
 386             s += ('Section titles (%d)' % sum (self.word_count)).ljust (colspec[0])
 387
 388         else:
 389             s = '%s (%d) ' \
 390                 % (self.print_title (numbering), sum (self.word_count))
 391             s = s.ljust (colspec[0])
 392
 393         s += ''.join ([t.text_status ().ljust(colspec[1]) for t in self.translations.values ()])
 394         s += '\n\n'
 395         s += ''.join ([i.text_status (numbering) for i in self.includes])
 396
 397         if self.level[1] == 0:
 398             s += '\n'
 399         return s
 400
 401
 402 update_category_word_counts_re = re.compile (r'(?ms)^-(\d+)-(.*?\n)\d+ *total')
 403
 404 counts_re = re.compile (r'(?m)^(\d+) ')
 405
 406 def update_category_word_counts_sub (m):
 407     return '-' + m.group (1) + '-' + m.group (2) + \
 408         str (sum ([int (c) for c in counts_re.findall (m.group (2))])).ljust (6) + 'total'
 409
 410
 411 progress ("Reading documents...")
 412
 413 tely_files = read_pipe ("find -maxdepth 2 -name '*.tely'")[0].splitlines ()
 414 master_docs = [MasterTelyDocument (os.path.normpath (filename)) for filename in tely_files]
 415 master_docs = [doc for doc in master_docs if doc.translations]
 416
 417 main_status_page = open ('translations.template.html.in').read ()
 418
 419 ## TODO
 420 #per_lang_status_pages = dict ([(l, open (os.path.join (l, 'translations.template.html')). read ())
 421 #                               for l in langdefs.LANGDICT.keys ()
 422 #                               if langdefs.LANGDICT[l].enabled])
 423
 424 progress ("Generating status pages...")
 425
 426 date_time = read_pipe ('LANG= date -u')[0]
 427
 428 main_status_html = ' <p><i>Last updated %s</i></p>\n' % date_time
 429 main_status_html += '\n'.join ([doc.html_status () for doc in master_docs])
 430
 431 html_re = re.compile ('<html>', re.I)
 432 end_body_re = re.compile ('</body>', re.I)
 433
 434 main_status_page = html_re.sub ('''<html>
 435 <!-- This page is automatically generated by translation-status.py from
 436 translations.template.html.in; DO NOT EDIT !-->''', main_status_page)
 437
 438 main_status_page = end_body_re.sub (main_status_html + '\n</body>', main_status_page)
 439
 440 open ('translations.html.in', 'w').write (main_status_page)
 441
 442 main_status_txt = '''Documentation translations status
 443 Generated %s
 444 NT = not translated
 445 FT = fully translated
 446
 447 ''' % date_time
 448
 449 main_status_txt += '\n'.join ([doc.text_status () for doc in master_docs])
 450
 451 status_txt_file = 'out/translations-status.txt'
 452 progress ("Writing %s..." % status_txt_file)
 453 open (status_txt_file, 'w').write (main_status_txt)
 454
 455 translation_instructions_file = 'TRANSLATION'
 456 progress ("Updating %s..." % translation_instructions_file)
 457 translation_instructions = open (translation_instructions_file).read ()
 458
 459 for doc in master_docs:
 460     translation_instructions = doc.update_word_counts (translation_instructions)
 461
 462 for html_file in re.findall (r'(?m)^\d+ *(\S+?\.html\S*?)(?: |$)', translation_instructions):
 463     word_count = sgml_word_count (open (html_file).read ())
 464     translation_instructions = update_word_count (translation_instructions,
 465                                                   html_file,
 466                                                   word_count)
 467
 468 for po_file in re.findall (r'(?m)^\d+ *(\S+?\.po\S*?)(?: |$)', translation_instructions):
 469     word_count = po_word_count (open (po_file).read ())
 470     translation_instructions = update_word_count (translation_instructions,
 471                                                   po_file,
 472                                                   word_count)
 473
 474 translation_instructions = update_category_word_counts_re.sub (update_category_word_counts_sub,
 475                                                                translation_instructions)
 476
 477 open (translation_instructions_file, 'w').write (translation_instructions)