buildscripts/translations-status.py

   1 #!@PYTHON@
   2
   3 """
   4 USAGE: translations-status.py BUILDSCRIPT-DIR LOCALEDIR
   5
   6   This script must be run from Documentation/
   7
   8   Reads template files translations.template.html.in
   9 and for each LANG in LANGUAGES LANG/translations.template.html.in
  10
  11   Writes translations.html.in and for each LANG in LANGUAGES
  12 translations.LANG.html.in
  13 """
  14
  15 import sys
  16 import re
  17 import string
  18 import os
  19 import gettext
  20 import subprocess
  21
  22 def progress (str):
  23     sys.stderr.write (str + '\n')
  24
  25 progress ("translations-status.py")
  26
  27 buildscript_dir = sys.argv[1]
  28 localedir = sys.argv[2]
  29
  30 _doc = lambda s: s
  31
  32 sys.path.append (buildscript_dir)
  33 import langdefs
  34
  35 # load gettext messages catalogs
  36 translation = {}
  37 for l in langdefs.LANGUAGES:
  38     if l.enabled and l.code != 'en':
  39         translation[l.code] = gettext.translation('lilypond-doc', localedir, [l.code]).gettext
  40
  41 def read_pipe (command):
  42     child = subprocess.Popen (command,
  43                               stdout = subprocess.PIPE,
  44                               stderr = subprocess.PIPE,
  45                               shell = True)
  46     (output, error) = child.communicate ()
  47     code = str (child.wait ())
  48     if not child.stdout or child.stdout.close ():
  49         print "pipe failed: %(command)s" % locals ()
  50     if code != '0':
  51         error = code + ' ' + error
  52     return (output, error)
  53
  54 comments_re = re.compile (r'^@ignore\n(.|\n)*?\n@end ignore$|@c .*?$', re.M)
  55 space_re = re.compile (r'\s+', re.M)
  56 lilypond_re = re.compile (r'@lilypond({.*?}|(.|\n)*?\n@end lilypond$)', re.M)
  57 node_re = re.compile ('^@node .*?$', re.M)
  58 title_re = re.compile ('^@(top|chapter|(?:sub){0,2}section|(?:unnumbered|appendix)(?:(?:sub){0,2}sec)?) (.*?)$', re.M)
  59 include_re = re.compile ('^@include (.*?)$', re.M)
  60
  61 committish_re = re.compile ('GIT [Cc]ommittish: ([a-f0-9]+)')
  62 translators_re = re.compile (r'^@c\s+Translators\s*:\s*(.*?)$', re.M | re.I)
  63 checkers_re = re.compile (r'^@c\s+Translation\s*checkers\s*:\s*(.*?)$', re.M | re.I)
  64 status_re = re.compile (r'^@c\s+Translation\s*status\s*:\s*(.*?)$', re.M | re.I)
  65 post_gdp_re = re.compile ('post.GDP', re.I)
  66 untranslated_node_str = 'UNTRANSLATED NODE: IGNORE ME'
  67 skeleton_str = '-- SKELETON FILE --'
  68
  69 diff_cmd = 'git diff --no-color %(committish)s HEAD -- %(original)s | cat'
  70
  71 format_table = {
  72     'not translated': {'color':'d0f0f8', 'short':_doc ('no'), 'abbr':'NT',
  73                        'long':_doc ('not translated')},
  74     'partially translated': {'color':'dfef77', 'short':_doc ('partially (%(p)d %%)'),
  75                              'abbr':'%(p)d%%', 'long':_doc ('partially translated (%(p)d %%)')},
  76     'fully translated': {'color':'1fff1f', 'short':_doc ('yes'), 'abbr':'FT',
  77                          'long': _doc ('translated')},
  78     'up to date': {'short':_doc ('yes'), 'long':_doc ('up to date'), 'abbr':'100%%',
  79                    'vague':_doc ('up to date')},
  80     'outdated': {'short':_doc ('partially (%(p)d %%)'), 'abbr':'%(p)d%%',
  81                  'vague':_doc ('partially up to date')},
  82     'N/A': {'short':_doc ('N/A'), 'abbr':'N/A', 'color':'d587ff', 'vague':''},
  83     'pre-GDP':_doc ('pre-GDP'),
  84     'post-GDP':_doc ('post-GDP')
  85 }
  86
  87 texi_level = {
  88 # (Unumbered/Numbered/Lettered, level)
  89     'top': ('u', 0),
  90     'unnumbered': ('u', 1),
  91     'unnumberedsec': ('u', 2),
  92     'unnumberedsubsec': ('u', 3),
  93     'chapter': ('n', 1),
  94     'section': ('n', 2),
  95     'subsection': ('n', 3),
  96     'appendix': ('l', 1)
  97 }
  98
  99 appendix_number_trans = string.maketrans ('@ABCDEFGHIJKLMNOPQRSTUVWXY',
 100                                           'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
 101
 102 class SectionNumber (object):
 103     def __init__ (self):
 104         self.__data = [[0,'u']]
 105
 106     def __increase_last_index (self):
 107         type = self.__data[-1][1]
 108         if type == 'l':
 109             self.__data[-1][0] = self.__data[-1][0].translate (appendix_number_trans)
 110         elif type == 'n':
 111             self.__data[-1][0] += 1
 112
 113     def format (self):
 114         if self.__data[-1][1] == 'u':
 115             return ''
 116         return '.'.join ([str (i[0]) for i in self.__data if i[1] != 'u']) + ' '
 117
 118     def increase (self, (type, level)):
 119         if level == 0:
 120             self.__data = [[0,'u']]
 121         while level + 1 < len (self.__data):
 122             del self.__data[-1]
 123         if level + 1 > len (self.__data):
 124             self.__data.append ([0, type])
 125             if type == 'l':
 126                 self.__data[-1][0] = '@'
 127         if type == self.__data[-1][1]:
 128             self.__increase_last_index ()
 129         else:
 130             self.__data[-1] = ([0, type])
 131             if type == 'l':
 132                 self.__data[-1][0] = 'A'
 133             elif type == 'n':
 134                 self.__data[-1][0] = 1
 135         return self.format ()
 136
 137
 138 def percentage_color (percent):
 139     p = percent / 100.0
 140     if p < 0.33:
 141         c = [hex (int (3 * p * b + (1 - 3 * p) * a))[2:] for (a, b) in [(0xff, 0xff), (0x5c, 0xa6), (0x5c, 0x4c)]]
 142     elif p < 0.67:
 143         c = [hex (int ((3 * p - 1) * b + (2 - 3 * p) * a))[2:] for (a, b) in [(0xff, 0xff), (0xa6, 0xff), (0x4c, 0x3d)]]
 144     else:
 145         c = [hex (int ((3 * p - 2) * b + 3 * (1 - p) * a))[2:] for (a, b) in [(0xff, 0x1f), (0xff, 0xff), (0x3d, 0x1f)]]
 146     return ''.join (c)
 147
 148 def tely_word_count (tely_doc):
 149     '''
 150     Calculate word count of a Texinfo document node by node.
 151
 152     Take string tely_doc as an argument.
 153     Return a list of integers.
 154
 155     Texinfo comments and @lilypond blocks are not included in word counts.
 156     '''
 157     tely_doc = comments_re.sub ('', tely_doc)
 158     tely_doc = lilypond_re.sub ('', tely_doc)
 159     nodes = node_re.split (tely_doc)
 160     return [len (space_re.split (n)) for n in nodes]
 161
 162
 163 class TelyDocument (object):
 164     def __init__ (self, filename):
 165         self.filename = filename
 166         self.contents = open (filename).read ()
 167
 168         ## record title and sectionning level of first Texinfo section
 169         m = title_re.search (self.contents)
 170         if m:
 171             self.title = m.group (2)
 172             self.level = texi_level [m.group (1)]
 173         else:
 174             self.title = 'Untitled'
 175             self.level = ('u', 1)
 176
 177         included_files = [os.path.join (os.path.dirname (filename), t) for t in include_re.findall (self.contents)]
 178         self.included_files = [p for p in included_files if os.path.exists (p)]
 179
 180     def print_title (self, section_number):
 181         return section_number.increase (self.level) + self.title
 182
 183
 184 class TranslatedTelyDocument (TelyDocument):
 185     def __init__ (self, filename, masterdocument, parent_translation=None):
 186         TelyDocument.__init__ (self, filename)
 187
 188         self.masterdocument = masterdocument
 189
 190         ## record authoring information
 191         m = translators_re.search (self.contents)
 192         if m:
 193             self.translators = [n.strip () for n in m.group (1).split (',')]
 194         else:
 195             self.translators = parent_translation.translators
 196         m = checkers_re.search (self.contents)
 197         if m:
 198             self.checkers = [n.strip () for n in m.group (1).split (',')]
 199         elif isinstance (parent_translation, TranslatedTelyDocument):
 200             self.checkers = parent_translation.checkers
 201         else:
 202             self.checkers = []
 203
 204         ## check whether translation is pre- or post-GDP
 205         m = status_re.search (self.contents)
 206         if m:
 207             self.post_gdp = bool (post_gdp_re.search (m.group (1)))
 208         else:
 209             self.post_gdp = False
 210
 211         ## record which parts (nodes) of the file are actually translated
 212         self.partially_translated = not skeleton_str in self.contents
 213         nodes = node_re.split (self.contents)
 214         self.translated_nodes = [not untranslated_node_str in n for n in nodes]
 215
 216         ## calculate translation percentage
 217         master_total_word_count = sum (masterdocument.word_count)
 218         translation_word_count = sum ([masterdocument.word_count[k] * self.translated_nodes[k]
 219                                        for k in range (min (len (masterdocument.word_count), len (self.translated_nodes)))])
 220         self.translation_percentage = 100 * translation_word_count / master_total_word_count
 221
 222         ## calculate how much the file is outdated
 223         m = committish_re.search (self.contents)
 224         if not m:
 225             sys.stderr.write ('error: ' + filename + \
 226                                   ": no 'GIT committish: <hash>' found.\nPlease check " + \
 227                                   'the whole file against the original in English, then ' + \
 228                                   'fill in HEAD committish in the header.\n')
 229             sys.exit (1)
 230         (diff_string, error) = read_pipe (diff_cmd % {'committish':m.group (1), 'original':masterdocument.filename})
 231         if error:
 232             sys.stderr.write ('warning: %s: %s' % (self.filename, error))
 233             self.uptodate_percentage = None
 234         else:
 235             diff = diff_string.splitlines ()
 236             insertions = sum ([len (l) - 1 for l in diff if l.startswith ('+') and not l.startswith ('+++')])
 237             deletions = sum ([len (l) - 1 for l in diff if l.startswith ('-') and not l.startswith ('---')])
 238             outdateness_percentage = 50.0 * (deletions + insertions) / (masterdocument.size + 0.5 * (deletions - insertions))
 239             self.uptodate_percentage = 100 - int (outdateness_percentage)
 240             if self.uptodate_percentage > 100:
 241                 alternative = 50
 242                 progress ("%s: strange uptodateness percentage %d %%, setting to %d %%" \
 243                               % (self.filename, self.uptodate_percentage, alternative))
 244                 self.uptodate_percentage = alternative
 245             elif self.uptodate_percentage < 1:
 246                 alternative = 1
 247                 progress ("%s: strange uptodateness percentage %d %%, setting to %d %%" \
 248                               % (self.filename, self.uptodate_percentage, alternative))
 249                 self.uptodate_percentage = alternative
 250
 251     def completeness (self, formats=['long']):
 252         if isinstance (formats, str):
 253             formats = [formats]
 254         p = self.translation_percentage
 255         if p == 0:
 256             status = 'not translated'
 257         elif p == 100:
 258             status = 'fully translated'
 259         else:
 260             status = 'partially translated'
 261         return dict ([(f, format_table[status][f] % locals()) for f in formats])
 262
 263     def uptodateness (self, formats=['long']):
 264         if isinstance (formats, str):
 265             formats = [formats]
 266         p = self.uptodate_percentage
 267         if p == None:
 268             status = 'N/A'
 269         elif p == 100:
 270             status = 'up to date'
 271         else:
 272             status = 'outdated'
 273         l = {}
 274         for f in formats:
 275             if f == 'color' and p != None:
 276                 l['color'] = percentage_color (p)
 277             else:
 278                 l[f] = format_table[status][f] % locals ()
 279         return l
 280
 281     def gdp_status (self, translation=lambda s: s):
 282         if self.post_gdp:
 283             return translation (format-table['post-GDP'])
 284         else:
 285             return translation (format-table['pre-GDP'])
 286
 287     def short_html_status (self):
 288         s = '  <td>'
 289         if self.partially_translated:
 290             s += '<br>\n   '.join (self.translators) + '<br>\n'
 291             if self.checkers:
 292                 s += '   <small>' + '<br>\n   '.join (self.checkers) + '</small><br>\n'
 293
 294         c = self.completeness (['color', 'long'])
 295         s += '   <span style="background-color: #%(color)s">%(long)s</span><br>\n' % c
 296
 297         if self.partially_translated:
 298             u = self.uptodateness (['vague', 'color'])
 299             s += '   <span style="background-color: #%(color)s">%(vague)s</span><br>\n' % u
 300
 301         s += '  </td>\n'
 302         return s
 303
 304     def text_status (self):
 305         s = self.completeness ('abbr')['abbr'] + ' '
 306
 307         if self.partially_translated:
 308             s += self.uptodateness ('abbr')['abbr'] + ' '
 309         return s
 310
 311     def html_status (self):
 312         # TODO
 313         return ''
 314
 315 class MasterTelyDocument (TelyDocument):
 316     def __init__ (self, filename, parent_translations=dict ([(lang, None) for lang in langdefs.LANGDICT.keys()])):
 317         TelyDocument.__init__ (self, filename)
 318         self.size = len (self.contents)
 319         self.word_count = tely_word_count (self.contents)
 320         translations = dict ([(lang, os.path.join (lang, filename)) for lang in langdefs.LANGDICT.keys()])
 321         self.translations = dict ([(lang, TranslatedTelyDocument (translations[lang], self, parent_translations.get (lang)))
 322                                    for lang in langdefs.LANGDICT.keys() if os.path.exists (translations[lang])])
 323         if self.translations:
 324             self.includes = [MasterTelyDocument (f, self.translations) for f in self.included_files]
 325         else:
 326             self.includes = []
 327
 328     # TODO
 329     def print_wc_priority (self):
 330         return
 331
 332     def html_status (self, numbering=SectionNumber ()):
 333         if self.title == 'Untitled' or not self.translations:
 334             return ''
 335         if self.level[1] == 0: # if self is a master document
 336             s = '''<table align="center" border="2">
 337  <tr align="center">
 338   <th>%s</th>''' % self.print_title (numbering)
 339             s += ''.join (['  <th>%s</th>\n' % l for l in self.translations.keys ()])
 340             s += ' </tr>\n'
 341             s += ' <tr align="left">\n  <td>Section titles<br>(%d)</td>\n' \
 342                 % sum (self.word_count)
 343
 344         else:
 345             s = ' <tr align="left">\n  <td>%s<br>(%d)</td>\n' \
 346                 % (self.print_title (numbering), sum (self.word_count))
 347
 348         s += ''.join ([t.short_html_status () for t in self.translations.values ()])
 349         s += ' </tr>\n'
 350         s += ''.join ([i.html_status (numbering) for i in self.includes])
 351
 352         if self.level[1] == 0:
 353             s += '</table>\n<p></p>\n'
 354         return s
 355
 356     def text_status (self, numbering=SectionNumber (), colspec=[48,12]):
 357         if self.title == 'Untitled' or not self.translations:
 358             return ''
 359
 360         s = ''
 361         if self.level[1] == 0: # if self is a master document
 362             s += (self.print_title (numbering) + ' ').ljust (colspec[0])
 363             s += ''.join (['%s'.ljust (colspec[1]) % l for l in self.translations.keys ()])
 364             s += '\n'
 365             s += ('Section titles (%d)' % sum (self.word_count)).ljust (colspec[0])
 366
 367         else:
 368             s = '%s (%d) ' \
 369                 % (self.print_title (numbering), sum (self.word_count))
 370             s = s.ljust (colspec[0])
 371
 372         s += ''.join ([t.text_status ().ljust(colspec[1]) for t in self.translations.values ()])
 373         s += '\n\n'
 374         s += ''.join ([i.text_status (numbering) for i in self.includes])
 375
 376         if self.level[1] == 0:
 377             s += '\n'
 378         return s
 379
 380
 381 progress ("Reading documents...")
 382
 383 tely_files = read_pipe ("find -maxdepth 2 -name '*.tely'")[0].splitlines ()
 384 master_docs = [MasterTelyDocument (os.path.normpath (filename)) for filename in tely_files]
 385 master_docs = [doc for doc in master_docs if doc.translations]
 386
 387 main_status_page = open ('translations.template.html.in').read ()
 388
 389 ## TODO
 390 #per_lang_status_pages = dict ([(l, open (os.path.join (l, 'translations.template.html')). read ())
 391 #                               for l in langdefs.LANGDICT.keys ()
 392 #                               if langdefs.LANGDICT[l].enabled])
 393
 394 progress ("Generating status pages...")
 395
 396 date_time = read_pipe ('LANG= date -u')[0]
 397
 398 main_status_html = ' <p><i>Last updated %s</i></p>\n' % date_time
 399 main_status_html += '\n'.join ([doc.html_status () for doc in master_docs])
 400
 401 html_re = re.compile ('<html>', re.I)
 402 end_body_re = re.compile ('</body>', re.I)
 403
 404 main_status_page = html_re.sub ('''<html>
 405 <!-- This page is automatically generated by translation-status.py from
 406 translations.template.html.in; DO NOT EDIT !-->''', main_status_page)
 407
 408 main_status_page = end_body_re.sub (main_status_html + '\n</body>', main_status_page)
 409
 410 open ('translations.html.in', 'w').write (main_status_page)
 411
 412 main_status_txt = '''Documentation translations status
 413 Generated %s
 414 NT = not translated
 415 FT = fully translated
 416
 417 ''' % date_time
 418
 419 main_status_txt += '\n'.join ([doc.text_status () for doc in master_docs])
 420
 421 status_txt_file = 'out/translations-status.txt'
 422 progress ("Writing %s..." % status_txt_file)
 423 open (status_txt_file, 'w').write (main_status_txt)