buildscripts/translations-status.py

   1 #!@PYTHON@
   2
   3 """
   4 USAGE: translations-status.py BUILDSCRIPT-DIR LOCALEDIR
   5
   6   This script must be run from Documentation/
   7
   8   Reads template files translations.template.html
   9 and for each LANG in LANGUAGES LANG/translations.template.html
  10
  11   Writes translations.html.in and for each LANG in LANGUAGES
  12 translations.LANG.html.in
  13 """
  14
  15 import sys
  16 import re
  17 import string
  18 import os
  19 import gettext
  20
  21 def progress (str):
  22     sys.stderr.write (str + '\n')
  23
  24 progress ("translations-status.py")
  25
  26 buildscript_dir = sys.argv[1]
  27 localedir = sys.argv[2]
  28
  29 _doc = lambda s: s
  30
  31 sys.path.append (buildscript_dir)
  32 import langdefs
  33
  34 # load gettext messages catalogs
  35 translation = {}
  36 for l in langdefs.LANGUAGES:
  37     if l.enabled and l.code != 'en':
  38         translation[l.code] = gettext.translation('lilypond-doc', localedir, [l.code]).gettext
  39
  40 def read_pipe (command):
  41     pipe = os.popen (command)
  42     output = pipe.read ()
  43     if pipe.close ():
  44         print "pipe failed: %(command)s" % locals ()
  45     return output
  46
  47 comments_re = re.compile (r'^@ignore\n(.|\n)*?\n@end ignore$|@c .*?$', re.M)
  48 space_re = re.compile (r'\s+', re.M)
  49 lilypond_re = re.compile (r'@lilypond({.*?}|(.|\n)*?\n@end lilypond$)', re.M)
  50 node_re = re.compile ('^@node .*?$', re.M)
  51 title_re = re.compile ('^@(top|chapter|(?:sub){0,2}section|(?:unnumbered|appendix)(?:(?:sub){0,2}sec)?) (.*?)$', re.M)
  52 include_re = re.compile ('^@include (.*?)$', re.M)
  53
  54 committish_re = re.compile ('GIT [Cc]ommittish: ([a-f0-9]+)')
  55 translators_re = re.compile (r'^@c\s+Translators\s*:\s*(.*?)$', re.M | re.I)
  56 checkers_re = re.compile (r'^@c\s+Translation\s*checkers\s*:\s*(.*?)$', re.M | re.I)
  57 status_re = re.compile (r'^@c\s+Translation\s*status\s*:\s*(.*?)$', re.M | re.I)
  58 post_gdp_re = re.compile ('post.GDP', re.I)
  59 untranslated_node_str = 'UNTRANSLATED NODE: IGNORE ME'
  60 skeleton_str = '-- SKELETON FILE --'
  61
  62 diff_cmd = 'git diff --no-color %(committish)s HEAD -- %(original)s | cat'
  63
  64 format_table = {
  65     'not translated': {'color':'d0f0f8', 'short':_doc ('no'), 'long':_doc ('not translated')},
  66     'partially translated': {'color':'dfef77', 'short':_doc ('partially (%(p)d %%)'),
  67                              'long':_doc ('partially translated (%(p)d %%)')},
  68     'fully translated': {'color':'1fff1f', 'short':_doc ('yes'), 'long': _doc ('translated')},
  69     'up to date': {'short':_doc ('yes'), 'long':_doc ('up to date')},
  70     'outdated': {'short':_doc ('partially (%(p)d %%)'), 'long':_doc ('partially up-to-date (%(p)d %%)')},
  71     'pre-GDP':_doc ('pre-GDP'),
  72     'post-GDP':_doc ('post-GDP')
  73 }
  74
  75 texi_level = {
  76 # (Unumbered/Numbered/Lettered, level)
  77     'top': ('u', 0),
  78     'unnumbered': ('u', 1),
  79     'unnumberedsec': ('u', 2),
  80     'unnumberedsubsec': ('u', 3),
  81     'chapter': ('n', 1),
  82     'section': ('n', 2),
  83     'subsection': ('n', 3),
  84     'appendix': ('l', 1)
  85 }
  86
  87 appendix_number_trans = string.maketrans ('@ABCDEFGHIJKLMNOPQRSTUVWXY','ABCDEFGHIJKLMNOPQRSTUVWXYZ')
  88
  89 class SectionNumber (object):
  90     def __init__ (self):
  91         self.__current_number = [0]
  92         self.__type = 'n'
  93
  94     def __increase_last_index (self):
  95         if isinstance (self.__current_number[-1], str):
  96             self.__current_number[-1] = self.__current_number[-1].translate (appendix_number_trans)
  97         else:
  98             self.__current_number[-1] += 1
  99
 100     # ugh, current implementation is too naive:
 101     # unnumbered stuff is really printed without number for @top only
 102     def format (self):
 103         if self.__current_number == [0] or self.__type == 'u':
 104             return ''
 105         return '.'.join ([str (i) for i in self.__current_number[1:]]) + ' '
 106
 107     def increase (self, (type, level)):
 108         if level == 0:
 109             self.__current_number = [0]
 110         while level + 1 < len (self.__current_number):
 111             del self.__current_number[-1]
 112         if level + 1 > len (self.__current_number):
 113             self.__type = type
 114             if type == 'l':
 115                 self.__current_number.append ('@')
 116             else:
 117                 self.__current_number.append (0)
 118
 119         if type == self.__type:
 120             self.__increase_last_index ()
 121         elif type == 'l' and level == 1:
 122             self.__current_number[-1] = 'A'
 123         else:
 124             self.__current_number[-1] = 1
 125         return self.format ()
 126
 127
 128 def percentage_color (percent):
 129     p = percent / 100.0
 130     if p < 0.33:
 131         c = [hex (int (3 * p * b + (1 - 3 * p) * a))[2:] for (a, b) in [(0xff, 0xff), (0x5c, 0xa6), (0x5c, 0x4c)]]
 132     elif p < 0.67:
 133         c = [hex (int ((3 * p - 1) * b + (2 - 3 * p) * a))[2:] for (a, b) in [(0xff, 0xff), (0xa6, 0xff), (0x4c, 0x3d)]]
 134     else:
 135         c = [hex (int ((3 * p - 2) * b + 3 * (1 - p) * a))[2:] for (a, b) in [(0xff, 0x1f), (0xff, 0xff), (0x3d, 0x1f)]]
 136     return ''.join (c)
 137
 138 def line_word_count (tely_line):
 139     if tely_line.startswith ('@'):
 140         return 0
 141     tely_line = comments_re.sub ('', tely_line)
 142     return len (space_re.split (tely_line))
 143
 144 def tely_word_count (tely_doc):
 145     '''
 146     Calculate word count of a Texinfo document node by node.
 147
 148     Take string tely_doc as an argument.
 149     Return a list of integers.
 150
 151     Texinfo comments and @lilypond blocks are not included in word counts.
 152     '''
 153     tely_doc = comments_re.sub ('', tely_doc)
 154     tely_doc = lilypond_re.sub ('', tely_doc)
 155     nodes = node_re.split (tely_doc)
 156     return [len (space_re.split (n)) for n in nodes]
 157
 158
 159 class TelyDocument (object):
 160     def __init__ (self, filename):
 161         self.filename = filename
 162         self.contents = open (filename).read ()
 163
 164         ## record title and sectionning level of first Texinfo section
 165         m = title_re.search (self.contents)
 166         if m:
 167             self.title = m.group (2)
 168             self.level = texi_level [m.group (1)]
 169         else:
 170             self.title = 'Untitled'
 171             self.level = ('u', 1)
 172
 173         included_files = [os.path.join (os.path.dirname (filename), t) for t in include_re.findall (self.contents)]
 174         self.included_files = [p for p in included_files if os.path.exists (p)]
 175
 176     def print_title (self, section_number):
 177         return section_number.increase (self.level) + self.title
 178
 179
 180 class TranslatedTelyDocument (TelyDocument):
 181     def __init__ (self, filename, masterdocument, parent_translation=None):
 182         TelyDocument.__init__ (self, filename)
 183
 184         self.masterdocument = masterdocument
 185
 186         ## record authoring information
 187         m = translators_re.search (self.contents)
 188         if m:
 189             self.translators = [n.strip () for n in m.group (1).split (',')]
 190         else:
 191             self.translators = parent_translation.translators
 192         m = checkers_re.search (self.contents)
 193         if m:
 194             self.checkers = [n.strip () for n in m.group (1).split (',')]
 195         elif isinstance (parent_translation, TranslatedTelyDocument):
 196             self.checkers = parent_translation.checkers
 197         else:
 198             self.checkers = []
 199
 200         ## check whether translation is pre- or post-GDP
 201         m = status_re.search (self.contents)
 202         if m:
 203             self.post_gdp = bool (post_gdp_re.search (m.group (1)))
 204         else:
 205             self.post_gdp = False
 206
 207         ## record which parts (nodes) of the file are actually translated
 208         self.partially_translated = not skeleton_str in self.contents
 209         nodes = node_re.split (self.contents)
 210         self.translated_nodes = [not untranslated_node_str in n for n in nodes]
 211
 212         ## calculate translation percentage
 213         master_total_word_count = sum (masterdocument.word_count)
 214         translation_word_count = sum ([masterdocument.word_count[k] * self.translated_nodes[k]
 215                                        for k in range (min (len (masterdocument.word_count), len (self.translated_nodes)))])
 216         self.translation_percentage = 100 * translation_word_count / master_total_word_count
 217
 218         ## calculate how much the file is outdated
 219         m = committish_re.search (self.contents)
 220         if not m:
 221             sys.stderr.write ('error: ' + filename + \
 222                                   ": no 'GIT committish: <hash>' found.\nPlease check " + \
 223                                   'the whole file against the original in English, then ' + \
 224                                   'fill in HEAD committish in the header.\n')
 225             sys.exit (1)
 226         diff = read_pipe (diff_cmd % {'committish':m.group (1), 'original':masterdocument.filename}).splitlines ()
 227         insertions = sum ([line_word_count (l[1:]) for l in diff if l.startswith ('+') and not l.startswith ('+++')])
 228         deletions = sum ([line_word_count (l[1:]) for l in diff if l.startswith ('-') and not l.startswith ('---')])
 229         outdateness_percentage = 50.0 * (deletions + insertions) / (master_total_word_count + 0.5 * (deletions - insertions))
 230         self.uptodate_percentage = 100 - int (outdateness_percentage)
 231         if self.uptodate_percentage > 100:
 232             alternative = 50
 233             progress ("%s: strange uptodateness percentage %d %%, setting to %d %%" \
 234                           % (self.filename, self.uptodate_percentage, alternative))
 235             self.uptodate_percentage = alternative
 236         elif self.uptodate_percentage < 1:
 237             alternative = 1
 238             progress ("%s: strange uptodateness percentage %d %%, setting to %d %%" \
 239                           % (self.filename, self.uptodate_percentage, alternative))
 240             self.uptodate_percentage = alternative
 241
 242     def completeness (self, formats=['long']):
 243         if isinstance (formats, str):
 244             formats = [formats]
 245         p = self.translation_percentage
 246         if p == 0:
 247             status = 'not translated'
 248         elif p == 100:
 249             status = 'fully translated'
 250         else:
 251             status = 'partially translated'
 252         return dict ([(f, format_table[status][f] % locals()) for f in formats])
 253
 254     def uptodateness (self, formats=['long']):
 255         if isinstance (formats, str):
 256             formats = [formats]
 257         p = self.uptodate_percentage
 258         if p == 100:
 259             status = 'up to date'
 260         else:
 261             status = 'outdated'
 262         l = {}
 263         for f in formats:
 264             if f == 'color':
 265                 l['color'] = percentage_color (p)
 266             else:
 267                 l[f] = format_table[status][f] % locals ()
 268         return l
 269
 270     def gdp_status (self, translation=lambda s: s):
 271         if self.post_gdp:
 272             return translation (format-table['post-GDP'])
 273         else:
 274             return translation (format-table['pre-GDP'])
 275
 276     def short_html_status (self):
 277         s = '  <td>'
 278         if self.partially_translated:
 279             s += '<br>\n   '.join (self.translators) + '<br>\n'
 280             if self.checkers:
 281                 s += '   <small>' + '<br>\n   '.join (self.checkers) + '</small><br>\n'
 282
 283         c = self.completeness (['long', 'color'])
 284         s += '   <span style="background-color: #%(color)s">%(long)s</span><br>\n' % c
 285
 286         if self.partially_translated:
 287             u = self.uptodateness (['long', 'color'])
 288             s += '   <span style="background-color: #%(color)s">%(long)s</span><br>\n' % u
 289
 290         s += '  </td>\n'
 291         return s
 292
 293     def html_status (self):
 294         # TODO
 295         return ''
 296
 297 class MasterTelyDocument (TelyDocument):
 298     def __init__ (self, filename, parent_translations=dict ([(lang, None) for lang in langdefs.LANGDICT.keys()])):
 299         #print "init MasterTelyDocument %s" % filename
 300         TelyDocument.__init__ (self, filename)
 301         self.word_count = tely_word_count (self.contents)
 302         translations = dict ([(lang, os.path.join (lang, filename)) for lang in langdefs.LANGDICT.keys()])
 303         #print translations
 304         self.translations = dict ([(lang, TranslatedTelyDocument (translations[lang], self, parent_translations.get (lang)))
 305                                    for lang in langdefs.LANGDICT.keys() if os.path.exists (translations[lang])])
 306         if self.translations:
 307             self.includes = [MasterTelyDocument (f, self.translations) for f in self.included_files]
 308         else:
 309             self.includes = []
 310
 311     # TODO
 312     def print_wc_priority (self):
 313         return
 314
 315     def html_status (self, numbering=SectionNumber ()):
 316         if self.title == 'Untitled' or not self.translations:
 317             return ''
 318         if self.level[1] == 0: # if self is a master document
 319             s = '''<table align="center" border="2">
 320  <tr align="center">
 321   <th>%s</th>''' % self.print_title (numbering)
 322             s += ''.join (['  <th>%s</th>\n' % l for l in self.translations.keys ()])
 323             s += ' </tr>\n'
 324             s += ' <tr align="left">\n  <td>Section titles<br>(%d)</td>\n' \
 325                 % sum (self.word_count)
 326
 327         else:
 328             s = ' <tr align="left">\n  <td>%s<br>(%d)</td>\n' \
 329                 % (self.print_title (numbering), sum (self.word_count))
 330
 331         s += ''.join ([t.short_html_status () for t in self.translations.values ()])
 332         s += ' </tr>\n'
 333         s += ''.join ([i.html_status (numbering) for i in self.includes])
 334
 335         if self.level[1] == 0:
 336             s += '</table>\n<p></p>\n'
 337         return s
 338
 339 progress ("Reading documents...")
 340
 341 tely_files = read_pipe ("find -maxdepth 2 -name '*.tely'").splitlines ()
 342 master_docs = [MasterTelyDocument (filename) for filename in tely_files]
 343 master_docs = [doc for doc in master_docs if doc.translations]
 344
 345 main_status_page = open ('translations.template.html').read ()
 346
 347 ## TODO
 348 #per_lang_status_pages = dict ([(l, open (os.path.join (l, 'translations.template.html')). read ())
 349 #                               for l in langdefs.LANGDICT.keys ()
 350 #                               if langdefs.LANGDICT[l].enabled])
 351
 352 progress ("Generating status pages...")
 353
 354 main_status_html = ' <p><i>Last updated %s</i></p>\n' % read_pipe ('LANG= date -u')
 355 main_status_html += '\n'.join ([doc.html_status () for doc in master_docs])
 356
 357 html_re = re.compile ('<html>', re.I)
 358 end_body_re = re.compile ('</body>', re.I)
 359
 360 main_status_page = html_re.sub ('''<html>
 361 <!-- This page is automatically generated by translation-status.py from
 362 translations.template.html; DO NOT EDIT !-->''', main_status_page)
 363
 364 main_status_page = end_body_re.sub (main_status_html + '\n</body>', main_status_page)
 365
 366 open ('translations.html.in', 'w').write (main_status_page)