buildscripts/translations-status.py

   1 #!@PYTHON@
   2
   3 """
   4 USAGE: translations-status.py BUILDSCRIPT-DIR LOCALEDIR
   5
   6   This script must be run from Documentation/
   7
   8   Reads template files translations.template.html.in
   9 and for each LANG in LANGUAGES LANG/translations.template.html.in
  10   Writes translations.html.in and for each LANG in LANGUAGES
  11 translations.LANG.html.in
  12   Writes out/translations-status.txt
  13   Updates word counts in TRANSLATION
  14 """
  15
  16 import sys
  17 import re
  18 import string
  19 import os
  20 import gettext
  21
  22 def progress (str):
  23     sys.stderr.write (str + '\n')
  24
  25 progress ("translations-status.py")
  26
  27 buildscript_dir = sys.argv[1]
  28 localedir = sys.argv[2]
  29
  30 _doc = lambda s: s
  31
  32 sys.path.append (buildscript_dir)
  33 import langdefs
  34 import buildlib
  35
  36 # load gettext messages catalogs
  37 translation = {}
  38 for l in langdefs.LANGUAGES:
  39     if l.enabled and l.code != 'en':
  40         translation[l.code] = gettext.translation('lilypond-doc', localedir, [l.code]).gettext
  41
  42
  43 comments_re = re.compile (r'^@ignore\n(.|\n)*?\n@end ignore$|@c .*?$', re.M)
  44 space_re = re.compile (r'\s+', re.M)
  45 lilypond_re = re.compile (r'@lilypond({.*?}|(.|\n)*?\n@end lilypond$)', re.M)
  46 node_re = re.compile ('^@node .*?$', re.M)
  47 title_re = re.compile ('^@(top|chapter|(?:sub){0,2}section|(?:unnumbered|appendix)(?:(?:sub){0,2}sec)?) (.*?)$', re.M)
  48 include_re = re.compile ('^@include (.*?)$', re.M)
  49
  50 translators_re = re.compile (r'^@c\s+Translators\s*:\s*(.*?)$', re.M | re.I)
  51 checkers_re = re.compile (r'^@c\s+Translation\s*checkers\s*:\s*(.*?)$', re.M | re.I)
  52 status_re = re.compile (r'^@c\s+Translation\s*status\s*:\s*(.*?)$', re.M | re.I)
  53 post_gdp_re = re.compile ('post.GDP', re.I)
  54 untranslated_node_str = 'UNTRANSLATED NODE: IGNORE ME'
  55 skeleton_str = '-- SKELETON FILE --'
  56
  57 format_table = {
  58     'not translated': {'color':'d0f0f8', 'short':_doc ('no'), 'abbr':'NT',
  59                        'long':_doc ('not translated')},
  60     'partially translated': {'color':'dfef77', 'short':_doc ('partially (%(p)d %%)'),
  61                              'abbr':'%(p)d%%', 'long':_doc ('partially translated (%(p)d %%)')},
  62     'fully translated': {'color':'1fff1f', 'short':_doc ('yes'), 'abbr':'FT',
  63                          'long': _doc ('translated')},
  64     'up to date': {'short':_doc ('yes'), 'long':_doc ('up to date'), 'abbr':'100%%',
  65                    'vague':_doc ('up to date')},
  66     'outdated': {'short':_doc ('partially (%(p)d %%)'), 'abbr':'%(p)d%%',
  67                  'vague':_doc ('partially up to date')},
  68     'N/A': {'short':_doc ('N/A'), 'abbr':'N/A', 'color':'d587ff', 'vague':''},
  69     'pre-GDP':_doc ('pre-GDP'),
  70     'post-GDP':_doc ('post-GDP')
  71 }
  72
  73 texi_level = {
  74 # (Unumbered/Numbered/Lettered, level)
  75     'top': ('u', 0),
  76     'unnumbered': ('u', 1),
  77     'unnumberedsec': ('u', 2),
  78     'unnumberedsubsec': ('u', 3),
  79     'chapter': ('n', 1),
  80     'section': ('n', 2),
  81     'subsection': ('n', 3),
  82     'appendix': ('l', 1)
  83 }
  84
  85 appendix_number_trans = string.maketrans ('@ABCDEFGHIJKLMNOPQRSTUVWXY',
  86                                           'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
  87
  88 class SectionNumber (object):
  89     def __init__ (self):
  90         self.__data = [[0,'u']]
  91
  92     def __increase_last_index (self):
  93         type = self.__data[-1][1]
  94         if type == 'l':
  95             self.__data[-1][0] = self.__data[-1][0].translate (appendix_number_trans)
  96         elif type == 'n':
  97             self.__data[-1][0] += 1
  98
  99     def format (self):
 100         if self.__data[-1][1] == 'u':
 101             return ''
 102         return '.'.join ([str (i[0]) for i in self.__data if i[1] != 'u']) + ' '
 103
 104     def increase (self, (type, level)):
 105         if level == 0:
 106             self.__data = [[0,'u']]
 107         while level + 1 < len (self.__data):
 108             del self.__data[-1]
 109         if level + 1 > len (self.__data):
 110             self.__data.append ([0, type])
 111             if type == 'l':
 112                 self.__data[-1][0] = '@'
 113         if type == self.__data[-1][1]:
 114             self.__increase_last_index ()
 115         else:
 116             self.__data[-1] = ([0, type])
 117             if type == 'l':
 118                 self.__data[-1][0] = 'A'
 119             elif type == 'n':
 120                 self.__data[-1][0] = 1
 121         return self.format ()
 122
 123
 124 def percentage_color (percent):
 125     p = percent / 100.0
 126     if p < 0.33:
 127         c = [hex (int (3 * p * b + (1 - 3 * p) * a))[2:] for (a, b) in [(0xff, 0xff), (0x5c, 0xa6), (0x5c, 0x4c)]]
 128     elif p < 0.67:
 129         c = [hex (int ((3 * p - 1) * b + (2 - 3 * p) * a))[2:] for (a, b) in [(0xff, 0xff), (0xa6, 0xff), (0x4c, 0x3d)]]
 130     else:
 131         c = [hex (int ((3 * p - 2) * b + 3 * (1 - p) * a))[2:] for (a, b) in [(0xff, 0x1f), (0xff, 0xff), (0x3d, 0x1f)]]
 132     return ''.join (c)
 133
 134
 135 def update_word_count (text, filename, word_count):
 136     return re.sub (r'(?m)^(\d+) *' + filename,
 137                    str (word_count).ljust (6) + filename,
 138                    text)
 139
 140 po_msgid_re = re.compile (r'^msgid "(.*?)"(?:\n"(.*?)")*', re.M)
 141
 142 def po_word_count (po_content):
 143     s = ' '.join ([''.join (t) for t in po_msgid_re.findall (po_content)])
 144     return len (space_re.split (s))
 145
 146 sgml_tag_re = re.compile (r'<.*?>', re.S)
 147
 148 def sgml_word_count (sgml_doc):
 149     s = sgml_tag_re.sub ('', sgml_doc)
 150     return len (space_re.split (s))
 151
 152 def tely_word_count (tely_doc):
 153     '''
 154     Calculate word count of a Texinfo document node by node.
 155
 156     Take string tely_doc as an argument.
 157     Return a list of integers.
 158
 159     Texinfo comments and @lilypond blocks are not included in word counts.
 160     '''
 161     tely_doc = comments_re.sub ('', tely_doc)
 162     tely_doc = lilypond_re.sub ('', tely_doc)
 163     nodes = node_re.split (tely_doc)
 164     return [len (space_re.split (n)) for n in nodes]
 165
 166
 167 class TelyDocument (object):
 168     def __init__ (self, filename):
 169         self.filename = filename
 170         self.contents = open (filename).read ()
 171
 172         ## record title and sectionning level of first Texinfo section
 173         m = title_re.search (self.contents)
 174         if m:
 175             self.title = m.group (2)
 176             self.level = texi_level [m.group (1)]
 177         else:
 178             self.title = 'Untitled'
 179             self.level = ('u', 1)
 180
 181         included_files = [os.path.join (os.path.dirname (filename), t) for t in include_re.findall (self.contents)]
 182         self.included_files = [p for p in included_files if os.path.exists (p)]
 183
 184     def print_title (self, section_number):
 185         return section_number.increase (self.level) + self.title
 186
 187
 188 class TranslatedTelyDocument (TelyDocument):
 189     def __init__ (self, filename, masterdocument, parent_translation=None):
 190         TelyDocument.__init__ (self, filename)
 191
 192         self.masterdocument = masterdocument
 193
 194         ## record authoring information
 195         m = translators_re.search (self.contents)
 196         if m:
 197             self.translators = [n.strip () for n in m.group (1).split (',')]
 198         else:
 199             self.translators = parent_translation.translators
 200         m = checkers_re.search (self.contents)
 201         if m:
 202             self.checkers = [n.strip () for n in m.group (1).split (',')]
 203         elif isinstance (parent_translation, TranslatedTelyDocument):
 204             self.checkers = parent_translation.checkers
 205         else:
 206             self.checkers = []
 207
 208         ## check whether translation is pre- or post-GDP
 209         m = status_re.search (self.contents)
 210         if m:
 211             self.post_gdp = bool (post_gdp_re.search (m.group (1)))
 212         else:
 213             self.post_gdp = False
 214
 215         ## record which parts (nodes) of the file are actually translated
 216         self.partially_translated = not skeleton_str in self.contents
 217         nodes = node_re.split (self.contents)
 218         self.translated_nodes = [not untranslated_node_str in n for n in nodes]
 219
 220         ## calculate translation percentage
 221         master_total_word_count = sum (masterdocument.word_count)
 222         translation_word_count = sum ([masterdocument.word_count[k] * self.translated_nodes[k]
 223                                        for k in range (min (len (masterdocument.word_count), len (self.translated_nodes)))])
 224         self.translation_percentage = 100 * translation_word_count / master_total_word_count
 225
 226         ## calculate how much the file is outdated
 227         (diff_string, error) = buildlib.check_translated_doc (masterdocument.filename, self.contents)
 228         if error:
 229             sys.stderr.write ('warning: %s: %s' % (self.filename, error))
 230             self.uptodate_percentage = None
 231         else:
 232             diff = diff_string.splitlines ()
 233             insertions = sum ([len (l) - 1 for l in diff if l.startswith ('+') and not l.startswith ('+++')])
 234             deletions = sum ([len (l) - 1 for l in diff if l.startswith ('-') and not l.startswith ('---')])
 235             outdateness_percentage = 50.0 * (deletions + insertions) / (masterdocument.size + 0.5 * (deletions - insertions))
 236             self.uptodate_percentage = 100 - int (outdateness_percentage)
 237             if self.uptodate_percentage > 100:
 238                 alternative = 50
 239                 progress ("%s: strange uptodateness percentage %d %%, setting to %d %%" \
 240                               % (self.filename, self.uptodate_percentage, alternative))
 241                 self.uptodate_percentage = alternative
 242             elif self.uptodate_percentage < 1:
 243                 alternative = 1
 244                 progress ("%s: strange uptodateness percentage %d %%, setting to %d %%" \
 245                               % (self.filename, self.uptodate_percentage, alternative))
 246                 self.uptodate_percentage = alternative
 247
 248     def completeness (self, formats=['long']):
 249         if isinstance (formats, str):
 250             formats = [formats]
 251         p = self.translation_percentage
 252         if p == 0:
 253             status = 'not translated'
 254         elif p == 100:
 255             status = 'fully translated'
 256         else:
 257             status = 'partially translated'
 258         return dict ([(f, format_table[status][f] % locals()) for f in formats])
 259
 260     def uptodateness (self, formats=['long']):
 261         if isinstance (formats, str):
 262             formats = [formats]
 263         p = self.uptodate_percentage
 264         if p == None:
 265             status = 'N/A'
 266         elif p == 100:
 267             status = 'up to date'
 268         else:
 269             status = 'outdated'
 270         l = {}
 271         for f in formats:
 272             if f == 'color' and p != None:
 273                 l['color'] = percentage_color (p)
 274             else:
 275                 l[f] = format_table[status][f] % locals ()
 276         return l
 277
 278     def gdp_status (self, translation=lambda s: s):
 279         if self.post_gdp:
 280             return translation (format-table['post-GDP'])
 281         else:
 282             return translation (format-table['pre-GDP'])
 283
 284     def short_html_status (self):
 285         s = '  <td>'
 286         if self.partially_translated:
 287             s += '<br>\n   '.join (self.translators) + '<br>\n'
 288             if self.checkers:
 289                 s += '   <small>' + '<br>\n   '.join (self.checkers) + '</small><br>\n'
 290
 291         c = self.completeness (['color', 'long'])
 292         s += '   <span style="background-color: #%(color)s">%(long)s</span><br>\n' % c
 293
 294         if self.partially_translated:
 295             u = self.uptodateness (['vague', 'color'])
 296             s += '   <span style="background-color: #%(color)s">%(vague)s</span><br>\n' % u
 297
 298         s += '  </td>\n'
 299         return s
 300
 301     def text_status (self):
 302         s = self.completeness ('abbr')['abbr'] + ' '
 303
 304         if self.partially_translated:
 305             s += self.uptodateness ('abbr')['abbr'] + ' '
 306         return s
 307
 308     def html_status (self):
 309         # TODO
 310         return ''
 311
 312 class MasterTelyDocument (TelyDocument):
 313     def __init__ (self, filename, parent_translations=dict ([(lang, None) for lang in langdefs.LANGDICT.keys()])):
 314         TelyDocument.__init__ (self, filename)
 315         self.size = len (self.contents)
 316         self.word_count = tely_word_count (self.contents)
 317         translations = dict ([(lang, os.path.join (lang, filename)) for lang in langdefs.LANGDICT.keys()])
 318         self.translations = dict ([(lang, TranslatedTelyDocument (translations[lang], self, parent_translations.get (lang)))
 319                                    for lang in langdefs.LANGDICT.keys() if os.path.exists (translations[lang])])
 320         if self.translations:
 321             self.includes = [MasterTelyDocument (f, self.translations) for f in self.included_files]
 322         else:
 323             self.includes = []
 324
 325     def update_word_counts (self, s):
 326         s = update_word_count (s, self.filename, sum (self.word_count))
 327         for i in self.includes:
 328             s = i.update_word_counts (s)
 329         return s
 330
 331     def html_status (self, numbering=SectionNumber ()):
 332         if self.title == 'Untitled' or not self.translations:
 333             return ''
 334         if self.level[1] == 0: # if self is a master document
 335             s = '''<table align="center" border="2">
 336  <tr align="center">
 337   <th>%s</th>''' % self.print_title (numbering)
 338             s += ''.join (['  <th>%s</th>\n' % l for l in self.translations.keys ()])
 339             s += ' </tr>\n'
 340             s += ' <tr align="left">\n  <td>Section titles<br>(%d)</td>\n' \
 341                 % sum (self.word_count)
 342
 343         else:
 344             s = ' <tr align="left">\n  <td>%s<br>(%d)</td>\n' \
 345                 % (self.print_title (numbering), sum (self.word_count))
 346
 347         s += ''.join ([t.short_html_status () for t in self.translations.values ()])
 348         s += ' </tr>\n'
 349         s += ''.join ([i.html_status (numbering) for i in self.includes])
 350
 351         if self.level[1] == 0:
 352             s += '</table>\n<p></p>\n'
 353         return s
 354
 355     def text_status (self, numbering=SectionNumber (), colspec=[48,12]):
 356         if self.title == 'Untitled' or not self.translations:
 357             return ''
 358
 359         s = ''
 360         if self.level[1] == 0: # if self is a master document
 361             s += (self.print_title (numbering) + ' ').ljust (colspec[0])
 362             s += ''.join (['%s'.ljust (colspec[1]) % l for l in self.translations.keys ()])
 363             s += '\n'
 364             s += ('Section titles (%d)' % sum (self.word_count)).ljust (colspec[0])
 365
 366         else:
 367             s = '%s (%d) ' \
 368                 % (self.print_title (numbering), sum (self.word_count))
 369             s = s.ljust (colspec[0])
 370
 371         s += ''.join ([t.text_status ().ljust(colspec[1]) for t in self.translations.values ()])
 372         s += '\n\n'
 373         s += ''.join ([i.text_status (numbering) for i in self.includes])
 374
 375         if self.level[1] == 0:
 376             s += '\n'
 377         return s
 378
 379
 380 update_category_word_counts_re = re.compile (r'(?ms)^-(\d+)-(.*?\n)\d+ *total')
 381
 382 counts_re = re.compile (r'(?m)^(\d+) ')
 383
 384 def update_category_word_counts_sub (m):
 385     return '-' + m.group (1) + '-' + m.group (2) + \
 386         str (sum ([int (c) for c in counts_re.findall (m.group (2))])).ljust (6) + 'total'
 387
 388
 389 progress ("Reading documents...")
 390
 391 tely_files = buildlib.read_pipe ("find -maxdepth 2 -name '*.tely'")[0].splitlines ()
 392 master_docs = [MasterTelyDocument (os.path.normpath (filename)) for filename in tely_files]
 393 master_docs = [doc for doc in master_docs if doc.translations]
 394
 395 main_status_page = open ('translations.template.html.in').read ()
 396
 397 ## TODO
 398 #per_lang_status_pages = dict ([(l, open (os.path.join (l, 'translations.template.html')). read ())
 399 #                               for l in langdefs.LANGDICT.keys ()
 400 #                               if langdefs.LANGDICT[l].enabled])
 401
 402 progress ("Generating status pages...")
 403
 404 date_time = buildlib.read_pipe ('LANG= date -u')[0]
 405
 406 main_status_html = ' <p><i>Last updated %s</i></p>\n' % date_time
 407 main_status_html += '\n'.join ([doc.html_status () for doc in master_docs])
 408
 409 html_re = re.compile ('<html>', re.I)
 410 end_body_re = re.compile ('</body>', re.I)
 411
 412 main_status_page = html_re.sub ('''<html>
 413 <!-- This page is automatically generated by translation-status.py from
 414 translations.template.html.in; DO NOT EDIT !-->''', main_status_page)
 415
 416 main_status_page = end_body_re.sub (main_status_html + '\n</body>', main_status_page)
 417
 418 open ('translations.html.in', 'w').write (main_status_page)
 419
 420 main_status_txt = '''Documentation translations status
 421 Generated %s
 422 NT = not translated
 423 FT = fully translated
 424
 425 ''' % date_time
 426
 427 main_status_txt += '\n'.join ([doc.text_status () for doc in master_docs])
 428
 429 status_txt_file = 'out/translations-status.txt'
 430 progress ("Writing %s..." % status_txt_file)
 431 open (status_txt_file, 'w').write (main_status_txt)
 432
 433 translation_instructions_file = 'TRANSLATION'
 434 progress ("Updating %s..." % translation_instructions_file)
 435 translation_instructions = open (translation_instructions_file).read ()
 436
 437 for doc in master_docs:
 438     translation_instructions = doc.update_word_counts (translation_instructions)
 439
 440 for html_file in re.findall (r'(?m)^\d+ *(\S+?\.html\S*?)(?: |$)', translation_instructions):
 441     word_count = sgml_word_count (open (html_file).read ())
 442     translation_instructions = update_word_count (translation_instructions,
 443                                                   html_file,
 444                                                   word_count)
 445
 446 for po_file in re.findall (r'(?m)^\d+ *(\S+?\.po\S*?)(?: |$)', translation_instructions):
 447     word_count = po_word_count (open (po_file).read ())
 448     translation_instructions = update_word_count (translation_instructions,
 449                                                   po_file,
 450                                                   word_count)
 451
 452 translation_instructions = update_category_word_counts_re.sub (update_category_word_counts_sub,
 453                                                                translation_instructions)
 454
 455 open (translation_instructions_file, 'w').write (translation_instructions)