buildscripts/translations-status.py

   1 #!/usr/bin/env python
   2
   3 """
   4 USAGE: translations-status.py BUILDSCRIPT-DIR LOCALEDIR
   5
   6   This script must be run from Documentation/
   7
   8   Reads template files translations.template.html.in
   9 and for each LANG in LANGUAGES LANG/translations.template.html.in
  10   Writes translations.html.in and for each LANG in LANGUAGES
  11 translations.LANG.html.in
  12   Writes out/translations-status.txt
  13   Updates word counts in TRANSLATION
  14 """
  15
  16 import sys
  17 import re
  18 import string
  19 import os
  20 import gettext
  21
  22 import langdefs
  23
  24 def progress (str):
  25     sys.stderr.write (str + '\n')
  26
  27 progress ("translations-status.py")
  28
  29 buildscript_dir = sys.argv[1]
  30
  31 _doc = lambda s: s
  32
  33 sys.path.append (buildscript_dir)
  34 import buildlib
  35
  36 # load gettext messages catalogs
  37 translation = langdefs.translation
  38
  39
  40 comments_re = re.compile (r'^@ignore\n(.|\n)*?\n@end ignore$|@c .*?$', re.M)
  41 space_re = re.compile (r'\s+', re.M)
  42 lilypond_re = re.compile (r'@lilypond({.*?}|(.|\n)*?\n@end lilypond$)', re.M)
  43 node_re = re.compile ('^@node .*?$', re.M)
  44 title_re = re.compile ('^@(top|chapter|(?:sub){0,2}section|(?:unnumbered|appendix)(?:(?:sub){0,2}sec)?) (.*?)$', re.M)
  45 include_re = re.compile ('^@include (.*?)$', re.M)
  46
  47 translators_re = re.compile (r'^@c\s+Translators\s*:\s*(.*?)$', re.M | re.I)
  48 checkers_re = re.compile (r'^@c\s+Translation\s*checkers\s*:\s*(.*?)$', re.M | re.I)
  49 status_re = re.compile (r'^@c\s+Translation\s*status\s*:\s*(.*?)$', re.M | re.I)
  50 post_gdp_re = re.compile ('post.GDP', re.I)
  51 untranslated_node_str = 'UNTRANSLATED NODE: IGNORE ME'
  52 skeleton_str = '-- SKELETON FILE --'
  53
  54 format_table = {
  55     'not translated': {'color':'d0f0f8', 'short':_doc ('no'), 'abbr':'NT',
  56                        'long':_doc ('not translated')},
  57     'partially translated': {'color':'dfef77', 'short':_doc ('partially (%(p)d %%)'),
  58                              'abbr':'%(p)d%%', 'long':_doc ('partially translated (%(p)d %%)')},
  59     'fully translated': {'color':'1fff1f', 'short':_doc ('yes'), 'abbr':'FT',
  60                          'long': _doc ('translated')},
  61     'up to date': {'short':_doc ('yes'), 'long':_doc ('up to date'), 'abbr':'100%%',
  62                    'vague':_doc ('up to date')},
  63     'outdated': {'short':_doc ('partially (%(p)d %%)'), 'abbr':'%(p)d%%',
  64                  'vague':_doc ('partially up to date')},
  65     'N/A': {'short':_doc ('N/A'), 'abbr':'N/A', 'color':'d587ff', 'vague':''},
  66     'pre-GDP':_doc ('pre-GDP'),
  67     'post-GDP':_doc ('post-GDP')
  68 }
  69
  70 texi_level = {
  71 # (Unumbered/Numbered/Lettered, level)
  72     'top': ('u', 0),
  73     'unnumbered': ('u', 1),
  74     'unnumberedsec': ('u', 2),
  75     'unnumberedsubsec': ('u', 3),
  76     'chapter': ('n', 1),
  77     'section': ('n', 2),
  78     'subsection': ('n', 3),
  79     'appendix': ('l', 1)
  80 }
  81
  82 appendix_number_trans = string.maketrans ('@ABCDEFGHIJKLMNOPQRSTUVWXY',
  83                                           'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
  84
  85 class SectionNumber (object):
  86     def __init__ (self):
  87         self.__data = [[0,'u']]
  88
  89     def __increase_last_index (self):
  90         type = self.__data[-1][1]
  91         if type == 'l':
  92             self.__data[-1][0] = self.__data[-1][0].translate (appendix_number_trans)
  93         elif type == 'n':
  94             self.__data[-1][0] += 1
  95
  96     def format (self):
  97         if self.__data[-1][1] == 'u':
  98             return ''
  99         return '.'.join ([str (i[0]) for i in self.__data if i[1] != 'u']) + ' '
 100
 101     def increase (self, (type, level)):
 102         if level == 0:
 103             self.__data = [[0,'u']]
 104         while level + 1 < len (self.__data):
 105             del self.__data[-1]
 106         if level + 1 > len (self.__data):
 107             self.__data.append ([0, type])
 108             if type == 'l':
 109                 self.__data[-1][0] = '@'
 110         if type == self.__data[-1][1]:
 111             self.__increase_last_index ()
 112         else:
 113             self.__data[-1] = ([0, type])
 114             if type == 'l':
 115                 self.__data[-1][0] = 'A'
 116             elif type == 'n':
 117                 self.__data[-1][0] = 1
 118         return self.format ()
 119
 120
 121 def percentage_color (percent):
 122     p = percent / 100.0
 123     if p < 0.33:
 124         c = [hex (int (3 * p * b + (1 - 3 * p) * a))[2:] for (a, b) in [(0xff, 0xff), (0x5c, 0xa6), (0x5c, 0x4c)]]
 125     elif p < 0.67:
 126         c = [hex (int ((3 * p - 1) * b + (2 - 3 * p) * a))[2:] for (a, b) in [(0xff, 0xff), (0xa6, 0xff), (0x4c, 0x3d)]]
 127     else:
 128         c = [hex (int ((3 * p - 2) * b + 3 * (1 - p) * a))[2:] for (a, b) in [(0xff, 0x1f), (0xff, 0xff), (0x3d, 0x1f)]]
 129     return ''.join (c)
 130
 131
 132 def update_word_count (text, filename, word_count):
 133     return re.sub (r'(?m)^(\d+) *' + filename,
 134                    str (word_count).ljust (6) + filename,
 135                    text)
 136
 137 po_msgid_re = re.compile (r'^msgid "(.*?)"(?:\n"(.*?)")*', re.M)
 138
 139 def po_word_count (po_content):
 140     s = ' '.join ([''.join (t) for t in po_msgid_re.findall (po_content)])
 141     return len (space_re.split (s))
 142
 143 sgml_tag_re = re.compile (r'<.*?>', re.S)
 144
 145 def sgml_word_count (sgml_doc):
 146     s = sgml_tag_re.sub ('', sgml_doc)
 147     return len (space_re.split (s))
 148
 149 def tely_word_count (tely_doc):
 150     '''
 151     Calculate word count of a Texinfo document node by node.
 152
 153     Take string tely_doc as an argument.
 154     Return a list of integers.
 155
 156     Texinfo comments and @lilypond blocks are not included in word counts.
 157     '''
 158     tely_doc = comments_re.sub ('', tely_doc)
 159     tely_doc = lilypond_re.sub ('', tely_doc)
 160     nodes = node_re.split (tely_doc)
 161     return [len (space_re.split (n)) for n in nodes]
 162
 163
 164 class TelyDocument (object):
 165     def __init__ (self, filename):
 166         self.filename = filename
 167         self.contents = open (filename).read ()
 168
 169         ## record title and sectionning level of first Texinfo section
 170         m = title_re.search (self.contents)
 171         if m:
 172             self.title = m.group (2)
 173             self.level = texi_level [m.group (1)]
 174         else:
 175             self.title = 'Untitled'
 176             self.level = ('u', 1)
 177
 178         included_files = [os.path.join (os.path.dirname (filename), t) for t in include_re.findall (self.contents)]
 179         self.included_files = [p for p in included_files if os.path.exists (p)]
 180
 181     def print_title (self, section_number):
 182         return section_number.increase (self.level) + self.title
 183
 184
 185 class TranslatedTelyDocument (TelyDocument):
 186     def __init__ (self, filename, masterdocument, parent_translation=None):
 187         TelyDocument.__init__ (self, filename)
 188
 189         self.masterdocument = masterdocument
 190
 191         ## record authoring information
 192         m = translators_re.search (self.contents)
 193         if m:
 194             self.translators = [n.strip () for n in m.group (1).split (',')]
 195         else:
 196             self.translators = parent_translation.translators
 197         m = checkers_re.search (self.contents)
 198         if m:
 199             self.checkers = [n.strip () for n in m.group (1).split (',')]
 200         elif isinstance (parent_translation, TranslatedTelyDocument):
 201             self.checkers = parent_translation.checkers
 202         else:
 203             self.checkers = []
 204
 205         ## check whether translation is pre- or post-GDP
 206         m = status_re.search (self.contents)
 207         if m:
 208             self.post_gdp = bool (post_gdp_re.search (m.group (1)))
 209         else:
 210             self.post_gdp = False
 211
 212         ## record which parts (nodes) of the file are actually translated
 213         self.partially_translated = not skeleton_str in self.contents
 214         nodes = node_re.split (self.contents)
 215         self.translated_nodes = [not untranslated_node_str in n for n in nodes]
 216
 217         ## calculate translation percentage
 218         master_total_word_count = sum (masterdocument.word_count)
 219         translation_word_count = sum ([masterdocument.word_count[k] * self.translated_nodes[k]
 220                                        for k in range (min (len (masterdocument.word_count), len (self.translated_nodes)))])
 221         self.translation_percentage = 100 * translation_word_count / master_total_word_count
 222
 223         ## calculate how much the file is outdated
 224         (diff_string, error) = buildlib.check_translated_doc (masterdocument.filename, self.contents)
 225         if error:
 226             sys.stderr.write ('warning: %s: %s' % (self.filename, error))
 227             self.uptodate_percentage = None
 228         else:
 229             diff = diff_string.splitlines ()
 230             insertions = sum ([len (l) - 1 for l in diff if l.startswith ('+') and not l.startswith ('+++')])
 231             deletions = sum ([len (l) - 1 for l in diff if l.startswith ('-') and not l.startswith ('---')])
 232             outdateness_percentage = 50.0 * (deletions + insertions) / (masterdocument.size + 0.5 * (deletions - insertions))
 233             self.uptodate_percentage = 100 - int (outdateness_percentage)
 234             if self.uptodate_percentage > 100:
 235                 alternative = 50
 236                 progress ("%s: strange uptodateness percentage %d %%, setting to %d %%" \
 237                               % (self.filename, self.uptodate_percentage, alternative))
 238                 self.uptodate_percentage = alternative
 239             elif self.uptodate_percentage < 1:
 240                 alternative = 1
 241                 progress ("%s: strange uptodateness percentage %d %%, setting to %d %%" \
 242                               % (self.filename, self.uptodate_percentage, alternative))
 243                 self.uptodate_percentage = alternative
 244
 245     def completeness (self, formats=['long']):
 246         if isinstance (formats, str):
 247             formats = [formats]
 248         p = self.translation_percentage
 249         if p == 0:
 250             status = 'not translated'
 251         elif p == 100:
 252             status = 'fully translated'
 253         else:
 254             status = 'partially translated'
 255         return dict ([(f, format_table[status][f] % locals()) for f in formats])
 256
 257     def uptodateness (self, formats=['long']):
 258         if isinstance (formats, str):
 259             formats = [formats]
 260         p = self.uptodate_percentage
 261         if p == None:
 262             status = 'N/A'
 263         elif p == 100:
 264             status = 'up to date'
 265         else:
 266             status = 'outdated'
 267         l = {}
 268         for f in formats:
 269             if f == 'color' and p != None:
 270                 l['color'] = percentage_color (p)
 271             else:
 272                 l[f] = format_table[status][f] % locals ()
 273         return l
 274
 275     def gdp_status (self, translation=lambda s: s):
 276         if self.post_gdp:
 277             return translation (format-table['post-GDP'])
 278         else:
 279             return translation (format-table['pre-GDP'])
 280
 281     def short_html_status (self):
 282         s = '  <td>'
 283         if self.partially_translated:
 284             s += '<br>\n   '.join (self.translators) + '<br>\n'
 285             if self.checkers:
 286                 s += '   <small>' + '<br>\n   '.join (self.checkers) + '</small><br>\n'
 287
 288         c = self.completeness (['color', 'long'])
 289         s += '   <span style="background-color: #%(color)s">%(long)s</span><br>\n' % c
 290
 291         if self.partially_translated:
 292             u = self.uptodateness (['vague', 'color'])
 293             s += '   <span style="background-color: #%(color)s">%(vague)s</span><br>\n' % u
 294
 295         s += '  </td>\n'
 296         return s
 297
 298     def text_status (self):
 299         s = self.completeness ('abbr')['abbr'] + ' '
 300
 301         if self.partially_translated:
 302             s += self.uptodateness ('abbr')['abbr'] + ' '
 303         return s
 304
 305     def html_status (self):
 306         # TODO
 307         return ''
 308
 309 class MasterTelyDocument (TelyDocument):
 310     def __init__ (self, filename, parent_translations=dict ([(lang, None) for lang in langdefs.LANGDICT])):
 311         TelyDocument.__init__ (self, filename)
 312         self.size = len (self.contents)
 313         self.word_count = tely_word_count (self.contents)
 314         translations = dict ([(lang, os.path.join (lang, filename)) for lang in langdefs.LANGDICT])
 315         self.translations = dict ([(lang, TranslatedTelyDocument (translations[lang], self, parent_translations.get (lang)))
 316                                    for lang in langdefs.LANGDICT if os.path.exists (translations[lang])])
 317         if self.translations:
 318             self.includes = [MasterTelyDocument (f, self.translations) for f in self.included_files]
 319         else:
 320             self.includes = []
 321
 322     def update_word_counts (self, s):
 323         s = update_word_count (s, self.filename, sum (self.word_count))
 324         for i in self.includes:
 325             s = i.update_word_counts (s)
 326         return s
 327
 328     def html_status (self, numbering=SectionNumber ()):
 329         if self.title == 'Untitled' or not self.translations:
 330             return ''
 331         if self.level[1] == 0: # if self is a master document
 332             s = '''<table align="center" border="2">
 333  <tr align="center">
 334   <th>%s</th>''' % self.print_title (numbering)
 335             s += ''.join (['  <th>%s</th>\n' % l for l in self.translations])
 336             s += ' </tr>\n'
 337             s += ' <tr align="left">\n  <td>Section titles<br>(%d)</td>\n' \
 338                 % sum (self.word_count)
 339
 340         else:
 341             s = ' <tr align="left">\n  <td>%s<br>(%d)</td>\n' \
 342                 % (self.print_title (numbering), sum (self.word_count))
 343
 344         s += ''.join ([t.short_html_status () for t in self.translations.values ()])
 345         s += ' </tr>\n'
 346         s += ''.join ([i.html_status (numbering) for i in self.includes])
 347
 348         if self.level[1] == 0:
 349             s += '</table>\n<p></p>\n'
 350         return s
 351
 352     def text_status (self, numbering=SectionNumber (), colspec=[48,12]):
 353         if self.title == 'Untitled' or not self.translations:
 354             return ''
 355
 356         s = ''
 357         if self.level[1] == 0: # if self is a master document
 358             s += (self.print_title (numbering) + ' ').ljust (colspec[0])
 359             s += ''.join (['%s'.ljust (colspec[1]) % l for l in self.translations])
 360             s += '\n'
 361             s += ('Section titles (%d)' % sum (self.word_count)).ljust (colspec[0])
 362
 363         else:
 364             s = '%s (%d) ' \
 365                 % (self.print_title (numbering), sum (self.word_count))
 366             s = s.ljust (colspec[0])
 367
 368         s += ''.join ([t.text_status ().ljust(colspec[1]) for t in self.translations.values ()])
 369         s += '\n\n'
 370         s += ''.join ([i.text_status (numbering) for i in self.includes])
 371
 372         if self.level[1] == 0:
 373             s += '\n'
 374         return s
 375
 376
 377 update_category_word_counts_re = re.compile (r'(?ms)^-(\d+)-(.*?\n)\d+ *total')
 378
 379 counts_re = re.compile (r'(?m)^(\d+) ')
 380
 381 def update_category_word_counts_sub (m):
 382     return '-' + m.group (1) + '-' + m.group (2) + \
 383         str (sum ([int (c) for c in counts_re.findall (m.group (2))])).ljust (6) + 'total'
 384
 385
 386 progress ("Reading documents...")
 387
 388 tely_files = buildlib.read_pipe ("find -maxdepth 2 -name '*.tely'")[0].splitlines ()
 389 master_docs = [MasterTelyDocument (os.path.normpath (filename)) for filename in tely_files]
 390 master_docs = [doc for doc in master_docs if doc.translations]
 391
 392 main_status_page = open ('translations.template.html.in').read ()
 393
 394 ## TODO
 395 #per_lang_status_pages = dict ([(l, open (os.path.join (l, 'translations.template.html')). read ())
 396 #                               for l in langdefs.LANGDICT
 397 #                               if langdefs.LANGDICT[l].enabled])
 398
 399 progress ("Generating status pages...")
 400
 401 date_time = buildlib.read_pipe ('LANG= date -u')[0]
 402
 403 main_status_html = ' <p><i>Last updated %s</i></p>\n' % date_time
 404 main_status_html += '\n'.join ([doc.html_status () for doc in master_docs])
 405
 406 html_re = re.compile ('<html>', re.I)
 407 end_body_re = re.compile ('</body>', re.I)
 408
 409 main_status_page = html_re.sub ('''<html>
 410 <!-- This page is automatically generated by translation-status.py from
 411 translations.template.html.in; DO NOT EDIT !-->''', main_status_page)
 412
 413 main_status_page = end_body_re.sub (main_status_html + '\n</body>', main_status_page)
 414
 415 open ('translations.html.in', 'w').write (main_status_page)
 416
 417 main_status_txt = '''Documentation translations status
 418 Generated %s
 419 NT = not translated
 420 FT = fully translated
 421
 422 ''' % date_time
 423
 424 main_status_txt += '\n'.join ([doc.text_status () for doc in master_docs])
 425
 426 status_txt_file = 'out/translations-status.txt'
 427 progress ("Writing %s..." % status_txt_file)
 428 open (status_txt_file, 'w').write (main_status_txt)
 429
 430 translation_instructions_file = 'TRANSLATION'
 431 progress ("Updating %s..." % translation_instructions_file)
 432 translation_instructions = open (translation_instructions_file).read ()
 433
 434 for doc in master_docs:
 435     translation_instructions = doc.update_word_counts (translation_instructions)
 436
 437 for html_file in re.findall (r'(?m)^\d+ *(\S+?\.html\S*?)(?: |$)', translation_instructions):
 438     word_count = sgml_word_count (open (html_file).read ())
 439     translation_instructions = update_word_count (translation_instructions,
 440                                                   html_file,
 441                                                   word_count)
 442
 443 for po_file in re.findall (r'(?m)^\d+ *(\S+?\.po\S*?)(?: |$)', translation_instructions):
 444     word_count = po_word_count (open (po_file).read ())
 445     translation_instructions = update_word_count (translation_instructions,
 446                                                   po_file,
 447                                                   word_count)
 448
 449 translation_instructions = update_category_word_counts_re.sub (update_category_word_counts_sub,
 450                                                                translation_instructions)
 451
 452 open (translation_instructions_file, 'w').write (translation_instructions)