buildscripts/translations-status.py

   1 #!/usr/bin/env python
   2
   3 """
   4 USAGE: translations-status.py BUILDSCRIPT-DIR LOCALEDIR
   5
   6   This script must be run from Documentation/
   7
   8   Reads template files translations.template.html.in
   9 and for each LANG in LANGUAGES LANG/translations.template.html.in
  10   Writes translations.html.in and for each LANG in LANGUAGES
  11 translations.LANG.html.in
  12   Writes out/translations-status.txt
  13   Updates word counts in TRANSLATION
  14 """
  15
  16 import sys
  17 import re
  18 import string
  19 import os
  20 import gettext
  21
  22 import langdefs
  23
  24 def progress (str):
  25     sys.stderr.write (str + '\n')
  26
  27 progress ("translations-status.py")
  28
  29 buildscript_dir = sys.argv[1]
  30
  31 _doc = lambda s: s
  32
  33 sys.path.append (buildscript_dir)
  34 import buildlib
  35
  36 # load gettext messages catalogs
  37 translation = langdefs.translation
  38
  39
  40 comments_re = re.compile (r'^@ignore\n(.|\n)*?\n@end ignore$|@c .*?$', re.M)
  41 space_re = re.compile (r'\s+', re.M)
  42 lilypond_re = re.compile (r'@lilypond({.*?}|(.|\n)*?\n@end lilypond$)', re.M)
  43 node_re = re.compile ('^@node .*?$', re.M)
  44 title_re = re.compile ('^@(top|chapter|(?:sub){0,2}section|' + \
  45 '(?:unnumbered|appendix)(?:(?:sub){0,2}sec)?) (.*?)$', re.M)
  46 include_re = re.compile ('^@include (.*?)$', re.M)
  47
  48 translators_re = re.compile (r'^@c\s+Translators\s*:\s*(.*?)$', re.M | re.I)
  49 checkers_re = re.compile (r'^@c\s+Translation\s*checkers\s*:\s*(.*?)$',
  50                           re.M | re.I)
  51 status_re = re.compile (r'^@c\s+Translation\s*status\s*:\s*(.*?)$', re.M | re.I)
  52 post_gdp_re = re.compile ('post.GDP', re.I)
  53 untranslated_node_str = 'UNTRANSLATED NODE: IGNORE ME'
  54 skeleton_str = '-- SKELETON FILE --'
  55
  56 format_table = {
  57     'not translated': {'color':'d0f0f8', 'short':_doc ('no'), 'abbr':'NT',
  58                        'long':_doc ('not translated')},
  59     'partially translated': {'color':'dfef77',
  60                              'short':_doc ('partially (%(p)d %%)'),
  61                              'abbr':'%(p)d%%',
  62                              'long':_doc ('partially translated (%(p)d %%)')},
  63     'fully translated': {'color':'1fff1f', 'short':_doc ('yes'), 'abbr':'FT',
  64                          'long': _doc ('translated')},
  65     'up to date': {'short':_doc ('yes'), 'long':_doc ('up to date'),
  66                    'abbr':'100%%', 'vague':_doc ('up to date')},
  67     'outdated': {'short':_doc ('partially (%(p)d %%)'), 'abbr':'%(p)d%%',
  68                  'vague':_doc ('partially up to date')},
  69     'N/A': {'short':_doc ('N/A'), 'abbr':'N/A', 'color':'d587ff', 'vague':''},
  70     'pre-GDP':_doc ('pre-GDP'),
  71     'post-GDP':_doc ('post-GDP')
  72 }
  73
  74 texi_level = {
  75 # (Unumbered/Numbered/Lettered, level)
  76     'top': ('u', 0),
  77     'unnumbered': ('u', 1),
  78     'unnumberedsec': ('u', 2),
  79     'unnumberedsubsec': ('u', 3),
  80     'chapter': ('n', 1),
  81     'section': ('n', 2),
  82     'subsection': ('n', 3),
  83     'appendix': ('l', 1)
  84 }
  85
  86 appendix_number_trans = string.maketrans ('@ABCDEFGHIJKLMNOPQRSTUVWXY',
  87                                           'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
  88
  89 class SectionNumber (object):
  90     def __init__ (self):
  91         self.__data = [[0,'u']]
  92
  93     def __increase_last_index (self):
  94         type = self.__data[-1][1]
  95         if type == 'l':
  96             self.__data[-1][0] = \
  97                 self.__data[-1][0].translate (appendix_number_trans)
  98         elif type == 'n':
  99             self.__data[-1][0] += 1
 100
 101     def format (self):
 102         if self.__data[-1][1] == 'u':
 103             return ''
 104         return '.'.join ([str (i[0]) for i in self.__data if i[1] != 'u']) + ' '
 105
 106     def increase (self, (type, level)):
 107         if level == 0:
 108             self.__data = [[0,'u']]
 109         while level + 1 < len (self.__data):
 110             del self.__data[-1]
 111         if level + 1 > len (self.__data):
 112             self.__data.append ([0, type])
 113             if type == 'l':
 114                 self.__data[-1][0] = '@'
 115         if type == self.__data[-1][1]:
 116             self.__increase_last_index ()
 117         else:
 118             self.__data[-1] = ([0, type])
 119             if type == 'l':
 120                 self.__data[-1][0] = 'A'
 121             elif type == 'n':
 122                 self.__data[-1][0] = 1
 123         return self.format ()
 124
 125
 126 def percentage_color (percent):
 127     p = percent / 100.0
 128     if p < 0.33:
 129         c = [hex (int (3 * p * b + (1 - 3 * p) * a))[2:]
 130              for (a, b) in [(0xff, 0xff), (0x5c, 0xa6), (0x5c, 0x4c)]]
 131     elif p < 0.67:
 132         c = [hex (int ((3 * p - 1) * b + (2 - 3 * p) * a))[2:]
 133              for (a, b) in [(0xff, 0xff), (0xa6, 0xff), (0x4c, 0x3d)]]
 134     else:
 135         c = [hex (int ((3 * p - 2) * b + 3 * (1 - p) * a))[2:]
 136              for (a, b) in [(0xff, 0x1f), (0xff, 0xff), (0x3d, 0x1f)]]
 137     return ''.join (c)
 138
 139
 140 def update_word_count (text, filename, word_count):
 141     return re.sub (r'(?m)^(\d+) *' + filename,
 142                    str (word_count).ljust (6) + filename,
 143                    text)
 144
 145 po_msgid_re = re.compile (r'^msgid "(.*?)"(?:\n"(.*?)")*', re.M)
 146
 147 def po_word_count (po_content):
 148     s = ' '.join ([''.join (t) for t in po_msgid_re.findall (po_content)])
 149     return len (space_re.split (s))
 150
 151 sgml_tag_re = re.compile (r'<.*?>', re.S)
 152
 153 def sgml_word_count (sgml_doc):
 154     s = sgml_tag_re.sub ('', sgml_doc)
 155     return len (space_re.split (s))
 156
 157 def tely_word_count (tely_doc):
 158     '''
 159     Calculate word count of a Texinfo document node by node.
 160
 161     Take string tely_doc as an argument.
 162     Return a list of integers.
 163
 164     Texinfo comments and @lilypond blocks are not included in word counts.
 165     '''
 166     tely_doc = comments_re.sub ('', tely_doc)
 167     tely_doc = lilypond_re.sub ('', tely_doc)
 168     nodes = node_re.split (tely_doc)
 169     return [len (space_re.split (n)) for n in nodes]
 170
 171
 172 class TelyDocument (object):
 173     def __init__ (self, filename):
 174         self.filename = filename
 175         self.contents = open (filename).read ()
 176
 177         ## record title and sectionning level of first Texinfo section
 178         m = title_re.search (self.contents)
 179         if m:
 180             self.title = m.group (2)
 181             self.level = texi_level [m.group (1)]
 182         else:
 183             self.title = 'Untitled'
 184             self.level = ('u', 1)
 185
 186         included_files = [os.path.join (os.path.dirname (filename), t)
 187                           for t in include_re.findall (self.contents)]
 188         self.included_files = [p for p in included_files if os.path.exists (p)]
 189
 190     def print_title (self, section_number):
 191         return section_number.increase (self.level) + self.title
 192
 193
 194 class TranslatedTelyDocument (TelyDocument):
 195     def __init__ (self, filename, masterdocument, parent_translation=None):
 196         TelyDocument.__init__ (self, filename)
 197
 198         self.masterdocument = masterdocument
 199
 200         ## record authoring information
 201         m = translators_re.search (self.contents)
 202         if m:
 203             self.translators = [n.strip () for n in m.group (1).split (',')]
 204         else:
 205             self.translators = parent_translation.translators
 206         m = checkers_re.search (self.contents)
 207         if m:
 208             self.checkers = [n.strip () for n in m.group (1).split (',')]
 209         elif isinstance (parent_translation, TranslatedTelyDocument):
 210             self.checkers = parent_translation.checkers
 211         else:
 212             self.checkers = []
 213
 214         ## check whether translation is pre- or post-GDP
 215         m = status_re.search (self.contents)
 216         if m:
 217             self.post_gdp = bool (post_gdp_re.search (m.group (1)))
 218         else:
 219             self.post_gdp = False
 220
 221         ## record which parts (nodes) of the file are actually translated
 222         self.partially_translated = not skeleton_str in self.contents
 223         nodes = node_re.split (self.contents)
 224         self.translated_nodes = [not untranslated_node_str in n for n in nodes]
 225
 226         ## calculate translation percentage
 227         master_total_word_count = sum (masterdocument.word_count)
 228         translation_word_count = \
 229             sum ([masterdocument.word_count[k] * self.translated_nodes[k]
 230                   for k in range (min (len (masterdocument.word_count),
 231                                        len (self.translated_nodes)))])
 232         self.translation_percentage = \
 233             100 * translation_word_count / master_total_word_count
 234
 235         ## calculate how much the file is outdated
 236         (diff_string, error) = \
 237             buildlib.check_translated_doc (masterdocument.filename, self.contents)
 238         if error:
 239             sys.stderr.write ('warning: %s: %s' % (self.filename, error))
 240             self.uptodate_percentage = None
 241         else:
 242             diff = diff_string.splitlines ()
 243             insertions = sum ([len (l) - 1 for l in diff
 244                                if l.startswith ('+')
 245                                and not l.startswith ('+++')])
 246             deletions = sum ([len (l) - 1 for l in diff
 247                               if l.startswith ('-')
 248                               and not l.startswith ('---')])
 249             outdateness_percentage = 50.0 * (deletions + insertions) / \
 250                 (masterdocument.size + 0.5 * (deletions - insertions))
 251             self.uptodate_percentage = 100 - int (outdateness_percentage)
 252             if self.uptodate_percentage > 100:
 253                 alternative = 50
 254                 progress ("%s: strange uptodateness percentage %d %%, \
 255 setting to %d %%" % (self.filename, self.uptodate_percentage, alternative))
 256                 self.uptodate_percentage = alternative
 257             elif self.uptodate_percentage < 1:
 258                 alternative = 1
 259                 progress ("%s: strange uptodateness percentage %d %%, \
 260 setting to %d %%" % (self.filename, self.uptodate_percentage, alternative))
 261                 self.uptodate_percentage = alternative
 262
 263     def completeness (self, formats=['long']):
 264         if isinstance (formats, str):
 265             formats = [formats]
 266         p = self.translation_percentage
 267         if p == 0:
 268             status = 'not translated'
 269         elif p == 100:
 270             status = 'fully translated'
 271         else:
 272             status = 'partially translated'
 273         return dict ([(f, format_table[status][f] % locals()) for f in formats])
 274
 275     def uptodateness (self, formats=['long']):
 276         if isinstance (formats, str):
 277             formats = [formats]
 278         p = self.uptodate_percentage
 279         if p == None:
 280             status = 'N/A'
 281         elif p == 100:
 282             status = 'up to date'
 283         else:
 284             status = 'outdated'
 285         l = {}
 286         for f in formats:
 287             if f == 'color' and p != None:
 288                 l['color'] = percentage_color (p)
 289             else:
 290                 l[f] = format_table[status][f] % locals ()
 291         return l
 292
 293     def gdp_status (self, translation=lambda s: s):
 294         if self.post_gdp:
 295             return translation (format-table['post-GDP'])
 296         else:
 297             return translation (format-table['pre-GDP'])
 298
 299     def short_html_status (self):
 300         s = '  <td>'
 301         if self.partially_translated:
 302             s += '<br>\n   '.join (self.translators) + '<br>\n'
 303             if self.checkers:
 304                 s += '   <small>' + \
 305                     '<br>\n   '.join (self.checkers) + '</small><br>\n'
 306
 307         c = self.completeness (['color', 'long'])
 308         s += '   <span style="background-color: #%(color)s">\
 309 %(long)s</span><br>\n' % c
 310
 311         if self.partially_translated:
 312             u = self.uptodateness (['vague', 'color'])
 313             s += '   <span style="background-color: #%(color)s">\
 314 %(vague)s</span><br>\n' % u
 315
 316         s += '  </td>\n'
 317         return s
 318
 319     def text_status (self):
 320         s = self.completeness ('abbr')['abbr'] + ' '
 321
 322         if self.partially_translated:
 323             s += self.uptodateness ('abbr')['abbr'] + ' '
 324         return s
 325
 326     def html_status (self):
 327         # TODO
 328         return ''
 329
 330 class MasterTelyDocument (TelyDocument):
 331     def __init__ (self,
 332                   filename,
 333                   parent_translations=dict ([(lang, None)
 334                                              for lang in langdefs.LANGDICT])):
 335         TelyDocument.__init__ (self, filename)
 336         self.size = len (self.contents)
 337         self.word_count = tely_word_count (self.contents)
 338         translations = dict ([(lang, os.path.join (lang, filename))
 339                               for lang in langdefs.LANGDICT])
 340         self.translations = \
 341             dict ([(lang,
 342                     TranslatedTelyDocument (translations[lang],
 343                                             self, parent_translations.get (lang)))
 344                    for lang in langdefs.LANGDICT
 345                    if os.path.exists (translations[lang])])
 346         if self.translations:
 347             self.includes = [MasterTelyDocument (f, self.translations)
 348                              for f in self.included_files]
 349         else:
 350             self.includes = []
 351
 352     def update_word_counts (self, s):
 353         s = update_word_count (s, self.filename, sum (self.word_count))
 354         for i in self.includes:
 355             s = i.update_word_counts (s)
 356         return s
 357
 358     def html_status (self, numbering=SectionNumber ()):
 359         if self.title == 'Untitled' or not self.translations:
 360             return ''
 361         if self.level[1] == 0: # if self is a master document
 362             s = '''<table align="center" border="2">
 363  <tr align="center">
 364   <th>%s</th>''' % self.print_title (numbering)
 365             s += ''.join (['  <th>%s</th>\n' % l for l in self.translations])
 366             s += ' </tr>\n'
 367             s += ' <tr align="left">\n  <td>Section titles<br>(%d)</td>\n' \
 368                 % sum (self.word_count)
 369
 370         else:
 371             s = ' <tr align="left">\n  <td>%s<br>(%d)</td>\n' \
 372                 % (self.print_title (numbering), sum (self.word_count))
 373
 374         s += ''.join ([t.short_html_status ()
 375                        for t in self.translations.values ()])
 376         s += ' </tr>\n'
 377         s += ''.join ([i.html_status (numbering) for i in self.includes])
 378
 379         if self.level[1] == 0:
 380             s += '</table>\n<p></p>\n'
 381         return s
 382
 383     def text_status (self, numbering=SectionNumber (), colspec=[48,12]):
 384         if self.title == 'Untitled' or not self.translations:
 385             return ''
 386
 387         s = ''
 388         if self.level[1] == 0: # if self is a master document
 389             s += (self.print_title (numbering) + ' ').ljust (colspec[0])
 390             s += ''.join (['%s'.ljust (colspec[1]) % l
 391                            for l in self.translations])
 392             s += '\n'
 393             s += ('Section titles (%d)' % \
 394                       sum (self.word_count)).ljust (colspec[0])
 395
 396         else:
 397             s = '%s (%d) ' \
 398                 % (self.print_title (numbering), sum (self.word_count))
 399             s = s.ljust (colspec[0])
 400
 401         s += ''.join ([t.text_status ().ljust(colspec[1])
 402                        for t in self.translations.values ()])
 403         s += '\n\n'
 404         s += ''.join ([i.text_status (numbering) for i in self.includes])
 405
 406         if self.level[1] == 0:
 407             s += '\n'
 408         return s
 409
 410
 411 update_category_word_counts_re = re.compile (r'(?ms)^-(\d+)-(.*?\n)\d+ *total')
 412
 413 counts_re = re.compile (r'(?m)^(\d+) ')
 414
 415 def update_category_word_counts_sub (m):
 416     return '-' + m.group (1) + '-' + m.group (2) + \
 417         str (sum ([int (c)
 418                    for c in counts_re.findall (m.group (2))])).ljust (6) + \
 419         'total'
 420
 421
 422 progress ("Reading documents...")
 423
 424 tely_files = \
 425     buildlib.read_pipe ("find -maxdepth 2 -name '*.tely'")[0].splitlines ()
 426 master_docs = [MasterTelyDocument (os.path.normpath (filename))
 427                for filename in tely_files]
 428 master_docs = [doc for doc in master_docs if doc.translations]
 429
 430 main_status_page = open ('translations.template.html.in').read ()
 431
 432 ## TODO
 433 #per_lang_status_pages = \
 434 #    dict ([(l, open (os.path.join (l, 'translations.template.html')). read ())
 435 #           for l in langdefs.LANGDICT
 436 #           if langdefs.LANGDICT[l].enabled])
 437
 438 progress ("Generating status pages...")
 439
 440 date_time = buildlib.read_pipe ('LANG= date -u')[0]
 441
 442 main_status_html = ' <p><i>Last updated %s</i></p>\n' % date_time
 443 main_status_html += '\n'.join ([doc.html_status () for doc in master_docs])
 444
 445 html_re = re.compile ('<html>', re.I)
 446 end_body_re = re.compile ('</body>', re.I)
 447
 448 main_status_page = html_re.sub ('''<html>
 449 <!-- This page is automatically generated by translation-status.py from
 450 translations.template.html.in; DO NOT EDIT !-->''', main_status_page)
 451
 452 main_status_page = end_body_re.sub (main_status_html + '\n</body>',
 453                                     main_status_page)
 454
 455 open ('translations.html.in', 'w').write (main_status_page)
 456
 457 main_status_txt = '''Documentation translations status
 458 Generated %s
 459 NT = not translated
 460 FT = fully translated
 461
 462 ''' % date_time
 463
 464 main_status_txt += '\n'.join ([doc.text_status () for doc in master_docs])
 465
 466 status_txt_file = 'out/translations-status.txt'
 467 progress ("Writing %s..." % status_txt_file)
 468 open (status_txt_file, 'w').write (main_status_txt)
 469
 470 translation_instructions_file = 'TRANSLATION'
 471 progress ("Updating %s..." % translation_instructions_file)
 472 translation_instructions = open (translation_instructions_file).read ()
 473
 474 for doc in master_docs:
 475     translation_instructions = doc.update_word_counts (translation_instructions)
 476
 477 for html_file in re.findall (r'(?m)^\d+ *(\S+?\.html\S*?)(?: |$)',
 478                              translation_instructions):
 479     word_count = sgml_word_count (open (html_file).read ())
 480     translation_instructions = update_word_count (translation_instructions,
 481                                                   html_file,
 482                                                   word_count)
 483
 484 for po_file in re.findall (r'(?m)^\d+ *(\S+?\.po\S*?)(?: |$)',
 485                            translation_instructions):
 486     word_count = po_word_count (open (po_file).read ())
 487     translation_instructions = update_word_count (translation_instructions,
 488                                                   po_file,
 489                                                   word_count)
 490
 491 translation_instructions = \
 492     update_category_word_counts_re.sub (update_category_word_counts_sub,
 493                                         translation_instructions)
 494
 495 open (translation_instructions_file, 'w').write (translation_instructions)