4 USAGE: cd Documentation && translations-status.py
7 * layout tweaks for TexiMarkup
8 - set html table border
9 - collapse first column
10 * switch to using TexiMarkup (see #markup = ..)
14 <LANG>/translations.itexi
15 out/translations-status.txt
17 Update word counts in:
18 contributor/doc-translation-list.itexi
31 sys.stderr.write (str + '\n')
35 def error (str, update_status=1):
37 sys.stderr.write ('translations-status.py: %s\n' % str)
38 exit_code = max (exit_code, update_status)
40 progress ("translations-status.py")
44 # load gettext messages catalogs
45 translation = langdefs.translation
48 language_re = re.compile (r'^@documentlanguage (.+)', re.M)
49 comments_re = re.compile (r'^@ignore\n(.|\n)*?\n@end ignore$|@c .*?$', re.M)
50 space_re = re.compile (r'\s+', re.M)
51 lilypond_re = re.compile (r'@lilypond({.*?}|(.|\n)*?\n@end lilypond$)', re.M)
52 node_re = re.compile ('^@node .*?$', re.M)
53 title_re = re.compile ('^@(settitle|chapter|top|(?:sub){0,2}section|'
54 '(?:unnumbered|appendix)(?:(?:sub){0,2}sec)?) (.*?)$', re.M)
55 include_re = re.compile ('^@include (.*?)$', re.M)
57 # allow multiple lines
58 translators_re = re.compile (r'^@c[ ]+[Tt]ranslators?[ ]*:[ ]*(.*?)$', re.M)
59 checkers_re = re.compile (r'^@c[ ]+[Tt]ranslation[ ]*[Cc]heckers?[ ]*:[ ]*(.*?)$', re.M)
60 status_re = re.compile (r'^@c[ ]+[Tt]ranslation[ ]*[Ss]tatus[ ]*:[ ]*(.*?)$', re.M)
61 post_gdp_re = re.compile ('post.GDP', re.I)
62 untranslated_node_str = '@untranslated'
63 skeleton_str = '-- SKELETON FILE --'
65 section_titles_string = _doc ('Section titles')
66 last_updated_string = _doc ('Last updated %s')
67 detailed_status_heads = [_doc ('Translators'), _doc ('Translation checkers'),
68 _doc ('Translated'), _doc ('Up to date'),
71 'not translated': {'color':'d0f0f8', 'short':_doc ('no'), 'abbr':'NT',
72 'long':_doc ('not translated')},
73 'partially translated': {'color':'dfef77',
74 'short':_doc ('partially (%(p)d %%)'),
76 'long':_doc ('partially translated (%(p)d %%)')},
77 'fully translated': {'color':'1fff1f', 'short':_doc ('yes'), 'abbr':'FT',
78 'long': _doc ('translated')},
79 'up to date': {'short':_doc ('yes'), 'long':_doc ('up to date'),
80 'abbr':'100%%', 'vague':_doc ('up to date')},
81 'outdated': {'short':_doc ('partially'), 'abbr':'%(p)d%%',
82 'vague':_doc ('partially up to date')},
83 'N/A': {'short':_doc ('N/A'), 'abbr':'N/A', 'color':'d587ff', 'vague':''},
84 'pre-GDP':_doc ('pre-GDP'),
85 'post-GDP':_doc ('post-GDP')
89 # (Unumbered/Numbered/Lettered, level)
91 'unnumbered': ('u', 1),
92 'unnumberedsec': ('u', 2),
93 'unnumberedsubsec': ('u', 3),
96 'subsection': ('n', 3),
98 'appendixsec': ('l', 2),
101 appendix_number_trans = string.maketrans ('@ABCDEFGHIJKLMNOPQRSTUVWXY',
102 'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
104 class SectionNumber (object):
106 self.__data = [[0,'u']]
108 def __increase_last_index (self):
109 type = self.__data[-1][1]
111 self.__data[-1][0] = \
112 self.__data[-1][0].translate (appendix_number_trans)
114 self.__data[-1][0] += 1
117 if self.__data[-1][1] == 'u':
119 return '.'.join ([str (i[0]) for i in self.__data if i[1] != 'u']) + ' '
121 def increase (self, (type, level)):
123 self.__data = [[0,'u']]
124 while level + 1 < len (self.__data):
126 if level + 1 > len (self.__data):
127 self.__data.append ([0, type])
129 self.__data[-1][0] = '@'
130 if type == self.__data[-1][1]:
131 self.__increase_last_index ()
133 self.__data[-1] = ([0, type])
135 self.__data[-1][0] = 'A'
137 self.__data[-1][0] = 1
138 return self.format ()
141 def percentage_color (percent):
144 c = [hex (int (3 * p * b + (1 - 3 * p) * a))[2:]
145 for (a, b) in [(0xff, 0xff), (0x5c, 0xa6), (0x5c, 0x4c)]]
147 c = [hex (int ((3 * p - 1) * b + (2 - 3 * p) * a))[2:]
148 for (a, b) in [(0xff, 0xff), (0xa6, 0xff), (0x4c, 0x3d)]]
150 c = [hex (int ((3 * p - 2) * b + 3 * (1 - p) * a))[2:]
151 for (a, b) in [(0xff, 0x1f), (0xff, 0xff), (0x3d, 0x1f)]]
155 def update_word_count (text, filename, word_count):
156 return re.sub (r'(?m)^(\d+) *' + filename,
157 str (word_count).ljust (6) + filename,
160 po_msgid_re = re.compile (r'^msgid "(.*?)"(?:\n"(.*?)")*', re.M)
162 def po_word_count (po_content):
163 s = ' '.join ([''.join (t) for t in po_msgid_re.findall (po_content)])
164 return len (space_re.split (s))
166 sgml_tag_re = re.compile (r'<.*?>', re.S)
168 def sgml_word_count (sgml_doc):
169 s = sgml_tag_re.sub ('', sgml_doc)
170 return len (space_re.split (s))
172 def tely_word_count (tely_doc):
174 Calculate word count of a Texinfo document node by node.
176 Take string tely_doc as an argument.
177 Return a list of integers.
179 Texinfo comments and @lilypond blocks are not included in word counts.
181 tely_doc = comments_re.sub ('', tely_doc)
182 tely_doc = lilypond_re.sub ('', tely_doc)
183 nodes = node_re.split (tely_doc)
184 return [len (space_re.split (n)) for n in nodes]
186 class HTMLMarkup (object):
187 def entity (self, name, string='', attributes=[]):
188 attr_list = ''.join ([' %s="%s"' % x for x in attributes])
189 return '<%(name)s%(attr_list)s>%(string)s</%(name)s>' % locals ()
190 def paragraph (self, string=''):
191 return self.entity ('p', string)
192 def table (self, string):
193 return self.entity ('table', string, [('align', 'center'), ('border', '2')])
194 def row (self, string, attributes=[]):
195 return self.entity ('tr', string, attributes)
197 def headcell (self, string, attributes=[]):
198 return self.entity ('th', string, attributes)
199 def cell (self, string='', attributes=[]):
200 return self.entity ('td', string, attributes)
201 def newline (self, attributes=[]):
202 return self.entity ('br', '', attributes)[:-5]
203 def span (self, string, attributes=[]):
204 return self.entity ('span', string, attributes)
205 def small (self, string, attributes=[]):
206 return self.entity ('small', string, attributes)
207 def emph (self, string, attributes=[]):
208 return self.entity ('em', string, attributes)
210 class TexiMarkup (HTMLMarkup):
211 def entity (self, name, string='', attributes=[]):
215 @end %(name)s''' % locals ()
216 def paragraph (self, string=''):
218 %(string)s''' % locals ()
219 def table (self, string):
220 # Ugh, makeinfo is fine without @columnfractions
221 # but texi2html 1.82 barfs: `empty multicolumn'
222 return (self.entity ('multitable', string)
223 .replace ('@multitable',
224 '@multitable @columnfractions' + ' .1' * 10))
225 def headrow (self, string, attributes=[]):
227 @headitem ''' + string
228 def row (self, string, attributes=[]):
231 def cell (self, string='', attributes=[]):
238 def html (self, string):
239 return self.entity ('ifhtml', self.entity ('html', string))
240 def span (self, string, attributes=[]):
241 return self.html (HTMLMarkup ().span (string, attributes))
242 def small (self, string, attributes=[]):
243 return self.html (HTMLMarkup ().small (string, attributes))
244 def command (self, name, string):
245 return '@%(name)s{%(string)s}' % locals ()
246 def emph (self, string, attributes=[]):
247 return self.command ('emph', string)
249 class TelyDocument (object):
250 def __init__ (self, filename):
251 self.filename = filename
252 self.contents = 'GIT committish: 0'
253 if os.path.exists (filename):
254 self.contents = open (filename).read ()
255 ## record title and sectionning level of first Texinfo section
256 self.sectioning = 'unnumbered'
257 self.title = 'Untitled'
258 m = title_re.search (self.contents)
260 self.sectioning = m.group (1)
261 self.title = m.group (2)
263 if not hasattr (self, 'language'):
265 m = language_re.search (self.contents)
267 self.language = m.group (1)
269 dir = os.path.dirname (filename).split ('/')[0]
274 included_files = [dir + t
275 for t in include_re.findall (self.contents)]
276 self.included_files = [p for p in included_files if os.path.exists (p)]
278 def get_level (self):
279 return texi_level [self.sectioning]
281 def print_title (self, section_number):
282 if not hasattr (self, 'level'):
283 self.level = self.get_level ()
284 return section_number.increase (self.level) + self.title
287 class TranslatedTelyDocument (TelyDocument):
288 def __init__ (self, filename, masterdocument, parent_translation=None):
289 TelyDocument.__init__ (self, filename)
290 self.masterdocument = masterdocument
291 if not hasattr (self, 'language'):
293 if not self.language and parent_translation:
294 self.language = parent_translation.__dict__.get ('language', '')
295 if self.language == 'en':
296 print filename + ': language en specified: set @documentlanguage', self.filename[:2]
298 if not self.language and filename[2] == '/':
299 print filename + ': no language specified: add @documentlanguage', self.filename[:2]
300 self.language = filename[:2]
302 self.translation = translation[self.language]
304 self.translation = lambda x: x
305 self.title = self.translation (self.title)
307 ## record authoring information
308 self.translators = ['']
309 if parent_translation:
310 self.translators = parent_translation.__dict__.get ('translators', [''])
311 m = translators_re.findall (self.contents)
313 self.translators = [n.strip () for n in
314 reduce (operator.add, [n.split (',') for n in m])]
315 if self.language != self.filename[:2]:
316 print 'Barf:', self.filename
318 if (not isinstance (self, UntranslatedTelyDocument)
319 and (not self.translators or not self.translators[0])
320 and not 'macros.itexi' in self.filename):
321 error (self.filename + ''': error: no translator name found
322 please specify one ore more lines in the master file
323 @c Translator: FirstName LastName[, FirstName LastName]..''')
325 m = checkers_re.findall (self.contents)
327 self.checkers = [n.strip () for n in
328 reduce (operator.add, [n.split (',') for n in m])]
329 if not self.checkers and isinstance (parent_translation, TranslatedTelyDocument):
330 self.checkers = parent_translation.checkers
332 ## check whether translation is pre- or post-GDP
333 m = status_re.search (self.contents)
335 self.post_gdp = bool (post_gdp_re.search (m.group (1)))
337 self.post_gdp = False
339 ## record which parts (nodes) of the file are actually translated
340 self.partially_translated = not skeleton_str in self.contents
341 nodes = node_re.split (self.contents)
342 self.translated_nodes = [not untranslated_node_str in n for n in nodes]
344 ## calculate translation percentage
345 master_total_word_count = sum (masterdocument.word_count)
346 translation_word_count = \
347 sum ([masterdocument.word_count[k] * self.translated_nodes[k]
348 for k in range (min (len (masterdocument.word_count),
349 len (self.translated_nodes)))])
350 self.translation_percentage = \
351 100 * translation_word_count / master_total_word_count
353 ## calculate how much the file is outdated
354 (diff_string, git_error) = \
355 buildlib.check_translated_doc (masterdocument.filename, self.filename, self.contents)
357 sys.stderr.write ('warning: %s: %s' % (self.filename, git_error))
358 self.uptodate_percentage = None
360 diff = diff_string.splitlines ()
361 insertions = sum ([len (l) - 1 for l in diff
362 if l.startswith ('+')
363 and not l.startswith ('+++')])
364 deletions = sum ([len (l) - 1 for l in diff
365 if l.startswith ('-')
366 and not l.startswith ('---')])
367 outdateness_percentage = 50.0 * (deletions + insertions) / \
368 (masterdocument.size + 0.5 * (deletions - insertions))
369 self.uptodate_percentage = 100 - int (outdateness_percentage)
370 if self.uptodate_percentage > 100:
372 progress ("%s: strange uptodateness percentage %d %%, \
373 setting to %d %%" % (self.filename, self.uptodate_percentage, alternative))
374 self.uptodate_percentage = alternative
375 elif self.uptodate_percentage < 1:
377 progress ("%s: strange uptodateness percentage %d %%, \
378 setting to %d %%" % (self.filename, self.uptodate_percentage, alternative))
379 self.uptodate_percentage = alternative
381 def get_level (self):
382 return texi_level ['top']
384 def completeness (self, formats=['long'], translated=False):
386 translation = self.translation
388 translation = lambda x: x
390 if isinstance (formats, str):
392 p = self.translation_percentage
394 status = 'not translated'
396 status = 'fully translated'
398 status = 'partially translated'
399 return dict ([(f, translation (format_table[status][f]) % locals())
402 def uptodateness (self, formats=['long'], translated=False):
404 translation = self.translation
406 translation = lambda x: x
408 if isinstance (formats, str):
410 p = self.uptodate_percentage
414 status = 'up to date'
419 if f == 'color' and p != None:
420 l['color'] = percentage_color (p)
422 l[f] = translation (format_table[status][f]) % locals ()
425 def gdp_status (self):
427 return self.translation (format_table['post-GDP'])
429 return self.translation (format_table['pre-GDP'])
431 def short_texi_status (self, markup):
433 if self.partially_translated:
434 s += markup.newline ().join (self.translators + [''])
436 s += markup.newline ().join ([markup.small (x) for x in self.checkers + ['']])
437 c = self.completeness (['color', 'long'])
438 s += markup.span ('%(long)s' % c, [('style', 'background-color: #%(color)s' % c)])
439 s += markup.newline ()
440 if self.partially_translated:
441 u = self.uptodateness (['vague', 'color'])
442 s += markup.span ('%(vague)s' % u, [('style', 'background-color: #%(color)s' % u)])
443 return markup.cell (s, [('title', filename)])
445 def text_status (self):
446 s = self.completeness ('abbr')['abbr'] + ' '
447 if self.partially_translated:
448 s += self.uptodateness ('abbr')['abbr'] + ' '
451 def texi_status (self, markup, numbering=SectionNumber ()):
452 return (markup.table (
454 (markup.headcell (self.print_title (numbering))
455 + ''.join ([markup.headcell (self.translation (h))
456 for h in detailed_status_heads])),
457 [('align', 'center')])
459 (markup.cell ((self.translation (section_titles_string)
461 + '%d' % sum (self.masterdocument.word_count)),
462 [('title',filename)])
463 + self.texi_body (markup, numbering)),
465 + self.texi_translations (markup, numbering))
466 ) + markup.paragraph ()
468 def texi_body (self, markup, numbering):
469 return (self.texi_translators (markup)
470 + self.texi_completeness (markup)
471 + self.texi_uptodateness (markup)
472 + self.texi_gdp (markup))
474 def texi_translators (self, markup):
475 if self.partially_translated:
476 return (markup.cell (markup.newline ().join (self.translators))
477 + markup.cell (markup.newline ().join (self.checkers)))
478 return markup.cell () + markup.cell ()
480 def texi_completeness (self, markup):
481 c = self.completeness (['color', 'short'], translated=True)
482 return markup.cell (markup.span (c['short'],
483 [('style', 'background-color: #' + c['color'])]))
485 def texi_uptodateness (self, markup):
486 if self.partially_translated:
487 u = self.uptodateness (['short', 'color'], translated=True)
488 return markup.cell (markup.span (u['short'],
489 [('style', 'background-color: #' + u['color'])]))
490 return markup.cell ()
492 def texi_gdp (self, markup):
493 return markup.cell (self.gdp_status ())
495 def texi_translations (self, markup, numbering):
496 return ''.join ([i.translations[self.language].texi_status (markup, numbering)
497 for i in self.masterdocument.includes
498 if self.language in i.translations])
500 class IncludedTranslatedTelyDocument (TranslatedTelyDocument):
501 get_level = TelyDocument.get_level
502 def texi_status (self, markup, numbering=SectionNumber ()):
503 if self.title != 'Untitled':
506 self.print_title (numbering)
508 + '%d' % sum (self.masterdocument.word_count)),
509 [('title',filename)])
510 + self.texi_body (markup, numbering)),
512 + self.texi_translations (markup, numbering))
515 class UntranslatedTelyDocument (TranslatedTelyDocument):
516 def __init__ (self, filename, masterdocument, parent_translation=None):
517 if filename[2] == '/':
518 self.language = filename[:2]
519 TranslatedTelyDocument.__init__ (self, filename, masterdocument, parent_translation)
521 class IncludedUntranslatedTelyDocument (UntranslatedTelyDocument, IncludedTranslatedTelyDocument):
522 get_level = TelyDocument.get_level
524 class MasterTelyDocument (TelyDocument):
527 parent_translations=dict ([(lang, None)
528 for lang in langdefs.LANGDICT])):
529 TelyDocument.__init__ (self, filename)
530 self.size = len (self.contents)
531 self.word_count = tely_word_count (self.contents)
532 self.translations = {}
534 if not self.language or self.language == 'en':
535 languages = [x for x in parent_translations.keys () if x != 'en']
536 self.translations = dict ([x for x in
537 [(lang, self.translated_factory (os.path.join (lang, self.filename),
538 parent_translations.get (lang)))
539 for lang in languages]
541 if self.translations:
542 self.includes = [IncludedMasterTelyDocument (f, self.translations)
543 for f in self.included_files]
545 def get_level (self):
546 return texi_level ['top']
548 def translated_factory (self, filename, parent):
549 if os.path.exists (filename):
550 return TranslatedTelyDocument (filename, self, parent)
553 def update_word_counts (self, s):
554 s = update_word_count (s, self.filename, sum (self.word_count))
555 for i in self.includes:
556 s = i.update_word_counts (s)
559 def texi_status (self, markup, numbering=SectionNumber ()):
560 return markup.table (
562 (markup.headcell (self.print_title (numbering))
563 + ''.join ([markup.headcell (l) for l in sorted (self.translations.keys ())])),
564 [('align','center')])
566 (markup.cell (('Section titles'
568 + '(%d)' % sum (self.word_count)),
569 [('title',filename)])
570 + self.texi_body (markup, numbering)),
572 + self.texi_includes (markup, numbering)
573 )) + markup.paragraph ()
575 def texi_includes (self, markup, numbering):
576 return ''.join ([i.texi_status (markup, numbering) for i in self.includes])
578 def texi_body (self, markup, numbering):
579 return ''.join ([self.translations[k].short_texi_status (markup)
580 for k in sorted (self.translations.keys ())])
582 def text_status (self, markup, numbering=SectionNumber (), colspec=[48,12]):
583 s = (self.print_title (numbering) + ' ').ljust (colspec[0])
584 s += ''.join (['%s'.ljust (colspec[1]) % l
585 for l in sorted (self.translations.keys ())])
587 s += ('Section titles (%d)' % \
588 sum (self.word_count)).ljust (colspec[0])
589 s += self.text_body (markup, numbering, colspec)
593 def text_body (self, markup, numbering, colspec):
594 return (''.join ([self.translations[k].text_status ().ljust(colspec[1])
595 for k in sorted (self.translations.keys ())])
597 + ''.join ([i.text_status (markup, numbering) for i in self.includes]))
599 class IncludedMasterTelyDocument (MasterTelyDocument):
600 get_level = TelyDocument.get_level
602 def translated_factory (self, filename, parent):
603 if os.path.exists (filename):
604 return IncludedTranslatedTelyDocument (filename, self, parent)
605 return IncludedUntranslatedTelyDocument (filename, self, parent)
607 def texi_status (self, markup, numbering=SectionNumber ()):
608 if self.title != 'Untitled':
610 (markup.cell ((self.print_title (numbering)
612 + '(%d)' % sum (self.word_count)),
613 [('title',filename)])
614 + self.texi_body (markup, numbering)),
616 + self.texi_includes (markup, numbering))
619 def text_status (self, markup, numbering=SectionNumber (), colspec=[48,12]):
620 if self.title != 'Untitled':
621 return (self.print_title (numbering)
622 + '(%d)' % sum (self.word_count)
623 + self.text_body (markup, numbering, colspec)
628 update_category_word_counts_re = re.compile (r'(?ms)^-(\d+)-(.*?\n)\d+ *total')
630 counts_re = re.compile (r'(?m)^(\d+) ')
632 def update_category_word_counts_sub (m):
633 return ('-' + m.group (1) + '-' + m.group (2)
635 for c in counts_re.findall (m.group (2))])).ljust (6)
639 # main () starts here-abouts
641 progress ("Reading documents...")
644 buildlib.read_pipe ("git ls-files | grep -E '[^/]*/?[^/]*[.](tely|texi)$'")[0].splitlines ()
646 master_docs = [MasterTelyDocument (os.path.normpath (filename))
647 for filename in master_files]
648 master_docs = [doc for doc in master_docs if doc.translations]
650 enabled_languages = [l for l in langdefs.LANGDICT
651 if langdefs.LANGDICT[l].enabled
654 progress ("Generating status pages...")
656 date_time = buildlib.read_pipe ('LANG= date -u')[0]
658 # TEXI output sort of works
659 # TODO: table border, td-titles :-)
660 #markup = HTMLMarkup ()
661 markup = TexiMarkup ()
663 main_status_body = markup.paragraph (markup.emph (last_updated_string % date_time))
664 main_status_body += '\n'.join ([doc.texi_status (markup) for doc in master_docs])
666 texi_header = '''@c -*- coding: utf-8; mode: texinfo; -*-
667 @c This file was generated by translation-status.py -- DO NOT EDIT!
669 Translation of GIT committish: 0
674 if not isinstance (markup, TexiMarkup):
677 Translation status currently only available in HTML.
686 if not isinstance (markup, TexiMarkup):
692 main_status_page = texi_header % locals () + main_status_body + texi_footer
694 open ('translations.itexi', 'w').write (main_status_page)
696 for l in enabled_languages:
697 date_time = buildlib.read_pipe ('LANG=%s date -u' % l)[0]
698 updated = markup.paragraph (markup.emph (translation[l] (last_updated_string) % date_time))
699 texi_status = '\n'.join ([doc.translations[l].texi_status (markup)
700 for doc in master_docs
701 if l in doc.translations])
702 lang_status_page = texi_header + updated + texi_status + texi_footer
703 open (os.path.join (l, 'translations.itexi'), 'w').write (lang_status_page)
705 main_status_txt = '''Documentation translations status
708 FT = fully translated
712 main_status_txt += '\n'.join ([doc.text_status (markup) for doc in master_docs])
714 status_txt_file = 'out/translations-status.txt'
715 progress ("Writing %s..." % status_txt_file)
716 open (status_txt_file, 'w').write (main_status_txt)
718 translation_instructions_file = 'contributor/doc-translation-list.itexi'
719 progress ("Updating %s..." % translation_instructions_file)
720 translation_instructions = open (translation_instructions_file).read ()
722 for doc in master_docs:
723 translation_instructions = doc.update_word_counts (translation_instructions)
725 for html_file in re.findall (r'(?m)^\d+ *(\S+?\.html\S*?)(?: |$)',
726 translation_instructions):
727 word_count = sgml_word_count (open (html_file).read ())
728 translation_instructions = update_word_count (translation_instructions,
732 for po_file in re.findall (r'(?m)^\d+ *(\S+?\.po\S*?)(?: |$)',
733 translation_instructions):
734 word_count = po_word_count (open (po_file).read ())
735 translation_instructions = update_word_count (translation_instructions,
739 translation_instructions = \
740 update_category_word_counts_re.sub (update_category_word_counts_sub,
741 translation_instructions)
743 open (translation_instructions_file, 'w').write (translation_instructions)