4 USAGE: cd Documentation && translations-status.py
8 <LANG>/translations.itexi
9 out/translations-status.txt
11 Update word counts in:
12 contributor/doc-translation-list.itexi
15 * decide if we really want this in printed output:
16 - the PDF output of TexiMarkup () is useless
17 - the INFO output of TexiMarkup() is useless
18 - using markup = HTMLMarkup (), we get nice <td title="FILENAME">
19 popups -- do we want that with texi output? -- how?
20 or possibly links to the git archive?
34 sys.stderr.write (str + '\n')
38 def error (str, update_status=1):
40 sys.stderr.write ('translations-status.py: %s\n' % str)
41 exit_code = max (exit_code, update_status)
43 progress ("translations-status.py")
47 # load gettext messages catalogs
48 translation = langdefs.translation
51 language_re = re.compile (r'^@documentlanguage (.+)', re.M)
52 comments_re = re.compile (r'^@ignore\n(.|\n)*?\n@end ignore$|@c .*?$', re.M)
53 space_re = re.compile (r'\s+', re.M)
54 lilypond_re = re.compile (r'@lilypond({.*?}|(.|\n)*?\n@end lilypond$)', re.M)
55 node_re = re.compile ('^@node .*?$', re.M)
56 title_re = re.compile ('^@(settitle|chapter|top|(?:sub){0,2}section|'
57 '(?:unnumbered|appendix)(?:(?:sub){0,2}sec)?) (.*?)$', re.M)
58 include_re = re.compile ('^@include (.*?)$', re.M)
60 # allow multiple lines
61 translators_re = re.compile (r'^@c[ ]+[Tt]ranslators?[ ]*:[ ]*(.*?)$', re.M)
62 checkers_re = re.compile (r'^@c[ ]+[Tt]ranslation[ ]*[Cc]heckers?[ ]*:[ ]*(.*?)$', re.M)
63 status_re = re.compile (r'^@c[ ]+[Tt]ranslation[ ]*[Ss]tatus[ ]*:[ ]*(.*?)$', re.M)
64 post_gdp_re = re.compile ('post.GDP', re.I)
65 untranslated_node_str = '@untranslated'
66 skeleton_str = '-- SKELETON FILE --'
68 section_titles_string = _doc ('Section titles')
69 last_updated_string = _doc ('Last updated %s')
70 detailed_status_heads = [_doc ('Translators'), _doc ('Translation checkers'),
71 _doc ('Translated'), _doc ('Up to date'),
74 'not translated': {'color':'d0f0f8', 'short':_doc ('no'), 'abbr':'NT',
75 'long':_doc ('not translated')},
76 'partially translated': {'color':'dfef77',
77 'short':_doc ('partially (%(p)d %%)'),
79 'long':_doc ('partially translated (%(p)d %%)')},
80 'fully translated': {'color':'1fff1f', 'short':_doc ('yes'), 'abbr':'FT',
81 'long': _doc ('translated')},
82 'up to date': {'short':_doc ('yes'), 'long':_doc ('up to date'),
83 'abbr':'100%%', 'vague':_doc ('up to date')},
84 'outdated': {'short':_doc ('partially'), 'abbr':'%(p)d%%',
85 'vague':_doc ('partially up to date')},
86 'N/A': {'short':_doc ('N/A'), 'abbr':'N/A', 'color':'d587ff', 'vague':''},
87 'pre-GDP':_doc ('pre-GDP'),
88 'post-GDP':_doc ('post-GDP')
92 # (Unumbered/Numbered/Lettered, level)
94 'unnumbered': ('u', 1),
95 'unnumberedsec': ('u', 2),
96 'unnumberedsubsec': ('u', 3),
99 'subsection': ('n', 3),
100 'appendix': ('l', 1),
101 'appendixsec': ('l', 2),
104 appendix_number_trans = string.maketrans ('@ABCDEFGHIJKLMNOPQRSTUVWXY',
105 'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
107 class SectionNumber (object):
109 self.__data = [[0,'u']]
111 def __increase_last_index (self):
112 type = self.__data[-1][1]
114 self.__data[-1][0] = \
115 self.__data[-1][0].translate (appendix_number_trans)
117 self.__data[-1][0] += 1
120 if self.__data[-1][1] == 'u':
122 return '.'.join ([str (i[0]) for i in self.__data if i[1] != 'u']) + ' '
124 def increase (self, (type, level)):
126 self.__data = [[0,'u']]
127 while level + 1 < len (self.__data):
129 if level + 1 > len (self.__data):
130 self.__data.append ([0, type])
132 self.__data[-1][0] = '@'
133 if type == self.__data[-1][1]:
134 self.__increase_last_index ()
136 self.__data[-1] = ([0, type])
138 self.__data[-1][0] = 'A'
140 self.__data[-1][0] = 1
141 return self.format ()
144 def percentage_color (percent):
147 c = [hex (int (3 * p * b + (1 - 3 * p) * a))[2:]
148 for (a, b) in [(0xff, 0xff), (0x5c, 0xa6), (0x5c, 0x4c)]]
150 c = [hex (int ((3 * p - 1) * b + (2 - 3 * p) * a))[2:]
151 for (a, b) in [(0xff, 0xff), (0xa6, 0xff), (0x4c, 0x3d)]]
153 c = [hex (int ((3 * p - 2) * b + 3 * (1 - p) * a))[2:]
154 for (a, b) in [(0xff, 0x1f), (0xff, 0xff), (0x3d, 0x1f)]]
158 def update_word_count (text, filename, word_count):
159 return re.sub (r'(?m)^(\d+) *' + filename,
160 str (word_count).ljust (6) + filename,
163 po_msgid_re = re.compile (r'^msgid "(.*?)"(?:\n"(.*?)")*', re.M)
165 def po_word_count (po_content):
166 s = ' '.join ([''.join (t) for t in po_msgid_re.findall (po_content)])
167 return len (space_re.split (s))
169 sgml_tag_re = re.compile (r'<.*?>', re.S)
171 def sgml_word_count (sgml_doc):
172 s = sgml_tag_re.sub ('', sgml_doc)
173 return len (space_re.split (s))
175 def tely_word_count (tely_doc):
177 Calculate word count of a Texinfo document node by node.
179 Take string tely_doc as an argument.
180 Return a list of integers.
182 Texinfo comments and @lilypond blocks are not included in word counts.
184 tely_doc = comments_re.sub ('', tely_doc)
185 tely_doc = lilypond_re.sub ('', tely_doc)
186 nodes = node_re.split (tely_doc)
187 return [len (space_re.split (n)) for n in nodes]
189 class HTMLMarkup (object):
190 texi_header = '''@c -*- coding: utf-8; mode: texinfo; -*-
191 @c This file was generated by translation-status.py -- DO NOT EDIT!
193 Translation of GIT committish: 0
199 def texi (self, string):
200 return (self.texi_header
203 Translation status currently only available in HTML.
208 def entity (self, name, string='', attributes=[]):
209 attr_list = ''.join ([' %s="%s"' % x for x in attributes])
210 return '<%(name)s%(attr_list)s>%(string)s</%(name)s>' % locals ()
211 def paragraph (self, string=''):
212 return self.entity ('p', string)
213 def table (self, string):
214 return self.entity ('table', string, [('align', 'center'), ('border', '2')])
215 def row (self, string, attributes=[]):
216 return self.entity ('tr', string, attributes)
218 def headcell (self, string, attributes=[]):
219 return self.entity ('th', string, attributes)
220 def cell (self, string='', attributes=[]):
221 return self.entity ('td', string, attributes)
222 def newline (self, attributes=[]):
223 return self.entity ('br', '', attributes)[:-5]
224 def span (self, string, attributes=[]):
225 return self.entity ('span', string, attributes)
226 def small (self, string, attributes=[]):
227 return self.entity ('small', string, attributes)
228 def emph (self, string, attributes=[]):
229 return self.entity ('em', string, attributes)
231 class TexiMarkup (HTMLMarkup):
232 def texi (self, string):
233 return (self.texi_header
235 <style type="text/css"><!--
236 th { border: 1px solid black; text-align: center; }
237 td { border: 1px solid black; text-align: center; }
241 .replace ('''item \n@tab ''', '''item
243 .replace ('@multitable',
244 '@multitable @columnfractions' + ' 0' * 10))
246 def entity (self, name, string='', attributes=[]):
250 @end %(name)s''' % locals ()
251 def paragraph (self, string=''):
253 %(string)s''' % locals ()
254 def table (self, string):
255 # Ugh, makeinfo is fine without @columnfractions
256 # but texi2html 1.82 barfs: `empty multicolumn'
257 return (self.entity ('multitable', string))
258 def headrow (self, string, attributes=[]):
260 @headitem ''' + string
261 def row (self, string, attributes=[]):
264 def cell (self, string='', attributes=[]):
272 def html (self, string):
273 return self.entity ('ifhtml', self.entity ('html', string))
274 def nothtml (self, string):
275 return self.entity ('ifnothtml', string)
276 def span (self, string, attributes=[]):
277 return (self.html (HTMLMarkup ().span (string, attributes))
278 + self.nothtml (string))
279 def small (self, string, attributes=[]):
280 return (self.html (HTMLMarkup ().small (string, attributes))
281 + self.nothtml (string))
282 def command (self, name, string):
283 return '@%(name)s{%(string)s}' % locals ()
284 def emph (self, string, attributes=[]):
285 return self.command ('emph', string)
287 class TelyDocument (object):
288 def __init__ (self, filename):
289 self.filename = filename
290 self.contents = 'GIT committish: 0'
291 if os.path.exists (filename):
292 self.contents = open (filename).read ()
293 ## record title and sectionning level of first Texinfo section
294 self.sectioning = 'unnumbered'
295 self.title = 'Untitled'
296 m = title_re.search (self.contents)
298 self.sectioning = m.group (1)
299 self.title = m.group (2)
301 if not hasattr (self, 'language'):
303 m = language_re.search (self.contents)
305 self.language = m.group (1)
307 dir = os.path.dirname (filename).split ('/')[0]
312 included_files = [dir + t
313 for t in include_re.findall (self.contents)]
314 self.included_files = [p for p in included_files if os.path.exists (p)]
316 def get_level (self):
317 return texi_level [self.sectioning]
319 def print_title (self, section_number):
320 if not hasattr (self, 'level'):
321 self.level = self.get_level ()
322 return section_number.increase (self.level) + self.title
325 class TranslatedTelyDocument (TelyDocument):
326 def __init__ (self, filename, masterdocument, parent_translation=None):
327 TelyDocument.__init__ (self, filename)
328 self.masterdocument = masterdocument
329 if not hasattr (self, 'language'):
331 if not self.language and parent_translation:
332 self.language = parent_translation.__dict__.get ('language', '')
333 if self.language == 'en':
334 print filename + ': language en specified: set @documentlanguage', self.filename[:2]
336 if not self.language and filename[2] == '/':
337 print filename + ': no language specified: add @documentlanguage', self.filename[:2]
338 self.language = filename[:2]
340 self.translation = translation[self.language]
342 self.translation = lambda x: x
343 self.title = self.translation (self.title)
345 ## record authoring information
346 self.translators = ['']
347 if parent_translation:
348 self.translators = parent_translation.__dict__.get ('translators', [''])
349 m = translators_re.findall (self.contents)
351 self.translators = [n.strip () for n in
352 reduce (operator.add, [n.split (',') for n in m])]
353 if self.language != self.filename[:2]:
354 print 'Barf:', self.filename
356 if (not isinstance (self, UntranslatedTelyDocument)
357 and (not self.translators or not self.translators[0])
358 and not 'macros.itexi' in self.filename):
359 error (self.filename + ''': error: no translator name found
360 please specify one ore more lines in the master file
361 @c Translator: FirstName LastName[, FirstName LastName]..''')
363 m = checkers_re.findall (self.contents)
365 self.checkers = [n.strip () for n in
366 reduce (operator.add, [n.split (',') for n in m])]
367 if not self.checkers and isinstance (parent_translation, TranslatedTelyDocument):
368 self.checkers = parent_translation.checkers
370 ## check whether translation is pre- or post-GDP
371 m = status_re.search (self.contents)
373 self.post_gdp = bool (post_gdp_re.search (m.group (1)))
375 self.post_gdp = False
377 ## record which parts (nodes) of the file are actually translated
378 self.partially_translated = not skeleton_str in self.contents
379 nodes = node_re.split (self.contents)
380 self.translated_nodes = [not untranslated_node_str in n for n in nodes]
382 ## calculate translation percentage
383 master_total_word_count = sum (masterdocument.word_count)
384 translation_word_count = \
385 sum ([masterdocument.word_count[k] * self.translated_nodes[k]
386 for k in range (min (len (masterdocument.word_count),
387 len (self.translated_nodes)))])
388 self.translation_percentage = \
389 100 * translation_word_count / master_total_word_count
391 ## calculate how much the file is outdated
392 (diff_string, git_error) = \
393 buildlib.check_translated_doc (masterdocument.filename, self.filename, self.contents)
395 sys.stderr.write ('warning: %s: %s' % (self.filename, git_error))
396 self.uptodate_percentage = None
398 diff = diff_string.splitlines ()
399 insertions = sum ([len (l) - 1 for l in diff
400 if l.startswith ('+')
401 and not l.startswith ('+++')])
402 deletions = sum ([len (l) - 1 for l in diff
403 if l.startswith ('-')
404 and not l.startswith ('---')])
405 outdateness_percentage = 50.0 * (deletions + insertions) / \
406 (masterdocument.size + 0.5 * (deletions - insertions))
407 self.uptodate_percentage = 100 - int (outdateness_percentage)
408 if self.uptodate_percentage > 100:
410 progress ("%s: strange uptodateness percentage %d %%, \
411 setting to %d %%" % (self.filename, self.uptodate_percentage, alternative))
412 self.uptodate_percentage = alternative
413 elif self.uptodate_percentage < 1:
415 progress ("%s: strange uptodateness percentage %d %%, \
416 setting to %d %%" % (self.filename, self.uptodate_percentage, alternative))
417 self.uptodate_percentage = alternative
419 def get_level (self):
420 return texi_level ['top']
422 def completeness (self, formats=['long'], translated=False):
424 translation = self.translation
426 translation = lambda x: x
428 if isinstance (formats, str):
430 p = self.translation_percentage
432 status = 'not translated'
434 status = 'fully translated'
436 status = 'partially translated'
437 return dict ([(f, translation (format_table[status][f]) % locals())
440 def uptodateness (self, formats=['long'], translated=False):
442 translation = self.translation
444 translation = lambda x: x
446 if isinstance (formats, str):
448 p = self.uptodate_percentage
452 status = 'up to date'
457 if f == 'color' and p != None:
458 l['color'] = percentage_color (p)
460 l[f] = translation (format_table[status][f]) % locals ()
463 def gdp_status (self):
465 return self.translation (format_table['post-GDP'])
467 return self.translation (format_table['pre-GDP'])
469 def short_texi_status (self, markup):
471 if self.partially_translated:
472 s += markup.newline ().join (self.translators + [''])
474 s += markup.newline ().join ([markup.small (x) for x in self.checkers + ['']])
475 c = self.completeness (['color', 'long'])
476 s += markup.span ('%(long)s' % c, [('style', 'background-color: #%(color)s' % c)])
477 s += markup.newline ()
478 if self.partially_translated:
479 u = self.uptodateness (['vague', 'color'])
480 s += markup.span ('%(vague)s' % u, [('style', 'background-color: #%(color)s' % u)])
481 return markup.cell (s, [('title', filename)])
483 def text_status (self):
484 s = self.completeness ('abbr')['abbr'] + ' '
485 if self.partially_translated:
486 s += self.uptodateness ('abbr')['abbr'] + ' '
489 def texi_status (self, markup, numbering=SectionNumber ()):
490 return (markup.table (
492 (markup.headcell (self.print_title (numbering))
493 + ''.join ([markup.headcell (self.translation (h))
494 for h in detailed_status_heads])),
495 [('align', 'center')])
497 (markup.cell ((self.translation (section_titles_string)
499 + '%d' % sum (self.masterdocument.word_count)),
500 [('title',filename)])
501 + self.texi_body (markup, numbering)),
503 + self.texi_translations (markup, numbering))
504 ) + markup.paragraph ()
506 def texi_body (self, markup, numbering):
507 return (self.texi_translators (markup)
508 + self.texi_completeness (markup)
509 + self.texi_uptodateness (markup)
510 + self.texi_gdp (markup))
512 def texi_translators (self, markup):
513 if self.partially_translated:
514 return (markup.cell (markup.newline ().join (self.translators))
515 + markup.cell (markup.newline ().join (self.checkers)))
516 return markup.cell () + markup.cell ()
518 def texi_completeness (self, markup):
519 c = self.completeness (['color', 'short'], translated=True)
520 return markup.cell (markup.span (c['short'],
521 [('style', 'background-color: #' + c['color'])]))
523 def texi_uptodateness (self, markup):
524 if self.partially_translated:
525 u = self.uptodateness (['short', 'color'], translated=True)
526 return markup.cell (markup.span (u['short'],
527 [('style', 'background-color: #' + u['color'])]))
528 return markup.cell ()
530 def texi_gdp (self, markup):
531 return markup.cell (self.gdp_status ())
533 def texi_translations (self, markup, numbering):
534 return ''.join ([i.translations[self.language].texi_status (markup, numbering)
535 for i in self.masterdocument.includes
536 if self.language in i.translations])
538 class IncludedTranslatedTelyDocument (TranslatedTelyDocument):
539 get_level = TelyDocument.get_level
540 def texi_status (self, markup, numbering=SectionNumber ()):
541 if self.title != 'Untitled':
544 self.print_title (numbering)
546 + '%d' % sum (self.masterdocument.word_count)),
547 [('title',filename)])
548 + self.texi_body (markup, numbering)),
550 + self.texi_translations (markup, numbering))
553 class UntranslatedTelyDocument (TranslatedTelyDocument):
554 def __init__ (self, filename, masterdocument, parent_translation=None):
555 if filename[2] == '/':
556 self.language = filename[:2]
557 TranslatedTelyDocument.__init__ (self, filename, masterdocument, parent_translation)
559 class IncludedUntranslatedTelyDocument (UntranslatedTelyDocument, IncludedTranslatedTelyDocument):
560 get_level = TelyDocument.get_level
562 class MasterTelyDocument (TelyDocument):
565 parent_translations=dict ([(lang, None)
566 for lang in langdefs.LANGDICT])):
567 TelyDocument.__init__ (self, filename)
568 self.size = len (self.contents)
569 self.word_count = tely_word_count (self.contents)
570 self.translations = {}
572 if not self.language or self.language == 'en':
573 languages = [x for x in parent_translations.keys () if x != 'en']
574 self.translations = dict ([x for x in
575 [(lang, self.translated_factory (os.path.join (lang, self.filename),
576 parent_translations.get (lang)))
577 for lang in languages]
579 if self.translations:
580 self.includes = [IncludedMasterTelyDocument (f, self.translations)
581 for f in self.included_files]
583 def get_level (self):
584 return texi_level ['top']
586 def translated_factory (self, filename, parent):
587 if os.path.exists (filename):
588 return TranslatedTelyDocument (filename, self, parent)
591 def update_word_counts (self, s):
592 s = update_word_count (s, self.filename, sum (self.word_count))
593 for i in self.includes:
594 s = i.update_word_counts (s)
597 def texi_status (self, markup, numbering=SectionNumber ()):
598 return markup.table (
600 (markup.headcell (self.print_title (numbering))
601 + ''.join ([markup.headcell (l) for l in sorted (self.translations.keys ())])),
602 [('align','center')])
604 (markup.cell (('Section titles'
606 + '(%d)' % sum (self.word_count)),
607 [('title',filename)])
608 + self.texi_body (markup, numbering)),
610 + self.texi_includes (markup, numbering)
611 )) + markup.paragraph ()
613 def texi_includes (self, markup, numbering):
614 return ''.join ([i.texi_status (markup, numbering) for i in self.includes])
616 def texi_body (self, markup, numbering):
617 return ''.join ([self.translations[k].short_texi_status (markup)
618 for k in sorted (self.translations.keys ())])
620 def text_status (self, markup, numbering=SectionNumber (), colspec=[48,12]):
621 s = (self.print_title (numbering) + ' ').ljust (colspec[0])
622 s += ''.join (['%s'.ljust (colspec[1]) % l
623 for l in sorted (self.translations.keys ())])
625 s += ('Section titles (%d)' % \
626 sum (self.word_count)).ljust (colspec[0])
627 s += self.text_body (markup, numbering, colspec)
631 def text_body (self, markup, numbering, colspec):
632 return (''.join ([self.translations[k].text_status ().ljust(colspec[1])
633 for k in sorted (self.translations.keys ())])
635 + ''.join ([i.text_status (markup, numbering) for i in self.includes]))
637 class IncludedMasterTelyDocument (MasterTelyDocument):
638 get_level = TelyDocument.get_level
640 def translated_factory (self, filename, parent):
641 if os.path.exists (filename):
642 return IncludedTranslatedTelyDocument (filename, self, parent)
643 return IncludedUntranslatedTelyDocument (filename, self, parent)
645 def texi_status (self, markup, numbering=SectionNumber ()):
646 if self.title != 'Untitled':
648 (markup.cell ((self.print_title (numbering)
650 + '(%d)' % sum (self.word_count)),
651 [('title',filename)])
652 + self.texi_body (markup, numbering)),
654 + self.texi_includes (markup, numbering))
657 def text_status (self, markup, numbering=SectionNumber (), colspec=[48,12]):
658 if self.title != 'Untitled':
659 return (self.print_title (numbering)
660 + '(%d)' % sum (self.word_count)
661 + self.text_body (markup, numbering, colspec)
666 update_category_word_counts_re = re.compile (r'(?ms)^-(\d+)-(.*?\n)\d+ *total')
668 counts_re = re.compile (r'(?m)^(\d+) ')
670 def update_category_word_counts_sub (m):
671 return ('-' + m.group (1) + '-' + m.group (2)
673 for c in counts_re.findall (m.group (2))])).ljust (6)
677 # main () starts here-abouts
679 progress ("Reading documents...")
682 buildlib.read_pipe ("git ls-files | grep -E '[^/]*/?[^/]*[.](tely|texi)$'")[0].splitlines ()
684 master_docs = [MasterTelyDocument (os.path.normpath (filename))
685 for filename in master_files]
686 master_docs = [doc for doc in master_docs if doc.translations]
688 enabled_languages = [l for l in langdefs.LANGDICT
689 if langdefs.LANGDICT[l].enabled
692 progress ("Generating status pages...")
694 date_time = buildlib.read_pipe ('LANG= date -u')[0]
696 # TEXI output sort of works
697 # TODO: table border, td-titles :-)
698 # markup = HTMLMarkup ()
699 markup = TexiMarkup ()
701 main_status_body = markup.paragraph (markup.emph (last_updated_string % date_time))
702 main_status_body += '\n'.join ([doc.texi_status (markup) for doc in master_docs])
703 main_status_page = markup.texi (main_status_body)
705 open ('translations.itexi', 'w').write (main_status_page)
707 for l in enabled_languages:
708 date_time = buildlib.read_pipe ('LANG=%s date -u' % l)[0]
709 updated = markup.paragraph (markup.emph (translation[l] (last_updated_string) % date_time))
710 texi_status = '\n'.join ([doc.translations[l].texi_status (markup)
711 for doc in master_docs
712 if l in doc.translations])
713 lang_status_page = markup.texi (updated + texi_status)
714 open (os.path.join (l, 'translations.itexi'), 'w').write (lang_status_page)
716 main_status_txt = '''Documentation translations status
719 FT = fully translated
723 main_status_txt += '\n'.join ([doc.text_status (markup) for doc in master_docs])
725 status_txt_file = 'out/translations-status.txt'
726 progress ("Writing %s..." % status_txt_file)
727 open (status_txt_file, 'w').write (main_status_txt)
729 translation_instructions_file = 'contributor/doc-translation-list.itexi'
730 progress ("Updating %s..." % translation_instructions_file)
731 translation_instructions = open (translation_instructions_file).read ()
733 for doc in master_docs:
734 translation_instructions = doc.update_word_counts (translation_instructions)
736 for html_file in re.findall (r'(?m)^\d+ *(\S+?\.html\S*?)(?: |$)',
737 translation_instructions):
738 word_count = sgml_word_count (open (html_file).read ())
739 translation_instructions = update_word_count (translation_instructions,
743 for po_file in re.findall (r'(?m)^\d+ *(\S+?\.po\S*?)(?: |$)',
744 translation_instructions):
745 word_count = po_word_count (open (po_file).read ())
746 translation_instructions = update_word_count (translation_instructions,
750 translation_instructions = \
751 update_category_word_counts_re.sub (update_category_word_counts_sub,
752 translation_instructions)
754 open (translation_instructions_file, 'w').write (translation_instructions)