4 USAGE: cd Documentation && translations-status.py
8 <LANG>/translations.itexi
9 out/translations-status.txt
11 Update word counts in:
12 contributor/doc-translation-list.itexi
25 sys.stderr.write (str + '\n')
29 def error (str, update_status=1):
31 sys.stderr.write ('translations-status.py: %s\n' % str)
32 exit_code = max (exit_code, update_status)
34 progress ("translations-status.py")
38 # load gettext messages catalogs
39 translation = langdefs.translation
42 language_re = re.compile (r'^@documentlanguage (.+)', re.M)
43 comments_re = re.compile (r'^@ignore\n(.|\n)*?\n@end ignore$|@c .*?$', re.M)
44 space_re = re.compile (r'\s+', re.M)
45 lilypond_re = re.compile (r'@lilypond({.*?}|(.|\n)*?\n@end lilypond$)', re.M)
46 node_re = re.compile ('^@node .*?$', re.M)
47 title_re = re.compile ('^@(settitle|chapter|top|(?:sub){0,2}section|'
48 '(?:unnumbered|appendix)(?:(?:sub){0,2}sec)?) (.*?)$', re.M)
49 include_re = re.compile ('^@include (.*?)$', re.M)
51 # allow multiple lines
52 translators_re = re.compile (r'^@c[ ]+[Tt]ranslators?[ ]*:[ ]*(.*?)$', re.M)
53 checkers_re = re.compile (r'^@c[ ]+[Tt]ranslation[ ]*[Cc]heckers?[ ]*:[ ]*(.*?)$', re.M)
54 status_re = re.compile (r'^@c[ ]+[Tt]ranslation[ ]*[Ss]tatus[ ]*:[ ]*(.*?)$', re.M)
55 post_gdp_re = re.compile ('post.GDP', re.I)
56 untranslated_node_str = '@untranslated'
57 skeleton_str = '-- SKELETON FILE --'
59 section_titles_string = _doc ('Section titles')
60 last_updated_string = _doc (' <p><i>Last updated %s</i></p>\n')
61 detailed_status_heads = [_doc ('Translators'), _doc ('Translation checkers'),
62 _doc ('Translated'), _doc ('Up to date'),
65 'not translated': {'color':'d0f0f8', 'short':_doc ('no'), 'abbr':'NT',
66 'long':_doc ('not translated')},
67 'partially translated': {'color':'dfef77',
68 'short':_doc ('partially (%(p)d %%)'),
70 'long':_doc ('partially translated (%(p)d %%)')},
71 'fully translated': {'color':'1fff1f', 'short':_doc ('yes'), 'abbr':'FT',
72 'long': _doc ('translated')},
73 'up to date': {'short':_doc ('yes'), 'long':_doc ('up to date'),
74 'abbr':'100%%', 'vague':_doc ('up to date')},
75 'outdated': {'short':_doc ('partially'), 'abbr':'%(p)d%%',
76 'vague':_doc ('partially up to date')},
77 'N/A': {'short':_doc ('N/A'), 'abbr':'N/A', 'color':'d587ff', 'vague':''},
78 'pre-GDP':_doc ('pre-GDP'),
79 'post-GDP':_doc ('post-GDP')
83 # (Unumbered/Numbered/Lettered, level)
85 'unnumbered': ('u', 1),
86 'unnumberedsec': ('u', 2),
87 'unnumberedsubsec': ('u', 3),
90 'subsection': ('n', 3),
92 'appendixsec': ('l', 2),
95 appendix_number_trans = string.maketrans ('@ABCDEFGHIJKLMNOPQRSTUVWXY',
96 'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
98 class SectionNumber (object):
100 self.__data = [[0,'u']]
102 def __increase_last_index (self):
103 type = self.__data[-1][1]
105 self.__data[-1][0] = \
106 self.__data[-1][0].translate (appendix_number_trans)
108 self.__data[-1][0] += 1
111 if self.__data[-1][1] == 'u':
113 return '.'.join ([str (i[0]) for i in self.__data if i[1] != 'u']) + ' '
115 def increase (self, (type, level)):
117 self.__data = [[0,'u']]
118 while level + 1 < len (self.__data):
120 if level + 1 > len (self.__data):
121 self.__data.append ([0, type])
123 self.__data[-1][0] = '@'
124 if type == self.__data[-1][1]:
125 self.__increase_last_index ()
127 self.__data[-1] = ([0, type])
129 self.__data[-1][0] = 'A'
131 self.__data[-1][0] = 1
132 return self.format ()
135 def percentage_color (percent):
138 c = [hex (int (3 * p * b + (1 - 3 * p) * a))[2:]
139 for (a, b) in [(0xff, 0xff), (0x5c, 0xa6), (0x5c, 0x4c)]]
141 c = [hex (int ((3 * p - 1) * b + (2 - 3 * p) * a))[2:]
142 for (a, b) in [(0xff, 0xff), (0xa6, 0xff), (0x4c, 0x3d)]]
144 c = [hex (int ((3 * p - 2) * b + 3 * (1 - p) * a))[2:]
145 for (a, b) in [(0xff, 0x1f), (0xff, 0xff), (0x3d, 0x1f)]]
149 def update_word_count (text, filename, word_count):
150 return re.sub (r'(?m)^(\d+) *' + filename,
151 str (word_count).ljust (6) + filename,
154 po_msgid_re = re.compile (r'^msgid "(.*?)"(?:\n"(.*?)")*', re.M)
156 def po_word_count (po_content):
157 s = ' '.join ([''.join (t) for t in po_msgid_re.findall (po_content)])
158 return len (space_re.split (s))
160 sgml_tag_re = re.compile (r'<.*?>', re.S)
162 def sgml_word_count (sgml_doc):
163 s = sgml_tag_re.sub ('', sgml_doc)
164 return len (space_re.split (s))
166 def tely_word_count (tely_doc):
168 Calculate word count of a Texinfo document node by node.
170 Take string tely_doc as an argument.
171 Return a list of integers.
173 Texinfo comments and @lilypond blocks are not included in word counts.
175 tely_doc = comments_re.sub ('', tely_doc)
176 tely_doc = lilypond_re.sub ('', tely_doc)
177 nodes = node_re.split (tely_doc)
178 return [len (space_re.split (n)) for n in nodes]
180 class HTMLMarkup (object):
181 def entity (self, name, string='', attributes=[]):
182 attr_list = ''.join ([' %s="%s"' % x for x in attributes])
183 return '<%(name)s%(attr_list)s>%(string)s</%(name)s>' % locals ()
184 def paragraph (self, string=''):
185 return self.entity ('p')
186 def table (self, string):
187 return self.entity ('table', string, [('align', 'center'), ('border', '2')])
188 def row (self, string, attributes=[]):
189 return self.entity ('tr', string, attributes)
191 def headcell (self, string, attributes=[]):
192 return self.entity ('th', string, attributes)
193 def cell (self, string, attributes=[]):
194 return self.entity ('td', string, attributes)
195 def newline (self, attributes=[]):
196 return self.entity ('br', '', attributes)[:-5]
197 def span (self, string, attributes=[]):
198 return self.entity ('span', string, attributes)
199 def small (self, string, attributes=[]):
200 return self.entity ('small', string, attributes)
202 class TexiMarkup (HTMLMarkup):
203 def entity (self, name, string='', attributes=[]):
209 def paragraph (self, string=''):
211 %(string)s''' % locals ()
212 def table (self, string):
213 return self.entity ('multitable', string)
214 def headrow (self, string, attributes=[]):
216 @headitem ''' + string
217 def row (self, string, attributes=[]):
220 def cell (self, string, attributes=[]):
227 def html (self, string):
228 return self.entity ('ifhtml', self.entity ('html', string))
229 def span (self, string, attributes=[]):
230 return self.html (HTMLMarkup.span (self, string, attributes))
231 def small (self, string, attributes=[]):
232 return self.html (HTMLMarkup.small (self, string, attributes))
234 class TelyDocument (object):
235 def __init__ (self, filename):
236 self.filename = filename
237 self.contents = 'GIT committish: 0'
238 if os.path.exists (filename):
239 self.contents = open (filename).read ()
240 ## record title and sectionning level of first Texinfo section
241 self.sectioning = 'unnumbered'
242 self.title = 'Untitled'
243 m = title_re.search (self.contents)
245 self.sectioning = m.group (1)
246 self.title = m.group (2)
248 if not hasattr (self, 'language'):
250 m = language_re.search (self.contents)
252 self.language = m.group (1)
254 dir = os.path.dirname (filename).split ('/')[0]
259 included_files = [dir + t
260 for t in include_re.findall (self.contents)]
261 self.included_files = [p for p in included_files if os.path.exists (p)]
263 def get_level (self):
264 return texi_level [self.sectioning]
266 def print_title (self, section_number):
267 if not hasattr (self, 'level'):
268 self.level = self.get_level ()
269 return section_number.increase (self.level) + self.title
272 class TranslatedTelyDocument (TelyDocument):
273 def __init__ (self, filename, masterdocument, parent_translation=None):
274 TelyDocument.__init__ (self, filename)
275 self.masterdocument = masterdocument
276 if not hasattr (self, 'language'):
278 if not self.language and parent_translation:
279 self.language = parent_translation.__dict__.get ('language', '')
280 if self.language == 'en':
281 print filename + ': language en specified: set @documentlanguage', self.filename[:2]
283 if not self.language and filename[2] == '/':
284 print filename + ': no language specified: add @documentlanguage', self.filename[:2]
285 self.language = filename[:2]
287 self.translation = translation[self.language]
289 self.translation = lambda x: x
290 self.title = self.translation (self.title)
292 ## record authoring information
293 self.translators = ['']
294 if parent_translation:
295 self.translators = parent_translation.__dict__.get ('translators', [''])
296 m = translators_re.findall (self.contents)
298 self.translators = [n.strip () for n in
299 reduce (operator.add, [n.split (',') for n in m])]
300 if self.language != self.filename[:2]:
301 print 'Barf:', self.filename
303 if (not isinstance (self, UntranslatedTelyDocument)
304 and (not self.translators or not self.translators[0])
305 and not 'macros.itexi' in self.filename):
306 error (self.filename + ''': error: no translator name found
307 please specify one ore more lines in the master file
308 @c Translator: FirstName LastName[, FirstName LastName]..''')
310 m = checkers_re.findall (self.contents)
312 self.checkers = [n.strip () for n in
313 reduce (operator.add, [n.split (',') for n in m])]
314 if not self.checkers and isinstance (parent_translation, TranslatedTelyDocument):
315 self.checkers = parent_translation.checkers
317 ## check whether translation is pre- or post-GDP
318 m = status_re.search (self.contents)
320 self.post_gdp = bool (post_gdp_re.search (m.group (1)))
322 self.post_gdp = False
324 ## record which parts (nodes) of the file are actually translated
325 self.partially_translated = not skeleton_str in self.contents
326 nodes = node_re.split (self.contents)
327 self.translated_nodes = [not untranslated_node_str in n for n in nodes]
329 ## calculate translation percentage
330 master_total_word_count = sum (masterdocument.word_count)
331 translation_word_count = \
332 sum ([masterdocument.word_count[k] * self.translated_nodes[k]
333 for k in range (min (len (masterdocument.word_count),
334 len (self.translated_nodes)))])
335 self.translation_percentage = \
336 100 * translation_word_count / master_total_word_count
338 ## calculate how much the file is outdated
339 (diff_string, git_error) = \
340 buildlib.check_translated_doc (masterdocument.filename, self.filename, self.contents)
342 sys.stderr.write ('warning: %s: %s' % (self.filename, git_error))
343 self.uptodate_percentage = None
345 diff = diff_string.splitlines ()
346 insertions = sum ([len (l) - 1 for l in diff
347 if l.startswith ('+')
348 and not l.startswith ('+++')])
349 deletions = sum ([len (l) - 1 for l in diff
350 if l.startswith ('-')
351 and not l.startswith ('---')])
352 outdateness_percentage = 50.0 * (deletions + insertions) / \
353 (masterdocument.size + 0.5 * (deletions - insertions))
354 self.uptodate_percentage = 100 - int (outdateness_percentage)
355 if self.uptodate_percentage > 100:
357 progress ("%s: strange uptodateness percentage %d %%, \
358 setting to %d %%" % (self.filename, self.uptodate_percentage, alternative))
359 self.uptodate_percentage = alternative
360 elif self.uptodate_percentage < 1:
362 progress ("%s: strange uptodateness percentage %d %%, \
363 setting to %d %%" % (self.filename, self.uptodate_percentage, alternative))
364 self.uptodate_percentage = alternative
366 def get_level (self):
367 return texi_level ['top']
369 def completeness (self, formats=['long'], translated=False):
371 translation = self.translation
373 translation = lambda x: x
375 if isinstance (formats, str):
377 p = self.translation_percentage
379 status = 'not translated'
381 status = 'fully translated'
383 status = 'partially translated'
384 return dict ([(f, translation (format_table[status][f]) % locals())
387 def uptodateness (self, formats=['long'], translated=False):
389 translation = self.translation
391 translation = lambda x: x
393 if isinstance (formats, str):
395 p = self.uptodate_percentage
399 status = 'up to date'
404 if f == 'color' and p != None:
405 l['color'] = percentage_color (p)
407 l[f] = translation (format_table[status][f]) % locals ()
410 def gdp_status (self):
412 return self.translation (format_table['post-GDP'])
414 return self.translation (format_table['pre-GDP'])
416 def short_texi_status (self, markup):
418 if self.partially_translated:
419 s += markup.newline ().join (self.translators + [''])
421 s += markup.small (markup.newline ().join (self.checkers + ['']))
422 c = self.completeness (['color', 'long'])
423 s += markup.span ('%(long)s' % c, [('style', 'background-color: #%(color)s' % c)])
424 s += markup.newline ()
425 if self.partially_translated:
426 u = self.uptodateness (['vague', 'color'])
427 s += markup.span ('%(vague)s' % u, [('style', 'background-color: #%(color)s' % u)])
428 return markup.cell (s, [('title', filename)])
430 def text_status (self):
431 s = self.completeness ('abbr')['abbr'] + ' '
432 if self.partially_translated:
433 s += self.uptodateness ('abbr')['abbr'] + ' '
436 def texi_status (self, markup, numbering=SectionNumber ()):
437 return (markup.table (
439 (markup.headcell (self.print_title (numbering))
440 + ''.join ([markup.headcell (self.translation (h))
441 for h in detailed_status_heads])),
442 [('align', 'center')])
444 (markup.cell (((self.translation (section_titles_string)
446 + '%d' % sum (self.masterdocument.word_count))) % self.__dict__,
447 [('title',filename)])
448 + self.texi_body (markup, numbering)),
450 + self.texi_translations (markup, numbering))
451 ) + markup.paragraph ()
453 def texi_body (self, markup, numbering):
454 return (self.texi_translators (markup)
455 + self.texi_completeness (markup)
456 + self.texi_uptodateness (markup)
457 + self.texi_gdp (markup))
459 def texi_translators (self, markup):
460 if self.partially_translated:
461 return (' <td>' + '<br>\n '.join (self.translators) + '</td>\n'
462 + ' <td>' + '<br>\n '.join (self.checkers) + '</td>\n')
463 return ' <td></td>\n' * 2
465 def texi_completeness (self, markup):
466 c = self.completeness (['color', 'short'], translated=True)
467 return ' <td><span style="background-color: #%(color)s">\
468 %(short)s</span></td>\n' % {'color': c['color'],
471 def texi_uptodateness (self, markup):
472 if self.partially_translated:
473 u = self.uptodateness (['short', 'color'], translated=True)
474 return ' <td><span style="background-color: #%(color)s">\
475 %(short)s</span></td>\n' % {'color': u['color'],
477 return ' <td></td>\n'
479 def texi_gdp (self, markup):
480 return ' <td>' + self.gdp_status () + '</td>\n </tr>\n'
482 def texi_translations (self, markup, numbering):
483 return ''.join ([i.translations[self.language].texi_status (markup, numbering)
484 for i in self.masterdocument.includes
485 if self.language in i.translations])
487 class IncludedTranslatedTelyDocument (TranslatedTelyDocument):
488 get_level = TelyDocument.get_level
489 def texi_status (self, markup, numbering=SectionNumber ()):
490 if self.title != 'Untitled':
491 return ((' <tr align="left">\n <td title="%%(filename)s">%s<br>(%d)</td>\n'
492 % (self.print_title (numbering),
493 sum (self.masterdocument.word_count))) % self.__dict__
494 + self.texi_body (markup, numbering)
496 + self.texi_translations (markup, numbering))
499 class UntranslatedTelyDocument (TranslatedTelyDocument):
500 def __init__ (self, filename, masterdocument, parent_translation=None):
501 if filename[2] == '/':
502 self.language = filename[:2]
503 TranslatedTelyDocument.__init__ (self, filename, masterdocument, parent_translation)
505 class IncludedUntranslatedTelyDocument (UntranslatedTelyDocument, IncludedTranslatedTelyDocument):
506 get_level = TelyDocument.get_level
508 class MasterTelyDocument (TelyDocument):
511 parent_translations=dict ([(lang, None)
512 for lang in langdefs.LANGDICT])):
513 TelyDocument.__init__ (self, filename)
514 self.size = len (self.contents)
515 self.word_count = tely_word_count (self.contents)
516 self.translations = {}
518 if not self.language or self.language == 'en':
519 languages = [x for x in parent_translations.keys () if x != 'en']
520 self.translations = dict ([x for x in
521 [(lang, self.translated_factory (os.path.join (lang, self.filename),
522 parent_translations.get (lang)))
523 for lang in languages]
525 if self.translations:
526 self.includes = [IncludedMasterTelyDocument (f, self.translations)
527 for f in self.included_files]
529 def get_level (self):
530 return texi_level ['top']
532 def translated_factory (self, filename, parent):
533 if os.path.exists (filename):
534 return TranslatedTelyDocument (filename, self, parent)
537 def update_word_counts (self, s):
538 s = update_word_count (s, self.filename, sum (self.word_count))
539 for i in self.includes:
540 s = i.update_word_counts (s)
543 def texi_status (self, markup, numbering=SectionNumber ()):
546 <th>%s</th>''' % self.print_title (numbering)
547 s += ''.join ([' <th>%s</th>\n' % l for l in sorted (self.translations.keys ())])
549 s += (' <tr align="left">\n <td title="%%(filename)s">Section titles<br>(%d)</td>\n'
550 % sum (self.word_count)) % self.__dict__
551 s += self.texi_body (markup, numbering)
553 s += self.texi_includes (markup, numbering)
554 return markup.table (s) + markup.paragraph ()
556 def texi_includes (self, markup, numbering):
557 return ''.join ([i.texi_status (markup, numbering) for i in self.includes])
559 def texi_body (self, markup, numbering):
560 return (''.join ([self.translations[k].short_texi_status (markup)
561 for k in sorted (self.translations.keys ())])
564 def text_status (self, markup, numbering=SectionNumber (), colspec=[48,12]):
565 s = (self.print_title (numbering) + ' ').ljust (colspec[0])
566 s += ''.join (['%s'.ljust (colspec[1]) % l
567 for l in sorted (self.translations.keys ())])
569 s += ('Section titles (%d)' % \
570 sum (self.word_count)).ljust (colspec[0])
571 s += self.text_body (markup, numbering, colspec)
575 def text_body (self, markup, numbering, colspec):
576 return (''.join ([self.translations[k].text_status ().ljust(colspec[1])
577 for k in sorted (self.translations.keys ())])
579 + ''.join ([i.text_status (markup, numbering) for i in self.includes]))
581 class IncludedMasterTelyDocument (MasterTelyDocument):
582 get_level = TelyDocument.get_level
584 def translated_factory (self, filename, parent):
585 if os.path.exists (filename):
586 return IncludedTranslatedTelyDocument (filename, self, parent)
587 return IncludedUntranslatedTelyDocument (filename, self, parent)
589 def texi_status (self, markup, numbering=SectionNumber ()):
590 if self.title != 'Untitled':
591 return ((' <tr align="left">\n <td title=%%(filename)s>%s<br>(%d)</td>\n'
592 % (self.print_title (numbering), sum (self.word_count))) % self.__dict__
593 + self.texi_body (markup, numbering)
595 + self.texi_includes (markup, numbering))
598 def text_status (self, markup, numbering=SectionNumber (), colspec=[48,12]):
599 if self.title != 'Untitled':
601 % (self.print_title (numbering), sum (self.word_count)))
602 + self.text_body (markup, numbering, colspec)
607 update_category_word_counts_re = re.compile (r'(?ms)^-(\d+)-(.*?\n)\d+ *total')
609 counts_re = re.compile (r'(?m)^(\d+) ')
611 def update_category_word_counts_sub (m):
612 return ('-' + m.group (1) + '-' + m.group (2)
614 for c in counts_re.findall (m.group (2))])).ljust (6)
618 # main () starts here-abouts
620 progress ("Reading documents...")
623 buildlib.read_pipe ("git ls-files | grep -E '[^/]*/?[^/]*[.](tely|texi)$'")[0].splitlines ()
625 master_docs = [MasterTelyDocument (os.path.normpath (filename))
626 for filename in master_files]
627 master_docs = [doc for doc in master_docs if doc.translations]
629 enabled_languages = [l for l in langdefs.LANGDICT
630 if langdefs.LANGDICT[l].enabled
633 progress ("Generating status pages...")
635 date_time = buildlib.read_pipe ('LANG= date -u')[0]
637 markup = HTMLMarkup ()
638 #markup = TexiMarkup ()
639 main_status_body = last_updated_string % date_time
640 main_status_body += '\n'.join ([doc.texi_status (markup) for doc in master_docs])
642 texi_header = '''@c -*- coding: utf-8; mode: texinfo; -*-
643 @c This file was generated by translation-status.py -- DO NOT EDIT!
645 Translation of GIT committish: 0
649 Translation status currently only available in HTML.
660 main_status_page = texi_header % locals () + main_status_body + texi_footer
662 open ('translations.itexi', 'w').write (main_status_page)
664 for l in enabled_languages:
665 date_time = buildlib.read_pipe ('LANG=%s date -u' % l)[0]
666 updated = translation[l] (last_updated_string) % date_time
667 texi_status = '\n'.join ([doc.translations[l].texi_status (markup)
668 for doc in master_docs
669 if l in doc.translations])
670 lang_status_page = texi_header + updated + texi_status + texi_footer
671 open (os.path.join (l, 'translations.itexi'), 'w').write (lang_status_page)
673 main_status_txt = '''Documentation translations status
676 FT = fully translated
680 main_status_txt += '\n'.join ([doc.text_status (markup) for doc in master_docs])
682 status_txt_file = 'out/translations-status.txt'
683 progress ("Writing %s..." % status_txt_file)
684 open (status_txt_file, 'w').write (main_status_txt)
686 translation_instructions_file = 'contributor/doc-translation-list.itexi'
687 progress ("Updating %s..." % translation_instructions_file)
688 translation_instructions = open (translation_instructions_file).read ()
690 for doc in master_docs:
691 translation_instructions = doc.update_word_counts (translation_instructions)
693 for html_file in re.findall (r'(?m)^\d+ *(\S+?\.html\S*?)(?: |$)',
694 translation_instructions):
695 word_count = sgml_word_count (open (html_file).read ())
696 translation_instructions = update_word_count (translation_instructions,
700 for po_file in re.findall (r'(?m)^\d+ *(\S+?\.po\S*?)(?: |$)',
701 translation_instructions):
702 word_count = po_word_count (open (po_file).read ())
703 translation_instructions = update_word_count (translation_instructions,
707 translation_instructions = \
708 update_category_word_counts_re.sub (update_category_word_counts_sub,
709 translation_instructions)
711 open (translation_instructions_file, 'w').write (translation_instructions)