4 USAGE: cd Documentation && translations-status.py
8 <LANG>/translations.itexi
9 out/translations-status.txt
11 Update word counts in:
12 contributor/doc-translation-list.itexi
25 sys.stderr.write (str + '\n')
29 def error (str, update_status=1):
31 sys.stderr.write ('translations-status.py: %s\n' % str)
32 exit_code = max (exit_code, update_status)
34 progress ("translations-status.py")
38 # load gettext messages catalogs
39 translation = langdefs.translation
42 language_re = re.compile (r'^@documentlanguage (.+)', re.M)
43 comments_re = re.compile (r'^@ignore\n(.|\n)*?\n@end ignore$|@c .*?$', re.M)
44 space_re = re.compile (r'\s+', re.M)
45 lilypond_re = re.compile (r'@lilypond({.*?}|(.|\n)*?\n@end lilypond$)', re.M)
46 node_re = re.compile ('^@node .*?$', re.M)
47 title_re = re.compile ('^@(settitle|chapter|top|(?:sub){0,2}section|'
48 '(?:unnumbered|appendix)(?:(?:sub){0,2}sec)?) (.*?)$', re.M)
49 include_re = re.compile ('^@include (.*?)$', re.M)
51 # allow multiple lines
52 translators_re = re.compile (r'^@c\s+[tT]ranslators?\s*:\s*(.*?)$', re.M | re.I)
53 checkers_re = re.compile (r'^@c\s+Translation\s*checkers?\s*:\s*(.*?)$',
55 status_re = re.compile (r'^@c\s+Translation\s*status\s*:\s*(.*?)$', re.M | re.I)
56 post_gdp_re = re.compile ('post.GDP', re.I)
57 untranslated_node_str = '@untranslated'
58 skeleton_str = '-- SKELETON FILE --'
60 section_titles_string = _doc ('Section titles')
61 last_updated_string = _doc (' <p><i>Last updated %s</i></p>\n')
62 detailed_status_heads = [_doc ('Translators'), _doc ('Translation checkers'),
63 _doc ('Translated'), _doc ('Up to date'),
66 'not translated': {'color':'d0f0f8', 'short':_doc ('no'), 'abbr':'NT',
67 'long':_doc ('not translated')},
68 'partially translated': {'color':'dfef77',
69 'short':_doc ('partially (%(p)d %%)'),
71 'long':_doc ('partially translated (%(p)d %%)')},
72 'fully translated': {'color':'1fff1f', 'short':_doc ('yes'), 'abbr':'FT',
73 'long': _doc ('translated')},
74 'up to date': {'short':_doc ('yes'), 'long':_doc ('up to date'),
75 'abbr':'100%%', 'vague':_doc ('up to date')},
76 'outdated': {'short':_doc ('partially'), 'abbr':'%(p)d%%',
77 'vague':_doc ('partially up to date')},
78 'N/A': {'short':_doc ('N/A'), 'abbr':'N/A', 'color':'d587ff', 'vague':''},
79 'pre-GDP':_doc ('pre-GDP'),
80 'post-GDP':_doc ('post-GDP')
84 # (Unumbered/Numbered/Lettered, level)
86 'unnumbered': ('u', 1),
87 'unnumberedsec': ('u', 2),
88 'unnumberedsubsec': ('u', 3),
91 'subsection': ('n', 3),
93 'appendixsec': ('l', 2),
96 appendix_number_trans = string.maketrans ('@ABCDEFGHIJKLMNOPQRSTUVWXY',
97 'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
99 class SectionNumber (object):
101 self.__data = [[0,'u']]
103 def __increase_last_index (self):
104 type = self.__data[-1][1]
106 self.__data[-1][0] = \
107 self.__data[-1][0].translate (appendix_number_trans)
109 self.__data[-1][0] += 1
112 if self.__data[-1][1] == 'u':
114 return '.'.join ([str (i[0]) for i in self.__data if i[1] != 'u']) + ' '
116 def increase (self, (type, level)):
118 self.__data = [[0,'u']]
119 while level + 1 < len (self.__data):
121 if level + 1 > len (self.__data):
122 self.__data.append ([0, type])
124 self.__data[-1][0] = '@'
125 if type == self.__data[-1][1]:
126 self.__increase_last_index ()
128 self.__data[-1] = ([0, type])
130 self.__data[-1][0] = 'A'
132 self.__data[-1][0] = 1
133 return self.format ()
136 def percentage_color (percent):
139 c = [hex (int (3 * p * b + (1 - 3 * p) * a))[2:]
140 for (a, b) in [(0xff, 0xff), (0x5c, 0xa6), (0x5c, 0x4c)]]
142 c = [hex (int ((3 * p - 1) * b + (2 - 3 * p) * a))[2:]
143 for (a, b) in [(0xff, 0xff), (0xa6, 0xff), (0x4c, 0x3d)]]
145 c = [hex (int ((3 * p - 2) * b + 3 * (1 - p) * a))[2:]
146 for (a, b) in [(0xff, 0x1f), (0xff, 0xff), (0x3d, 0x1f)]]
150 def update_word_count (text, filename, word_count):
151 return re.sub (r'(?m)^(\d+) *' + filename,
152 str (word_count).ljust (6) + filename,
155 po_msgid_re = re.compile (r'^msgid "(.*?)"(?:\n"(.*?)")*', re.M)
157 def po_word_count (po_content):
158 s = ' '.join ([''.join (t) for t in po_msgid_re.findall (po_content)])
159 return len (space_re.split (s))
161 sgml_tag_re = re.compile (r'<.*?>', re.S)
163 def sgml_word_count (sgml_doc):
164 s = sgml_tag_re.sub ('', sgml_doc)
165 return len (space_re.split (s))
167 def tely_word_count (tely_doc):
169 Calculate word count of a Texinfo document node by node.
171 Take string tely_doc as an argument.
172 Return a list of integers.
174 Texinfo comments and @lilypond blocks are not included in word counts.
176 tely_doc = comments_re.sub ('', tely_doc)
177 tely_doc = lilypond_re.sub ('', tely_doc)
178 nodes = node_re.split (tely_doc)
179 return [len (space_re.split (n)) for n in nodes]
182 class TelyDocument (object):
183 def __init__ (self, filename):
184 self.filename = filename
185 self.contents = 'GIT committish: 0'
186 if os.path.exists (filename):
187 self.contents = open (filename).read ()
188 self.top = os.path.splitext (filename)[1] in ['.tely', '.texi']
189 ## record title and sectionning level of first Texinfo section
190 self.sectioning = 'unnumbered'
191 self.title = 'Untitled'
192 m = title_re.search (self.contents)
194 self.sectioning = m.group (1)
195 self.title = m.group (2)
196 # This is all quite ugly and hairy. The original code worked
197 # with @top node detection and each manual had its own @top
198 # node. Not any more. Declaring .tely / .texi files to be
199 # @top sort of works...
201 self.sectioning = 'top'
202 self.level = texi_level [self.sectioning]
204 if not hasattr (self, 'language'):
206 m = language_re.search (self.contents)
208 self.language = m.group (1)
210 dir = os.path.dirname (filename).split ('/')[0]
215 included_files = [dir + t
216 for t in include_re.findall (self.contents)]
217 self.included_files = [p for p in included_files if os.path.exists (p)]
219 def print_title (self, section_number):
220 return section_number.increase (self.level) + self.title
223 class TranslatedTelyDocument (TelyDocument):
224 def __init__ (self, filename, masterdocument, parent_translation=None):
225 TelyDocument.__init__ (self, filename)
226 self.masterdocument = masterdocument
227 if not hasattr (self, 'language'):
229 if not self.language and parent_translation:
230 self.language = parent_translation.__dict__.get ('language', '')
231 if self.language == 'en':
232 print filename + ': language en specified: set @documentlanguage', self.filename[:2]
234 if not self.language and filename[2] == '/':
235 print filename + ': no language specified: add @documentlanguage', self.filename[:2]
236 self.language = filename[:2]
238 self.translation = translation[self.language]
240 self.translation = lambda x: x
241 self.title = self.translation (self.title)
243 ## record authoring information
244 self.translators = ['']
245 if parent_translation:
246 self.translators = parent_translation.__dict__.get ('translators', [''])
247 m = translators_re.findall (self.contents)
249 self.translators = [n.strip () for n in
250 reduce (operator.add, [n.split (',') for n in m])]
251 if (not isinstance (self, UntranslatedTelyDocument)
252 and (not self.translators or not self.translators[0])
253 and not 'macros.itexi' in self.filename):
254 error ('%s: no translator name found, \nplease \
255 specify at least one in the master file as a line containing\n\
256 @c Translators: FirstName1 LastName1, FirstName2 LastName2' % self.filename)
258 m = checkers_re.findall (self.contents)
260 self.checkers = [n.strip () for n in
261 reduce (operator.add, [n.split (',') for n in m])]
262 if not self.checkers and isinstance (parent_translation, TranslatedTelyDocument):
263 self.checkers = parent_translation.checkers
265 ## check whether translation is pre- or post-GDP
266 m = status_re.search (self.contents)
268 self.post_gdp = bool (post_gdp_re.search (m.group (1)))
270 self.post_gdp = False
272 ## record which parts (nodes) of the file are actually translated
273 self.partially_translated = not skeleton_str in self.contents
274 nodes = node_re.split (self.contents)
275 self.translated_nodes = [not untranslated_node_str in n for n in nodes]
277 ## calculate translation percentage
278 master_total_word_count = sum (masterdocument.word_count)
279 translation_word_count = \
280 sum ([masterdocument.word_count[k] * self.translated_nodes[k]
281 for k in range (min (len (masterdocument.word_count),
282 len (self.translated_nodes)))])
283 self.translation_percentage = \
284 100 * translation_word_count / master_total_word_count
286 ## calculate how much the file is outdated
287 (diff_string, git_error) = \
288 buildlib.check_translated_doc (masterdocument.filename, self.filename, self.contents)
290 sys.stderr.write ('warning: %s: %s' % (self.filename, git_error))
291 self.uptodate_percentage = None
293 diff = diff_string.splitlines ()
294 insertions = sum ([len (l) - 1 for l in diff
295 if l.startswith ('+')
296 and not l.startswith ('+++')])
297 deletions = sum ([len (l) - 1 for l in diff
298 if l.startswith ('-')
299 and not l.startswith ('---')])
300 outdateness_percentage = 50.0 * (deletions + insertions) / \
301 (masterdocument.size + 0.5 * (deletions - insertions))
302 self.uptodate_percentage = 100 - int (outdateness_percentage)
303 if self.uptodate_percentage > 100:
305 progress ("%s: strange uptodateness percentage %d %%, \
306 setting to %d %%" % (self.filename, self.uptodate_percentage, alternative))
307 self.uptodate_percentage = alternative
308 elif self.uptodate_percentage < 1:
310 progress ("%s: strange uptodateness percentage %d %%, \
311 setting to %d %%" % (self.filename, self.uptodate_percentage, alternative))
312 self.uptodate_percentage = alternative
314 def completeness (self, formats=['long'], translated=False):
316 translation = self.translation
318 translation = lambda x: x
320 if isinstance (formats, str):
322 p = self.translation_percentage
324 status = 'not translated'
326 status = 'fully translated'
328 status = 'partially translated'
329 return dict ([(f, translation (format_table[status][f]) % locals())
332 def uptodateness (self, formats=['long'], translated=False):
334 translation = self.translation
336 translation = lambda x: x
338 if isinstance (formats, str):
340 p = self.uptodate_percentage
344 status = 'up to date'
349 if f == 'color' and p != None:
350 l['color'] = percentage_color (p)
352 l[f] = translation (format_table[status][f]) % locals ()
355 def gdp_status (self):
357 return self.translation (format_table['post-GDP'])
359 return self.translation (format_table['pre-GDP'])
361 def short_texi_status (self):
363 if self.partially_translated:
364 s += '<br>\n '.join (self.translators) + '<br>\n'
367 '<br>\n '.join (self.checkers) + '</small><br>\n'
369 c = self.completeness (['color', 'long'])
370 s += ' <span style="background-color: #%(color)s">\
371 %(long)s</span><br>\n' % c
373 if self.partially_translated:
374 u = self.uptodateness (['vague', 'color'])
375 s += ' <span style="background-color: #%(color)s">\
376 %(vague)s</span><br>\n' % u
381 def text_status (self):
382 s = self.completeness ('abbr')['abbr'] + ' '
384 if self.partially_translated:
385 s += self.uptodateness ('abbr')['abbr'] + ' '
388 def texi_status (self, numbering=SectionNumber ()):
389 if self.title == 'Untitled':
393 s = '''<table align="center" border="2">
395 <th>%s</th>''' % self.print_title (numbering)
396 s += ''.join ([' <th>%s</th>\n' % self.translation (h)
397 for h in detailed_status_heads])
399 s += ' <tr align="left">\n <td>%s<br>(%d)</td>\n' \
400 % (self.translation (section_titles_string),
401 sum (self.masterdocument.word_count))
404 s = ' <tr align="left">\n <td>%s<br>(%d)</td>\n' \
405 % (self.print_title (numbering),
406 sum (self.masterdocument.word_count))
408 if self.partially_translated:
409 s += ' <td>' + '<br>\n '.join (self.translators) + '</td>\n'
410 s += ' <td>' + '<br>\n '.join (self.checkers) + '</td>\n'
412 s += ' <td></td>\n' * 2
414 c = self.completeness (['color', 'short'], translated=True)
415 s += ' <td><span style="background-color: #%(color)s">\
416 %(short)s</span></td>\n' % {'color': c['color'],
419 if self.partially_translated:
420 u = self.uptodateness (['short', 'color'], translated=True)
421 s += ' <td><span style="background-color: #%(color)s">\
422 %(short)s</span></td>\n' % {'color': u['color'],
427 s += ' <td>' + self.gdp_status () + '</td>\n </tr>\n'
428 s += ''.join ([i.translations[self.language].texi_status (numbering)
429 for i in self.masterdocument.includes
430 if self.language in i.translations])
433 s += '</table>\n<p></p>\n'
436 class UntranslatedTelyDocument (TranslatedTelyDocument):
437 def __init__ (self, filename, masterdocument, parent_translation=None):
438 if filename[2] == '/':
439 self.language = filename[:2]
440 TranslatedTelyDocument.__init__ (self, filename, masterdocument, parent_translation)
442 class MasterTelyDocument (TelyDocument):
445 parent_translations=dict ([(lang, None)
446 for lang in langdefs.LANGDICT])):
447 TelyDocument.__init__ (self, filename)
448 self.size = len (self.contents)
449 self.word_count = tely_word_count (self.contents)
450 self.translations = {}
452 translations = dict ([(lang, os.path.join (lang, filename))
453 for lang in langdefs.LANGDICT])
454 if not self.language or self.language == 'en':
455 languages = [x for x in parent_translations.keys () if x != 'en']
456 for lang in languages:
457 if os.path.exists (translations[lang]):
459 self.translations[lang] = TranslatedTelyDocument (translations[lang],
461 parent_translations.get (lang))
463 self.translations[lang] = UntranslatedTelyDocument (translations[lang],
465 parent_translations.get (lang))
467 for lang in [x for x in langdefs.LANGDICT if x and x != 'en']:
468 if not found.has_key (lang):
469 del self.translations[lang]
471 if self.translations:
472 self.includes = [MasterTelyDocument (f, self.translations)
473 for f in self.included_files]
477 def update_word_counts (self, s):
478 s = update_word_count (s, self.filename, sum (self.word_count))
479 for i in self.includes:
480 s = i.update_word_counts (s)
483 def texi_status (self, numbering=SectionNumber ()):
484 if self.title == 'Untitled' or not self.translations:
487 s = '''<table align="center" border="2">
489 <th>%s</th>''' % self.print_title (numbering)
490 s += ''.join ([' <th>%s</th>\n' % l for l in self.translations])
492 s += ' <tr align="left">\n <td>Section titles<br>(%d)</td>\n' \
493 % sum (self.word_count)
495 else: # if self is an included file
496 s = ' <tr align="left">\n <td>%s<br>(%d)</td>\n' \
497 % (self.print_title (numbering), sum (self.word_count))
499 s += ''.join ([t.short_texi_status ()
500 for t in self.translations.values ()])
502 s += ''.join ([i.texi_status (numbering) for i in self.includes])
505 s += '</table>\n<p></p>\n'
508 def text_status (self, numbering=SectionNumber (), colspec=[48,12]):
509 if self.title == 'Untitled' or not self.translations:
514 s += (self.print_title (numbering) + ' ').ljust (colspec[0])
515 s += ''.join (['%s'.ljust (colspec[1]) % l
516 for l in self.translations])
518 s += ('Section titles (%d)' % \
519 sum (self.word_count)).ljust (colspec[0])
523 % (self.print_title (numbering), sum (self.word_count))
524 s = s.ljust (colspec[0])
526 s += ''.join ([t.text_status ().ljust(colspec[1])
527 for t in self.translations.values ()])
529 s += ''.join ([i.text_status (numbering) for i in self.includes])
536 update_category_word_counts_re = re.compile (r'(?ms)^-(\d+)-(.*?\n)\d+ *total')
538 counts_re = re.compile (r'(?m)^(\d+) ')
540 def update_category_word_counts_sub (m):
541 return ('-' + m.group (1) + '-' + m.group (2)
543 for c in counts_re.findall (m.group (2))])).ljust (6)
547 # main () starts here-abouts
549 progress ("Reading documents...")
552 buildlib.read_pipe ("git ls-files | grep -E '[^/]*/?[^/]*[.](tely|texi)$'")[0].splitlines ()
554 master_docs = [MasterTelyDocument (os.path.normpath (filename))
555 for filename in master_files]
556 master_docs = [doc for doc in master_docs if doc.translations]
558 enabled_languages = [l for l in langdefs.LANGDICT
559 if langdefs.LANGDICT[l].enabled
562 progress ("Generating status pages...")
564 date_time = buildlib.read_pipe ('LANG= date -u')[0]
566 main_status_body = last_updated_string % date_time
567 main_status_body += '\n'.join ([doc.texi_status () for doc in master_docs])
569 texi_header = '''@c -*- coding: utf-8; mode: texinfo; -*-
570 @c This file was generated by translation-status.py -- DO NOT EDIT!
572 Translation of GIT committish: 0
576 Translation status currently only available in HTML.
587 main_status_page = texi_header % locals () + main_status_body + texi_footer
589 open ('translations.itexi', 'w').write (main_status_page)
591 for l in enabled_languages:
592 date_time = buildlib.read_pipe ('LANG=%s date -u' % l)[0]
593 updated = translation[l] (last_updated_string) % date_time
594 texi_status = '\n'.join ([doc.translations[l].texi_status ()
595 for doc in master_docs
596 if l in doc.translations])
597 lang_status_page = texi_header + updated + texi_status + texi_footer
598 open (os.path.join (l, 'translations.itexi'), 'w').write (lang_status_page)
600 main_status_txt = '''Documentation translations status
603 FT = fully translated
607 main_status_txt += '\n'.join ([doc.text_status () for doc in master_docs])
609 status_txt_file = 'out/translations-status.txt'
610 progress ("Writing %s..." % status_txt_file)
611 open (status_txt_file, 'w').write (main_status_txt)
613 translation_instructions_file = 'contributor/doc-translation-list.itexi'
614 progress ("Updating %s..." % translation_instructions_file)
615 translation_instructions = open (translation_instructions_file).read ()
617 for doc in master_docs:
618 translation_instructions = doc.update_word_counts (translation_instructions)
620 for html_file in re.findall (r'(?m)^\d+ *(\S+?\.html\S*?)(?: |$)',
621 translation_instructions):
622 word_count = sgml_word_count (open (html_file).read ())
623 translation_instructions = update_word_count (translation_instructions,
627 for po_file in re.findall (r'(?m)^\d+ *(\S+?\.po\S*?)(?: |$)',
628 translation_instructions):
629 word_count = po_word_count (open (po_file).read ())
630 translation_instructions = update_word_count (translation_instructions,
634 translation_instructions = \
635 update_category_word_counts_re.sub (update_category_word_counts_sub,
636 translation_instructions)
638 open (translation_instructions_file, 'w').write (translation_instructions)