5 Interactive Texinfo cross-references checking and fixing tool
21 file_not_found = 'file not found in include path'
23 warn_not_fixed = '*** Warning: this broken x-ref has not been fixed!\n'
25 opt_parser = optparse.OptionParser (usage='check_texi_refs.py [OPTION]... FILE',
26 description='''Check and fix \
27 cross-references in a collection of Texinfo
28 documents heavily cross-referenced each other.
31 opt_parser.add_option ('-a', '--auto-fix',
32 help="Automatically fix cross-references whenever \
38 opt_parser.add_option ('-b', '--batch',
39 help="Do not run interactively",
44 opt_parser.add_option ('-c', '--check-comments',
45 help="Also check commented out x-refs",
47 dest='check_comments',
50 opt_parser.add_option ('-p', '--check-punctuation',
51 help="Check punctuation after x-refs",
53 dest='check_punctuation',
56 opt_parser.add_option ("-I", '--include', help="add DIR to include path",
58 action='append', dest='include_path',
59 default=[os.path.abspath (os.getcwd ())])
61 (options, files) = opt_parser.parse_args ()
63 class InteractionError (Exception):
67 manuals_defs = imp.load_source ('manuals_defs', files[0])
70 def find_file (name, prior_directory='.'):
71 p = os.path.join (prior_directory, name)
72 out_p = os.path.join (prior_directory, outdir, name)
73 if os.path.isfile (p):
75 elif os.path.isfile (out_p):
78 # looking for file in include_path
79 for d in options.include_path:
80 p = os.path.join (d, name)
81 if os.path.isfile (p):
84 # file not found in include_path: looking in `outdir' subdirs
85 for d in options.include_path:
86 p = os.path.join (d, outdir, name)
87 if os.path.isfile (p):
90 raise EnvironmentError (1, file_not_found, name)
95 def set_exit_code (n):
97 exit_code = max (exit_code, n)
100 if options.interactive:
106 def yes_prompt (question, default=False, retries=3):
107 d = {True: 'y', False: 'n'}.get (default, False)
109 a = raw_input ('%s [default: %s]' % (question, d) + '\n')
110 if a.lower ().startswith ('y'):
112 if a.lower ().startswith ('n'):
114 if a == '' or retries < 0:
116 stdout.write ("Please answer yes or no.\n")
119 def search_prompt ():
120 """Prompt user for a substring to look for in node names.
122 If user input is empty or matches no node name, return None,
123 otherwise return a list of (manual, node name, file) tuples.
126 substring = raw_input ("Enter a substring to search in node names \
127 (press Enter to skip this x-ref):\n")
130 substring = substring.lower ()
133 matches += [(k, node, manuals[k]['nodes'][node][0])
134 for node in manuals[k]['nodes']
135 if substring in node.lower ()]
139 def yes_prompt (question, default=False, retries=3):
142 def search_prompt ():
146 ref_re = re.compile \
147 ('@(ref|ruser|rlearning|rprogram|rglos)(?:\\{(?P<ref>[^,\\\\\\}]+?)|\
148 named\\{(?P<refname>[^,\\\\]+?),(?P<display>[^,\\\\\\}]+?))\\}(?P<last>.)',
150 node_include_re = re.compile (r'(?m)^@(node|include)\s+(.+?)$')
152 whitespace_re = re.compile (r'\s+')
153 line_start_re = re.compile ('(?m)^')
155 def which_line (index, newline_indices):
156 """Calculate line number of a given string index
158 Return line number of string index index, where
159 newline_indices is an ordered iterable of all newline indices.
162 sup = len (newline_indices) - 1
163 n = len (newline_indices)
164 while inf + 1 != sup:
166 if index >= newline_indices [m]:
173 comments_re = re.compile ('(?<!@)(@c(?:omment)? \
174 .*?\\n|^@ignore\\n.*?\\n@end ignore\\n)', re.M | re.S)
176 def calc_comments_boundaries (texinfo_doc):
177 return [(m.start (), m.end ()) for m in comments_re.finditer (texinfo_doc)]
180 def is_commented_out (start, end, comments_boundaries):
181 for k in range (len (comments_boundaries)):
182 if (start > comments_boundaries[k][0]
183 and end <= comments_boundaries[k][1]):
185 elif end <= comments_boundaries[k][0]:
190 def read_file (f, d):
192 base = os.path.basename (f)
193 dir = os.path.dirname (f)
197 d['newline_indices'][f] = [m.end () for m in line_start_re.finditer (s)]
198 if options.check_comments:
199 d['comments_boundaries'][f] = []
201 d['comments_boundaries'][f] = calc_comments_boundaries (s)
203 for m in node_include_re.finditer (s):
204 if m.group (1) == 'node':
205 line = which_line (m.start (), d['newline_indices'][f])
206 d['nodes'][m.group (2)] = (f, line)
208 elif m.group (1) == 'include':
210 p = find_file (m.group (2), dir)
211 except EnvironmentError, (errno, strerror):
212 if strerror == file_not_found:
219 def read_manual (name):
220 """Look for all node names and cross-references in a Texinfo document
222 Return a (manual, dictionary) tuple where manual is the cross-reference
223 macro name defined by references_dict[name], and dictionary
224 has the following keys:
226 'nodes' is a dictionary of `node name':(file name, line number),
228 'contents' is a dictionary of file:`full file contents',
230 'newline_indices' is a dictionary of
231 file:[list of beginning-of-line string indices],
233 'comments_boundaries' is a list of (start, end) tuples,
234 which contain string indices of start and end of each comment.
236 Included files that can be found in the include path are processed too.
242 d['newline_indices'] = {}
243 d['comments_boundaries'] = {}
244 manual = manuals_defs.references_dict.get (name, '')
246 f = find_file (name + '.tely')
247 except EnvironmentError, (errno, strerror):
248 if not strerror == file_not_found:
252 f = find_file (name + '.texi')
253 except EnvironmentError, (errno, strerror):
254 if strerror == file_not_found:
255 sys.stderr.write (name + '.{texi,tely}: ' +
256 file_not_found + '\n')
261 log.write ("Processing manual %s (%s)\n" % (f, manual))
266 log.write ("Reading files...\n")
268 manuals = dict ([read_manual (name)
269 for name in manuals_defs.references_dict.keys ()])
275 def add_fix (old_type, old_ref, new_type, new_ref):
276 ref_fixes.add ((old_type, old_ref, new_type, new_ref))
281 for (old_type, old_ref, new_type, new_ref) in ref_fixes:
283 found.append ((new_type, new_ref))
287 def preserve_linebreak (text, linebroken):
290 text = text.replace (' ', '\n', 1)
299 def choose_in_numbered_list (message, string_list, sep=' ', retries=3):
300 S = set (string_list)
302 string_list = list (S)
303 numbered_list = sep.join ([str (j + 1) + '. ' + string_list[j]
304 for j in range (len (string_list))]) + '\n'
308 stdout.write (message +
309 "(press Enter to discard and start a new search)\n")
310 input = raw_input (numbered_list)
314 value = string_list[int (input) - 1]
316 stdout.write ("Error: index number out of range\n")
318 matches = [input in v for v in string_list]
319 n = matches.count (True)
321 stdout.write ("Error: input matches no item in the list\n")
323 stdout.write ("Error: ambiguous input (matches several items \
326 value = string_list[matches.index (True)]
330 raise InteractionError ("%d retries limit exceeded" % retries)
334 def check_ref (manual, file, m):
335 global fixes_count, bad_refs_count, refs_count
340 original_name = m.group ('ref') or m.group ('refname')
341 name = whitespace_re.sub (' ', original_name). strip ()
342 newline_indices = manuals[manual]['newline_indices'][file]
343 line = which_line (m.start (), newline_indices)
344 linebroken = '\n' in original_name
345 original_display_name = m.group ('display')
346 next_char = m.group ('last')
347 if original_display_name: # the xref has an explicit display name
348 display_linebroken = '\n' in original_display_name
349 display_name = whitespace_re.sub (' ', original_display_name). strip ()
350 commented_out = is_commented_out \
351 (m.start (), m.end (), manuals[manual]['comments_boundaries'][file])
352 useful_fix = not outdir in file
354 # check puncuation after x-ref
355 if options.check_punctuation and not next_char in '.,;:!?':
356 stdout.write ("Warning: %s: %d: `%s': x-ref \
357 not followed by punctuation\n" % (file, line, name))
363 if type != 'ref' and type == manual and not commented_out and useful_fix:
365 stdout.write ("\n%s: %d: `%s': external %s x-ref should be internal\n"
366 % (file, line, name, type))
367 if options.auto_fix or yes_prompt ("Fix this?"):
371 explicit_type = manual
373 if not name in manuals[explicit_type]['nodes'] and not commented_out:
378 stdout.write ("%s: %d: `%s': wrong internal x-ref\n"
379 % (file, line, name))
381 stdout.write ("%s: %d: `%s': wrong external `%s' x-ref\n"
382 % (file, line, name, type))
384 stdout.write ('--\n' + manuals[manual]['contents'][file]
385 [newline_indices[max (0, line - 2)]:
386 newline_indices[min (line + 3,
387 len (newline_indices) - 1)]] +
390 # try to find the reference in other manuals
392 for k in [k for k in manuals if k != explicit_type]:
393 if name in manuals[k]['nodes']:
396 stdout.write (" found as internal x-ref\n")
400 stdout.write (" found as `%s' x-ref\n" % k)
403 and (options.auto_fix or yes_prompt ("Fix this x-ref?"))):
404 add_fix (type, name, found[0], name)
408 elif len (found) > 1 and useful_fix:
409 if options.interactive or options.auto_fix:
410 stdout.write ("* Several manuals contain this node name, \
411 cannot determine manual automatically.\n")
412 if options.interactive:
413 t = choose_in_numbered_list ("Choose manual for this x-ref by \
414 index number or beginning of name:\n", found)
416 add_fix (type, name, t, name)
421 # try to find a fix already made
422 found = lookup_fix (name)
425 stdout.write ("Found one previous fix: %s `%s'\n" % found[0])
426 if options.auto_fix or yes_prompt ("Apply this fix?"):
427 type, new_name = found[0]
430 elif len (found) > 1:
431 if options.interactive or options.auto_fix:
432 stdout.write ("* Several previous fixes match \
433 this node name, cannot fix automatically.\n")
434 if options.interactive:
435 concatened = choose_in_numbered_list ("Choose new manual \
436 and x-ref by index number or beginning of name:\n", [''.join ([i[0], ' ', i[1]])
440 type, new_name = concatenated.split (' ', 1)
444 # all previous automatic fixing attempts failed,
445 # ask user for substring to look in node names
447 node_list = search_prompt ()
448 if node_list == None:
449 if options.interactive:
450 stdout.write (warn_not_fixed)
453 stdout.write ("No matched node names.\n")
455 concatenated = choose_in_numbered_list ("Choose \
456 node name and manual for this x-ref by index number or beginning of name:\n", \
457 [' '.join ([i[0], i[1], '(in %s)' % i[2]])
461 t, z = concatenated.split (' ', 1)
462 new_name = z.split (' (in ', 1)[0]
463 add_fix (type, name, t, new_name)
468 if fixed and type == manual:
470 bad_refs_count += int (bad_ref)
471 if bad_ref and not useful_fix:
472 stdout.write ("*** Warning: this file is automatically generated, \
473 please fix the code source instead of generated documentation.\n")
475 # compute returned string
477 if bad_ref and (options.interactive or options.auto_fix):
478 # only the type of the ref was fixed
480 if original_display_name:
481 return ('@%snamed{%s,%s}' % (type, original_name, original_display_name)) + next_char
483 return ('@%s{%s}' % (type, original_name)) + next_char
486 (ref, n) = preserve_linebreak (new_name, linebroken)
487 if original_display_name:
489 stdout.write ("Current display name is `%s'\n")
490 display_name = raw_input \
491 ("Enter a new display name or press enter to keep the existing name:\n") \
493 (display_name, n) = preserve_linebreak (display_name, display_linebroken)
495 display_name = original_display_name
496 return ('@%snamed{%s,%s}' % (type, ref, display_name)) + \
499 return ('@%s{%s}' % (type, ref)) + next_char + n
502 log.write ("Checking cross-references...\n")
506 for file in manuals[key]['contents']:
507 s = ref_re.sub (lambda m: check_ref (key, file, m),
508 manuals[key]['contents'][file])
509 if s != manuals[key]['contents'][file]:
510 open (file, 'w').write (s)
511 except KeyboardInterrupt:
512 log.write ("Operation interrupted, exiting.\n")
514 except InteractionError, instance:
515 log.write ("Operation refused by user: %s\nExiting.\n" % instance)
518 log.write ("Done: %d x-refs found, %d bad x-refs found, fixed %d.\n" %
519 (refs_count, bad_refs_count, fixes_count))