5 Interactive Texinfo cross-references checking and fixing tool
21 file_not_found = 'file not found in include path'
23 warn_not_fixed = '*** Warning: this broken x-ref has not been fixed!\n'
25 opt_parser = optparse.OptionParser (usage='check_texi_refs.py [OPTION]... FILE',
26 description='''Check and fix \
27 cross-references in a collection of Texinfo
28 documents heavily cross-referenced each other.
31 opt_parser.add_option ('-a', '--auto-fix',
32 help="Automatically fix cross-references whenever \
38 opt_parser.add_option ('-b', '--batch',
39 help="Do not run interactively",
44 opt_parser.add_option ('-c', '--check-comments',
45 help="Also check commented out x-refs",
47 dest='check_comments',
50 opt_parser.add_option ('-p', '--check-punctuation',
51 help="Check punctuation after x-refs",
53 dest='check_punctuation',
56 opt_parser.add_option ("-I", '--include', help="add DIR to include path",
58 action='append', dest='include_path',
61 (options, files) = opt_parser.parse_args ()
62 options.include_path.append (os.path.abspath (os.getcwd ()))
64 class InteractionError (Exception):
68 manuals_defs = imp.load_source ('manuals_defs', files[0])
71 def find_file (name, prior_directory='.'):
72 p = os.path.join (prior_directory, name)
73 out_p = os.path.join (prior_directory, outdir, name)
74 if os.path.isfile (p):
76 elif os.path.isfile (out_p):
79 # looking for file in include_path
80 for d in options.include_path:
81 p = os.path.join (d, name)
82 if os.path.isfile (p):
85 # file not found in include_path: looking in `outdir' subdirs
86 for d in options.include_path:
87 p = os.path.join (d, outdir, name)
88 if os.path.isfile (p):
91 raise EnvironmentError (1, file_not_found, name)
96 def set_exit_code (n):
98 exit_code = max (exit_code, n)
101 if options.interactive:
107 def yes_prompt (question, default=False, retries=3):
108 d = {True: 'y', False: 'n'}.get (default, False)
110 a = raw_input ('%s [default: %s]' % (question, d) + '\n')
111 if a.lower ().startswith ('y'):
113 if a.lower ().startswith ('n'):
115 if a == '' or retries < 0:
117 stdout.write ("Please answer yes or no.\n")
120 def search_prompt ():
121 """Prompt user for a substring to look for in node names.
123 If user input is empty or matches no node name, return None,
124 otherwise return a list of (manual, node name, file) tuples.
127 substring = raw_input ("Enter a substring to search in node names \
128 (press Enter to skip this x-ref):\n")
131 substring = substring.lower ()
134 matches += [(k, node, manuals[k]['nodes'][node][0])
135 for node in manuals[k]['nodes']
136 if substring in node.lower ()]
140 def yes_prompt (question, default=False, retries=3):
143 def search_prompt ():
147 ref_re = re.compile \
148 ('@((?:ressay|rgloss|rinternals|rlearning|rslr|rprogram|ruser|ref)|named)(?:\\{(?P<ref>[^,\\\\\\}]+?)|\
149 named\\{(?P<refname>[^,\\\\]+?),(?P<display>[^,\\\\\\}]+?))\\}(?P<last>.)',
151 node_include_re = re.compile (r'(?m)^@(node|include)\s+(.+?)$')
153 whitespace_re = re.compile (r'\s+')
154 line_start_re = re.compile ('(?m)^')
156 def which_line (index, newline_indices):
157 """Calculate line number of a given string index
159 Return line number of string index index, where
160 newline_indices is an ordered iterable of all newline indices.
163 sup = len (newline_indices) - 1
164 n = len (newline_indices)
165 while inf + 1 != sup:
167 if index >= newline_indices [m]:
174 comments_re = re.compile ('(?<!@)(@c(?:omment)? \
175 .*?\\n|^@ignore\\n.*?\\n@end ignore\\n)', re.M | re.S)
177 def calc_comments_boundaries (texinfo_doc):
178 return [(m.start (), m.end ()) for m in comments_re.finditer (texinfo_doc)]
181 def is_commented_out (start, end, comments_boundaries):
182 for k in range (len (comments_boundaries)):
183 if (start > comments_boundaries[k][0]
184 and end <= comments_boundaries[k][1]):
186 elif end <= comments_boundaries[k][0]:
191 def read_file (f, d):
193 base = os.path.basename (f)
194 dir = os.path.dirname (f)
198 d['newline_indices'][f] = [m.end () for m in line_start_re.finditer (s)]
199 if options.check_comments:
200 d['comments_boundaries'][f] = []
202 d['comments_boundaries'][f] = calc_comments_boundaries (s)
204 for m in node_include_re.finditer (s):
205 if m.group (1) == 'node':
206 line = which_line (m.start (), d['newline_indices'][f])
207 d['nodes'][m.group (2)] = (f, line)
209 elif m.group (1) == 'include':
211 p = find_file (m.group (2), dir)
212 except EnvironmentError, (errno, strerror):
213 if strerror == file_not_found:
220 def read_manual (name):
221 """Look for all node names and cross-references in a Texinfo document
223 Return a (manual, dictionary) tuple where manual is the cross-reference
224 macro name defined by references_dict[name], and dictionary
225 has the following keys:
227 'nodes' is a dictionary of `node name':(file name, line number),
229 'contents' is a dictionary of file:`full file contents',
231 'newline_indices' is a dictionary of
232 file:[list of beginning-of-line string indices],
234 'comments_boundaries' is a list of (start, end) tuples,
235 which contain string indices of start and end of each comment.
237 Included files that can be found in the include path are processed too.
243 d['newline_indices'] = {}
244 d['comments_boundaries'] = {}
245 manual = manuals_defs.references_dict.get (name, '')
247 f = find_file (name + '.tely')
248 except EnvironmentError, (errno, strerror):
249 if not strerror == file_not_found:
253 f = find_file (name + '.texi')
254 except EnvironmentError, (errno, strerror):
255 if strerror == file_not_found:
256 sys.stderr.write (name + '.{texi,tely}: ' +
257 file_not_found + '\n')
262 log.write ("Processing manual %s (%s)\n" % (f, manual))
267 log.write ("Reading files...\n")
269 manuals = dict ([read_manual (name)
270 for name in manuals_defs.references_dict.keys ()])
276 def add_fix (old_type, old_ref, new_type, new_ref):
277 ref_fixes.add ((old_type, old_ref, new_type, new_ref))
282 for (old_type, old_ref, new_type, new_ref) in ref_fixes:
284 found.append ((new_type, new_ref))
288 def preserve_linebreak (text, linebroken):
291 text = text.replace (' ', '\n', 1)
300 def choose_in_numbered_list (message, string_list, sep=' ', retries=3):
301 S = set (string_list)
303 string_list = list (S)
304 numbered_list = sep.join ([str (j + 1) + '. ' + string_list[j]
305 for j in range (len (string_list))]) + '\n'
309 stdout.write (message +
310 "(press Enter to discard and start a new search)\n")
311 input = raw_input (numbered_list)
315 value = string_list[int (input) - 1]
317 stdout.write ("Error: index number out of range\n")
319 matches = [input in v for v in string_list]
320 n = matches.count (True)
322 stdout.write ("Error: input matches no item in the list\n")
324 stdout.write ("Error: ambiguous input (matches several items \
327 value = string_list[matches.index (True)]
331 raise InteractionError ("%d retries limit exceeded" % retries)
335 def check_ref (manual, file, m):
336 global fixes_count, bad_refs_count, refs_count
341 original_name = m.group ('ref') or m.group ('refname')
342 name = whitespace_re.sub (' ', original_name). strip ()
343 newline_indices = manuals[manual]['newline_indices'][file]
344 line = which_line (m.start (), newline_indices)
345 linebroken = '\n' in original_name
346 original_display_name = m.group ('display')
347 next_char = m.group ('last')
348 if original_display_name: # the xref has an explicit display name
349 display_linebroken = '\n' in original_display_name
350 display_name = whitespace_re.sub (' ', original_display_name). strip ()
351 commented_out = is_commented_out \
352 (m.start (), m.end (), manuals[manual]['comments_boundaries'][file])
353 useful_fix = not outdir in file
355 # check puncuation after x-ref
356 if options.check_punctuation and not next_char in '.,;:!?':
357 stdout.write ("Warning: %s: %d: `%s': x-ref \
358 not followed by punctuation\n" % (file, line, name))
364 if type != 'ref' and type == manual and not commented_out:
368 stdout.write ("\n%s: %d: `%s': external %s x-ref should be internal\n"
369 % (file, line, name, type))
370 if options.auto_fix or yes_prompt ("Fix this?"):
374 explicit_type = manual
376 if not name in manuals[explicit_type]['nodes'] and not commented_out:
381 stdout.write ("
\e[1;31m%s: %d: `%s': wrong internal x-ref
\e[0m\n"
382 % (file, line, name))
384 stdout.write ("
\e[1;31m%s: %d: `%s': wrong external `%s' x-ref
\e[0m\n"
385 % (file, line, name, type))
387 stdout.write ('--\n' + manuals[manual]['contents'][file]
388 [newline_indices[max (0, line - 2)]:
389 newline_indices[min (line + 3,
390 len (newline_indices) - 1)]] +
393 # try to find the reference in other manuals
395 for k in [k for k in manuals if k != explicit_type]:
396 if name in manuals[k]['nodes']:
399 stdout.write ("
\e[1;32m found as internal x-ref
\e[0m\n")
403 stdout.write ("
\e[1;32m found as `%s' x-ref
\e[0m\n" % k)
406 and (options.auto_fix or yes_prompt ("Fix this x-ref?"))):
407 add_fix (type, name, found[0], name)
411 elif len (found) > 1 and useful_fix:
412 if options.interactive or options.auto_fix:
413 stdout.write ("* Several manuals contain this node name, \
414 cannot determine manual automatically.\n")
415 if options.interactive:
416 t = choose_in_numbered_list ("Choose manual for this x-ref by \
417 index number or beginning of name:\n", found)
419 add_fix (type, name, t, name)
424 # try to find a fix already made
425 found = lookup_fix (name)
428 stdout.write ("Found one previous fix: %s `%s'\n" % found[0])
429 if options.auto_fix or yes_prompt ("Apply this fix?"):
430 type, new_name = found[0]
433 elif len (found) > 1:
434 if options.interactive or options.auto_fix:
435 stdout.write ("* Several previous fixes match \
436 this node name, cannot fix automatically.\n")
437 if options.interactive:
438 concatened = choose_in_numbered_list ("Choose new manual \
439 and x-ref by index number or beginning of name:\n", [''.join ([i[0], ' ', i[1]])
443 type, new_name = concatenated.split (' ', 1)
447 # all previous automatic fixing attempts failed,
448 # ask user for substring to look in node names
450 node_list = search_prompt ()
451 if node_list == None:
452 if options.interactive:
453 stdout.write (warn_not_fixed)
456 stdout.write ("No matched node names.\n")
458 concatenated = choose_in_numbered_list ("Choose \
459 node name and manual for this x-ref by index number or beginning of name:\n", \
460 [' '.join ([i[0], i[1], '(in %s)' % i[2]])
464 t, z = concatenated.split (' ', 1)
465 new_name = z.split (' (in ', 1)[0]
466 add_fix (type, name, t, new_name)
471 if fixed and type == manual:
473 bad_refs_count += int (bad_ref)
474 if bad_ref and not useful_fix:
475 stdout.write ("*** Warning: this file is automatically generated, \
476 please fix the code source instead of generated documentation.\n")
478 # compute returned string
480 if bad_ref and (options.interactive or options.auto_fix):
481 # only the type of the ref was fixed
482 fixes_count += int (fixed)
483 if original_display_name:
484 return ('@%snamed{%s,%s}' % (type, original_name, original_display_name)) + next_char
486 return ('@%s{%s}' % (type, original_name)) + next_char
488 fixes_count += int (fixed)
489 (ref, n) = preserve_linebreak (new_name, linebroken)
490 if original_display_name:
492 stdout.write ("Current display name is `%s'\n")
493 display_name = raw_input \
494 ("Enter a new display name or press enter to keep the existing name:\n") \
496 (display_name, n) = preserve_linebreak (display_name, display_linebroken)
498 display_name = original_display_name
499 return ('@%snamed{%s,%s}' % (type, ref, display_name)) + \
502 return ('@%s{%s}' % (type, ref)) + next_char + n
505 log.write ("Checking cross-references...\n")
509 for file in manuals[key]['contents']:
510 s = ref_re.sub (lambda m: check_ref (key, file, m),
511 manuals[key]['contents'][file])
512 if s != manuals[key]['contents'][file]:
513 open (file, 'w').write (s)
514 except KeyboardInterrupt:
515 log.write ("Operation interrupted, exiting.\n")
517 except InteractionError, instance:
518 log.write ("Operation refused by user: %s\nExiting.\n" % instance)
521 log.write ("
\e[1;36mDone: %d x-refs found, %d bad x-refs found, fixed %d.
\e[0m\n" %
522 (refs_count, bad_refs_count, fixes_count))