5 Interactive Texinfo cross-references checking and fixing tool
21 file_not_found = 'file not found in include path'
23 warn_not_fixed = '*** Warning: this broken x-ref has not been fixed!\n'
25 opt_parser = optparse.OptionParser (usage='check_texi_refs.py [OPTION]... FILE',
26 description='''Check and fix \
27 cross-references in a collection of Texinfo
28 documents heavily cross-referenced each other.
31 opt_parser.add_option ('-a', '--auto-fix',
32 help="Automatically fix cross-references whenever \
38 opt_parser.add_option ('-b', '--batch',
39 help="Do not run interactively",
44 opt_parser.add_option ('-c', '--check-comments',
45 help="Also check commented out x-refs",
47 dest='check_comments',
50 opt_parser.add_option ('-p', '--check-punctuation',
51 help="Check punctuation after x-refs",
53 dest='check_punctuation',
56 opt_parser.add_option ("-I", '--include', help="add DIR to include path",
58 action='append', dest='include_path',
59 default=[os.path.abspath (os.getcwd ())])
61 (options, files) = opt_parser.parse_args ()
63 class InteractionError (Exception):
67 manuals_defs = imp.load_source ('manuals_defs', files[0])
70 def find_file (name, prior_directory='.'):
71 p = os.path.join (prior_directory, name)
72 out_p = os.path.join (prior_directory, outdir, name)
73 if os.path.isfile (p):
75 elif os.path.isfile (out_p):
78 # looking for file in include_path
79 for d in options.include_path:
80 p = os.path.join (d, name)
81 if os.path.isfile (p):
84 # file not found in include_path: looking in `outdir' subdirs
85 for d in options.include_path:
86 p = os.path.join (d, outdir, name)
87 if os.path.isfile (p):
90 raise EnvironmentError (1, file_not_found, name)
95 def set_exit_code (n):
97 exit_code = max (exit_code, n)
100 if options.interactive:
106 def yes_prompt (question, default=False, retries=3):
107 d = {True: 'y', False: 'n'}.get (default, False)
109 a = raw_input ('%s [default: %s]' % (question, d) + '\n')
110 if a.lower ().startswith ('y'):
112 if a.lower ().startswith ('n'):
114 if a == '' or retries < 0:
116 stdout.write ("Please answer yes or no.\n")
119 def search_prompt ():
120 """Prompt user for a substring to look for in node names.
122 If user input is empty or matches no node name, return None,
123 otherwise return a list of (manual, node name, file) tuples.
126 substring = raw_input ("Enter a substring to search in node names \
127 (press Enter to skip this x-ref):\n")
130 substring = substring.lower ()
133 matches += [(k, node, manuals[k]['nodes'][node][0])
134 for node in manuals[k]['nodes']
135 if substring in node.lower ()]
139 def yes_prompt (question, default=False, retries=3):
142 def search_prompt ():
146 ref_re = re.compile (r'@(ref|ruser|rlearning|rprogram|rglos)\{([^,\\]*?)\}(.)',
148 node_include_re = re.compile (r'(?m)^@(node|include)\s+(.+?)$')
150 whitespace_re = re.compile (r'\s+')
151 line_start_re = re.compile ('(?m)^')
153 def which_line (index, newline_indices):
154 """Calculate line number of a given string index
156 Return line number of string index index, where
157 newline_indices is an ordered iterable of all newline indices.
160 sup = len (newline_indices) - 1
161 n = len (newline_indices)
162 while inf + 1 != sup:
164 if index >= newline_indices [m]:
171 comments_re = re.compile ('(?<!@)(@c(?:omment)? \
172 .*?\\n|^@ignore\\n.*?\\n@end ignore\\n)', re.M | re.S)
174 def calc_comments_boundaries (texinfo_doc):
175 return [(m.start (), m.end ()) for m in comments_re.finditer (texinfo_doc)]
178 def is_commented_out (start, end, comments_boundaries):
179 for k in range (len (comments_boundaries)):
180 if (start > comments_boundaries[k][0]
181 and end <= comments_boundaries[k][1]):
183 elif end <= comments_boundaries[k][0]:
188 def read_file (f, d):
190 base = os.path.basename (f)
191 dir = os.path.dirname (f)
195 d['newline_indices'][f] = [m.end () for m in line_start_re.finditer (s)]
196 if options.check_comments:
197 d['comments_boundaries'][f] = []
199 d['comments_boundaries'][f] = calc_comments_boundaries (s)
201 for m in node_include_re.finditer (s):
202 if m.group (1) == 'node':
203 line = which_line (m.start (), d['newline_indices'][f])
204 d['nodes'][m.group (2)] = (f, line)
206 elif m.group (1) == 'include':
208 p = find_file (m.group (2), dir)
209 except EnvironmentError, (errno, strerror):
210 if strerror == file_not_found:
217 def read_manual (name):
218 """Look for all node names and cross-references in a Texinfo document
220 Return a (manual, dictionary) tuple where manual is the cross-reference
221 macro name defined by references_dict[name], and dictionary
222 has the following keys:
224 'nodes' is a dictionary of `node name':(file name, line number),
226 'contents' is a dictionary of file:`full file contents',
228 'newline_indices' is a dictionary of
229 file:[list of beginning-of-line string indices],
231 'comments_boundaries' is a list of (start, end) tuples,
232 which contain string indices of start and end of each comment.
234 Included files that can be found in the include path are processed too.
240 d['newline_indices'] = {}
241 d['comments_boundaries'] = {}
242 manual = manuals_defs.references_dict.get (name, '')
244 f = find_file (name + '.tely')
245 except EnvironmentError, (errno, strerror):
246 if not strerror == file_not_found:
250 f = find_file (name + '.texi')
251 except EnvironmentError, (errno, strerror):
252 if strerror == file_not_found:
253 sys.stderr.write (name + '.{texi,tely}: ' +
254 file_not_found + '\n')
259 log.write ("Processing manual %s (%s)\n" % (f, manual))
264 log.write ("Reading files...\n")
266 manuals = dict ([read_manual (name)
267 for name in manuals_defs.references_dict.keys ()])
273 def add_fix (old_type, old_ref, new_type, new_ref):
274 ref_fixes.add ((old_type, old_ref, new_type, new_ref))
279 for (old_type, old_ref, new_type, new_ref) in ref_fixes:
281 found.append ((new_type, new_ref))
285 def preserve_linebreak (text, linebroken):
288 text = text.replace (' ', '\n', 1)
297 def choose_in_numbered_list (message, string_list, sep=' ', retries=3):
298 S = set (string_list)
300 string_list = list (S)
301 numbered_list = sep.join ([str (j + 1) + '. ' + string_list[j]
302 for j in range (len (string_list))]) + '\n'
306 stdout.write (message +
307 "(press Enter to discard and start a new search)\n")
308 input = raw_input (numbered_list)
312 value = string_list[int (input) - 1]
314 stdout.write ("Error: index number out of range\n")
316 matches = [input in v for v in string_list]
317 n = matches.count (True)
319 stdout.write ("Error: input matches no item in the list\n")
321 stdout.write ("Error: ambiguous input (matches several items \
324 value = string_list[matches.index (True)]
328 raise InteractionError ("%d retries limit exceeded" % retries)
331 def check_ref (manual, file, m):
332 global fixes_count, bad_refs_count
336 original_name = m.group (2)
337 name = whitespace_re.sub (' ', original_name). strip ()
338 newline_indices = manuals[manual]['newline_indices'][file]
339 line = which_line (m.start (), newline_indices)
340 linebroken = '\n' in m.group (2)
341 next_char = m.group (3)
342 commented_out = is_commented_out \
343 (m.start (), m.end (), manuals[manual]['comments_boundaries'][file])
344 useful_fix = not outdir in file
346 # check puncuation after x-ref
347 if options.check_punctuation and not next_char in '.,;:!?':
348 stdout.write ("Warning: %s: %d: `%s': x-ref \
349 not followed by punctuation\n" % (file, line, name))
355 if type != 'ref' and type == manual and not commented_out:
357 stdout.write ("\n%s: %d: `%s': external %s x-ref should be internal\n"
358 % (file, line, name, type))
359 if options.auto_fix or yes_prompt ("Fix this?"):
363 explicit_type = manual
365 if not name in manuals[explicit_type]['nodes'] and not commented_out:
370 stdout.write ("%s: %d: `%s': wrong internal x-ref\n"
371 % (file, line, name))
373 stdout.write ("%s: %d: `%s': wrong external `%s' x-ref\n"
374 % (file, line, name, type))
376 stdout.write ('--\n' + manuals[manual]['contents'][file]
377 [newline_indices[max (0, line - 2)]:
378 newline_indices[min (line + 3,
379 len (newline_indices) - 1)]] +
382 # try to find the reference in other manuals
384 for k in [k for k in manuals if k != explicit_type]:
385 if name in manuals[k]['nodes']:
388 stdout.write (" found as internal x-ref\n")
392 stdout.write (" found as `%s' x-ref\n" % k)
395 and (options.auto_fix or yes_prompt ("Fix this x-ref?"))):
396 add_fix (type, name, found[0], name)
400 elif len (found) > 1 and useful_fix:
401 if options.interactive or options.auto_fix:
402 stdout.write ("* Several manuals contain this node name, \
403 cannot determine manual automatically.\n")
404 if options.interactive:
405 t = choose_in_numbered_list ("Choose manual for this x-ref by \
406 index number or beginning of name:\n", found)
408 add_fix (type, name, t, name)
413 # try to find a fix already made
414 found = lookup_fix (name)
417 stdout.write ("Found one previous fix: %s `%s'\n" % found[0])
418 if options.auto_fix or yes_prompt ("Apply this fix?"):
419 type, new_name = found[0]
422 elif len (found) > 1:
423 if options.interactive or options.auto_fix:
424 stdout.write ("* Several previous fixes match \
425 this node name, cannot fix automatically.\n")
426 if options.interactive:
427 concatened = choose_in_numbered_list ("Choose new manual \
428 and x-ref by index number or beginning of name:\n", [''.join ([i[0], ' ', i[1]])
432 type, new_name = concatenated.split (' ', 1)
436 # all previous automatic fixes attempts failed,
437 # ask user for substring to look in node names
439 node_list = search_prompt ()
440 if node_list == None:
441 if options.interactive:
442 stdout.write (warn_not_fixed)
445 stdout.write ("No matched node names.\n")
447 concatenated = choose_in_numbered_list ("Choose \
448 node name and manual for this x-ref by index number or beginning of name:\n", \
449 [' '.join ([i[0], i[1], '(in %s)' % i[2]])
453 t, z = concatenated.split (' ', 1)
454 new_name = z.split (' (in ', 1)[0]
455 add_fix (type, name, t, new_name)
460 if fixed and type == manual:
462 bad_refs_count += int (bad_ref)
463 if bad_ref and not useful_fix:
464 stdout.write ("*** Warning: this file is automatically generated, \
465 please fix the code source instead of generated documentation.\n")
467 # compute returned string
469 return ('@%s{%s}' % (type, original_name)) + next_char
472 (ref, n) = preserve_linebreak (new_name, linebroken)
473 return ('@%s{%s}' % (type, ref)) + next_char + n
476 log.write ("Checking cross-references...\n")
480 for file in manuals[key]['contents']:
481 s = ref_re.sub (lambda m: check_ref (key, file, m),
482 manuals[key]['contents'][file])
483 if s != manuals[key]['contents'][file]:
484 open (file, 'w').write (s)
485 except KeyboardInterrupt:
486 log.write ("Operation interrupted, exiting.\n")
488 except InteractionError, instance:
489 log.write ("Operation refused by user: %s\nExiting.\n" % instance)
492 log.write ("Done: %d bad x-refs found, fixed %d.\n" %
493 (bad_refs_count, fixes_count))