5 Interactive Texinfo cross-references checking and fixing tool
21 file_not_found = 'file not found in include path'
23 warn_not_fixed = '*** Warning: this broken x-ref has not been fixed!\n'
25 opt_parser = optparse.OptionParser (usage='check_texi_refs.py [OPTION]... FILE',
26 description='''Check and fix \
27 cross-references in a collection of Texinfo
28 documents heavily cross-referenced each other.
31 opt_parser.add_option ('-a', '--auto-fix',
32 help="Automatically fix cross-references whenever \
38 opt_parser.add_option ('-b', '--batch',
39 help="Do not run interactively",
44 opt_parser.add_option ('-c', '--check-comments',
45 help="Also check commented out x-refs",
47 dest='check_comments',
50 opt_parser.add_option ('-p', '--check-punctuation',
51 help="Check punctuation after x-refs",
53 dest='check_punctuation',
56 opt_parser.add_option ("-I", '--include', help="add DIR to include path",
58 action='append', dest='include_path',
59 default=[os.path.abspath (os.getcwd ())])
61 (options, files) = opt_parser.parse_args ()
63 class InteractionError (Exception):
67 manuals_defs = imp.load_source ('manuals_defs', files[0])
70 def find_file (name, prior_directory='.'):
71 p = os.path.join (prior_directory, name)
72 out_p = os.path.join (prior_directory, outdir, name)
73 if os.path.isfile (p):
75 elif os.path.isfile (out_p):
78 # looking for file in include_path
79 for d in options.include_path:
80 p = os.path.join (d, name)
81 if os.path.isfile (p):
84 # file not found in include_path: looking in `outdir' subdirs
85 for d in options.include_path:
86 p = os.path.join (d, outdir, name)
87 if os.path.isfile (p):
90 raise EnvironmentError (1, file_not_found, name)
95 def set_exit_code (n):
97 exit_code = max (exit_code, n)
100 if options.interactive:
106 def yes_prompt (question, default=False, retries=3):
107 d = {True: 'y', False: 'n'}.get (default, False)
109 a = raw_input ('%s [default: %s]' % (question, d) + '\n')
110 if a.lower ().startswith ('y'):
112 if a.lower ().startswith ('n'):
114 if a == '' or retries < 0:
116 stdout.write ("Please answer yes or no.\n")
119 def search_prompt ():
120 """Prompt user for a substring to look for in node names.
122 If user input is empty or matches no node name, return None,
123 otherwise return a list of (manual, node name, file) tuples.
126 substring = raw_input ("Enter a substring to search in node names \
127 (press Enter to skip this x-ref):\n")
130 substring = substring.lower ()
133 matches += [(k, node, manuals[k]['nodes'][node][0])
134 for node in manuals[k]['nodes']
135 if substring in node.lower ()]
139 def yes_prompt (question, default=False, retries=3):
142 def search_prompt ():
146 ref_re = re.compile (r'@(ref|ruser|rlearning|rprogram|rglos)\{([^,\\]*?)\}(.)',
148 node_include_re = re.compile (r'(?m)^@(node|include)\s+(.+?)$')
150 whitespace_re = re.compile (r'\s+')
151 line_start_re = re.compile ('(?m)^')
153 def which_line (index, newline_indices):
154 """Calculate line number of a given string index
156 Return line number of string index index, where
157 newline_indices is an ordered iterable of all newline indices.
160 sup = len (newline_indices) - 1
161 n = len (newline_indices)
162 while inf + 1 != sup:
164 if index >= newline_indices [m]:
171 comments_re = re.compile ('(?<!@)(@c(?:omment)? \
172 .*?\\n|^@ignore\\n.*?\\n@end ignore\\n)', re.M | re.S)
174 def calc_comments_boundaries (texinfo_doc):
175 return [(m.start (), m.end ()) for m in comments_re.finditer (texinfo_doc)]
178 def is_commented_out (start, end, comments_boundaries):
179 for k in range (len (comments_boundaries)):
180 if (start > comments_boundaries[k][0]
181 and end <= comments_boundaries[k][1]):
183 elif end <= comments_boundaries[k][0]:
188 def read_file (f, d):
190 base = os.path.basename (f)
191 dir = os.path.dirname (f)
195 d['newline_indices'][f] = [m.end () for m in line_start_re.finditer (s)]
196 if options.check_comments:
197 d['comments_boundaries'][f] = []
199 d['comments_boundaries'][f] = calc_comments_boundaries (s)
201 for m in node_include_re.finditer (s):
202 if m.group (1) == 'node':
203 line = which_line (m.start (), d['newline_indices'][f])
204 d['nodes'][m.group (2)] = (f, line)
206 elif m.group (1) == 'include':
208 p = find_file (m.group (2), dir)
209 except EnvironmentError, (errno, strerror):
210 if strerror == file_not_found:
217 def read_manual (name):
218 """Look for all node names and cross-references in a Texinfo document
220 Return a (manual, dictionary) tuple where manual is the cross-reference
221 macro name defined by references_dict[name], and dictionary
222 has the following keys:
224 'nodes' is a dictionary of `node name':(file name, line number),
226 'contents' is a dictionary of file:`full file contents',
228 'newline_indices' is a dictionary of
229 file:[list of beginning-of-line string indices],
231 'comments_boundaries' is a list of (start, end) tuples,
232 which contain string indices of start and end of each comment.
234 Included files that can be found in the include path are processed too.
240 d['newline_indices'] = {}
241 d['comments_boundaries'] = {}
242 manual = manuals_defs.references_dict.get (name, '')
244 f = find_file (name + '.tely')
245 except EnvironmentError, (errno, strerror):
246 if not strerror == file_not_found:
250 f = find_file (name + '.texi')
251 except EnvironmentError, (errno, strerror):
252 if strerror == file_not_found:
253 sys.stderr.write (name + '.{texi,tely}: ' +
254 file_not_found + '\n')
259 log.write ("Processing manual %s (%s)\n" % (f, manual))
264 log.write ("Reading files...\n")
266 manuals = dict ([read_manual (name)
267 for name in manuals_defs.references_dict.keys ()])
273 def add_fix (old_type, old_ref, new_type, new_ref):
274 ref_fixes.add ((old_type, old_ref, new_type, new_ref))
279 for (old_type, old_ref, new_type, new_ref) in ref_fixes:
281 found.append ((new_type, new_ref))
285 def preserve_linebreak (text, linebroken):
288 text = text.replace (' ', '\n', 1)
297 def choose_in_numbered_list (message, string_list, sep=' ', retries=3):
298 S = set (string_list)
300 string_list = list (S)
301 numbered_list = sep.join ([str (j + 1) + '. ' + string_list[j]
302 for j in range (len (string_list))]) + '\n'
306 stdout.write (message +
307 "(press Enter to discard and start a new search)\n")
308 input = raw_input (numbered_list)
312 value = string_list[int (input) - 1]
314 stdout.write ("Error: index number out of range\n")
316 matches = [input in v for v in string_list]
317 n = matches.count (True)
319 stdout.write ("Error: input matches no item in the list\n")
321 stdout.write ("Error: ambiguous input (matches several items \
324 value = string_list[matches.index (True)]
328 raise InteractionError ("%d retries limit exceeded" % retries)
331 def check_ref (manual, file, m):
332 global fixes_count, bad_refs_count
335 original_name = m.group (2)
336 name = whitespace_re.sub (' ', original_name). strip ()
337 newline_indices = manuals[manual]['newline_indices'][file]
338 line = which_line (m.start (), newline_indices)
339 linebroken = '\n' in m.group (2)
340 next_char = m.group (3)
341 commented_out = is_commented_out \
342 (m.start (), m.end (), manuals[manual]['comments_boundaries'][file])
343 useful_fix = not outdir in file
345 # check puncuation after x-ref
346 if options.check_punctuation and not next_char in '.,;:!?':
347 stdout.write ("Warning: %s: %d: `%s': x-ref \
348 not followed by punctuation\n" % (file, line, name))
354 if type != 'ref' and type == manual and not commented_out:
356 stdout.write ("\n%s: %d: `%s': external %s x-ref should be internal\n"
357 % (file, line, name, type))
358 if options.auto_fix or yes_prompt ("Fix this?"):
362 explicit_type = manual
364 if not name in manuals[explicit_type]['nodes'] and not commented_out:
369 stdout.write ("%s: %d: `%s': wrong internal x-ref\n"
370 % (file, line, name))
372 stdout.write ("%s: %d: `%s': wrong external `%s' x-ref\n"
373 % (file, line, name, type))
375 stdout.write ('--\n' + manuals[manual]['contents'][file]
376 [newline_indices[max (0, line - 2)]:
377 newline_indices[min (line + 3,
378 len (newline_indices) - 1)]] +
381 # try to find the reference in other manuals
383 for k in [k for k in manuals if k != explicit_type]:
384 if name in manuals[k]['nodes']:
387 stdout.write (" found as internal x-ref\n")
391 stdout.write (" found as `%s' x-ref\n" % k)
394 and (options.auto_fix or yes_prompt ("Fix this x-ref?"))):
395 add_fix (type, name, found[0], name)
399 elif len (found) > 1 and useful_fix:
400 if options.interactive or options.auto_fix:
401 stdout.write ("* Several manuals contain this node name, \
402 cannot determine manual automatically.\n")
403 if options.interactive:
404 t = choose_in_numbered_list ("Choose manual for this x-ref by \
405 index number or beginning of name:\n", found)
407 add_fix (type, name, t, name)
412 # try to find a fix already made
413 found = lookup_fix (name)
416 stdout.write ("Found one previous fix: %s `%s'\n" % found[0])
417 if options.auto_fix or yes_prompt ("Apply this fix?"):
418 type, new_name = found[0]
421 elif len (found) > 1:
422 if options.interactive or options.auto_fix:
423 stdout.write ("* Several previous fixes match \
424 this node name, cannot fix automatically.\n")
425 if options.interactive:
426 concatened = choose_in_numbered_list ("Choose new manual \
427 and x-ref by index number or beginning of name:\n", [''.join ([i[0], ' ', i[1]])
431 type, new_name = concatenated.split (' ', 1)
435 # all previous automatic fixes attempts failed,
436 # ask user for substring to look in node names
438 node_list = search_prompt ()
439 if node_list == None:
440 if options.interactive:
441 stdout.write (warn_not_fixed)
444 stdout.write ("No matched node names.\n")
446 concatenated = choose_in_numbered_list ("Choose \
447 node name and manual for this x-ref by index number or beginning of name:\n", \
448 [' '.join ([i[0], i[1], '(in %s)' % i[2]])
452 t, z = concatenated.split (' ', 1)
453 new_name = z.split (' (in ', 1)[0]
454 add_fix (type, name, t, new_name)
459 if fixed and type == manual:
461 bad_refs_count += int (bad_ref)
462 if bad_ref and not useful_fix:
463 stdout.write ("*** Warning: this file is automatically generated, \
464 please fix the code source manually.\n")
466 # compute returned string
468 return ('@%s{%s}' % (type, original_name)) + next_char
471 (ref, n) = preserve_linebreak (new_name, linebroken)
472 return ('@%s{%s}' % (type, ref)) + next_char + n
475 log.write ("Checking cross-references...\n")
479 for file in manuals[key]['contents']:
480 s = ref_re.sub (lambda m: check_ref (key, file, m),
481 manuals[key]['contents'][file])
482 if s != manuals[key]['contents'][file]:
483 open (file, 'w').write (s)
484 except KeyboardInterrupt:
485 log.write ("Operation interrupted, exiting.\n")
487 except InteractionError, instance:
488 log.write ("Operation refused by user: %s\nExiting.\n" % instance)
491 log.write ("Done, %d bad x-refs found, fixed %d.\n" %
492 (bad_refs_count, fixes_count))