buildscripts/check_texi_refs.py

   1 #!/usr/bin/env python
   2
   3 """
   4 check_texi_refs.py
   5 Interactive Texinfo cross-references checking and fixing tool
   6
   7 """
   8
   9
  10 import sys
  11 import re
  12 import os
  13 import optparse
  14 import imp
  15
  16 outdir = 'out-www'
  17
  18 log = sys.stderr
  19 stdout = sys.stdout
  20
  21 file_not_found = 'file not found in include path'
  22
  23 warn_not_fixed = '*** Warning: this broken x-ref has not been fixed!\n'
  24
  25 opt_parser = optparse.OptionParser (usage='check_texi_refs.py [OPTION]... FILE',
  26                                     description='''Check and fix \
  27 cross-references in a collection of Texinfo
  28 documents heavily cross-referenced each other.
  29 ''')
  30
  31 opt_parser.add_option ('-a', '--auto-fix',
  32                        help="Automatically fix cross-references whenever \
  33 it is possible",
  34                        action='store_true',
  35                        dest='auto_fix',
  36                        default=False)
  37
  38 opt_parser.add_option ('-b', '--batch',
  39                        help="Do not run interactively",
  40                        action='store_false',
  41                        dest='interactive',
  42                        default=True)
  43
  44 opt_parser.add_option ('-c', '--check-comments',
  45                        help="Also check commented out x-refs",
  46                        action='store_true',
  47                        dest='check_comments',
  48                        default=False)
  49
  50 opt_parser.add_option ('-p', '--check-punctuation',
  51                        help="Check punctuation after x-refs",
  52                        action='store_true',
  53                        dest='check_punctuation',
  54                        default=False)
  55
  56 opt_parser.add_option ("-I", '--include', help="add DIR to include path",
  57                        metavar="DIR",
  58                        action='append', dest='include_path',
  59                        default=[os.path.abspath (os.getcwd ())])
  60
  61 (options, files) = opt_parser.parse_args ()
  62
  63 class InteractionError (Exception):
  64     pass
  65
  66
  67 manuals_defs = imp.load_source ('manuals_defs', files[0])
  68 manuals = {}
  69
  70 def find_file (name, prior_directory='.'):
  71     p = os.path.join (prior_directory, name)
  72     out_p = os.path.join (prior_directory, outdir, name)
  73     if os.path.isfile (p):
  74         return p
  75     elif os.path.isfile (out_p):
  76         return out_p
  77
  78     # looking for file in include_path
  79     for d in options.include_path:
  80         p = os.path.join (d, name)
  81         if os.path.isfile (p):
  82             return p
  83
  84     # file not found in include_path: looking in `outdir' subdirs
  85     for d in options.include_path:
  86         p = os.path.join (d, outdir, name)
  87         if os.path.isfile (p):
  88             return p
  89
  90     raise EnvironmentError (1, file_not_found, name)
  91
  92
  93 exit_code = 0
  94
  95 def set_exit_code (n):
  96     global exit_code
  97     exit_code = max (exit_code, n)
  98
  99
 100 if options.interactive:
 101     try:
 102         import readline
 103     except:
 104         pass
 105
 106     def yes_prompt (question, default=False, retries=3):
 107         d = {True: 'y', False: 'n'}.get (default, False)
 108         while retries:
 109             a = raw_input ('%s [default: %s]' % (question, d) + '\n')
 110             if a.lower ().startswith ('y'):
 111                 return True
 112             if a.lower ().startswith ('n'):
 113                 return False
 114             if a == '' or retries < 0:
 115                 return default
 116             stdout.write ("Please answer yes or no.\n")
 117             retries -= 1
 118
 119     def search_prompt ():
 120         """Prompt user for a substring to look for in node names.
 121
 122 If user input is empty or matches no node name, return None,
 123 otherwise return a list of (manual, node name, file) tuples.
 124
 125 """
 126         substring = raw_input ("Enter a substring to search in node names \
 127 (press Enter to skip this x-ref):\n")
 128         if not substring:
 129             return None
 130         substring = substring.lower ()
 131         matches = []
 132         for k in manuals:
 133             matches += [(k, node, manuals[k]['nodes'][node][0])
 134                         for node in manuals[k]['nodes']
 135                         if substring in node.lower ()]
 136         return matches
 137
 138 else:
 139     def yes_prompt (question, default=False, retries=3):
 140         return default
 141
 142     def search_prompt ():
 143         return None
 144
 145
 146 ref_re = re.compile \
 147     ('@(ref|ruser|rlearning|rprogram|rglos)(?:\\{(?P<ref>[^,\\\\\\}]+?)|\
 148 named\\{(?P<refname>[^,\\\\]+?),(?P<display>[^,\\\\\\}]+?))\\}(?P<last>.)',
 149      re.DOTALL)
 150 node_include_re = re.compile (r'(?m)^@(node|include)\s+(.+?)$')
 151
 152 whitespace_re = re.compile (r'\s+')
 153 line_start_re = re.compile ('(?m)^')
 154
 155 def which_line (index, newline_indices):
 156     """Calculate line number of a given string index
 157
 158 Return line number of string index index, where
 159 newline_indices is an ordered iterable of all newline indices.
 160 """
 161     inf = 0
 162     sup = len (newline_indices) - 1
 163     n = len (newline_indices)
 164     while inf + 1 != sup:
 165         m = (inf + sup) / 2
 166         if index >= newline_indices [m]:
 167             inf = m
 168         else:
 169             sup = m
 170     return inf + 1
 171
 172
 173 comments_re = re.compile ('(?<!@)(@c(?:omment)? \
 174 .*?\\n|^@ignore\\n.*?\\n@end ignore\\n)', re.M | re.S)
 175
 176 def calc_comments_boundaries (texinfo_doc):
 177     return [(m.start (), m.end ()) for m in comments_re.finditer (texinfo_doc)]
 178
 179
 180 def is_commented_out (start, end, comments_boundaries):
 181     for k in range (len (comments_boundaries)):
 182         if (start > comments_boundaries[k][0]
 183             and end <= comments_boundaries[k][1]):
 184             return True
 185         elif end <= comments_boundaries[k][0]:
 186             return False
 187     return False
 188
 189
 190 def read_file (f, d):
 191     s = open (f).read ()
 192     base = os.path.basename (f)
 193     dir = os.path.dirname (f)
 194
 195     d['contents'][f] = s
 196
 197     d['newline_indices'][f] = [m.end () for m in line_start_re.finditer (s)]
 198     if options.check_comments:
 199         d['comments_boundaries'][f] = []
 200     else:
 201         d['comments_boundaries'][f] = calc_comments_boundaries (s)
 202
 203     for m in node_include_re.finditer (s):
 204         if m.group (1) == 'node':
 205             line = which_line (m.start (), d['newline_indices'][f])
 206             d['nodes'][m.group (2)] = (f, line)
 207
 208         elif m.group (1) == 'include':
 209             try:
 210                 p = find_file (m.group (2), dir)
 211             except EnvironmentError, (errno, strerror):
 212                 if strerror == file_not_found:
 213                     continue
 214                 else:
 215                     raise
 216             read_file (p, d)
 217
 218
 219 def read_manual (name):
 220     """Look for all node names and cross-references in a Texinfo document
 221
 222 Return a (manual, dictionary) tuple where manual is the cross-reference
 223 macro name defined by references_dict[name], and dictionary
 224 has the following keys:
 225
 226   'nodes' is a dictionary of `node name':(file name, line number),
 227
 228   'contents' is a dictionary of file:`full file contents',
 229
 230   'newline_indices' is a dictionary of
 231 file:[list of beginning-of-line string indices],
 232
 233   'comments_boundaries' is a list of (start, end) tuples,
 234 which contain string indices of start and end of each comment.
 235
 236 Included files that can be found in the include path are processed too.
 237
 238 """
 239     d = {}
 240     d['nodes'] = {}
 241     d['contents'] = {}
 242     d['newline_indices'] = {}
 243     d['comments_boundaries'] = {}
 244     manual = manuals_defs.references_dict.get (name, '')
 245     try:
 246         f = find_file (name + '.tely')
 247     except EnvironmentError, (errno, strerror):
 248         if not strerror == file_not_found:
 249             raise
 250         else:
 251             try:
 252                 f = find_file (name + '.texi')
 253             except EnvironmentError, (errno, strerror):
 254                 if strerror == file_not_found:
 255                     sys.stderr.write (name + '.{texi,tely}: ' +
 256                                       file_not_found + '\n')
 257                     return (manual, d)
 258                 else:
 259                     raise
 260
 261     log.write ("Processing manual %s (%s)\n" % (f, manual))
 262     read_file (f, d)
 263     return (manual, d)
 264
 265
 266 log.write ("Reading files...\n")
 267
 268 manuals = dict ([read_manual (name)
 269                  for name in manuals_defs.references_dict.keys ()])
 270
 271 ref_fixes = set ()
 272 bad_refs_count = 0
 273 fixes_count = 0
 274
 275 def add_fix (old_type, old_ref, new_type, new_ref):
 276     ref_fixes.add ((old_type, old_ref, new_type, new_ref))
 277
 278
 279 def lookup_fix (r):
 280     found = []
 281     for (old_type, old_ref, new_type, new_ref) in ref_fixes:
 282         if r == old_ref:
 283             found.append ((new_type, new_ref))
 284     return found
 285
 286
 287 def preserve_linebreak (text, linebroken):
 288     if linebroken:
 289         if ' ' in text:
 290             text = text.replace (' ', '\n', 1)
 291             n = ''
 292         else:
 293             n = '\n'
 294     else:
 295         n = ''
 296     return (text, n)
 297
 298
 299 def choose_in_numbered_list (message, string_list, sep=' ', retries=3):
 300     S = set (string_list)
 301     S.discard ('')
 302     string_list = list (S)
 303     numbered_list = sep.join ([str (j + 1) + '. ' + string_list[j]
 304                                for j in range (len (string_list))]) + '\n'
 305     t = retries
 306     while t > 0:
 307         value = ''
 308         stdout.write (message +
 309                       "(press Enter to discard and start a new search)\n")
 310         input = raw_input (numbered_list)
 311         if not input:
 312             return ''
 313         try:
 314             value = string_list[int (input) - 1]
 315         except IndexError:
 316             stdout.write ("Error: index number out of range\n")
 317         except ValueError:
 318             matches = [input in v for v in string_list]
 319             n = matches.count (True)
 320             if n == 0:
 321                 stdout.write ("Error: input matches no item in the list\n")
 322             elif n > 1:
 323                 stdout.write ("Error: ambiguous input (matches several items \
 324 in the list)\n")
 325             else:
 326                 value = string_list[matches.index (True)]
 327         if value:
 328             return value
 329         t -= 1
 330     raise InteractionError ("%d retries limit exceeded" % retries)
 331
 332 refs_count = 0
 333
 334 def check_ref (manual, file, m):
 335     global fixes_count, bad_refs_count, refs_count
 336     refs_count += 1
 337     bad_ref = False
 338     fixed = True
 339     type = m.group (1)
 340     original_name = m.group ('ref') or m.group ('refname')
 341     name = whitespace_re.sub (' ', original_name). strip ()
 342     newline_indices = manuals[manual]['newline_indices'][file]
 343     line = which_line (m.start (), newline_indices)
 344     linebroken = '\n' in original_name
 345     original_display_name = m.group ('display')
 346     next_char = m.group ('last')
 347     if original_display_name: # the xref has an explicit display name
 348         display_linebroken = '\n' in original_display_name
 349         display_name = whitespace_re.sub (' ', original_display_name). strip ()
 350     commented_out = is_commented_out \
 351         (m.start (), m.end (), manuals[manual]['comments_boundaries'][file])
 352     useful_fix = not outdir in file
 353
 354     # check puncuation after x-ref
 355     if options.check_punctuation and not next_char in '.,;:!?':
 356         stdout.write ("Warning: %s: %d: `%s': x-ref \
 357 not followed by punctuation\n" % (file, line, name))
 358
 359     # validate xref
 360     explicit_type = type
 361     new_name = name
 362
 363     if type != 'ref' and type == manual and not commented_out:
 364         if useful_fix:
 365             fixed = False
 366             bad_ref = True
 367             stdout.write ("\n%s: %d: `%s': external %s x-ref should be internal\n"
 368                           % (file, line, name, type))
 369             if options.auto_fix or yes_prompt ("Fix this?"):
 370                 type = 'ref'
 371
 372     if type == 'ref':
 373         explicit_type = manual
 374
 375     if not name in manuals[explicit_type]['nodes'] and not commented_out:
 376         bad_ref = True
 377         fixed = False
 378         stdout.write ('\n')
 379         if type == 'ref':
 380             stdout.write ("\e[1;31m%s: %d: `%s': wrong internal x-ref\e[0m\n"
 381                           % (file, line, name))
 382         else:
 383             stdout.write ("\e[1;31m%s: %d: `%s': wrong external `%s' x-ref\e[0m\n"
 384                           % (file, line, name, type))
 385         # print context
 386         stdout.write ('--\n' + manuals[manual]['contents'][file]
 387                       [newline_indices[max (0, line - 2)]:
 388                        newline_indices[min (line + 3,
 389                                             len (newline_indices) - 1)]] +
 390                       '--\n')
 391
 392         # try to find the reference in other manuals
 393         found = []
 394         for k in [k for k in manuals if k != explicit_type]:
 395             if name in manuals[k]['nodes']:
 396                 if k == manual:
 397                     found = ['ref']
 398                     stdout.write ("\e[1;32m  found as internal x-ref\e[0m\n")
 399                     break
 400                 else:
 401                     found.append (k)
 402                     stdout.write ("\e[1;32m  found as `%s' x-ref\e[0m\n" % k)
 403
 404         if (len (found) == 1
 405             and (options.auto_fix or yes_prompt ("Fix this x-ref?"))):
 406             add_fix (type, name, found[0], name)
 407             type = found[0]
 408             fixed = True
 409
 410         elif len (found) > 1 and useful_fix:
 411             if options.interactive or options.auto_fix:
 412                 stdout.write ("* Several manuals contain this node name, \
 413 cannot determine manual automatically.\n")
 414             if options.interactive:
 415                 t = choose_in_numbered_list ("Choose manual for this x-ref by \
 416 index number or beginning of name:\n", found)
 417                 if t:
 418                     add_fix (type, name, t, name)
 419                     type = t
 420                     fixed = True
 421
 422         if not fixed:
 423             # try to find a fix already made
 424             found = lookup_fix (name)
 425
 426             if len (found) == 1:
 427                 stdout.write ("Found one previous fix: %s `%s'\n" % found[0])
 428                 if options.auto_fix or yes_prompt ("Apply this fix?"):
 429                     type, new_name = found[0]
 430                     fixed = True
 431
 432             elif len (found) > 1:
 433                 if options.interactive or options.auto_fix:
 434                     stdout.write ("* Several previous fixes match \
 435 this node name, cannot fix automatically.\n")
 436                 if options.interactive:
 437                     concatened = choose_in_numbered_list ("Choose new manual \
 438 and x-ref by index number or beginning of name:\n", [''.join ([i[0], ' ', i[1]])
 439                                                      for i in found],
 440                                                     sep='\n')
 441                     if concatened:
 442                         type, new_name = concatenated.split (' ', 1)
 443                         fixed = True
 444
 445         if not fixed:
 446             # all previous automatic fixing attempts failed,
 447             # ask user for substring to look in node names
 448             while True:
 449                 node_list = search_prompt ()
 450                 if node_list == None:
 451                     if options.interactive:
 452                         stdout.write (warn_not_fixed)
 453                     break
 454                 elif not node_list:
 455                     stdout.write ("No matched node names.\n")
 456                 else:
 457                     concatenated = choose_in_numbered_list ("Choose \
 458 node name and manual for this x-ref by index number or beginning of name:\n", \
 459                             [' '.join ([i[0], i[1], '(in %s)' % i[2]])
 460                              for i in node_list],
 461                                                             sep='\n')
 462                     if concatenated:
 463                         t, z = concatenated.split (' ', 1)
 464                         new_name = z.split (' (in ', 1)[0]
 465                         add_fix (type, name, t, new_name)
 466                         type = t
 467                         fixed = True
 468                         break
 469
 470     if fixed and type == manual:
 471         type = 'ref'
 472     bad_refs_count += int (bad_ref)
 473     if bad_ref and not useful_fix:
 474         stdout.write ("*** Warning: this file is automatically generated, \
 475 please fix the code source instead of generated documentation.\n")
 476
 477     # compute returned string
 478     if new_name == name:
 479         if bad_ref and (options.interactive or options.auto_fix):
 480             # only the type of the ref was fixed
 481             fixes_count += int (fixed)
 482         if original_display_name:
 483             return ('@%snamed{%s,%s}' % (type, original_name, original_display_name)) + next_char
 484         else:
 485             return ('@%s{%s}' % (type, original_name)) + next_char
 486     else:
 487         fixes_count += int (fixed)
 488         (ref, n) = preserve_linebreak (new_name, linebroken)
 489         if original_display_name:
 490             if bad_ref:
 491                 stdout.write ("Current display name is `%s'\n")
 492                 display_name = raw_input \
 493                     ("Enter a new display name or press enter to keep the existing name:\n") \
 494                     or display_name
 495                 (display_name, n) = preserve_linebreak (display_name, display_linebroken)
 496             else:
 497                 display_name = original_display_name
 498             return ('@%snamed{%s,%s}' % (type, ref, display_name)) + \
 499                 next_char + n
 500         else:
 501             return ('@%s{%s}' % (type, ref)) + next_char + n
 502
 503
 504 log.write ("Checking cross-references...\n")
 505
 506 try:
 507     for key in manuals:
 508         for file in manuals[key]['contents']:
 509             s = ref_re.sub (lambda m: check_ref (key, file, m),
 510                             manuals[key]['contents'][file])
 511             if s != manuals[key]['contents'][file]:
 512                 open (file, 'w').write (s)
 513 except KeyboardInterrupt:
 514     log.write ("Operation interrupted, exiting.\n")
 515     sys.exit (2)
 516 except InteractionError, instance:
 517     log.write ("Operation refused by user: %s\nExiting.\n" % instance)
 518     sys.exit (3)
 519
 520 log.write ("\e[1;36mDone: %d x-refs found, %d bad x-refs found, fixed %d.\e[0m\n" %
 521            (refs_count, bad_refs_count, fixes_count))