buildscripts/check_texi_refs.py

   1 #!/usr/bin/env python
   2
   3 """
   4 check_texi_refs.py
   5 Interactive Texinfo cross-references checking and fixing tool
   6
   7 """
   8
   9
  10 import sys
  11 import re
  12 import os
  13 import optparse
  14 import imp
  15
  16 outdir = 'out-www'
  17
  18 log = sys.stderr
  19 stdout = sys.stdout
  20
  21 file_not_found = 'file not found in include path'
  22
  23 warn_not_fixed = '*** Warning: this broken x-ref has not been fixed!\n'
  24
  25 opt_parser = optparse.OptionParser (usage='check_texi_refs.py [OPTION]... FILE',
  26                                     description='''Check and fix \
  27 cross-references in a collection of Texinfo
  28 documents heavily cross-referenced each other.
  29 ''')
  30
  31 opt_parser.add_option ('-a', '--auto-fix',
  32                        help="Automatically fix cross-references whenever \
  33 it is possible",
  34                        action='store_true',
  35                        dest='auto_fix',
  36                        default=False)
  37
  38 opt_parser.add_option ('-b', '--batch',
  39                        help="Do not run interactively",
  40                        action='store_false',
  41                        dest='interactive',
  42                        default=True)
  43
  44 opt_parser.add_option ('-c', '--check-comments',
  45                        help="Also check commented out x-refs",
  46                        action='store_true',
  47                        dest='check_comments',
  48                        default=False)
  49
  50 opt_parser.add_option ('-p', '--check-punctuation',
  51                        help="Check punctuation after x-refs",
  52                        action='store_true',
  53                        dest='check_punctuation',
  54                        default=False)
  55
  56 opt_parser.add_option ("-I", '--include', help="add DIR to include path",
  57                        metavar="DIR",
  58                        action='append', dest='include_path',
  59                        default=[os.path.abspath (os.getcwd ())])
  60
  61 (options, files) = opt_parser.parse_args ()
  62
  63 class InteractionError (Exception):
  64     pass
  65
  66
  67 manuals_defs = imp.load_source ('manuals_defs', files[0])
  68 manuals = {}
  69
  70 def find_file (name, prior_directory='.'):
  71     p = os.path.join (prior_directory, name)
  72     out_p = os.path.join (prior_directory, outdir, name)
  73     if os.path.isfile (p):
  74         return p
  75     elif os.path.isfile (out_p):
  76         return out_p
  77
  78     # looking for file in include_path
  79     for d in options.include_path:
  80         p = os.path.join (d, name)
  81         if os.path.isfile (p):
  82             return p
  83
  84     # file not found in include_path: looking in `outdir' subdirs
  85     for d in options.include_path:
  86         p = os.path.join (d, outdir, name)
  87         if os.path.isfile (p):
  88             return p
  89
  90     raise EnvironmentError (1, file_not_found, name)
  91
  92
  93 exit_code = 0
  94
  95 def set_exit_code (n):
  96     global exit_code
  97     exit_code = max (exit_code, n)
  98
  99
 100 if options.interactive:
 101     try:
 102         import readline
 103     except:
 104         pass
 105
 106     def yes_prompt (question, default=False, retries=3):
 107         d = {True: 'y', False: 'n'}.get (default, False)
 108         while retries:
 109             a = raw_input ('%s [default: %s]' % (question, d) + '\n')
 110             if a.lower ().startswith ('y'):
 111                 return True
 112             if a.lower ().startswith ('n'):
 113                 return False
 114             if a == '' or retries < 0:
 115                 return default
 116             stdout.write ("Please answer yes or no.\n")
 117             retries -= 1
 118
 119     def search_prompt ():
 120         """Prompt user for a substring to look for in node names.
 121
 122 If user input is empty or matches no node name, return None,
 123 otherwise return a list of (manual, node name, file) tuples.
 124
 125 """
 126         substring = raw_input ("Enter a substring to search in node names \
 127 (press Enter to skip this x-ref):\n")
 128         if not substring:
 129             return None
 130         substring = substring.lower ()
 131         matches = []
 132         for k in manuals:
 133             matches += [(k, node, manuals[k]['nodes'][node][0])
 134                         for node in manuals[k]['nodes']
 135                         if substring in node.lower ()]
 136         return matches
 137
 138 else:
 139     def yes_prompt (question, default=False, retries=3):
 140         return default
 141
 142     def search_prompt ():
 143         return None
 144
 145
 146 ref_re = re.compile \
 147     ('@(ref|ruser|rlearning|rprogram|rglos)(?:\\{(?P<ref>[^,\\\\\\}]+?)|\
 148 named\\{(?P<refname>[^,\\\\]+?),(?P<display>[^,\\\\\\}]+?))\\}(?P<last>.)',
 149      re.DOTALL)
 150 node_include_re = re.compile (r'(?m)^@(node|include)\s+(.+?)$')
 151
 152 whitespace_re = re.compile (r'\s+')
 153 line_start_re = re.compile ('(?m)^')
 154
 155 def which_line (index, newline_indices):
 156     """Calculate line number of a given string index
 157
 158 Return line number of string index index, where
 159 newline_indices is an ordered iterable of all newline indices.
 160 """
 161     inf = 0
 162     sup = len (newline_indices) - 1
 163     n = len (newline_indices)
 164     while inf + 1 != sup:
 165         m = (inf + sup) / 2
 166         if index >= newline_indices [m]:
 167             inf = m
 168         else:
 169             sup = m
 170     return inf + 1
 171
 172
 173 comments_re = re.compile ('(?<!@)(@c(?:omment)? \
 174 .*?\\n|^@ignore\\n.*?\\n@end ignore\\n)', re.M | re.S)
 175
 176 def calc_comments_boundaries (texinfo_doc):
 177     return [(m.start (), m.end ()) for m in comments_re.finditer (texinfo_doc)]
 178
 179
 180 def is_commented_out (start, end, comments_boundaries):
 181     for k in range (len (comments_boundaries)):
 182         if (start > comments_boundaries[k][0]
 183             and end <= comments_boundaries[k][1]):
 184             return True
 185         elif end <= comments_boundaries[k][0]:
 186             return False
 187     return False
 188
 189
 190 def read_file (f, d):
 191     s = open (f).read ()
 192     base = os.path.basename (f)
 193     dir = os.path.dirname (f)
 194
 195     d['contents'][f] = s
 196
 197     d['newline_indices'][f] = [m.end () for m in line_start_re.finditer (s)]
 198     if options.check_comments:
 199         d['comments_boundaries'][f] = []
 200     else:
 201         d['comments_boundaries'][f] = calc_comments_boundaries (s)
 202
 203     for m in node_include_re.finditer (s):
 204         if m.group (1) == 'node':
 205             line = which_line (m.start (), d['newline_indices'][f])
 206             d['nodes'][m.group (2)] = (f, line)
 207
 208         elif m.group (1) == 'include':
 209             try:
 210                 p = find_file (m.group (2), dir)
 211             except EnvironmentError, (errno, strerror):
 212                 if strerror == file_not_found:
 213                     continue
 214                 else:
 215                     raise
 216             read_file (p, d)
 217
 218
 219 def read_manual (name):
 220     """Look for all node names and cross-references in a Texinfo document
 221
 222 Return a (manual, dictionary) tuple where manual is the cross-reference
 223 macro name defined by references_dict[name], and dictionary
 224 has the following keys:
 225
 226   'nodes' is a dictionary of `node name':(file name, line number),
 227
 228   'contents' is a dictionary of file:`full file contents',
 229
 230   'newline_indices' is a dictionary of
 231 file:[list of beginning-of-line string indices],
 232
 233   'comments_boundaries' is a list of (start, end) tuples,
 234 which contain string indices of start and end of each comment.
 235
 236 Included files that can be found in the include path are processed too.
 237
 238 """
 239     d = {}
 240     d['nodes'] = {}
 241     d['contents'] = {}
 242     d['newline_indices'] = {}
 243     d['comments_boundaries'] = {}
 244     manual = manuals_defs.references_dict.get (name, '')
 245     try:
 246         f = find_file (name + '.tely')
 247     except EnvironmentError, (errno, strerror):
 248         if not strerror == file_not_found:
 249             raise
 250         else:
 251             try:
 252                 f = find_file (name + '.texi')
 253             except EnvironmentError, (errno, strerror):
 254                 if strerror == file_not_found:
 255                     sys.stderr.write (name + '.{texi,tely}: ' +
 256                                       file_not_found + '\n')
 257                     return (manual, d)
 258                 else:
 259                     raise
 260
 261     log.write ("Processing manual %s (%s)\n" % (f, manual))
 262     read_file (f, d)
 263     return (manual, d)
 264
 265
 266 log.write ("Reading files...\n")
 267
 268 manuals = dict ([read_manual (name)
 269                  for name in manuals_defs.references_dict.keys ()])
 270
 271 ref_fixes = set ()
 272 bad_refs_count = 0
 273 fixes_count = 0
 274
 275 def add_fix (old_type, old_ref, new_type, new_ref):
 276     ref_fixes.add ((old_type, old_ref, new_type, new_ref))
 277
 278
 279 def lookup_fix (r):
 280     found = []
 281     for (old_type, old_ref, new_type, new_ref) in ref_fixes:
 282         if r == old_ref:
 283             found.append ((new_type, new_ref))
 284     return found
 285
 286
 287 def preserve_linebreak (text, linebroken):
 288     if linebroken:
 289         if ' ' in text:
 290             text = text.replace (' ', '\n', 1)
 291             n = ''
 292         else:
 293             n = '\n'
 294     else:
 295         n = ''
 296     return (text, n)
 297
 298
 299 def choose_in_numbered_list (message, string_list, sep=' ', retries=3):
 300     S = set (string_list)
 301     S.discard ('')
 302     string_list = list (S)
 303     numbered_list = sep.join ([str (j + 1) + '. ' + string_list[j]
 304                                for j in range (len (string_list))]) + '\n'
 305     t = retries
 306     while t > 0:
 307         value = ''
 308         stdout.write (message +
 309                       "(press Enter to discard and start a new search)\n")
 310         input = raw_input (numbered_list)
 311         if not input:
 312             return ''
 313         try:
 314             value = string_list[int (input) - 1]
 315         except IndexError:
 316             stdout.write ("Error: index number out of range\n")
 317         except ValueError:
 318             matches = [input in v for v in string_list]
 319             n = matches.count (True)
 320             if n == 0:
 321                 stdout.write ("Error: input matches no item in the list\n")
 322             elif n > 1:
 323                 stdout.write ("Error: ambiguous input (matches several items \
 324 in the list)\n")
 325             else:
 326                 value = string_list[matches.index (True)]
 327         if value:
 328             return value
 329         t -= 1
 330     raise InteractionError ("%d retries limit exceeded" % retries)
 331
 332 refs_count = 0
 333
 334 def check_ref (manual, file, m):
 335     global fixes_count, bad_refs_count, refs_count
 336     refs_count += 1
 337     bad_ref = False
 338     fixed = True
 339     type = m.group (1)
 340     original_name = m.group ('ref') or m.group ('refname')
 341     name = whitespace_re.sub (' ', original_name). strip ()
 342     newline_indices = manuals[manual]['newline_indices'][file]
 343     line = which_line (m.start (), newline_indices)
 344     linebroken = '\n' in original_name
 345     original_display_name = m.group ('display')
 346     next_char = m.group ('last')
 347     if original_display_name: # the xref has an explicit display name
 348         display_linebroken = '\n' in original_display_name
 349         display_name = whitespace_re.sub (' ', original_display_name). strip ()
 350     commented_out = is_commented_out \
 351         (m.start (), m.end (), manuals[manual]['comments_boundaries'][file])
 352     useful_fix = not outdir in file
 353
 354     # check puncuation after x-ref
 355     if options.check_punctuation and not next_char in '.,;:!?':
 356         stdout.write ("Warning: %s: %d: `%s': x-ref \
 357 not followed by punctuation\n" % (file, line, name))
 358
 359     # validate xref
 360     explicit_type = type
 361     new_name = name
 362
 363     if type != 'ref' and type == manual and not commented_out and useful_fix:
 364         bad_ref = True
 365         stdout.write ("\n%s: %d: `%s': external %s x-ref should be internal\n"
 366                       % (file, line, name, type))
 367         if options.auto_fix or yes_prompt ("Fix this?"):
 368             type = 'ref'
 369
 370     if type == 'ref':
 371         explicit_type = manual
 372
 373     if not name in manuals[explicit_type]['nodes'] and not commented_out:
 374         bad_ref = True
 375         fixed = False
 376         stdout.write ('\n')
 377         if type == 'ref':
 378             stdout.write ("%s: %d: `%s': wrong internal x-ref\n"
 379                           % (file, line, name))
 380         else:
 381             stdout.write ("%s: %d: `%s': wrong external `%s' x-ref\n"
 382                           % (file, line, name, type))
 383         # print context
 384         stdout.write ('--\n' + manuals[manual]['contents'][file]
 385                       [newline_indices[max (0, line - 2)]:
 386                        newline_indices[min (line + 3,
 387                                             len (newline_indices) - 1)]] +
 388                       '--\n')
 389
 390         # try to find the reference in other manuals
 391         found = []
 392         for k in [k for k in manuals if k != explicit_type]:
 393             if name in manuals[k]['nodes']:
 394                 if k == manual:
 395                     found = ['ref']
 396                     stdout.write ("  found as internal x-ref\n")
 397                     break
 398                 else:
 399                     found.append (k)
 400                     stdout.write ("  found as `%s' x-ref\n" % k)
 401
 402         if (len (found) == 1
 403             and (options.auto_fix or yes_prompt ("Fix this x-ref?"))):
 404             add_fix (type, name, found[0], name)
 405             type = found[0]
 406             fixed = True
 407
 408         elif len (found) > 1 and useful_fix:
 409             if options.interactive or options.auto_fix:
 410                 stdout.write ("* Several manuals contain this node name, \
 411 cannot determine manual automatically.\n")
 412             if options.interactive:
 413                 t = choose_in_numbered_list ("Choose manual for this x-ref by \
 414 index number or beginning of name:\n", found)
 415                 if t:
 416                     add_fix (type, name, t, name)
 417                     type = t
 418                     fixed = True
 419
 420         if not fixed:
 421             # try to find a fix already made
 422             found = lookup_fix (name)
 423
 424             if len (found) == 1:
 425                 stdout.write ("Found one previous fix: %s `%s'\n" % found[0])
 426                 if options.auto_fix or yes_prompt ("Apply this fix?"):
 427                     type, new_name = found[0]
 428                     fixed = True
 429
 430             elif len (found) > 1:
 431                 if options.interactive or options.auto_fix:
 432                     stdout.write ("* Several previous fixes match \
 433 this node name, cannot fix automatically.\n")
 434                 if options.interactive:
 435                     concatened = choose_in_numbered_list ("Choose new manual \
 436 and x-ref by index number or beginning of name:\n", [''.join ([i[0], ' ', i[1]])
 437                                                      for i in found],
 438                                                     sep='\n')
 439                     if concatened:
 440                         type, new_name = concatenated.split (' ', 1)
 441                         fixed = True
 442
 443         if not fixed:
 444             # all previous automatic fixing attempts failed,
 445             # ask user for substring to look in node names
 446             while True:
 447                 node_list = search_prompt ()
 448                 if node_list == None:
 449                     if options.interactive:
 450                         stdout.write (warn_not_fixed)
 451                     break
 452                 elif not node_list:
 453                     stdout.write ("No matched node names.\n")
 454                 else:
 455                     concatenated = choose_in_numbered_list ("Choose \
 456 node name and manual for this x-ref by index number or beginning of name:\n", \
 457                             [' '.join ([i[0], i[1], '(in %s)' % i[2]])
 458                              for i in node_list],
 459                                                             sep='\n')
 460                     if concatenated:
 461                         t, z = concatenated.split (' ', 1)
 462                         new_name = z.split (' (in ', 1)[0]
 463                         add_fix (type, name, t, new_name)
 464                         type = t
 465                         fixed = True
 466                         break
 467
 468     if fixed and type == manual:
 469         type = 'ref'
 470     bad_refs_count += int (bad_ref)
 471     if bad_ref and not useful_fix:
 472         stdout.write ("*** Warning: this file is automatically generated, \
 473 please fix the code source instead of generated documentation.\n")
 474
 475     # compute returned string
 476     if new_name == name:
 477         if bad_ref and (options.interactive or options.auto_fix):
 478             # only the type of the ref was fixed
 479             fixes_count += 1
 480         if original_display_name:
 481             return ('@%snamed{%s,%s}' % (type, original_name, original_display_name)) + next_char
 482         else:
 483             return ('@%s{%s}' % (type, original_name)) + next_char
 484     else:
 485         fixes_count += 1
 486         (ref, n) = preserve_linebreak (new_name, linebroken)
 487         if original_display_name:
 488             if bad_ref:
 489                 stdout.write ("Current display name is `%s'\n")
 490                 display_name = raw_input \
 491                     ("Enter a new display name or press enter to keep the existing name:\n") \
 492                     or display_name
 493                 (display_name, n) = preserve_linebreak (display_name, display_linebroken)
 494             else:
 495                 display_name = original_display_name
 496             return ('@%snamed{%s,%s}' % (type, ref, display_name)) + \
 497                 next_char + n
 498         else:
 499             return ('@%s{%s}' % (type, ref)) + next_char + n
 500
 501
 502 log.write ("Checking cross-references...\n")
 503
 504 try:
 505     for key in manuals:
 506         for file in manuals[key]['contents']:
 507             s = ref_re.sub (lambda m: check_ref (key, file, m),
 508                             manuals[key]['contents'][file])
 509             if s != manuals[key]['contents'][file]:
 510                 open (file, 'w').write (s)
 511 except KeyboardInterrupt:
 512     log.write ("Operation interrupted, exiting.\n")
 513     sys.exit (2)
 514 except InteractionError, instance:
 515     log.write ("Operation refused by user: %s\nExiting.\n" % instance)
 516     sys.exit (3)
 517
 518 log.write ("Done: %d x-refs found, %d bad x-refs found, fixed %d.\n" %
 519            (refs_count, bad_refs_count, fixes_count))