buildscripts/check_texi_refs.py

   1 #!/usr/bin/env python
   2
   3 """
   4 check_texi_refs.py
   5 Interactive Texinfo cross-references checking and fixing tool
   6
   7 """
   8
   9
  10 import sys
  11 import re
  12 import os
  13 import optparse
  14 import imp
  15
  16 outdir = 'out-www'
  17
  18 log = sys.stderr
  19 stdout = sys.stdout
  20
  21 file_not_found = 'file not found in include path'
  22
  23 warn_not_fixed = '*** Warning: this broken x-ref has not been fixed!\n'
  24
  25 opt_parser = optparse.OptionParser (usage='check_texi_refs.py [OPTION]... FILE',
  26                                     description='''Check and fix \
  27 cross-references in a collection of Texinfo
  28 documents heavily cross-referenced each other.
  29 ''')
  30
  31 opt_parser.add_option ('-a', '--auto-fix',
  32                        help="Automatically fix cross-references whenever \
  33 it is possible",
  34                        action='store_true',
  35                        dest='auto_fix',
  36                        default=False)
  37
  38 opt_parser.add_option ('-b', '--batch',
  39                        help="Do not run interactively",
  40                        action='store_false',
  41                        dest='interactive',
  42                        default=True)
  43
  44 opt_parser.add_option ('-c', '--check-comments',
  45                        help="Also check commented out x-refs",
  46                        action='store_true',
  47                        dest='check_comments',
  48                        default=False)
  49
  50 opt_parser.add_option ('-p', '--check-punctuation',
  51                        help="Check punctuation after x-refs",
  52                        action='store_true',
  53                        dest='check_punctuation',
  54                        default=False)
  55
  56 opt_parser.add_option ("-I", '--include', help="add DIR to include path",
  57                        metavar="DIR",
  58                        action='append', dest='include_path',
  59                        default=[os.path.abspath (os.getcwd ())])
  60
  61 (options, files) = opt_parser.parse_args ()
  62
  63 class InteractionError (Exception):
  64     pass
  65
  66
  67 manuals_defs = imp.load_source ('manuals_defs', files[0])
  68 manuals = {}
  69
  70 def find_file (name, prior_directory='.'):
  71     p = os.path.join (prior_directory, name)
  72     out_p = os.path.join (prior_directory, outdir, name)
  73     if os.path.isfile (p):
  74         return p
  75     elif os.path.isfile (out_p):
  76         return out_p
  77
  78     # looking for file in include_path
  79     for d in options.include_path:
  80         p = os.path.join (d, name)
  81         if os.path.isfile (p):
  82             return p
  83
  84     # file not found in include_path: looking in `outdir' subdirs
  85     for d in options.include_path:
  86         p = os.path.join (d, outdir, name)
  87         if os.path.isfile (p):
  88             return p
  89
  90     raise EnvironmentError (1, file_not_found, name)
  91
  92
  93 exit_code = 0
  94
  95 def set_exit_code (n):
  96     global exit_code
  97     exit_code = max (exit_code, n)
  98
  99
 100 if options.interactive:
 101     try:
 102         import readline
 103     except:
 104         pass
 105
 106     def yes_prompt (question, default=False, retries=3):
 107         d = {True: 'y', False: 'n'}.get (default, False)
 108         while retries:
 109             a = raw_input ('%s [default: %s]' % (question, d) + '\n')
 110             if a.lower ().startswith ('y'):
 111                 return True
 112             if a.lower ().startswith ('n'):
 113                 return False
 114             if a == '' or retries < 0:
 115                 return default
 116             stdout.write ("Please answer yes or no.\n")
 117             retries -= 1
 118
 119     def search_prompt ():
 120         """Prompt user for a substring to look for in node names.
 121
 122 If user input is empty or matches no node name, return None,
 123 otherwise return a list of (manual, node name, file) tuples.
 124
 125 """
 126         substring = raw_input ("Enter a substring to search in node names \
 127 (press Enter to skip this x-ref):\n")
 128         if not substring:
 129             return None
 130         substring = substring.lower ()
 131         matches = []
 132         for k in manuals:
 133             matches += [(k, node, manuals[k]['nodes'][node][0])
 134                         for node in manuals[k]['nodes']
 135                         if substring in node.lower ()]
 136         return matches
 137
 138 else:
 139     def yes_prompt (question, default=False, retries=3):
 140         return default
 141
 142     def search_prompt ():
 143         return None
 144
 145
 146 ref_re = re.compile (r'@(ref|ruser|rlearning|rprogram|rglos)\{([^,\\]*?)\}(.)',
 147                      re.DOTALL)
 148 node_include_re = re.compile (r'(?m)^@(node|include)\s+(.+?)$')
 149
 150 whitespace_re = re.compile (r'\s+')
 151 line_start_re = re.compile ('(?m)^')
 152
 153 def which_line (index, newline_indices):
 154     """Calculate line number of a given string index
 155
 156 Return line number of string index index, where
 157 newline_indices is an ordered iterable of all newline indices.
 158 """
 159     inf = 0
 160     sup = len (newline_indices) - 1
 161     n = len (newline_indices)
 162     while inf + 1 != sup:
 163         m = (inf + sup) / 2
 164         if index >= newline_indices [m]:
 165             inf = m
 166         else:
 167             sup = m
 168     return inf + 1
 169
 170
 171 comments_re = re.compile ('(?<!@)(@c(?:omment)? \
 172 .*?\\n|^@ignore\\n.*?\\n@end ignore\\n)', re.M | re.S)
 173
 174 def calc_comments_boundaries (texinfo_doc):
 175     return [(m.start (), m.end ()) for m in comments_re.finditer (texinfo_doc)]
 176
 177
 178 def is_commented_out (start, end, comments_boundaries):
 179     for k in range (len (comments_boundaries)):
 180         if (start > comments_boundaries[k][0]
 181             and end <= comments_boundaries[k][1]):
 182             return True
 183         elif end <= comments_boundaries[k][0]:
 184             return False
 185     return False
 186
 187
 188 def read_file (f, d):
 189     s = open (f).read ()
 190     base = os.path.basename (f)
 191     dir = os.path.dirname (f)
 192
 193     d['contents'][f] = s
 194
 195     d['newline_indices'][f] = [m.end () for m in line_start_re.finditer (s)]
 196     if options.check_comments:
 197         d['comments_boundaries'][f] = []
 198     else:
 199         d['comments_boundaries'][f] = calc_comments_boundaries (s)
 200
 201     for m in node_include_re.finditer (s):
 202         if m.group (1) == 'node':
 203             line = which_line (m.start (), d['newline_indices'][f])
 204             d['nodes'][m.group (2)] = (f, line)
 205
 206         elif m.group (1) == 'include':
 207             try:
 208                 p = find_file (m.group (2), dir)
 209             except EnvironmentError, (errno, strerror):
 210                 if strerror == file_not_found:
 211                     continue
 212                 else:
 213                     raise
 214             read_file (p, d)
 215
 216
 217 def read_manual (name):
 218     """Look for all node names and cross-references in a Texinfo document
 219
 220 Return a (manual, dictionary) tuple where manual is the cross-reference
 221 macro name defined by references_dict[name], and dictionary
 222 has the following keys:
 223
 224   'nodes' is a dictionary of `node name':(file name, line number),
 225
 226   'contents' is a dictionary of file:`full file contents',
 227
 228   'newline_indices' is a dictionary of
 229 file:[list of beginning-of-line string indices],
 230
 231   'comments_boundaries' is a list of (start, end) tuples,
 232 which contain string indices of start and end of each comment.
 233
 234 Included files that can be found in the include path are processed too.
 235
 236 """
 237     d = {}
 238     d['nodes'] = {}
 239     d['contents'] = {}
 240     d['newline_indices'] = {}
 241     d['comments_boundaries'] = {}
 242     manual = manuals_defs.references_dict.get (name, '')
 243     try:
 244         f = find_file (name + '.tely')
 245     except EnvironmentError, (errno, strerror):
 246         if not strerror == file_not_found:
 247             raise
 248         else:
 249             try:
 250                 f = find_file (name + '.texi')
 251             except EnvironmentError, (errno, strerror):
 252                 if strerror == file_not_found:
 253                     sys.stderr.write (name + '.{texi,tely}: ' +
 254                                       file_not_found + '\n')
 255                     return (manual, d)
 256                 else:
 257                     raise
 258
 259     log.write ("Processing manual %s (%s)\n" % (f, manual))
 260     read_file (f, d)
 261     return (manual, d)
 262
 263
 264 log.write ("Reading files...\n")
 265
 266 manuals = dict ([read_manual (name)
 267                  for name in manuals_defs.references_dict.keys ()])
 268
 269 ref_fixes = set ()
 270 bad_refs_count = 0
 271 fixes_count = 0
 272
 273 def add_fix (old_type, old_ref, new_type, new_ref):
 274     ref_fixes.add ((old_type, old_ref, new_type, new_ref))
 275
 276
 277 def lookup_fix (r):
 278     found = []
 279     for (old_type, old_ref, new_type, new_ref) in ref_fixes:
 280         if r == old_ref:
 281             found.append ((new_type, new_ref))
 282     return found
 283
 284
 285 def preserve_linebreak (text, linebroken):
 286     if linebroken:
 287         if ' ' in text:
 288             text = text.replace (' ', '\n', 1)
 289             n = ''
 290         else:
 291             n = '\n'
 292     else:
 293         n = ''
 294     return (text, n)
 295
 296
 297 def choose_in_numbered_list (message, string_list, sep=' ', retries=3):
 298     S = set (string_list)
 299     S.discard ('')
 300     string_list = list (S)
 301     numbered_list = sep.join ([str (j + 1) + '. ' + string_list[j]
 302                                for j in range (len (string_list))]) + '\n'
 303     t = retries
 304     while t > 0:
 305         value = ''
 306         stdout.write (message +
 307                       "(press Enter to discard and start a new search)\n")
 308         input = raw_input (numbered_list)
 309         if not input:
 310             return ''
 311         try:
 312             value = string_list[int (input) - 1]
 313         except IndexError:
 314             stdout.write ("Error: index number out of range\n")
 315         except ValueError:
 316             matches = [input in v for v in string_list]
 317             n = matches.count (True)
 318             if n == 0:
 319                 stdout.write ("Error: input matches no item in the list\n")
 320             elif n > 1:
 321                 stdout.write ("Error: ambiguous input (matches several items \
 322 in the list)\n")
 323             else:
 324                 value = string_list[matches.index (True)]
 325         if value:
 326             return value
 327         t -= 1
 328     raise InteractionError ("%d retries limit exceeded" % retries)
 329
 330
 331 def check_ref (manual, file, m):
 332     global fixes_count, bad_refs_count
 333     bad_ref = False
 334     type = m.group (1)
 335     original_name = m.group (2)
 336     name = whitespace_re.sub (' ', original_name). strip ()
 337     newline_indices = manuals[manual]['newline_indices'][file]
 338     line = which_line (m.start (), newline_indices)
 339     linebroken = '\n' in m.group (2)
 340     next_char = m.group (3)
 341     commented_out = is_commented_out \
 342         (m.start (), m.end (), manuals[manual]['comments_boundaries'][file])
 343     useful_fix = not outdir in file
 344
 345     # check puncuation after x-ref
 346     if options.check_punctuation and not next_char in '.,;:!?':
 347         stdout.write ("Warning: %s: %d: `%s': x-ref \
 348 not followed by punctuation\n" % (file, line, name))
 349
 350     # validate xref
 351     explicit_type = type
 352     new_name = name
 353
 354     if type != 'ref' and type == manual and not commented_out:
 355         bad_ref = True
 356         stdout.write ("\n%s: %d: `%s': external %s x-ref should be internal\n"
 357                       % (file, line, name, type))
 358         if options.auto_fix or yes_prompt ("Fix this?"):
 359             type = 'ref'
 360
 361     if type == 'ref':
 362         explicit_type = manual
 363
 364     if not name in manuals[explicit_type]['nodes'] and not commented_out:
 365         bad_ref = True
 366         fixed = False
 367         stdout.write ('\n')
 368         if type == 'ref':
 369             stdout.write ("%s: %d: `%s': wrong internal x-ref\n"
 370                           % (file, line, name))
 371         else:
 372             stdout.write ("%s: %d: `%s': wrong external `%s' x-ref\n"
 373                           % (file, line, name, type))
 374         # print context
 375         stdout.write ('--\n' + manuals[manual]['contents'][file]
 376                       [newline_indices[max (0, line - 2)]:
 377                        newline_indices[min (line + 3,
 378                                             len (newline_indices) - 1)]] +
 379                       '--\n')
 380
 381         # try to find the reference in other manuals
 382         found = []
 383         for k in [k for k in manuals if k != explicit_type]:
 384             if name in manuals[k]['nodes']:
 385                 if k == manual:
 386                     found = ['ref']
 387                     stdout.write ("  found as internal x-ref\n")
 388                     break
 389                 else:
 390                     found.append (k)
 391                     stdout.write ("  found as `%s' x-ref\n" % k)
 392
 393         if (len (found) == 1
 394             and (options.auto_fix or yes_prompt ("Fix this x-ref?"))):
 395             add_fix (type, name, found[0], name)
 396             type = found[0]
 397             fixed = True
 398
 399         elif len (found) > 1 and useful_fix:
 400             if options.interactive or options.auto_fix:
 401                 stdout.write ("* Several manuals contain this node name, \
 402 cannot determine manual automatically.\n")
 403             if options.interactive:
 404                 t = choose_in_numbered_list ("Choose manual for this x-ref by \
 405 index number or beginning of name:\n", found)
 406                 if t:
 407                     add_fix (type, name, t, name)
 408                     type = t
 409                     fixed = True
 410
 411         if not fixed:
 412             # try to find a fix already made
 413             found = lookup_fix (name)
 414
 415             if len (found) == 1:
 416                 stdout.write ("Found one previous fix: %s `%s'\n" % found[0])
 417                 if options.auto_fix or yes_prompt ("Apply this fix?"):
 418                     type, new_name = found[0]
 419                     fixed = True
 420
 421             elif len (found) > 1:
 422                 if options.interactive or options.auto_fix:
 423                     stdout.write ("* Several previous fixes match \
 424 this node name, cannot fix automatically.\n")
 425                 if options.interactive:
 426                     concatened = choose_in_numbered_list ("Choose new manual \
 427 and x-ref by index number or beginning of name:\n", [''.join ([i[0], ' ', i[1]])
 428                                                      for i in found],
 429                                                     sep='\n')
 430                     if concatened:
 431                         type, new_name = concatenated.split (' ', 1)
 432                         fixed = True
 433
 434         if not fixed:
 435             # all previous automatic fixes attempts failed,
 436             # ask user for substring to look in node names
 437             while True:
 438                 node_list = search_prompt ()
 439                 if node_list == None:
 440                     if options.interactive:
 441                         stdout.write (warn_not_fixed)
 442                     break
 443                 elif not node_list:
 444                     stdout.write ("No matched node names.\n")
 445                 else:
 446                     concatenated = choose_in_numbered_list ("Choose \
 447 node name and manual for this x-ref by index number or beginning of name:\n", \
 448                             [' '.join ([i[0], i[1], '(in %s)' % i[2]])
 449                              for i in node_list],
 450                                                             sep='\n')
 451                     if concatenated:
 452                         t, z = concatenated.split (' ', 1)
 453                         new_name = z.split (' (in ', 1)[0]
 454                         add_fix (type, name, t, new_name)
 455                         type = t
 456                         fixed = True
 457                         break
 458
 459     if fixed and type == manual:
 460         type = 'ref'
 461     bad_refs_count += int (bad_ref)
 462     if bad_ref and not useful_fix:
 463         stdout.write ("*** Warning: this file is automatically generated, \
 464 please fix the code source manually.\n")
 465
 466     # compute returned string
 467     if new_name == name:
 468         return ('@%s{%s}' % (type, original_name)) + next_char
 469     else:
 470         fixes_count += 1
 471         (ref, n) = preserve_linebreak (new_name, linebroken)
 472         return ('@%s{%s}' % (type, ref)) + next_char + n
 473
 474
 475 log.write ("Checking cross-references...\n")
 476
 477 try:
 478     for key in manuals:
 479         for file in manuals[key]['contents']:
 480             s = ref_re.sub (lambda m: check_ref (key, file, m),
 481                             manuals[key]['contents'][file])
 482             if s != manuals[key]['contents'][file]:
 483                 open (file, 'w').write (s)
 484 except KeyboardInterrupt:
 485     log.write ("Operation interrupted, exiting.\n")
 486     sys.exit (2)
 487 except InteractionError, instance:
 488     log.write ("Operation refused by user: %s\nExiting.\n" % instance)
 489     sys.exit (3)
 490
 491 log.write ("Done, %d bad x-refs found, fixed %d.\n" %
 492            (bad_refs_count, fixes_count))