buildscripts/fixcc.py

   1 #!/usr/bin/python
   2
   3 # fixcc -- nitpick lily's c++ code
   4
   5 # TODO
   6 #  * check lexer, parser
   7 #  * rewrite in elisp, add to cc-mode
   8 #  * ?
   9 #  * profit
  10
  11 import __main__
  12 import getopt
  13 import os
  14 import re
  15 import string
  16 import sys
  17 import time
  18
  19 COMMENT = 'COMMENT'
  20 CXX = 'C++'
  21
  22 rules = {
  23         CXX:
  24         [
  25         # space before parenthesis open
  26         ('([^\( \]])[ \t]*\(', '\\1 ('),
  27         # space after comma
  28         (',[ \t]*', ', '),
  29         # delete inline tabs
  30         ('(\w)\t+', '\\1 '),
  31         # delete inline double spaces
  32         ('   *', ' '),
  33         # delete space after parenthesis open
  34         ('\([ \t]*', '('),
  35         # delete space before parenthesis close
  36         ('[ \t]*\)', ')'),
  37         # delete spaces after prefix
  38         ('(--|\+\+)[ \t]*([\w\)])', '\\1\\2'),
  39         # delete spaces before postfix
  40         ('([\w\)\]])[ \t]*(--|\+\+)', '\\1\\2'),
  41         # delete space after parenthesis close
  42         #('\)[ \t]*([^\w])', ')\\1'),
  43         # delete superflous space around operator
  44         ('([\w\)\]])([ \t]+)(&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|<|>|\+|-|=|/|&|\|\*)([ \t]+)([\w\(])', '\\1 \\3 \\5'),
  45         # space around operator
  46         ('([\w\)\]])(&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|<|>|=|/|&|\|\*)([\w\(])', '\\1 \\2 \\3'),
  47         # space around +/-; exponent
  48         ('([\w\)\]])(\+|-)([_A-Za-z\(])', '\\1 \\2 \\3'),
  49         ('([_\dA-Za-df-z\)\]])(\+|-)([\w\(])', '\\1 \\2 \\3'),
  50         # trailing operator
  51         (' (&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|<|>|\+|-|=|/|\*XXX)[ \t]*\n([ \t]*)',
  52          '\n\\2\\1 '),
  53         # pointer
  54         ('(bool|char|const|int|unsigned|void|([A-Z]\w*))[ \t]*(\*|&)[ \t]*',
  55          '\\1 \\3'),
  56         # unary pointer, minus, not
  57         ('(return|=) (\*|&|-|!) ([\w\(])', '\\1 \\2\\3'),
  58         # delete space after `operator'
  59         #('(\Woperator) (\W)', '\\1\\2'),
  60         # space after `operator'
  61         ('(\Woperator) *(\W)', '\\1 \\2'),
  62         # dangling newline
  63         ('\n[ \t]*\n[ \t]*\n', '\n\n'),
  64         # dangling parenthesis open
  65         #('[ \t]*\n[ \t]*\([ \t]*\n', '('),
  66         ('\([ \t]*\n', '('),
  67         # dangling parenthesis close
  68         ('\n[ \t]*\)', ')'),
  69         # dangling comma
  70         ('\n[ \t]*,', ','),
  71         # dangling semicolon
  72         ('\n[ \t]*;', ';'),
  73         # brace open
  74         ('(\w[^\n]*){[ \t]*\n', '\\1\n{\n'),
  75         # brace open backslash
  76         ('(\w[^\n]*){[ \t]*\\\\\n', '\\1\\\n{\\\n'),
  77         # brace close
  78         ('}[ \t]*([^\n]*\w[^\n\\\]*\n)', '}\n\\1\n'),
  79         # brace close backslash
  80         ('}[ \t]*([^\n]*\w[^\n]*?\\\\\n)', '}\\\n\\1\n'),
  81         # delete space before comma
  82         ('[ \t]*,', ','),
  83         # delete space before semicolon
  84         ('[ \t]*;', ';'),
  85         # delete space before eol-backslash
  86         ('[ \t]*\\\\\n', '\\\n'),
  87         # delete trailing whitespace
  88         ('[ \t]*\n', '\n'),
  89
  90         ## Massage code that gets broken by rules above.
  91         # delete spaces around template brackets
  92         # Note that this does not work for PQueue_event.  Fix C++ name?
  93         ('(dynamic_cast|template|([A-Z]\w*))[ \t]*<[ \t]*((bool|char|int|unsigned|void|(class[ \t]+\w*)|([A-Z]\w*)))[ \t]*?(| \*)[ \t]*>',
  94          '\\1<\\3\\7>'),
  95         # delete space before #define x()
  96         ('#[ \t]*define (\w*)[ \t]*\(', '#define \\1('),
  97         # add space in #define x ()
  98         ('#[ \t]*define (\w*)(\(([^\(\)]|\([^\(\)]*\))*\)\\n)',
  99          '#define \\1 \\2'),
 100         # delete space in #include <>
 101         ('#[ \t]*include[ \t]*<[ \t]*([^ \t>]*)[ \t]*(/?)[ \t]*([^ \t>]*)[ \t]*>',
 102         '#include <\\1\\2\\3>'),
 103         # delete backslash before empty line (emacs' indent region is broken)
 104         ('\\\\\n\n', '\n\n'),
 105         ],
 106
 107         COMMENT:
 108         [
 109         # delete trailing whitespace
 110         ('[ \t]*\n', '\n'),
 111         # delete empty first lines
 112         ('(/\*\n)\n*', '\\1'),
 113         # delete empty last lines
 114         ('\n*(\n\*/)', '\\1'),
 115         ## delete newline after start?
 116         #('/(\*)\n', '\\1'),
 117         ## delete newline before end?
 118         #('\n(\*/)', '\\1'),
 119         ],
 120         }
 121
 122
 123 # Recognize special sequences in the input.
 124 #
 125 #   (?P<name>regex) -- Assign result of REGEX to NAME.
 126 #   *? -- Match non-greedily.
 127 #   (?m) -- Multiline regex: Make ^ and $ match at each line.
 128 #   (?s) -- Make the dot match all characters including newline.
 129 #   (?x) -- Ignore whitespace in patterns.
 130 no_match = 'a\ba'
 131 snippet_res = {
 132         CXX: {
 133                 'include':
 134                   no_match,
 135
 136                 'multiline_comment':
 137                   r'''(?sx)
 138                     (?P<match>
 139                     (?P<code>
 140                     [ \t]*/\*.*?\*/))''',
 141
 142                 'singleline_comment':
 143                   r'''(?mx)
 144                     ^.*
 145                     (?P<match>
 146                       (?P<code>
 147                       [ \t]*//([ \t][^\n]*|)\n))''',
 148
 149                 'string':
 150                   r'''(?x)
 151                     (?P<match>
 152                     (?P<code>
 153                     "([^"]|(([^\\]|(\\\\))\\"))*"))''',
 154
 155                 'char':
 156                   r'''(?x)
 157                     (?P<match>
 158                     (?P<code>
 159                     '([^']+|\')))''',
 160
 161                 'include':
 162                   r'''(?x)
 163                     (?P<match>
 164                     (?P<code>
 165                     "#[ \t]*include[ \t]*<[^>]*>''',
 166         },
 167         }
 168
 169 class Chunk:
 170         def replacement_text (self):
 171                 return ''
 172
 173         def filter_text (self):
 174                 return self.replacement_text ()
 175
 176         def ly_is_outdated (self):
 177                 return 0
 178
 179         def png_is_outdated (self):
 180                 return 0
 181
 182 class Substring (Chunk):
 183         def __init__ (self, source, start, end):
 184                 self.source = source
 185                 self.start = start
 186                 self.end = end
 187
 188         def replacement_text (self):
 189                 s = self.source[self.start:self.end]
 190                 for i in rules[CXX]:
 191                         s = re.sub (i[0], i[1], s)
 192                 return s
 193
 194
 195 class Snippet (Chunk):
 196         def __init__ (self, type, match, format):
 197                 self.type = type
 198                 self.match = match
 199                 self.hash = 0
 200                 self.options = []
 201                 self.format = format
 202
 203         def replacement_text (self):
 204                 return self.match.group ('match')
 205
 206         def substring (self, s):
 207                 return self.match.group (s)
 208
 209         def __repr__ (self):
 210                 return `self.__class__` + ' type = ' + self.type
 211
 212 class Multiline_comment (Snippet):
 213         def __init__ (self, source, match, format):
 214                 self.type = type
 215                 self.match = match
 216                 self.hash = 0
 217                 self.options = []
 218                 self.format = format
 219
 220         def replacement_text (self):
 221                 s = self.match.group ('match')
 222                 for i in rules[COMMENT]:
 223                         s = re.sub (i[0], i[1], s)
 224                 return s
 225
 226 snippet_type_to_class = {
 227         'multiline_comment': Multiline_comment,
 228 #       'lilypond_block': Lilypond_snippet,
 229 #       'lilypond': Lilypond_snippet,
 230 #       'include': Include_snippet,
 231 }
 232
 233 def find_toplevel_snippets (s, types):
 234         res = {}
 235         for i in types:
 236                 res[i] = re.compile (snippet_res[format][i])
 237
 238         snippets = []
 239         index = 0
 240         ## found = dict (map (lambda x: (x, None),
 241         ##                    types))
 242         ## urg python2.1
 243         found = {}
 244         map (lambda x, f = found: f.setdefault (x, None),
 245              types)
 246
 247         # We want to search for multiple regexes, without searching
 248         # the string multiple times for one regex.
 249         # Hence, we use earlier results to limit the string portion
 250         # where we search.
 251         # Since every part of the string is traversed at most once for
 252         # every type of snippet, this is linear.
 253
 254         while 1:
 255                 first = None
 256                 endex = 1 << 30
 257                 for type in types:
 258                         if not found[type] or found[type][0] < index:
 259                                 found[type] = None
 260                                 m = res[type].search (s[index:endex])
 261                                 if not m:
 262                                         continue
 263
 264                                 cl = Snippet
 265                                 if snippet_type_to_class.has_key (type):
 266                                         cl = snippet_type_to_class[type]
 267                                 snip = cl (type, m, format)
 268                                 start = index + m.start ('match')
 269                                 found[type] = (start, snip)
 270
 271                         if found[type] \
 272                            and (not first \
 273                                 or found[type][0] < found[first][0]):
 274                                 first = type
 275
 276                                 # FIXME.
 277
 278                                 # Limiting the search space is a cute
 279                                 # idea, but this *requires* to search
 280                                 # for possible containing blocks
 281                                 # first, at least as long as we do not
 282                                 # search for the start of blocks, but
 283                                 # always/directly for the entire
 284                                 # @block ... @end block.
 285
 286                                 endex = found[first][0]
 287
 288                 if not first:
 289                         snippets.append (Substring (s, index, len (s)))
 290                         break
 291
 292                 (start, snip) = found[first]
 293                 snippets.append (Substring (s, index, start))
 294                 snippets.append (snip)
 295                 found[first] = None
 296                 index = start + len (snip.match.group ('match'))
 297
 298         return snippets
 299
 300 def nitpick_file (outdir, file):
 301         s = open (file).read ()
 302
 303         # FIXME: Containing blocks must be first, see
 304         #        find_toplevel_snippets.
 305         snippet_types = (
 306                 'multiline_comment',
 307                 'singleline_comment',
 308                 'string',
 309                 'char',
 310                 )
 311
 312         chunks = find_toplevel_snippets (s, snippet_types)
 313         #code = filter (lambda x: is_derived_class (x.__class__, Substring),
 314         #              chunks)
 315
 316         t = string.join (map (lambda x: x.filter_text (), chunks), '')
 317         fixt = file
 318         if s != t:
 319                 if not outdir:
 320                         os.system ('mv %s %s~' % (file, file))
 321                 else:
 322                         fixt = os.path.join (outdir,
 323                                              os.path.basename (file))
 324                 h = open (fixt, "w")
 325                 h.write (t)
 326                 h.close ()
 327         indent_file (fixt)
 328
 329 def indent_file (file):
 330         emacs = '''emacs\
 331         --no-window-system\
 332         --batch\
 333         --no-site-file\
 334         --no-init-file\
 335         %(file)s\
 336         --eval '(let ((error nil)
 337                       (version-control nil))
 338                  (load-library "cc-mode")
 339                  (c++-mode)
 340                  (indent-region (point-min) (point-max))
 341                  (if (buffer-modified-p (current-buffer))
 342                   (save-buffer)))' ''' % vars ()
 343         emacsclient = '''emacsclient\
 344         --socket-name=%(socketdir)s/%(socketname)s\
 345         --no-wait\
 346         --eval '(let ((error nil)
 347                       (version-control nil))
 348                  (load-library "cc-mode")
 349                  (find-file "%(file)s")
 350                  (c++-mode)
 351                  (indent-region (point-min) (point-max))
 352                  (if (buffer-modified-p (current-buffer))
 353                   (save-buffer)))' ''' \
 354                   % { 'file': file,
 355                       'socketdir' : socketdir,
 356                       'socketname' : socketname, }
 357         os.system (emacs)
 358
 359
 360 def usage ():
 361         sys.stdout.write (r'''
 362 Usage:
 363 fixcc [--outdir=DIR] FILE...
 364
 365 Typical use with LilyPond:
 366
 367    fixcc $(find flower kpath-guile lily -name '*cc' -o -name '*hh' | grep -v /out)
 368
 369 This script is licensed under the GNU GPL
 370 ''')
 371
 372 def do_options ():
 373         global outdir
 374         (options, files) = getopt.getopt (sys.argv[1:], '',
 375                                           ['help', 'outdir='])
 376         for (o, a) in options:
 377                 if o == '--help':
 378                         usage ()
 379                         sys.exit (0)
 380                 elif o == '--outdir':
 381                         outdir = a
 382                 else:
 383                         assert unimplemented
 384         if not files:
 385                 usage ()
 386                 sys.exit (2)
 387         return files
 388
 389
 390 outdir = 0
 391 format = CXX
 392 socketdir = '/tmp/fixcc'
 393 socketname = 'fixcc%d' % os.getpid ()
 394
 395 def setup_client ():
 396         #--no-window-system\
 397         #--batch\
 398         os.unlink (os.path.join (socketdir, socketname))
 399         os.mkdir (socketdir, 0700)
 400         emacs='''emacs\
 401                 --no-site-file\
 402                 --no-init-file\
 403                 --eval '(let ((error nil)
 404                               (version-control nil))
 405                          (load-library "server")
 406                          (setq server-socket-dir "%(socketdir)s")
 407                          (setq server-name "%(socketname)s")
 408                          (server-start)
 409                          (while t) (sleep 1000))' ''' \
 410                          % { 'socketdir' : socketdir,
 411                              'socketname' : socketname, }
 412
 413         if not os.fork ():
 414                 os.system (emacs)
 415                 sys.exit (0)
 416         while not os.path.exists (os.path.join (socketdir, socketname)):
 417                 time.sleep (1)
 418
 419 def main ():
 420         #emacsclient should be faster, but this does not work yet
 421         #setup_client ()
 422         files = do_options ()
 423         if outdir and not os.path.isdir (outdir):
 424                 os.makedirs (outdir)
 425         for i in files:
 426                 sys.stderr.write ('%s...\n' % i)
 427                 nitpick_file (outdir, i)
 428
 429 if __name__ == '__main__':
 430         main ()
 431