buildscripts/fixcc.py

   1 #!/usr/bin/python
   2
   3 # fixcc -- nitpick lily's c++ code
   4
   5 # TODO
   6 #  * check lexer, parser
   7 #  * rewrite in elisp, add to cc-mode
   8 #  * ?
   9 #  * profit
  10
  11 import __main__
  12 import getopt
  13 import os
  14 import re
  15 import string
  16 import sys
  17 import time
  18
  19 COMMENT = 'COMMENT'
  20 CXX = 'C++'
  21
  22 rules = {
  23         CXX:
  24         [
  25         # space before parenthesis open
  26         ('([^\( \]])[ \t]*\(', '\\1 ('),
  27         # space after comma
  28         (',[ \t]*', ', '),
  29         # delete inline tabs
  30         ('(\w)\t+', '\\1 '),
  31         # delete inline double spaces
  32         ('   *', ' '),
  33         # delete space after parenthesis open
  34         ('\([ \t]*', '('),
  35         # delete space before parenthesis close
  36         ('[ \t]*\)', ')'),
  37         # delete spaces after prefix
  38         ('(--|\+\+)[ \t]*([\w\)])', '\\1\\2'),
  39         # delete spaces before postfix
  40         ('([\w\)\]])[ \t]*(--|\+\+)', '\\1\\2'),
  41         # delete space after parenthesis close
  42         #('\)[ \t]*([^\w])', ')\\1'),
  43         # delete superflous space around operator
  44         ('([\w\)\]])([ \t]+)(&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|<|>|\+|-|=|/|&|\|\*)([ \t]+)([\w\(])', '\\1 \\3 \\5'),
  45         # space around operator
  46         ('([\w\)\]])(&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|<|>|=|/|&|\|\*)([\w\(])', '\\1 \\2 \\3'),
  47         # space around +/-; exponent
  48         ('([\w\)\]])(\+|-)([_A-Za-z\(])', '\\1 \\2 \\3'),
  49         ('([_\dA-Za-df-z\)\]])(\+|-)([\w\(])', '\\1 \\2 \\3'),
  50         # trailing operator
  51         (' (&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|<|>|\+|-|=|/|\*XXX)[ \t]*\n([ \t]*)',
  52          '\n\\2\\1 '),
  53         # pointer
  54         ('(bool|char|const|int|unsigned|void|([A-Z]\w*))[ \t]*(\*|&)[ \t]*',
  55          '\\1 \\3'),
  56         # unary pointer, minus, not
  57         ('(return|=) (\*|&|-|!) ([\w\(])', '\\1 \\2\\3'),
  58         # space after `operator'
  59         ('(\Woperator) (\W)', '\\1\\2'),
  60         # dangling newline
  61         ('\n[ \t]*\n[ \t]*\n', '\n\n'),
  62         # dangling parenthesis open
  63         #('[ \t]*\n[ \t]*\([ \t]*\n', '('),
  64         ('\([ \t]*\n', '('),
  65         # dangling parenthesis close
  66         ('\n[ \t]*\)', ')'),
  67         # dangling comma
  68         ('\n[ \t]*,', ','),
  69         # dangling semicolon
  70         ('\n[ \t]*;', ';'),
  71         # brace open
  72         ('(\w[^\n]*){[ \t]*\n', '\\1\n{\n'),
  73         # brace open backslash
  74         ('(\w[^\n]*){[ \t]*\\\\\n', '\\1\\\n{\\\n'),
  75         # brace close
  76         ('}[ \t]*([^\n]*\w[^\n\\\]*\n)', '}\n\\1\n'),
  77         # brace close backslash
  78         ('}[ \t]*([^\n]*\w[^\n]*?\\\\\n)', '}\\\n\\1\n'),
  79         # delete space before comma
  80         ('[ \t]*,', ','),
  81         # delete space before semicolon
  82         ('[ \t]*;', ';'),
  83         # delete space before eol-backslash
  84         ('[ \t]*\\\\\n', '\\\n'),
  85         # delete trailing whitespace
  86         ('[ \t]*\n', '\n'),
  87
  88         ## Massage code that gets broken by rules above.
  89         # delete spaces around template brackets
  90         ('(dynamic_cast|template|([A-Z]\w*))[ \t]*<[ \t]*((bool|char|int|unsigned|void|(class[ \t]+\w*)|([A-Z]\w*)))[ \t]*?(| \*)[ \t]*>',
  91          '\\1<\\3\\7>'),
  92         # delete space before #define x()
  93         ('#[ \t]*define (\w*)[ \t]*\(', '#define \\1('),
  94         # add space in #define x ()
  95         ('#[ \t]*define (\w*)(\(([^\(\)]|\([^\(\)]*\))*\)\\n)',
  96          '#define \\1 \\2'),
  97         # delete space in #include <>
  98         ('#[ \t]*include[ \t]*<[ \t]*([^ \t>]*)[ \t]*(/?)[ \t]*([^ \t>]*)[ \t]*>',
  99         '#include <\\1\\2\\3>'),
 100         # delete backslash before empty line (emacs' indent region is broken)
 101         ('\\\\\n\n', '\n\n'),
 102         ],
 103
 104         COMMENT:
 105         [
 106         # delete trailing whitespace
 107         ('[ \t]*\n', '\n'),
 108         # delete empty first lines
 109         ('(/\*\n)\n*', '\\1'),
 110         # delete empty last lines
 111         ('\n*(\n\*/)', '\\1'),
 112         ## delete newline after start?
 113         #('/(\*)\n', '\\1'),
 114         ## delete newline before end?
 115         #('\n(\*/)', '\\1'),
 116         ],
 117         }
 118
 119
 120 # Recognize special sequences in the input.
 121 #
 122 #   (?P<name>regex) -- Assign result of REGEX to NAME.
 123 #   *? -- Match non-greedily.
 124 #   (?m) -- Multiline regex: Make ^ and $ match at each line.
 125 #   (?s) -- Make the dot match all characters including newline.
 126 #   (?x) -- Ignore whitespace in patterns.
 127 no_match = 'a\ba'
 128 snippet_res = {
 129         CXX: {
 130                 'include':
 131                   no_match,
 132
 133                 'multiline_comment':
 134                   r'''(?sx)
 135                     (?P<match>
 136                     (?P<code>
 137                     [ \t]*/\*.*?\*/))''',
 138
 139                 'singleline_comment':
 140                   r'''(?mx)
 141                     ^.*
 142                     (?P<match>
 143                       (?P<code>
 144                       [ \t]*//([ \t][^\n]*|)\n))''',
 145
 146                 'string':
 147                   r'''(?x)
 148                     (?P<match>
 149                     (?P<code>
 150                     "([^"]|(([^\\]|(\\\\))\\"))*"))''',
 151
 152                 'char':
 153                   r'''(?x)
 154                     (?P<match>
 155                     (?P<code>
 156                     '([^']+|\')))''',
 157
 158                 'include':
 159                   r'''(?x)
 160                     (?P<match>
 161                     (?P<code>
 162                     "#[ \t]*include[ \t]*<[^>]*>''',
 163         },
 164         }
 165
 166 class Chunk:
 167         def replacement_text (self):
 168                 return ''
 169
 170         def filter_text (self):
 171                 return self.replacement_text ()
 172
 173         def ly_is_outdated (self):
 174                 return 0
 175
 176         def png_is_outdated (self):
 177                 return 0
 178
 179 class Substring (Chunk):
 180         def __init__ (self, source, start, end):
 181                 self.source = source
 182                 self.start = start
 183                 self.end = end
 184
 185         def replacement_text (self):
 186                 s = self.source[self.start:self.end]
 187                 for i in rules[CXX]:
 188                         s = re.sub (i[0], i[1], s)
 189                 return s
 190
 191
 192 class Snippet (Chunk):
 193         def __init__ (self, type, match, format):
 194                 self.type = type
 195                 self.match = match
 196                 self.hash = 0
 197                 self.options = []
 198                 self.format = format
 199
 200         def replacement_text (self):
 201                 return self.match.group ('match')
 202
 203         def substring (self, s):
 204                 return self.match.group (s)
 205
 206         def __repr__ (self):
 207                 return `self.__class__` + ' type = ' + self.type
 208
 209 class Multiline_comment (Snippet):
 210         def __init__ (self, source, match, format):
 211                 self.type = type
 212                 self.match = match
 213                 self.hash = 0
 214                 self.options = []
 215                 self.format = format
 216
 217         def replacement_text (self):
 218                 s = self.match.group ('match')
 219                 for i in rules[COMMENT]:
 220                         s = re.sub (i[0], i[1], s)
 221                 return s
 222
 223 snippet_type_to_class = {
 224         'multiline_comment': Multiline_comment,
 225 #       'lilypond_block': Lilypond_snippet,
 226 #       'lilypond': Lilypond_snippet,
 227 #       'include': Include_snippet,
 228 }
 229
 230 def find_toplevel_snippets (s, types):
 231         res = {}
 232         for i in types:
 233                 res[i] = re.compile (snippet_res[format][i])
 234
 235         snippets = []
 236         index = 0
 237         ## found = dict (map (lambda x: (x, None),
 238         ##                    types))
 239         ## urg python2.1
 240         found = {}
 241         map (lambda x, f = found: f.setdefault (x, None),
 242              types)
 243
 244         # We want to search for multiple regexes, without searching
 245         # the string multiple times for one regex.
 246         # Hence, we use earlier results to limit the string portion
 247         # where we search.
 248         # Since every part of the string is traversed at most once for
 249         # every type of snippet, this is linear.
 250
 251         while 1:
 252                 first = None
 253                 endex = 1 << 30
 254                 for type in types:
 255                         if not found[type] or found[type][0] < index:
 256                                 found[type] = None
 257                                 m = res[type].search (s[index:endex])
 258                                 if not m:
 259                                         continue
 260
 261                                 cl = Snippet
 262                                 if snippet_type_to_class.has_key (type):
 263                                         cl = snippet_type_to_class[type]
 264                                 snip = cl (type, m, format)
 265                                 start = index + m.start ('match')
 266                                 found[type] = (start, snip)
 267
 268                         if found[type] \
 269                            and (not first \
 270                                 or found[type][0] < found[first][0]):
 271                                 first = type
 272
 273                                 # FIXME.
 274
 275                                 # Limiting the search space is a cute
 276                                 # idea, but this *requires* to search
 277                                 # for possible containing blocks
 278                                 # first, at least as long as we do not
 279                                 # search for the start of blocks, but
 280                                 # always/directly for the entire
 281                                 # @block ... @end block.
 282
 283                                 endex = found[first][0]
 284
 285                 if not first:
 286                         snippets.append (Substring (s, index, len (s)))
 287                         break
 288
 289                 (start, snip) = found[first]
 290                 snippets.append (Substring (s, index, start))
 291                 snippets.append (snip)
 292                 found[first] = None
 293                 index = start + len (snip.match.group ('match'))
 294
 295         return snippets
 296
 297 def nitpick_file (outdir, file):
 298         s = open (file).read ()
 299
 300         # FIXME: Containing blocks must be first, see
 301         #        find_toplevel_snippets.
 302         snippet_types = (
 303                 'multiline_comment',
 304                 'singleline_comment',
 305                 'string',
 306                 'char',
 307                 )
 308
 309         chunks = find_toplevel_snippets (s, snippet_types)
 310         #code = filter (lambda x: is_derived_class (x.__class__, Substring),
 311         #              chunks)
 312
 313         t = string.join (map (lambda x: x.filter_text (), chunks), '')
 314         fixt = file
 315         if s != t:
 316                 if not outdir:
 317                         os.system ('mv %s %s~' % (file, file))
 318                 else:
 319                         fixt = os.path.join (outdir,
 320                                              os.path.basename (file))
 321                 h = open (fixt, "w")
 322                 h.write (t)
 323                 h.close ()
 324         indent_file (fixt)
 325
 326 def indent_file (file):
 327         emacs = '''emacs\
 328         --no-window-system\
 329         --batch\
 330         --no-site-file\
 331         --no-init-file\
 332         %(file)s\
 333         --eval '(let ((error nil)
 334                       (version-control nil))
 335                  (load-library "cc-mode")
 336                  (c++-mode)
 337                  (indent-region (point-min) (point-max))
 338                  (if (buffer-modified-p (current-buffer))
 339                   (save-buffer)))' ''' % vars ()
 340         emacsclient = '''emacsclient\
 341         --socket-name=%(socketdir)s/%(socketname)s\
 342         --no-wait\
 343         --eval '(let ((error nil)
 344                       (version-control nil))
 345                  (load-library "cc-mode")
 346                  (find-file "%(file)s")
 347                  (c++-mode)
 348                  (indent-region (point-min) (point-max))
 349                  (if (buffer-modified-p (current-buffer))
 350                   (save-buffer)))' ''' \
 351                   % { 'file': file,
 352                       'socketdir' : socketdir,
 353                       'socketname' : socketname, }
 354         os.system (emacs)
 355
 356
 357 def usage ():
 358         sys.stdout.write (r'''
 359 Usage:
 360 fixcc [--outdir=DIR] FILE...
 361
 362 Typical use with LilyPond:
 363
 364    fixcc $(find flower kpath-guile lily -name '*cc' -o -name '*hh' | grep -v /out)
 365
 366 This script is licensed under the GNU GPL
 367 ''')
 368
 369 def do_options ():
 370         global outdir
 371         (options, files) = getopt.getopt (sys.argv[1:], '',
 372                                           ['help', 'outdir='])
 373         for (o, a) in options:
 374                 if o == '--help':
 375                         usage ()
 376                         sys.exit (0)
 377                 elif o == '--outdir':
 378                         outdir = a
 379                 else:
 380                         assert unimplemented
 381         if not files:
 382                 usage ()
 383                 sys.exit (2)
 384         return files
 385
 386
 387 outdir = 0
 388 format = CXX
 389 socketdir = '/tmp/fixcc'
 390 socketname = 'fixcc%d' % os.getpid ()
 391
 392 def setup_client ():
 393         #--no-window-system\
 394         #--batch\
 395         os.unlink (os.path.join (socketdir, socketname))
 396         os.mkdir (socketdir, 0700)
 397         emacs='''emacs\
 398                 --no-site-file\
 399                 --no-init-file\
 400                 --eval '(let ((error nil)
 401                               (version-control nil))
 402                          (load-library "server")
 403                          (setq server-socket-dir "%(socketdir)s")
 404                          (setq server-name "%(socketname)s")
 405                          (server-start)
 406                          (while t) (sleep 1000))' ''' \
 407                          % { 'socketdir' : socketdir,
 408                              'socketname' : socketname, }
 409
 410         if not os.fork ():
 411                 os.system (emacs)
 412                 sys.exit (0)
 413         while not os.path.exists (os.path.join (socketdir, socketname)):
 414                 time.sleep (1)
 415
 416 def main ():
 417         #emacsclient should be faster, but this does not work yet
 418         #setup_client ()
 419         files = do_options ()
 420         if outdir and not os.path.isdir (outdir):
 421                 os.makedirs (outdir)
 422         for i in files:
 423                 sys.stderr.write ('%s...\n' % i)
 424                 nitpick_file (outdir, i)
 425
 426 if __name__ == '__main__':
 427         main ()
 428