buildscripts/fixcc.py

   1 #!/usr/bin/python
   2
   3 # fixcc -- nitpick lily's c++ code
   4
   5 # TODO
   6 #  * maintainable rules: regexp's using whitespace (?x) and match names
   7 #    <identifier>)
   8 #  * trailing * vs function definition
   9 #  * check lexer, parser
  10 #  * rewrite in elisp, add to cc-mode
  11 #  * ?
  12 #  * profit
  13
  14 import __main__
  15 import getopt
  16 import os
  17 import re
  18 import string
  19 import sys
  20 import time
  21
  22 COMMENT = 'COMMENT'
  23 CXX = 'C++'
  24 verbose_p = 0
  25 indent_p = 0
  26
  27 rules = {
  28         CXX:
  29         [
  30         # space before parenthesis open
  31         ('([^\( \]])[ \t]*\(', '\\1 ('),
  32         # space after comma
  33         (',[ \t]*', ', '),
  34         # delete inline tabs
  35         ('(\w)\t+', '\\1 '),
  36         # delete inline double spaces
  37         ('   *', ' '),
  38         # delete space after parenthesis open
  39         ('\([ \t]*', '('),
  40         # delete space before parenthesis close
  41         ('[ \t]*\)', ')'),
  42         # delete spaces after prefix
  43         ('(--|\+\+)[ \t]*([\w\)])', '\\1\\2'),
  44         # delete spaces before postfix
  45         ('([\w\)\]])[ \t]*(--|\+\+)', '\\1\\2'),
  46         # delete space after parenthesis close
  47         #('\)[ \t]*([^\w])', ')\\1'),
  48         # delete space around operator
  49         # ('([\w\(\)\]])([ \t]*)(::|\.)([ \t]*)([\w\(\)])', '\\1\\3\\5'),
  50         ('([\w\(\)\]])([ \t]*)(\.)([ \t]*)([\w\(\)])', '\\1\\3\\5'),
  51         # delete space after operator
  52         ('(::)([ \t]*)([\w\(\)])', '\\1\\3'),
  53         # delete superflous space around operator
  54         ('([\w\(\)\]])([ \t]+)(&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|\?|<|>|\+|-|=|/|:|&|\||\*)([ \t]+)([\w\(\)])', '\\1 \\3 \\5'),
  55         # space around operator1
  56         ('([\w\)\]]) *(&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|\?|<|>|=|/|:|&|\||\*) *([\w\(])', '\\1 \\2 \\3'),
  57         # space around operator2
  58         ('([\w\)\]]) *(&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|\?|<|>|=|/|:|&|\||\*) ([^\w\s])', '\\1 \\2 \\3'),
  59         # space around operator3
  60         ('([^\w\s]) (&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|\?|<|>|=|/|:|&|\||\*) *([\w\(])', '\\1 \\2 \\3'),
  61         # space around +/-; exponent
  62         ('([\w\)\]])(\+|-)([_A-Za-z\(])', '\\1 \\2 \\3'),
  63         ('([_\dA-Za-df-z\)\]])(\+|-)([\w\(])', '\\1 \\2 \\3'),
  64         # trailing operator
  65         (' (::|&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|\?|<|>|\+|-|=|/|:|&XXX|\||\*XXX)[ \t]*\n([ \t]*)',  '\n\\2\\1 '),
  66         #breaks function definitions
  67         #to#(' (::|&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|<|>|\+|-|=|/|&|\||\*)[ \t]*\n([ \t]*)',         '\n\\2\\1 '),
  68         # pointer
  69         ('(bool|char|const|delete|int|stream|unsigned|void|(struct \w+)|([A-Z]\w*)|[,]|&&|\|\|)[ \t]*(\*|&)[ \t]*', '\\1 \\4'),
  70         #to#('(bool|char|const|delete|int|stream|unsigned|void|([A-Z]\w*)|[,])[ \n\t]*(\*|&)[ \t]*', '\\1 \\3'),
  71         # pointer with template
  72         ('(( *((bool|char|delete|int|stream|unsigned|void|(class[ \t]+\w*)|([A-Z]\w*)|[,])[ \*&],*)+)>) *(\*|&) *', '\\1 \\7'),
  73         #to#('(( *((bool|char|delete|int|stream|unsigned|void|(class[ \t]+\w*)|([A-Z]\w*)|[,])[ \*&],*)+)>)[ \t\n]*(\*|&) *', '\\1 \\7'),
  74         # unary pointer, minus, not
  75         ('(return|=) (\*|&|-|!) ([\w\(])', '\\1 \\2\\3'),
  76         # space after `operator'
  77         ('(\Woperator) *([^\w\s])', '\\1 \\2'),
  78         # dangling newline
  79         ('\n[ \t]*\n[ \t]*\n', '\n\n'),
  80         # dangling parenthesis open
  81         #('[ \t]*\n[ \t]*\([ \t]*\n', '('),
  82         ('\([ \t]*\n', '('),
  83         # dangling parenthesis close
  84         ('\n[ \t]*\)', ')'),
  85         # dangling comma
  86         ('\n[ \t]*,', ','),
  87         # dangling semicolon
  88         ('\n[ \t]*;', ';'),
  89         # brace open
  90         ('(\w)[ \t]*([^\s]*){([ \t]*\n)', '\\1\\2\n{\n'),
  91         # brace open backslash
  92         ('(\w[^\n]*){[ \t]*\\\\\n', '\\1\\\n{\\\n'),
  93         # brace close
  94         ('}[ \t]*([^\n]*\w[^\n\\\]*)\n', '}\n\\1\n'),
  95         # brace close backslash
  96         ('}[ \t]*([^\n]*\w[^\n\\\]*)', '\n}\n\\1'),
  97         # delete space after `operator'
  98         #('(\Woperator) (\W)', '\\1\\2'),
  99         # delete space after case, label
 100         ('(\W(case|label) ([\w]+)) :', '\\1:'),
 101         # delete space before comma
 102         ('[ \t]*,', ','),
 103         # delete space before semicolon
 104         ('[ \t]*;', ';'),
 105         # delete space before eol-backslash
 106         ('[ \t]*\\\\\n', '\\\n'),
 107         # delete trailing whitespace
 108         ('[ \t]*\n', '\n'),
 109
 110         ## Deuglify code that also gets ugly by rules above.
 111         # delete newline after typedef struct
 112         ('(typedef struct\s+([\w]*\s){([^}]|{[^}]*})*})\s*\n\s*(\w[\w\d]*;)', '\\1 \\4'),
 113         # delete spaces around template brackets
 114         #('(dynamic_cast|template|([A-Z]\w*))[ \t]*<[ \t]*(( *(bool|char|int|unsigned|void|(class[ \t]+\w*)|([A-Z]\w*)),?)+)[ \t]?(| [\*&])[ \t]*>', '\\1<\\3\\8>'),
 115         ('(dynamic_cast|template|([A-Z]\w*))[ \t]*<[ \t]*(( *(bool|char|int|unsigned|void|(class[ \t]+\w*)|([A-Z]\w*))[,\*&]*)+)[ \t]?(| [\*&])[ \t]*>', '\\1<\\3\\8>'),
 116         ('((if|while)\s+\(([^\)]|\([^\)]*\))*\))\s*;', '\\1\n;'),
 117         ('(for\s+\(([^;]*;[^;]*;([^\)]|\([^\)]*\))*)\))\s*;', '\\1\n;'),
 118         # do .. while
 119         ('(\Wdo\s*{([^}]|{[^}]*})*}\s*while\s*)(\(([^\)]|\([^\)]*\))*\))\s*;', '\\1\\3;\n'),
 120         ## Fix code that gets broken by rules above.
 121         # delete space before #define x()
 122         ('#[ \t]*define (\w*)[ \t]*\(', '#define \\1('),
 123         # add space in #define x ()
 124         ('#[ \t]*define (\w*)(\(([^\(\)]|\([^\(\)]*\))*\)\\n)',
 125          '#define \\1 \\2'),
 126         # delete space in #include <>
 127         ('#[ \t]*include[ \t]*<[ \t]*([^ \t>]*)[ \t]*(/?)[ \t]*([^ \t>]*)[ \t]*>',
 128         '#include <\\1\\2\\3>'),
 129         # delete backslash before empty line (emacs' indent region is broken)
 130         ('\\\\\n\n', '\n\n'),
 131         ],
 132
 133         COMMENT:
 134         [
 135         # delete trailing whitespace
 136         ('[ \t]*\n', '\n'),
 137         # delete empty first lines
 138         ('(/\*\n)\n*', '\\1'),
 139         # delete empty last lines
 140         ('\n*(\n\*/)', '\\1'),
 141         ## delete newline after start?
 142         #('/(\*)\n', '\\1'),
 143         ## delete newline before end?
 144         #('\n(\*/)', '\\1'),
 145         ],
 146         }
 147
 148 # Recognize special sequences in the input.
 149 #
 150 #   (?P<name>regex) -- Assign result of REGEX to NAME.
 151 #   *? -- Match non-greedily.
 152 #   (?m) -- Multiline regex: Make ^ and $ match at each line.
 153 #   (?s) -- Make the dot match all characters including newline.
 154 #   (?x) -- Ignore whitespace in patterns.
 155 no_match = 'a\ba'
 156 snippet_res = {
 157         CXX: {
 158                 'include':
 159                   no_match,
 160
 161                 'multiline_comment':
 162                   r'''(?sx)
 163                     (?P<match>
 164                     (?P<code>
 165                     [ \t]*/\*.*?\*/))''',
 166
 167                 'singleline_comment':
 168                   r'''(?mx)
 169                     ^.*
 170                     (?P<match>
 171                       (?P<code>
 172                       [ \t]*//([ \t][^\n]*|)\n))''',
 173
 174                 'string':
 175                   r'''(?x)
 176                     (?P<match>
 177                     (?P<code>
 178                     "([^"]|(([^\\]|(\\\\))\\"))*"))''',
 179
 180                 'char':
 181                   r'''(?x)
 182                     (?P<match>
 183                     (?P<code>
 184                     '([^']+|\')))''',
 185
 186                 'include':
 187                   r'''(?x)
 188                     (?P<match>
 189                     (?P<code>
 190                     "#[ \t]*include[ \t]*<[^>]*>''',
 191         },
 192         }
 193
 194 class Chunk:
 195         def replacement_text (self):
 196                 return ''
 197
 198         def filter_text (self):
 199                 return self.replacement_text ()
 200
 201         def ly_is_outdated (self):
 202                 return 0
 203
 204         def png_is_outdated (self):
 205                 return 0
 206
 207 class Substring (Chunk):
 208         def __init__ (self, source, start, end):
 209                 self.source = source
 210                 self.start = start
 211                 self.end = end
 212
 213         def replacement_text (self):
 214                 s = self.source[self.start:self.end]
 215                 if verbose_p:
 216                         sys.stderr.write ('CXX Rules')
 217                 for i in rules[CXX]:
 218                         if verbose_p:
 219                                 sys.stderr.write ('.')
 220                                 #sys.stderr.write ('\n*********\n')
 221                                 #sys.stderr.write (i[0])
 222                                 #sys.stderr.write ('\n=========\n')
 223                                 #sys.stderr.write (s)
 224                                 #sys.stderr.write ('\n*********\n')
 225                         s = re.sub (i[0], i[1], s)
 226                 if verbose_p:
 227                         sys.stderr.write ('done\n')
 228                 return s
 229
 230
 231 class Snippet (Chunk):
 232         def __init__ (self, type, match, format):
 233                 self.type = type
 234                 self.match = match
 235                 self.hash = 0
 236                 self.options = []
 237                 self.format = format
 238
 239         def replacement_text (self):
 240                 return self.match.group ('match')
 241
 242         def substring (self, s):
 243                 return self.match.group (s)
 244
 245         def __repr__ (self):
 246                 return `self.__class__` + ' type = ' + self.type
 247
 248 class Multiline_comment (Snippet):
 249         def __init__ (self, source, match, format):
 250                 self.type = type
 251                 self.match = match
 252                 self.hash = 0
 253                 self.options = []
 254                 self.format = format
 255
 256         def replacement_text (self):
 257                 s = self.match.group ('match')
 258                 if verbose_p:
 259                         sys.stderr.write ('COMMENT Rules')
 260                 for i in rules[COMMENT]:
 261                         if verbose_p:
 262                                 sys.stderr.write ('.')
 263                         s = re.sub (i[0], i[1], s)
 264                 return s
 265
 266 snippet_type_to_class = {
 267         'multiline_comment': Multiline_comment,
 268 #       'lilypond_block': Lilypond_snippet,
 269 #       'lilypond': Lilypond_snippet,
 270 #       'include': Include_snippet,
 271 }
 272
 273 def find_toplevel_snippets (s, types):
 274         if verbose_p:
 275                 sys.stderr.write ('Dissecting')
 276
 277         res = {}
 278         for i in types:
 279                 res[i] = re.compile (snippet_res[format][i])
 280
 281         snippets = []
 282         index = 0
 283         ## found = dict (map (lambda x: (x, None),
 284         ##                    types))
 285         ## urg python2.1
 286         found = {}
 287         map (lambda x, f = found: f.setdefault (x, None),
 288              types)
 289
 290         # We want to search for multiple regexes, without searching
 291         # the string multiple times for one regex.
 292         # Hence, we use earlier results to limit the string portion
 293         # where we search.
 294         # Since every part of the string is traversed at most once for
 295         # every type of snippet, this is linear.
 296
 297         while 1:
 298                 if verbose_p:
 299                         sys.stderr.write ('.')
 300                 first = None
 301                 endex = 1 << 30
 302                 for type in types:
 303                         if not found[type] or found[type][0] < index:
 304                                 found[type] = None
 305                                 m = res[type].search (s[index:endex])
 306                                 if not m:
 307                                         continue
 308
 309                                 cl = Snippet
 310                                 if snippet_type_to_class.has_key (type):
 311                                         cl = snippet_type_to_class[type]
 312                                 snip = cl (type, m, format)
 313                                 start = index + m.start ('match')
 314                                 found[type] = (start, snip)
 315
 316                         if found[type] \
 317                            and (not first \
 318                                 or found[type][0] < found[first][0]):
 319                                 first = type
 320
 321                                 # FIXME.
 322
 323                                 # Limiting the search space is a cute
 324                                 # idea, but this *requires* to search
 325                                 # for possible containing blocks
 326                                 # first, at least as long as we do not
 327                                 # search for the start of blocks, but
 328                                 # always/directly for the entire
 329                                 # @block ... @end block.
 330
 331                                 endex = found[first][0]
 332
 333                 if not first:
 334                         snippets.append (Substring (s, index, len (s)))
 335                         break
 336
 337                 (start, snip) = found[first]
 338                 snippets.append (Substring (s, index, start))
 339                 snippets.append (snip)
 340                 found[first] = None
 341                 index = start + len (snip.match.group ('match'))
 342
 343         return snippets
 344
 345 def nitpick_file (outdir, file):
 346         s = open (file).read ()
 347
 348         # FIXME: Containing blocks must be first, see
 349         #        find_toplevel_snippets.
 350         snippet_types = (
 351                 'multiline_comment',
 352                 'singleline_comment',
 353                 'string',
 354                 'char',
 355                 )
 356
 357         chunks = find_toplevel_snippets (s, snippet_types)
 358         #code = filter (lambda x: is_derived_class (x.__class__, Substring),
 359         #              chunks)
 360
 361         t = string.join (map (lambda x: x.filter_text (), chunks), '')
 362         fixt = file
 363         if s != t:
 364                 if not outdir:
 365                         os.system ('mv %s %s~' % (file, file))
 366                 else:
 367                         fixt = os.path.join (outdir,
 368                                              os.path.basename (file))
 369                 h = open (fixt, "w")
 370                 h.write (t)
 371                 h.close ()
 372         if s != t or indent_p:
 373                 indent_file (fixt)
 374
 375 def indent_file (file):
 376         emacs = '''emacs\
 377         --no-window-system\
 378         --batch\
 379         --no-site-file\
 380         --no-init-file\
 381         %(file)s\
 382         --eval '(let ((error nil)
 383                       (version-control nil))
 384                  (load-library "cc-mode")
 385                  (c++-mode)
 386                  (indent-region (point-min) (point-max))
 387                  (if (buffer-modified-p (current-buffer))
 388                   (save-buffer)))' ''' % vars ()
 389         emacsclient = '''emacsclient\
 390         --socket-name=%(socketdir)s/%(socketname)s\
 391         --no-wait\
 392         --eval '(let ((error nil)
 393                       (version-control nil))
 394                  (load-library "cc-mode")
 395                  (find-file "%(file)s")
 396                  (c++-mode)
 397                  (indent-region (point-min) (point-max))
 398                  (if (buffer-modified-p (current-buffer))
 399                   (save-buffer)))' ''' \
 400                   % { 'file': file,
 401                       'socketdir' : socketdir,
 402                       'socketname' : socketname, }
 403         if verbose_p:
 404                 sys.stderr.write (emacs)
 405                 sys.stderr.write ('\n')
 406         os.system (emacs)
 407
 408
 409 def usage ():
 410         sys.stdout.write (r'''
 411 Usage:
 412 fixcc [OPTION]... FILE...
 413
 414 Options:
 415    --help
 416    --indent   reindent, even if no changes
 417    --verbose
 418    --test
 419
 420 Typical use with LilyPond:
 421
 422    fixcc $(find flower kpath-guile lily -name '*cc' -o -name '*hh' | grep -v /out)
 423
 424 This script is licensed under the GNU GPL
 425 ''')
 426
 427 def do_options ():
 428         global indent_p, outdir, verbose_p
 429         (options, files) = getopt.getopt (sys.argv[1:], '',
 430                                           ['help', 'indent', 'outdir=',
 431                                            'test', 'verbose'])
 432         for (o, a) in options:
 433                 if o == '--help':
 434                         usage ()
 435                         sys.exit (0)
 436                 elif o == '--indent':
 437                         indent_p = 1
 438                 elif o == '--outdir':
 439                         outdir = a
 440                 elif o == '--verbose':
 441                         verbose_p = 1
 442                 elif o == '--test':
 443                         test ()
 444                         sys.exit (0)
 445                 else:
 446                         assert unimplemented
 447         if not files:
 448                 usage ()
 449                 sys.exit (2)
 450         return files
 451
 452
 453 outdir = 0
 454 format = CXX
 455 socketdir = '/tmp/fixcc'
 456 socketname = 'fixcc%d' % os.getpid ()
 457
 458 def setup_client ():
 459         #--no-window-system\
 460         #--batch\
 461         os.unlink (os.path.join (socketdir, socketname))
 462         os.mkdir (socketdir, 0700)
 463         emacs='''emacs\
 464                 --no-site-file\
 465                 --no-init-file\
 466                 --eval '(let ((error nil)
 467                               (version-control nil))
 468                          (load-library "server")
 469                          (setq server-socket-dir "%(socketdir)s")
 470                          (setq server-name "%(socketname)s")
 471                          (server-start)
 472                          (while t) (sleep 1000))' ''' \
 473                          % { 'socketdir' : socketdir,
 474                              'socketname' : socketname, }
 475
 476         if not os.fork ():
 477                 os.system (emacs)
 478                 sys.exit (0)
 479         while not os.path.exists (os.path.join (socketdir, socketname)):
 480                 time.sleep (1)
 481
 482 def main ():
 483         #emacsclient should be faster, but this does not work yet
 484         #setup_client ()
 485         files = do_options ()
 486         if outdir and not os.path.isdir (outdir):
 487                 os.makedirs (outdir)
 488         for i in files:
 489                 sys.stderr.write ('%s...\n' % i)
 490                 nitpick_file (outdir, i)
 491
 492
 493 TEST = '''
 494 ostream &
 495 operator << (ostream & os, String d);
 496
 497 typedef struct _t_ligature
 498 {
 499   char *succ, *lig;
 500   struct _t_ligature *next;
 501   struct _t_ligature * next;
 502 }  AFM_Ligature;
 503
 504 char *
 505 Bar:: foe ()
 506 {
 507   char* a= ++ 3  ;
 508   a [x] = foe (*i, &bar) *
 509   2;
 510   int operator double ();
 511   int x =foe(1 ,3);
 512   Interval_t<T> &operator*= (T r);
 513   int compare (Pqueue_ent < K, T > const& e1, Pqueue_ent < K,T> *e2);
 514   delete *p;
 515   if (abs (f)*2 > abs (d) *FUDGE)
 516     ;
 517   while (0);
 518   for (; i < x (); foo > bar);
 519   for (; i < x > y;
 520   foo > bar)
 521 ;
 522   do {
 523   ..
 524   }
 525   while (foe);
 526
 527   squiggle. extent;
 528
 529   1 && * unsmob_moment (lf);
 530
 531   line_spanner_ = make_spanner ("DynamicLineSpanner", rq ? rq->self_scm
 532 (): SCM_EOL);
 533
 534   case foo: k;
 535
 536   typedef struct
 537   {
 538     ...
 539   } cookie_io_functions_t;
 540
 541
 542   if (0) {a=b;} else {
 543    c=d;
 544   }
 545
 546   cookie_io_functions_t Memory_out_stream::functions_ = {
 547     Memory_out_stream::reader,
 548     ...
 549   };
 550
 551 }
 552 '''
 553
 554 def test ():
 555         test_file = 'fixcc.cc'
 556         open (test_file, 'w').write (TEST)
 557         nitpick_file (outdir, test_file)
 558         sys.stdout.write (open (test_file).read ())
 559
 560 if __name__ == '__main__':
 561         main ()
 562