buildscripts/fixcc.py

   1 #!/usr/bin/python
   2
   3 # fixcc -- nitpick lily's c++ code
   4
   5 # TODO
   6 #  * maintainable rules: regexp's using whitespace (?x) and match names
   7 #    <identifier>)
   8 #  * trailing `*' vs. function definition
   9 #  * do not break/change indentation of fixcc-clean files
  10 #  * check lexer, parser
  11 #  * rewrite in elisp, add to cc-mode
  12 #  * using regexes is broken by design
  13 #  * ?
  14 #  * profit
  15
  16 import __main__
  17 import getopt
  18 import os
  19 import re
  20 import string
  21 import sys
  22 import time
  23
  24 COMMENT = 'COMMENT'
  25 CXX = 'C++'
  26 verbose_p = 0
  27 indent_p = 0
  28
  29 rules = {
  30         CXX:
  31         [
  32         # space before parenthesis open
  33         ('([^\( \]])[ \t]*\(', '\\1 ('),
  34         # space after comma
  35         (',[ \t]*', ', '),
  36         # delete inline tabs
  37         ('(\w)\t+', '\\1 '),
  38         # delete inline double spaces
  39         ('   *', ' '),
  40         # delete space after parenthesis open
  41         ('\([ \t]*', '('),
  42         # delete space before parenthesis close
  43         ('[ \t]*\)', ')'),
  44         # delete spaces after prefix
  45         ('(--|\+\+)[ \t]*([\w\)])', '\\1\\2'),
  46         # delete spaces before postfix
  47         ('([\w\)\]])[ \t]*(--|\+\+)', '\\1\\2'),
  48         # delete space after parenthesis close
  49         #('\)[ \t]*([^\w])', ')\\1'),
  50         # delete space around operator
  51         # ('([\w\(\)\]])([ \t]*)(::|\.)([ \t]*)([\w\(\)])', '\\1\\3\\5'),
  52         ('([\w\(\)\]])([ \t]*)(\.|->)([ \t]*)([\w\(\)])', '\\1\\3\\5'),
  53         # delete space after operator
  54         ('(::)([ \t]*)([\w\(\)])', '\\1\\3'),
  55         # delete superflous space around operator
  56         ('([\w\(\)\]])([ \t]+)(&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|\?|<|>|\+|-|=|/|:|&|\||\*)([ \t]+)([\w\(\)])', '\\1 \\3 \\5'),
  57         # space around operator1
  58         ('([\w\)\]]) *(&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|\?|<|>|=|/|:|&|\||\*) *([\w\(])', '\\1 \\2 \\3'),
  59         # space around operator2
  60         ('([\w\)\]]) *(&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|\?|<|>|=|/|:|&|\||\*) ([^\w\s])', '\\1 \\2 \\3'),
  61         # space around operator3
  62         ('([^\w\s]) (&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|\?|<|[^-]>|=|/|:|&|\||\*) *([\w\(])', '\\1 \\2 \\3'),
  63         # space around operator4
  64         ('([\w\(\)\]]) (\*|/|\+|-) *([-:])', '\\1 \\2 \\3'),
  65         # space around +/-; exponent
  66         ('([\w\)\]])(\+|-)([_A-Za-z\(])', '\\1 \\2 \\3'),
  67         ('([_\dA-Za-df-z\)\]])(\+|-)([\w\(])', '\\1 \\2 \\3'),
  68         # trailing operator
  69         (' (::|&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|\?|<|>|\+|-|=|/|:|&XXX|\||\*XXX)[ \t]*\n([ \t]*)',  '\n\\2\\1 '),
  70         # pointer
  71         ##('(bool|char|const|delete|int|stream|unsigned|void|size_t|struct \w+|[A-Z]\w*|,|;|&&|<|[^-]>|\|\||-|\+)[ \t]*(\*|&)[ \t]*', '\\1 \\2'),
  72         ('(bool|char|const|delete|int|stream|unsigned|void|size_t|struct \w+|[A-Z]\w*|,|;|:|=|\?\)|&&|<|[^-]>|\|\||-|\+)[ \t]*(\*|&)[ \t]*', '\\1 \\2'),
  73         #to#('(bool|char|const|delete|int|stream|unsigned|void|([A-Z]\w*)|[,])[ \n\t]*(\*|&)[ \t]*', '\\1 \\3'),
  74         # pointer with template
  75         ('(( *((bool|char|const|delete|int|stream|unsigned|void|size_t|class[ \t]+\w*|[A-Z]\w*|\w+::\w+|[,])[ \*&],*)+)>) *(\*|&) *', '\\1 \\5'),
  76         #to#('(( *((bool|char|delete|int|stream|unsigned|void|(class[ \t]+\w*)|([A-Z]\w*)|[,])[ \*&],*)+)>)[ \t\n]*(\*|&) *', '\\1 \\7'),
  77         # unary pointer, minus, not
  78         ('(return|=) (\*|&|-|!) ([\w\(])', '\\1 \\2\\3'),
  79         # space after `operator'
  80         ('(\Woperator) *([^\w\s])', '\\1 \\2'),
  81         # dangling brace close
  82         ('\n[ \t]*(\n[ \t]*})', '\\1'),
  83         # dangling newline
  84         ('\n[ \t]*\n[ \t]*\n', '\n\n'),
  85         # dangling parenthesis open
  86         #('[ \t]*\n[ \t]*\([ \t]*\n', '('),
  87         ('\([ \t]*\n', '('),
  88         # dangling parenthesis close
  89         ('\n[ \t]*\)', ')'),
  90         # dangling comma
  91         ('\n[ \t]*,', ','),
  92         # dangling semicolon
  93         ('\n[ \t]*;', ';'),
  94         # brace open
  95         ('(\w)[ \t]*([^\s]*){([ \t]*\n)', '\\1\\2\n{\n'),
  96         # brace open backslash
  97         ('(\w[^\n]*){[ \t]*\\\\\n', '\\1\\\n{\\\n'),
  98         # brace close
  99         ('}[ \t]*([^\n]*\w[^\n\\\]*)\n', '}\n\\1\n'),
 100         # brace close backslash
 101         ('}[ \t]*([^\n]*\w[^\n\\\]*)', '\n}\n\\1'),
 102         # delete space after `operator'
 103         #('(\Woperator) (\W)', '\\1\\2'),
 104         # delete space after case, label
 105         ('(\W(case|label) ([\w]+)) :', '\\1:'),
 106         # delete space before comma
 107         ('[ \t]*,', ','),
 108         # delete space before semicolon
 109         ('[ \t]*;', ';'),
 110         # delete space before eol-backslash
 111         ('[ \t]*\\\\\n', '\\\n'),
 112         # delete trailing whitespace
 113         ('[ \t]*\n', '\n'),
 114
 115         ## Deuglify code that also gets ugly by rules above.
 116         # delete newline after typedef struct
 117         ('(typedef struct\s+([\w]*\s){([^}]|{[^}]*})*})\s*\n\s*(\w[\w\d]*;)', '\\1 \\4'),
 118         # delete spaces around template brackets
 119         #('(dynamic_cast|template|([A-Z]\w*))[ \t]*<[ \t]*(( *(bool|char|int|unsigned|void|(class[ \t]+\w*)|([A-Z]\w*)),?)+)[ \t]?(| [\*&])[ \t]*>', '\\1<\\3\\8>'),
 120         ('(dynamic_cast|template|typedef|\w+::\w+|[A-Z]\w*)[ \t]*<[ \t]*(( *(bool|char|const|int|unsigned|void|size_t|class[ \t]+\w*|[A-Z]\w*)( *[\*&]?,|[\*&])*)+)[ \t]?(| [\*&])[ \t]*>', '\\1<\\2\\6>'),
 121         ('(\w+::\w+|[A-Z]\w*) < ((\w+::\w+|[A-Z]\w*)<[A-Z]\w*>) >', '\\1<\\2 >'),
 122         ('((if|while)\s+\(([^\)]|\([^\)]*\))*\))\s*;', '\\1\n;'),
 123         ('(for\s+\(([^;]*;[^;]*;([^\)]|\([^\)]*\))*)\))\s*;', '\\1\n;'),
 124         # do {..} while
 125         ('(}\s*while\s*)(\(([^\)]|\([^\)]*\))*\))\s*;', '\\1\\2;'),
 126
 127         ## Fix code that gets broken by rules above.
 128         ##('->\s+\*', '->*'),
 129         # delete space before #define x()
 130         ('#[ \t]*define (\w*)[ \t]*\(', '#define \\1('),
 131         # add space in #define x ()
 132         ('#[ \t]*define (\w*)(\(([^\(\)]|\([^\(\)]*\))*\)\\n)',
 133          '#define \\1 \\2'),
 134         # delete space in #include <>
 135         ('#[ \t]*include[ \t]*<[ \t]*([^ \t>]*)[ \t]*(/?)[ \t]*([^ \t>]*)[ \t]*>',
 136         '#include <\\1\\2\\3>'),
 137         # delete backslash before empty line (emacs' indent region is broken)
 138         ('\\\\\n\n', '\n\n'),
 139         ],
 140
 141         COMMENT:
 142         [
 143         # delete trailing whitespace
 144         ('[ \t]*\n', '\n'),
 145         # delete empty first lines
 146         ('(/\*\n)\n*', '\\1'),
 147         # delete empty last lines
 148         ('\n*(\n\*/)', '\\1'),
 149         ## delete newline after start?
 150         #('/(\*)\n', '\\1'),
 151         ## delete newline before end?
 152         #('\n(\*/)', '\\1'),
 153         ],
 154         }
 155
 156 # Recognize special sequences in the input.
 157 #
 158 #   (?P<name>regex) -- Assign result of REGEX to NAME.
 159 #   *? -- Match non-greedily.
 160 #   (?m) -- Multiline regex: Make ^ and $ match at each line.
 161 #   (?s) -- Make the dot match all characters including newline.
 162 #   (?x) -- Ignore whitespace in patterns.
 163 no_match = 'a\ba'
 164 snippet_res = {
 165         CXX: {
 166                 'include':
 167                   no_match,
 168
 169                 'multiline_comment':
 170                   r'''(?sx)
 171                     (?P<match>
 172                     (?P<code>
 173                     [ \t]*/\*.*?\*/))''',
 174
 175                 'singleline_comment':
 176                   r'''(?mx)
 177                     ^.*
 178                     (?P<match>
 179                       (?P<code>
 180                       [ \t]*//([ \t][^\n]*|)\n))''',
 181
 182                 'string':
 183                   r'''(?x)
 184                     (?P<match>
 185                     (?P<code>
 186                     "([^"]|(([^\\]|(\\\\))\\"))*"))''',
 187
 188                 'char':
 189                   r'''(?x)
 190                     (?P<match>
 191                     (?P<code>
 192                     '([^']+|\')))''',
 193
 194                 'include':
 195                   r'''(?x)
 196                     (?P<match>
 197                     (?P<code>
 198                     "#[ \t]*include[ \t]*<[^>]*>''',
 199         },
 200         }
 201
 202 class Chunk:
 203         def replacement_text (self):
 204                 return ''
 205
 206         def filter_text (self):
 207                 return self.replacement_text ()
 208
 209         def ly_is_outdated (self):
 210                 return 0
 211
 212         def png_is_outdated (self):
 213                 return 0
 214
 215 class Substring (Chunk):
 216         def __init__ (self, source, start, end):
 217                 self.source = source
 218                 self.start = start
 219                 self.end = end
 220
 221         def replacement_text (self):
 222                 s = self.source[self.start:self.end]
 223                 if verbose_p:
 224                         sys.stderr.write ('CXX Rules')
 225                 for i in rules[CXX]:
 226                         if verbose_p:
 227                                 sys.stderr.write ('.')
 228                                 #sys.stderr.write ('\n*********\n')
 229                                 #sys.stderr.write (i[0])
 230                                 #sys.stderr.write ('\n=========\n')
 231                                 #sys.stderr.write (s)
 232                                 #sys.stderr.write ('\n*********\n')
 233                         s = re.sub (i[0], i[1], s)
 234                 if verbose_p:
 235                         sys.stderr.write ('done\n')
 236                 return s
 237
 238
 239 class Snippet (Chunk):
 240         def __init__ (self, type, match, format):
 241                 self.type = type
 242                 self.match = match
 243                 self.hash = 0
 244                 self.options = []
 245                 self.format = format
 246
 247         def replacement_text (self):
 248                 return self.match.group ('match')
 249
 250         def substring (self, s):
 251                 return self.match.group (s)
 252
 253         def __repr__ (self):
 254                 return `self.__class__` + ' type = ' + self.type
 255
 256 class Multiline_comment (Snippet):
 257         def __init__ (self, source, match, format):
 258                 self.type = type
 259                 self.match = match
 260                 self.hash = 0
 261                 self.options = []
 262                 self.format = format
 263
 264         def replacement_text (self):
 265                 s = self.match.group ('match')
 266                 if verbose_p:
 267                         sys.stderr.write ('COMMENT Rules')
 268                 for i in rules[COMMENT]:
 269                         if verbose_p:
 270                                 sys.stderr.write ('.')
 271                         s = re.sub (i[0], i[1], s)
 272                 return s
 273
 274 snippet_type_to_class = {
 275         'multiline_comment': Multiline_comment,
 276 #       'lilypond_block': Lilypond_snippet,
 277 #       'lilypond': Lilypond_snippet,
 278 #       'include': Include_snippet,
 279 }
 280
 281 def find_toplevel_snippets (s, types):
 282         if verbose_p:
 283                 sys.stderr.write ('Dissecting')
 284
 285         res = {}
 286         for i in types:
 287                 res[i] = re.compile (snippet_res[format][i])
 288
 289         snippets = []
 290         index = 0
 291         ## found = dict (map (lambda x: (x, None),
 292         ##                    types))
 293         ## urg python2.1
 294         found = {}
 295         map (lambda x, f = found: f.setdefault (x, None),
 296              types)
 297
 298         # We want to search for multiple regexes, without searching
 299         # the string multiple times for one regex.
 300         # Hence, we use earlier results to limit the string portion
 301         # where we search.
 302         # Since every part of the string is traversed at most once for
 303         # every type of snippet, this is linear.
 304
 305         while 1:
 306                 if verbose_p:
 307                         sys.stderr.write ('.')
 308                 first = None
 309                 endex = 1 << 30
 310                 for type in types:
 311                         if not found[type] or found[type][0] < index:
 312                                 found[type] = None
 313                                 m = res[type].search (s[index:endex])
 314                                 if not m:
 315                                         continue
 316
 317                                 cl = Snippet
 318                                 if snippet_type_to_class.has_key (type):
 319                                         cl = snippet_type_to_class[type]
 320                                 snip = cl (type, m, format)
 321                                 start = index + m.start ('match')
 322                                 found[type] = (start, snip)
 323
 324                         if found[type] \
 325                            and (not first \
 326                                 or found[type][0] < found[first][0]):
 327                                 first = type
 328
 329                                 # FIXME.
 330
 331                                 # Limiting the search space is a cute
 332                                 # idea, but this *requires* to search
 333                                 # for possible containing blocks
 334                                 # first, at least as long as we do not
 335                                 # search for the start of blocks, but
 336                                 # always/directly for the entire
 337                                 # @block ... @end block.
 338
 339                                 endex = found[first][0]
 340
 341                 if not first:
 342                         snippets.append (Substring (s, index, len (s)))
 343                         break
 344
 345                 (start, snip) = found[first]
 346                 snippets.append (Substring (s, index, start))
 347                 snippets.append (snip)
 348                 found[first] = None
 349                 index = start + len (snip.match.group ('match'))
 350
 351         return snippets
 352
 353 def nitpick_file (outdir, file):
 354         s = open (file).read ()
 355
 356         # FIXME: Containing blocks must be first, see
 357         #        find_toplevel_snippets.
 358         snippet_types = (
 359                 'multiline_comment',
 360                 'singleline_comment',
 361                 'string',
 362                 'char',
 363                 )
 364
 365         chunks = find_toplevel_snippets (s, snippet_types)
 366         #code = filter (lambda x: is_derived_class (x.__class__, Substring),
 367         #              chunks)
 368
 369         t = string.join (map (lambda x: x.filter_text (), chunks), '')
 370         fixt = file
 371         if s != t:
 372                 if not outdir:
 373                         os.system ('mv %s %s~' % (file, file))
 374                 else:
 375                         fixt = os.path.join (outdir,
 376                                              os.path.basename (file))
 377                 h = open (fixt, "w")
 378                 h.write (t)
 379                 h.close ()
 380         if s != t or indent_p:
 381                 indent_file (fixt)
 382
 383 def indent_file (file):
 384         emacs = '''emacs\
 385         --no-window-system\
 386         --batch\
 387         --no-site-file\
 388         --no-init-file\
 389         %(file)s\
 390         --eval '(let ((error nil)
 391                       (version-control nil))
 392                  (load-library "cc-mode")
 393                  (c++-mode)
 394                  (indent-region (point-min) (point-max))
 395                  (if (buffer-modified-p (current-buffer))
 396                   (save-buffer)))' ''' % vars ()
 397         emacsclient = '''emacsclient\
 398         --socket-name=%(socketdir)s/%(socketname)s\
 399         --no-wait\
 400         --eval '(let ((error nil)
 401                       (version-control nil))
 402                  (load-library "cc-mode")
 403                  (find-file "%(file)s")
 404                  (c++-mode)
 405                  (indent-region (point-min) (point-max))
 406                  (if (buffer-modified-p (current-buffer))
 407                   (save-buffer)))' ''' \
 408                   % { 'file': file,
 409                       'socketdir' : socketdir,
 410                       'socketname' : socketname, }
 411         if verbose_p:
 412                 sys.stderr.write (emacs)
 413                 sys.stderr.write ('\n')
 414         os.system (emacs)
 415
 416
 417 def usage ():
 418         sys.stdout.write (r'''
 419 Usage:
 420 fixcc [OPTION]... FILE...
 421
 422 Options:
 423    --help
 424    --indent   reindent, even if no changes
 425    --verbose
 426    --test
 427
 428 Typical use with LilyPond:
 429
 430    fixcc $(find flower kpath-guile lily -name '*cc' -o -name '*hh' | grep -v /out)
 431
 432 This script is licensed under the GNU GPL
 433 ''')
 434
 435 def do_options ():
 436         global indent_p, outdir, verbose_p
 437         (options, files) = getopt.getopt (sys.argv[1:], '',
 438                                           ['help', 'indent', 'outdir=',
 439                                            'test', 'verbose'])
 440         for (o, a) in options:
 441                 if o == '--help':
 442                         usage ()
 443                         sys.exit (0)
 444                 elif o == '--indent':
 445                         indent_p = 1
 446                 elif o == '--outdir':
 447                         outdir = a
 448                 elif o == '--verbose':
 449                         verbose_p = 1
 450                 elif o == '--test':
 451                         test ()
 452                         sys.exit (0)
 453                 else:
 454                         assert unimplemented
 455         if not files:
 456                 usage ()
 457                 sys.exit (2)
 458         return files
 459
 460
 461 outdir = 0
 462 format = CXX
 463 socketdir = '/tmp/fixcc'
 464 socketname = 'fixcc%d' % os.getpid ()
 465
 466 def setup_client ():
 467         #--no-window-system\
 468         #--batch\
 469         os.unlink (os.path.join (socketdir, socketname))
 470         os.mkdir (socketdir, 0700)
 471         emacs='''emacs\
 472                 --no-site-file\
 473                 --no-init-file\
 474                 --eval '(let ((error nil)
 475                               (version-control nil))
 476                          (load-library "server")
 477                          (setq server-socket-dir "%(socketdir)s")
 478                          (setq server-name "%(socketname)s")
 479                          (server-start)
 480                          (while t) (sleep 1000))' ''' \
 481                          % { 'socketdir' : socketdir,
 482                              'socketname' : socketname, }
 483
 484         if not os.fork ():
 485                 os.system (emacs)
 486                 sys.exit (0)
 487         while not os.path.exists (os.path.join (socketdir, socketname)):
 488                 time.sleep (1)
 489
 490 def main ():
 491         #emacsclient should be faster, but this does not work yet
 492         #setup_client ()
 493         files = do_options ()
 494         if outdir and not os.path.isdir (outdir):
 495                 os.makedirs (outdir)
 496         for i in files:
 497                 sys.stderr.write ('%s...\n' % i)
 498                 nitpick_file (outdir, i)
 499
 500
 501 ## TODO: make this compilable and check with g++
 502 TEST = '''
 503 #include <libio.h>
 504 #include <map>
 505 class
 506 ostream ;
 507
 508 class Foo {
 509 public: static char* foo ();
 510 std::map<char*,int>* bar (char, char) { return 0; }
 511 };
 512 typedef struct
 513 {
 514   Foo **bar;
 515 } String;
 516
 517 ostream &
 518 operator << (ostream & os, String d);
 519
 520 typedef struct _t_ligature
 521 {
 522   char *succ, *lig;
 523   struct _t_ligature * next;
 524 }  AFM_Ligature;
 525
 526 typedef std::map < AFM_Ligature const *, int > Bar;
 527
 528 /*      ||
 529  *      vv
 530  * !OK  OK
 531  */
 532 /*     ||
 533        vv
 534   !OK  OK
 535  */
 536 char *
 537 Foo:: foo ()
 538 {
 539 int
 540 i
 541 ;
 542   char* a= &++ i ;
 543   a [*++ a] = (char*) foe (*i, &bar) *
 544   2;
 545   int operator double ();
 546   std::map<char*,int> y =*bar(-*a ,*b);
 547   Interval_t<T> & operator*= (T r);
 548   Foo<T>*c;
 549   int compare (Pqueue_ent < K, T > const& e1, Pqueue_ent < K,T> *e2);
 550   delete *p;
 551   if (abs (f)*2 > abs (d) *FUDGE)
 552     ;
 553   while (0);
 554   for (; i<x foo(); foo>bar);
 555   for (; *p && > y;
 556        foo > bar)
 557 ;
 558   do {
 559   ;;;
 560   }
 561   while (foe);
 562
 563   squiggle. extent;
 564   1 && * unsmob_moment (lf);
 565   line_spanner_ = make_spanner ("DynamicLineSpanner", rq ? rq->*self_scm
 566 (): SCM_EOL);
 567   case foo: k;
 568
 569   if (0) {a=b;} else {
 570    c=d;
 571   }
 572
 573   cookie_io_functions_t Memory_out_stream::functions_ = {
 574     Memory_out_stream::reader,
 575     ...
 576   };
 577
 578   int compare (Array < Pitch> *, Array < Pitch> *);
 579   original_ = (Grob *) & s;
 580   Drul_array< Link_array<Grob> > o;
 581 }
 582
 583   header_.char_info_pos = (6 + header_length) * 4;
 584   return ly_bool2scm (*ma < * mb);
 585
 586   1 *::sign(2);
 587
 588   (shift) *-d;
 589
 590   a = 0 ? *x : *y;
 591 '''
 592
 593 def test ():
 594         test_file = 'fixcc.cc'
 595         open (test_file, 'w').write (TEST)
 596         nitpick_file (outdir, test_file)
 597         sys.stdout.write (open (test_file).read ())
 598
 599 if __name__ == '__main__':
 600         main ()
 601