# fixcc -- nitpick lily's c++ code
# TODO
+# * maintainable rules: regexp's using whitespace (?x) and match names
+# <identifier>)
+# * trailing `*' vs. function definition
+# * do not break/change indentation of fixcc-clean files
# * check lexer, parser
# * rewrite in elisp, add to cc-mode
+# * using regexes is broken by design
# * ?
# * profit
import time
COMMENT = 'COMMENT'
+STRING = 'STRING'
+GLOBAL_CXX = 'GC++'
CXX = 'C++'
+verbose_p = 0
+indent_p = 0
rules = {
+ GLOBAL_CXX:
+ [
+ # delete gratuitous block
+ ('''\n( |\t)\s*{\n\s*(.*?)(?![{}]|\b(do|for|else|if|switch|while)\b);\n\s*}''',
+ '\n\\2;'),
+ ],
CXX:
[
# space before parenthesis open
('([^\( \]])[ \t]*\(', '\\1 ('),
# space after comma
- (',[ \t]*', ', '),
+ ("\([^'],\)[ \t]*", '\1 '),
+ # delete gratuitous block
+ ('''\n( |\t)\s*{\n\s*(.*?)(?![{}]|\b(do|for|else|if|switch|while)\b);\n\s*}''',
+ '\n\\2;'),
# delete inline tabs
('(\w)\t+', '\\1 '),
# delete inline double spaces
('([\w\)\]])[ \t]*(--|\+\+)', '\\1\\2'),
# delete space after parenthesis close
#('\)[ \t]*([^\w])', ')\\1'),
+ # delete space around operator
+ # ('([\w\(\)\]])([ \t]*)(::|\.)([ \t]*)([\w\(\)])', '\\1\\3\\5'),
+ ('([\w\(\)\]])([ \t]*)(\.|->)([ \t]*)([\w\(\)])', '\\1\\3\\5'),
+ # delete space after operator
+ ('(::)([ \t]*)([\w\(\)])', '\\1\\3'),
# delete superflous space around operator
- ('([\w\)\]])([ \t]+)(&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|<|>|\+|-|=|/|&|\|\*)([ \t]+)([\w\(])', '\\1 \\3 \\5'),
- # space around operator
- ('([\w\)\]])(&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|<|>|=|/|&|\|\*)([\w\(])', '\\1 \\2 \\3'),
+ ('([\w\(\)\]])([ \t]+)(&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|\?|<|>|\+|-|=|/|:|&|\||\*)([ \t]+)([\w\(\)])', '\\1 \\3 \\5'),
+ # space around operator1
+ ('([\w\)\]]) *(&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|\?|<|>|=|/|:|&|\||\*) *([\w\(])', '\\1 \\2 \\3'),
+ # space around operator2
+ ('([\w\)\]]) *(&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|\?|<|>|=|/|:|&|\||\*) ([^\w\s])', '\\1 \\2 \\3'),
+ # space around operator3
+ ('([^\w\s]) (&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|\?|<|[^-]>|=|/|:|&|\||\*) *([\w\(])', '\\1 \\2 \\3'),
+ # space around operator4
+ ('([\w\(\)\]]) (\*|/|\+|-) *([-:])', '\\1 \\2 \\3'),
# space around +/-; exponent
('([\w\)\]])(\+|-)([_A-Za-z\(])', '\\1 \\2 \\3'),
('([_\dA-Za-df-z\)\]])(\+|-)([\w\(])', '\\1 \\2 \\3'),
# trailing operator
- (' (&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|<|>|\+|-|=|/|\*XXX)[ \t]*\n([ \t]*)',
- '\n\\2\\1 '),
+ (' (::|&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|\?|<|>|\+|-|=|/|:|&XXX|\||\*XXX)[ \t]*\n([ \t]*)', '\n\\2\\1 '),
# pointer
- ('(bool|char|const|int|unsigned|void|([A-Z]\w*))[ \t]*(\*|&)[ \t]*',
- '\\1 \\3'),
+ ##('(bool|char|const|delete|int|stream|unsigned|void|size_t|struct \w+|[A-Z]\w*|,|;|&&|<|[^-]>|\|\||-|\+)[ \t]*(\*|&)[ \t]*', '\\1 \\2'),
+ ('(bool|char|const|delete|int|stream|unsigned|void|size_t|struct \w+|[A-Z]\w*|,|;|:|=|\?\)|&&|<|[^-]>|\|\||-|\+)[ \t]*(\*|&)[ \t]*', '\\1 \\2'),
+ #to#('(bool|char|const|delete|int|stream|unsigned|void|([A-Z]\w*)|[,])[ \n\t]*(\*|&)[ \t]*', '\\1 \\3'),
+ # pointer with template
+ ('(( *((bool|char|const|delete|int|stream|unsigned|void|size_t|class[ \t]+\w*|[A-Z]\w*|\w+::\w+|[,])[ \*&],*)+)>) *(\*|&) *', '\\1 \\5'),
+ #to#('(( *((bool|char|delete|int|stream|unsigned|void|(class[ \t]+\w*)|([A-Z]\w*)|[,])[ \*&],*)+)>)[ \t\n]*(\*|&) *', '\\1 \\7'),
# unary pointer, minus, not
('(return|=) (\*|&|-|!) ([\w\(])', '\\1 \\2\\3'),
- # delete space after `operator'
- #('(\Woperator) (\W)', '\\1\\2'),
# space after `operator'
- ('(\Woperator) *(\W)', '\\1 \\2'),
+ ('(\Woperator) *([^\w\s])', '\\1 \\2'),
+ # dangling brace close
+ ('\n[ \t]*(\n[ \t]*})', '\\1'),
# dangling newline
('\n[ \t]*\n[ \t]*\n', '\n\n'),
# dangling parenthesis open
# dangling semicolon
('\n[ \t]*;', ';'),
# brace open
- ('(\w[^\n]*){[ \t]*\n', '\\1\n{\n'),
+ ('(\w)[ \t]*([^\s]*){([ \t]*\n)', '\\1\\2\n{\n'),
# brace open backslash
('(\w[^\n]*){[ \t]*\\\\\n', '\\1\\\n{\\\n'),
# brace close
- ('}[ \t]*([^\n]*\w[^\n\\\]*\n)', '}\n\\1\n'),
+ ("}[ \t]*([^'\n]*\w[^\n\\\]*)\n", '}\n\\1\n'),
# brace close backslash
- ('}[ \t]*([^\n]*\w[^\n]*?\\\\\n)', '}\\\n\\1\n'),
+ ("}[ \t]*([^'\n]*\w[^\n\\\]*)", '\n}\n\\1'),
+ # delete space after `operator'
+ #('(\Woperator) (\W)', '\\1\\2'),
+ # delete space after case, label
+ ('(\W(case|label) ([\w]+)) :', '\\1:'),
# delete space before comma
('[ \t]*,', ','),
# delete space before semicolon
# delete trailing whitespace
('[ \t]*\n', '\n'),
- ## Massage code that gets broken by rules above.
+ ## Deuglify code that also gets ugly by rules above.
+ # delete newline after typedef struct
+ ('(typedef struct\s+([\w]*\s){([^}]|{[^}]*})*})\s*\n\s*(\w[\w\d]*;)', '\\1 \\4'),
# delete spaces around template brackets
- # Note that this does not work for PQueue_event. Fix C++ name?
- ('(dynamic_cast|template|([A-Z]\w*))[ \t]*<[ \t]*((bool|char|int|unsigned|void|(class[ \t]+\w*)|([A-Z]\w*)))[ \t]*?(| \*)[ \t]*>',
- '\\1<\\3\\7>'),
+ #('(dynamic_cast|template|([A-Z]\w*))[ \t]*<[ \t]*(( *(bool|char|int|unsigned|void|(class[ \t]+\w*)|([A-Z]\w*)),?)+)[ \t]?(| [\*&])[ \t]*>', '\\1<\\3\\8>'),
+ ('(dynamic_cast|template|typedef|\w+::\w+|[A-Z]\w*)[ \t]*<[ \t]*(( *(bool|char|const|int|unsigned|void|size_t|class[ \t]+\w*|[A-Z]\w*)( *[\*&]?,|[\*&])*)+)[ \t]?(| [\*&])[ \t]*>', '\\1<\\2\\6>'),
+ ('(\w+::\w+|[A-Z]\w*) < ((\w+::\w+|[A-Z]\w*)<[A-Z]\w*>) >', '\\1<\\2 >'),
+ ('((if|while)\s+\(([^\)]|\([^\)]*\))*\))\s*;', '\\1\n;'),
+ ('(for\s+\(([^;]*;[^;]*;([^\)]|\([^\)]*\))*)\))\s*;', '\\1\n;'),
+ # do {..} while
+ ('(}\s*while\s*)(\(([^\)]|\([^\)]*\))*\))\s*;', '\\1\\2;'),
+
+ ## Fix code that gets broken by rules above.
+ ##('->\s+\*', '->*'),
# delete space before #define x()
('#[ \t]*define (\w*)[ \t]*\(', '#define \\1('),
# add space in #define x ()
('#[ \t]*define (\w*)(\(([^\(\)]|\([^\(\)]*\))*\)\\n)',
'#define \\1 \\2'),
# delete space in #include <>
- ('#[ \t]*include[ \t]*<[ \t]*([^ \t>]*)[ \t]*(/?)[ \t]*([^ \t>]*)[ \t]*>',
+ ('#[ \t]*include[ \t]*<[ \t]*([^ \t>]*)[ \t]*(/?)[ \t]*([^ \t>]*)[ \t]*>',
'#include <\\1\\2\\3>'),
# delete backslash before empty line (emacs' indent region is broken)
('\\\\\n\n', '\n\n'),
],
}
-
# Recognize special sequences in the input.
#
# (?P<name>regex) -- Assign result of REGEX to NAME.
no_match = 'a\ba'
snippet_res = {
CXX: {
- 'include':
- no_match,
-
- 'multiline_comment':
- r'''(?sx)
- (?P<match>
- (?P<code>
- [ \t]*/\*.*?\*/))''',
-
- 'singleline_comment':
- r'''(?mx)
- ^.*
- (?P<match>
- (?P<code>
- [ \t]*//([ \t][^\n]*|)\n))''',
-
- 'string':
- r'''(?x)
- (?P<match>
- (?P<code>
- "([^"]|(([^\\]|(\\\\))\\"))*"))''',
-
- 'char':
- r'''(?x)
- (?P<match>
- (?P<code>
- '([^']+|\')))''',
-
- 'include':
- r'''(?x)
- (?P<match>
- (?P<code>
- "#[ \t]*include[ \t]*<[^>]*>''',
- },
- }
+ 'multiline_comment':
+ r'''(?sx)
+ (?P<match>
+ (?P<code>
+ [ \t]*/\*.*?\*/))''',
+
+ 'singleline_comment':
+ r'''(?mx)
+ ^.*
+ (?P<match>
+ (?P<code>
+ [ \t]*//([ \t][^\n]*|)\n))''',
+
+ 'string':
+ r'''(?x)
+ (?P<match>
+ (?P<code>
+ "([^\"\n](\")*)*"))''',
+
+ 'char':
+ r'''(?x)
+ (?P<match>
+ (?P<code>
+ '([^']+|\')))''',
+
+ 'include':
+ r'''(?x)
+ (?P<match>
+ (?P<code>
+ "#[ \t]*include[ \t]*<[^>]*>''',
+ },
+ }
class Chunk:
def replacement_text (self):
def filter_text (self):
return self.replacement_text ()
- def ly_is_outdated (self):
- return 0
-
- def png_is_outdated (self):
- return 0
-
class Substring (Chunk):
def __init__ (self, source, start, end):
self.source = source
def replacement_text (self):
s = self.source[self.start:self.end]
+ if verbose_p:
+ sys.stderr.write ('CXX Rules')
for i in rules[CXX]:
+ if verbose_p:
+ sys.stderr.write ('.')
+ #sys.stderr.write ('\n\n***********\n')
+ #sys.stderr.write (i[0])
+ #sys.stderr.write ('\n***********\n')
+ #sys.stderr.write ('\n=========>>\n')
+ #sys.stderr.write (s)
+ #sys.stderr.write ('\n<<=========\n')
s = re.sub (i[0], i[1], s)
+ if verbose_p:
+ sys.stderr.write ('done\n')
return s
def replacement_text (self):
s = self.match.group ('match')
+ if verbose_p:
+ sys.stderr.write ('COMMENT Rules')
for i in rules[COMMENT]:
+ if verbose_p:
+ sys.stderr.write ('.')
s = re.sub (i[0], i[1], s)
return s
snippet_type_to_class = {
'multiline_comment': Multiline_comment,
-# 'lilypond_block': Lilypond_snippet,
-# 'lilypond': Lilypond_snippet,
+# 'string': Multiline_comment,
# 'include': Include_snippet,
}
def find_toplevel_snippets (s, types):
+ if verbose_p:
+ sys.stderr.write ('Dissecting')
+
res = {}
for i in types:
res[i] = re.compile (snippet_res[format][i])
# every type of snippet, this is linear.
while 1:
+ if verbose_p:
+ sys.stderr.write ('.')
first = None
endex = 1 << 30
for type in types:
def nitpick_file (outdir, file):
s = open (file).read ()
+ for i in rules[GLOBAL_CXX]:
+ s = re.sub (i[0], i[1], s)
+
# FIXME: Containing blocks must be first, see
# find_toplevel_snippets.
+ # We leave simple strings be part of the code
snippet_types = (
'multiline_comment',
'singleline_comment',
'string',
- 'char',
+# 'char',
)
chunks = find_toplevel_snippets (s, snippet_types)
h = open (fixt, "w")
h.write (t)
h.close ()
- indent_file (fixt)
+ if s != t or indent_p:
+ indent_file (fixt)
def indent_file (file):
emacs = '''emacs\
% { 'file': file,
'socketdir' : socketdir,
'socketname' : socketname, }
+ if verbose_p:
+ sys.stderr.write (emacs)
+ sys.stderr.write ('\n')
os.system (emacs)
def usage ():
sys.stdout.write (r'''
Usage:
-fixcc [--outdir=DIR] FILE...
+fixcc [OPTION]... FILE...
+
+Options:
+ --help
+ --indent reindent, even if no changes
+ --verbose
+ --test
Typical use with LilyPond:
''')
def do_options ():
- global outdir
+ global indent_p, outdir, verbose_p
(options, files) = getopt.getopt (sys.argv[1:], '',
- ['help', 'outdir='])
+ ['help', 'indent', 'outdir=',
+ 'test', 'verbose'])
for (o, a) in options:
if o == '--help':
usage ()
sys.exit (0)
+ elif o == '--indent':
+ indent_p = 1
elif o == '--outdir':
outdir = a
+ elif o == '--verbose':
+ verbose_p = 1
+ elif o == '--test':
+ test ()
+ sys.exit (0)
else:
assert unimplemented
if not files:
sys.stderr.write ('%s...\n' % i)
nitpick_file (outdir, i)
+
+## TODO: make this compilable and check with g++
+TEST = '''
+#include <libio.h>
+#include <map>
+class
+ostream ;
+
+class Foo {
+public: static char* foo ();
+std::map<char*,int>* bar (char, char) { return 0; }
+};
+typedef struct
+{
+ Foo **bar;
+} String;
+
+ostream &
+operator << (ostream & os, String d);
+
+typedef struct _t_ligature
+{
+ char *succ, *lig;
+ struct _t_ligature * next;
+} AFM_Ligature;
+
+typedef std::map < AFM_Ligature const *, int > Bar;
+
+ /**
+ (c) 1997--2006 Han-Wen Nienhuys <hanwen@cs.uu.nl>
+ */
+
+/* ||
+ * vv
+ * !OK OK
+ */
+/* ||
+ vv
+ !OK OK
+ */
+char *
+Foo:: foo ()
+{
+int
+i
+;
+ char* a= &++ i ;
+ a [*++ a] = (char*) foe (*i, &bar) *
+ 2;
+ int operator double ();
+ std::map<char*,int> y =*bar(-*a ,*b);
+ Interval_t<T> & operator*= (T r);
+ Foo<T>*c;
+ int compare (Pqueue_ent < K, T > const& e1, Pqueue_ent < K,T> *e2);
+ delete *p;
+ if (abs (f)*2 > abs (d) *FUDGE)
+ ;
+ while (0);
+ for (; i<x foo(); foo>bar);
+ for (; *p && > y;
+ foo > bar)
+;
+ do {
+ ;;;
+ }
+ while (foe);
+
+ squiggle. extent;
+ 1 && * unsmob_moment (lf);
+ line_spanner_ = make_spanner ("DynamicLineSpanner", rq ? rq->*self_scm
+(): SCM_EOL);
+ case foo: k;
+
+ if (0) {a=b;} else {
+ c=d;
+ }
+
+ cookie_io_functions_t Memory_out_stream::functions_ = {
+ Memory_out_stream::reader,
+ ...
+ };
+
+ int compare (Array < Pitch> *, Array < Pitch> *);
+ original_ = (Grob *) & s;
+ Drul_array< Link_array<Grob> > o;
+}
+
+ header_.char_info_pos = (6 + header_length) * 4;
+ return ly_bool2scm (*ma < * mb);
+
+ 1 *::sign(2);
+
+ (shift) *-d;
+
+ a = 0 ? *x : *y;
+
+a = "foo() 2,2,4";
+{
+ if (!span_)
+ {
+ span_ = make_spanner ("StaffSymbol", SCM_EOL);
+ }
+}
+{
+ if (!span_)
+ {
+ span_ = make_spanner (StaffSymbol, SCM_EOL);
+ }
+}
+'''
+
+def test ():
+ test_file = 'fixcc.cc'
+ open (test_file, 'w').write (TEST)
+ nitpick_file (outdir, test_file)
+ sys.stdout.write (open (test_file).read ())
+
if __name__ == '__main__':
main ()