self.msg = msg
def __str__(self):
- s = u'cannot tokenize data'
+ s = 'cannot tokenize data'
line, pos = self.place
- return u'%s: %d,%d: "%s"' % (s, line, pos, self.msg)
+ return '%s: %d,%d: "%s"' % (s, line, pos, self.msg)
class Token(object):
self.end = end
def __repr__(self):
- return u'Token(%r, %r)' % (self.type, self.value)
+ return 'Token(%r, %r)' % (self.type, self.value)
def __eq__(self, other):
# FIXME: Case sensitivity is assumed here
else:
sl, sp = self.start
el, ep = self.end
- return u'%d,%d-%d,%d:' % (sl, sp, el, ep)
+ return '%d,%d-%d,%d:' % (sl, sp, el, ep)
def __str__(self):
- s = u"%s %s '%s'" % (self._pos_str(), self.type, self.value)
+ s = "%s %s '%s'" % (self._pos_str(), self.type, self.value)
return s.strip()
@property
return self.value
def pformat(self):
- return u"%s %s '%s'" % (self._pos_str().ljust(20),
+ return "%s %s '%s'" % (self._pos_str().ljust(20),
self.type.ljust(14),
self.value)
m = regexp.match(str, i)
if m is not None:
value = m.group()
- nls = value.count(u'\n')
+ nls = value.count('\n')
n_line = line + nls
if nls == 0:
n_pos = pos + len(value)
else:
- n_pos = len(value) - value.rfind(u'\n') - 1
+ n_pos = len(value) - value.rfind('\n') - 1
return Token(type, value, (line, pos + 1), (n_line, n_pos))
else:
errline = str.splitlines()[line - 1]
Runs a parser wrapped into this object.
"""
if debug:
- log.debug(u'trying %s' % self.name)
+ log.debug('trying %s' % self.name)
return self._run(tokens, s)
def _run(self, tokens, s):
- raise NotImplementedError(u'you must define() a parser')
+ raise NotImplementedError('you must define() a parser')
def parse(self, tokens):
"""Sequence(a) -> b
if len(tokens) > max:
tok = tokens[max]
else:
- tok = u'<EOF>'
- raise NoParseError(u'%s: %s' % (e.msg, tok), e.state)
+ tok = '<EOF>'
+ raise NoParseError('%s: %s' % (e.msg, tok), e.state)
def __add__(self, other):
"""Parser(a, b), Parser(a, c) -> Parser(a, _Tuple(b, c))
# or in terms of bind and pure:
# _add = self.bind(lambda x: other.bind(lambda y: pure(magic(x, y))))
- _add.name = u'(%s , %s)' % (self.name, other.name)
+ _add.name = '(%s , %s)' % (self.name, other.name)
return _add
def __or__(self, other):
except NoParseError as e:
return other.run(tokens, State(s.pos, e.state.max))
- _or.name = u'(%s | %s)' % (self.name, other.name)
+ _or.name = '(%s | %s)' % (self.name, other.name)
return _or
def __rshift__(self, f):
# or in terms of bind and pure:
# _shift = self.bind(lambda x: pure(f(x)))
- _shift.name = u'(%s)' % (self.name,)
+ _shift.name = '(%s)' % (self.name,)
return _shift
def bind(self, f):
(v, s2) = self.run(tokens, s)
return f(v).run(tokens, s2)
- _bind.name = u'(%s >>=)' % (self.name,)
+ _bind.name = '(%s >>=)' % (self.name,)
return _bind
return unicode((self.pos, self.max))
def __repr__(self):
- return u'State(%r, %r)' % (self.pos, self.max)
+ return 'State(%r, %r)' % (self.pos, self.max)
class NoParseError(Exception):
- def __init__(self, msg=u'', state=None):
+ def __init__(self, msg='', state=None):
self.msg = msg
self.state = state
self.value = value
def __repr__(self):
- return u'_Ignored(%s)' % repr(self.value)
+ return '_Ignored(%s)' % repr(self.value)
@Parser
if s.pos >= len(tokens):
return None, s
else:
- raise NoParseError(u'should have reached <EOF>', s)
+ raise NoParseError('should have reached <EOF>', s)
-finished.name = u'finished'
+finished.name = 'finished'
def many(p):
except NoParseError as e:
return res, State(s.pos, e.state.max)
- _many.name = u'{ %s }' % p.name
+ _many.name = '{ %s }' % p.name
return _many
@Parser
def _some(tokens, s):
if s.pos >= len(tokens):
- raise NoParseError(u'no tokens left in the stream', s)
+ raise NoParseError('no tokens left in the stream', s)
else:
t = tokens[s.pos]
if pred(t):
pos = s.pos + 1
s2 = State(pos, max(pos, s.max))
if debug:
- log.debug(u'*matched* "%s", new state = %s' % (t, s2))
+ log.debug('*matched* "%s", new state = %s' % (t, s2))
return t, s2
else:
if debug:
- log.debug(u'failed "%s", state = %s' % (t, s))
- raise NoParseError(u'got unexpected token', s)
+ log.debug('failed "%s", state = %s' % (t, s))
+ raise NoParseError('got unexpected token', s)
- _some.name = u'(some)'
+ _some.name = '(some)'
return _some
Returns a parser that parses a token that is equal to the value value.
"""
name = getattr(value, 'name', value)
- return some(lambda t: t == value).named(u'(a "%s")' % (name,))
+ return some(lambda t: t == value).named('(a "%s")' % (name,))
def pure(x):
def _pure(_, s):
return x, s
- _pure.name = u'(pure %r)' % (x,)
+ _pure.name = '(pure %r)' % (x,)
return _pure
NOTE: In a statically typed language, the type Maybe b could be more
approprieate.
"""
- return (p | pure(None)).named(u'[ %s ]' % (p.name,))
+ return (p | pure(None)).named('[ %s ]' % (p.name,))
def skip(p):
Returns a parser that applies the parser p one or more times.
"""
q = p + many(p) >> (lambda x: [x[0]] + x[1])
- return q.named(u'(%s , { %s })' % (p.name, p.name))
+ return q.named('(%s , { %s })' % (p.name, p.name))
def with_forward_decls(suspension):
@Parser
def f(tokens, s):
- raise NotImplementedError(u'you must define() a forward_decl somewhere')
+ raise NotImplementedError('you must define() a forward_decl somewhere')
return f
Returns a pseudographic tree representation of x similar to the tree command
in Unix.
"""
- (MID, END, CONT, LAST, ROOT) = (u'|-- ', u'`-- ', u'| ', u' ', u'')
+ (MID, END, CONT, LAST, ROOT) = ('|-- ', '`-- ', '| ', ' ', '')
def rec(x, indent, sym):
line = indent + sym + show(x)
next_indent = indent + LAST
syms = [MID] * (len(xs) - 1) + [END]
lines = [rec(x, next_indent, sym) for x, sym in zip(xs, syms)]
- return u'\n'.join([line] + lines)
+ return '\n'.join([line] + lines)
- return rec(x, u'', ROOT)
+ return rec(x, '', ROOT)