1-- Copyright 2006-2021 Mitchell. See LICENSE. 2-- Python LPeg lexer. 3 4local lexer = require('lexer') 5local token, word_match = lexer.token, lexer.word_match 6local P, S = lpeg.P, lpeg.S 7 8local lex = lexer.new('python', {fold_by_indentation = true}) 9 10-- Whitespace. 11lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) 12 13-- Keywords. 14lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ 15 and as assert async await break class continue def del elif else except exec 16 finally for from global if import in is lambda nonlocal not or pass print 17 raise return try while with yield 18 -- Descriptors/attr access. 19 __get__ __set__ __delete__ __slots__ 20 -- Class. 21 __new__ __init__ __del__ __repr__ __str__ __cmp__ __index__ __lt__ __le__ 22 __gt__ __ge__ __eq__ __ne__ __hash__ __nonzero__ __getattr__ __getattribute__ 23 __setattr__ __delattr__ __call__ 24 -- Operator. 25 __add__ __sub__ __mul__ __div__ __floordiv__ __mod__ __divmod__ __pow__ 26 __and__ __xor__ __or__ __lshift__ __rshift__ __nonzero__ __neg__ __pos__ 27 __abs__ __invert__ __iadd__ __isub__ __imul__ __idiv__ __ifloordiv__ __imod__ 28 __ipow__ __iand__ __ixor__ __ior__ __ilshift__ __irshift__ 29 -- Conversions. 30 __int__ __long__ __float__ __complex__ __oct__ __hex__ __coerce__ 31 -- Containers. 32 __len__ __getitem__ __missing__ __setitem__ __delitem__ __contains__ __iter__ 33 __getslice__ __setslice__ __delslice__ 34 -- Module and class attribs. 35 __doc__ __name__ __dict__ __file__ __path__ __module__ __bases__ __class__ 36 __self__ 37 -- Stdlib/sys. 38 __builtin__ __future__ __main__ __import__ __stdin__ __stdout__ __stderr__ 39 -- Other. 40 __debug__ __doc__ __import__ __name__ 41]])) 42 43-- Functions. 44lex:add_rule('function', token(lexer.FUNCTION, word_match[[ 45 abs all any apply basestring bool buffer callable chr classmethod cmp coerce 46 compile complex copyright credits delattr dict dir divmod enumerate eval 47 execfile exit file filter float frozenset getattr globals hasattr hash help 48 hex id input int intern isinstance issubclass iter len license list locals 49 long map max min object oct open ord pow property quit range raw_input reduce 50 reload repr reversed round set setattr slice sorted staticmethod str sum super 51 tuple type unichr unicode vars xrange zip 52]])) 53 54-- Constants. 55lex:add_rule('constant', token(lexer.CONSTANT, word_match[[ 56 ArithmeticError AssertionError AttributeError BaseException DeprecationWarning 57 EOFError Ellipsis EnvironmentError Exception False FloatingPointError 58 FutureWarning GeneratorExit IOError ImportError ImportWarning IndentationError 59 IndexError KeyError KeyboardInterrupt LookupError MemoryError NameError None 60 NotImplemented NotImplementedError OSError OverflowError 61 PendingDeprecationWarning ReferenceError RuntimeError RuntimeWarning 62 StandardError StopIteration SyntaxError SyntaxWarning SystemError SystemExit 63 TabError True TypeError UnboundLocalError UnicodeDecodeError 64 UnicodeEncodeError UnicodeError UnicodeTranslateError UnicodeWarning 65 UserWarning ValueError Warning ZeroDivisionError 66]])) 67 68-- Self. 69lex:add_rule('self', token('self', P('self'))) 70lex:add_style('self', lexer.styles.type) 71 72-- Identifiers. 73lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) 74 75-- Comments. 76lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#', true))) 77 78-- Strings. 79local sq_str = P('u')^-1 * lexer.range("'", true) 80local dq_str = P('U')^-1 * lexer.range('"', true) 81local tq_str = lexer.range("'''") + lexer.range('"""') 82-- TODO: raw_strs cannot end in single \. 83local raw_sq_str = P('u')^-1 * 'r' * lexer.range("'", false, false) 84local raw_dq_str = P('U')^-1 * 'R' * lexer.range('"', false, false) 85lex:add_rule('string', token(lexer.STRING, tq_str + sq_str + dq_str + 86 raw_sq_str + raw_dq_str)) 87 88-- Numbers. 89local dec = lexer.dec_num * S('Ll')^-1 90local bin = '0b' * S('01')^1 * ('_' * S('01')^1)^0 91local oct = lexer.oct_num * S('Ll')^-1 92local integer = S('+-')^-1 * (bin + lexer.hex_num + oct + dec) 93lex:add_rule('number', token(lexer.NUMBER, lexer.float + integer)) 94 95-- Decorators. 96lex:add_rule('decorator', token('decorator', lexer.to_eol('@'))) 97lex:add_style('decorator', lexer.styles.preprocessor) 98 99-- Operators. 100lex:add_rule('operator', token(lexer.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~`'))) 101 102return lex 103