1# Copyright 2020 the authors.
2# This file is part of Hy, which is free software licensed under the Expat
3# license. See the LICENSE.
4
5from rply import LexerGenerator
6
7
8lg = LexerGenerator()
9
10
11# A regexp for something that should end a quoting/unquoting operator
12# i.e. a space or a closing brace/paren/curly
13end_quote_set = r'\s\)\]\}'
14end_quote = r'(?![%s])' % end_quote_set
15
16identifier = r'[^()\[\]{}\'"\s;]+'
17
18lg.add('LPAREN', r'\(')
19lg.add('RPAREN', r'\)')
20lg.add('LBRACKET', r'\[')
21lg.add('RBRACKET', r'\]')
22lg.add('LCURLY', r'\{')
23lg.add('RCURLY', r'\}')
24lg.add('HLCURLY', r'#\{')
25lg.add('QUOTE', r'\'%s' % end_quote)
26lg.add('QUASIQUOTE', r'`%s' % end_quote)
27lg.add('UNQUOTESPLICE', r'~@%s' % end_quote)
28lg.add('UNQUOTE', r'~%s' % end_quote)
29lg.add('ANNOTATION', r'\^(?![=%s])' % end_quote_set)
30lg.add('DISCARD', r'#_')
31lg.add('HASHSTARS', r'#\*+')
32lg.add('BRACKETSTRING', r'''(?x)
33    \# \[ ( [^\[\]]* ) \[    # Opening delimiter
34    \n?                      # A single leading newline will be ignored
35    ((?:\n|.)*?)             # Content of the string
36    \] \1 \]                 # Closing delimiter
37    ''')
38lg.add('HASHOTHER', r'#%s' % identifier)
39
40# A regexp which matches incomplete strings, used to support
41# multi-line strings in the interpreter
42partial_string = r'''(?x)
43    (?:u|r|ur|ru|b|br|rb|f|fr|rf)? # prefix
44    "  # start string
45    (?:
46       | [^"\\]             # non-quote or backslash
47       | \\(.|\n)           # or escaped single character or newline
48       | \\x[0-9a-fA-F]{2}  # or escaped raw character
49       | \\u[0-9a-fA-F]{4}  # or unicode escape
50       | \\U[0-9a-fA-F]{8}  # or long unicode escape
51    )* # one or more times
52'''
53
54lg.add('STRING', r'%s"' % partial_string)
55lg.add('PARTIAL_STRING', partial_string)
56
57lg.add('IDENTIFIER', identifier)
58
59
60lg.ignore(r';.*(?=\r|\n|$)')
61lg.ignore(r'\s+')
62
63
64lexer = lg.build()
65