1# Copyright 2020 the authors. 2# This file is part of Hy, which is free software licensed under the Expat 3# license. See the LICENSE. 4 5from rply import LexerGenerator 6 7 8lg = LexerGenerator() 9 10 11# A regexp for something that should end a quoting/unquoting operator 12# i.e. a space or a closing brace/paren/curly 13end_quote_set = r'\s\)\]\}' 14end_quote = r'(?![%s])' % end_quote_set 15 16identifier = r'[^()\[\]{}\'"\s;]+' 17 18lg.add('LPAREN', r'\(') 19lg.add('RPAREN', r'\)') 20lg.add('LBRACKET', r'\[') 21lg.add('RBRACKET', r'\]') 22lg.add('LCURLY', r'\{') 23lg.add('RCURLY', r'\}') 24lg.add('HLCURLY', r'#\{') 25lg.add('QUOTE', r'\'%s' % end_quote) 26lg.add('QUASIQUOTE', r'`%s' % end_quote) 27lg.add('UNQUOTESPLICE', r'~@%s' % end_quote) 28lg.add('UNQUOTE', r'~%s' % end_quote) 29lg.add('ANNOTATION', r'\^(?![=%s])' % end_quote_set) 30lg.add('DISCARD', r'#_') 31lg.add('HASHSTARS', r'#\*+') 32lg.add('BRACKETSTRING', r'''(?x) 33 \# \[ ( [^\[\]]* ) \[ # Opening delimiter 34 \n? # A single leading newline will be ignored 35 ((?:\n|.)*?) # Content of the string 36 \] \1 \] # Closing delimiter 37 ''') 38lg.add('HASHOTHER', r'#%s' % identifier) 39 40# A regexp which matches incomplete strings, used to support 41# multi-line strings in the interpreter 42partial_string = r'''(?x) 43 (?:u|r|ur|ru|b|br|rb|f|fr|rf)? # prefix 44 " # start string 45 (?: 46 | [^"\\] # non-quote or backslash 47 | \\(.|\n) # or escaped single character or newline 48 | \\x[0-9a-fA-F]{2} # or escaped raw character 49 | \\u[0-9a-fA-F]{4} # or unicode escape 50 | \\U[0-9a-fA-F]{8} # or long unicode escape 51 )* # one or more times 52''' 53 54lg.add('STRING', r'%s"' % partial_string) 55lg.add('PARTIAL_STRING', partial_string) 56 57lg.add('IDENTIFIER', identifier) 58 59 60lg.ignore(r';.*(?=\r|\n|$)') 61lg.ignore(r'\s+') 62 63 64lexer = lg.build() 65