1# ----------------------------------------------------------------------
2# clex.py
3#
4# A lexer for ANSI C.
5# ----------------------------------------------------------------------
6
7import sys
8sys.path.insert(0, "../..")
9
10import ply.lex as lex
11
12# Reserved words
13reserved = (
14    'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', 'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE',
15    'ELSE', 'ENUM', 'EXTERN', 'FLOAT', 'FOR', 'GOTO', 'IF', 'INT', 'LONG', 'REGISTER',
16    'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT', 'SWITCH', 'TYPEDEF',
17    'UNION', 'UNSIGNED', 'VOID', 'VOLATILE', 'WHILE',
18)
19
20tokens = reserved + (
21    # Literals (identifier, integer constant, float constant, string constant,
22    # char const)
23    'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST',
24
25    # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=)
26    'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
27    'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
28    'LOR', 'LAND', 'LNOT',
29    'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
30
31    # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=)
32    'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL',
33    'LSHIFTEQUAL', 'RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL',
34
35    # Increment/decrement (++,--)
36    'PLUSPLUS', 'MINUSMINUS',
37
38    # Structure dereference (->)
39    'ARROW',
40
41    # Conditional operator (?)
42    'CONDOP',
43
44    # Delimeters ( ) [ ] { } , . ; :
45    'LPAREN', 'RPAREN',
46    'LBRACKET', 'RBRACKET',
47    'LBRACE', 'RBRACE',
48    'COMMA', 'PERIOD', 'SEMI', 'COLON',
49
50    # Ellipsis (...)
51    'ELLIPSIS',
52)
53
54# Completely ignored characters
55t_ignore = ' \t\x0c'
56
57# Newlines
58
59
60def t_NEWLINE(t):
61    r'\n+'
62    t.lexer.lineno += t.value.count("\n")
63
64# Operators
65t_PLUS = r'\+'
66t_MINUS = r'-'
67t_TIMES = r'\*'
68t_DIVIDE = r'/'
69t_MOD = r'%'
70t_OR = r'\|'
71t_AND = r'&'
72t_NOT = r'~'
73t_XOR = r'\^'
74t_LSHIFT = r'<<'
75t_RSHIFT = r'>>'
76t_LOR = r'\|\|'
77t_LAND = r'&&'
78t_LNOT = r'!'
79t_LT = r'<'
80t_GT = r'>'
81t_LE = r'<='
82t_GE = r'>='
83t_EQ = r'=='
84t_NE = r'!='
85
86# Assignment operators
87
88t_EQUALS = r'='
89t_TIMESEQUAL = r'\*='
90t_DIVEQUAL = r'/='
91t_MODEQUAL = r'%='
92t_PLUSEQUAL = r'\+='
93t_MINUSEQUAL = r'-='
94t_LSHIFTEQUAL = r'<<='
95t_RSHIFTEQUAL = r'>>='
96t_ANDEQUAL = r'&='
97t_OREQUAL = r'\|='
98t_XOREQUAL = r'\^='
99
100# Increment/decrement
101t_PLUSPLUS = r'\+\+'
102t_MINUSMINUS = r'--'
103
104# ->
105t_ARROW = r'->'
106
107# ?
108t_CONDOP = r'\?'
109
110# Delimeters
111t_LPAREN = r'\('
112t_RPAREN = r'\)'
113t_LBRACKET = r'\['
114t_RBRACKET = r'\]'
115t_LBRACE = r'\{'
116t_RBRACE = r'\}'
117t_COMMA = r','
118t_PERIOD = r'\.'
119t_SEMI = r';'
120t_COLON = r':'
121t_ELLIPSIS = r'\.\.\.'
122
123# Identifiers and reserved words
124
125reserved_map = {}
126for r in reserved:
127    reserved_map[r.lower()] = r
128
129
130def t_ID(t):
131    r'[A-Za-z_][\w_]*'
132    t.type = reserved_map.get(t.value, "ID")
133    return t
134
135# Integer literal
136t_ICONST = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?'
137
138# Floating literal
139t_FCONST = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
140
141# String literal
142t_SCONST = r'\"([^\\\n]|(\\.))*?\"'
143
144# Character constant 'c' or L'c'
145t_CCONST = r'(L)?\'([^\\\n]|(\\.))*?\''
146
147# Comments
148
149
150def t_comment(t):
151    r'/\*(.|\n)*?\*/'
152    t.lexer.lineno += t.value.count('\n')
153
154# Preprocessor directive (ignored)
155
156
157def t_preprocessor(t):
158    r'\#(.)*?\n'
159    t.lexer.lineno += 1
160
161
162def t_error(t):
163    print("Illegal character %s" % repr(t.value[0]))
164    t.lexer.skip(1)
165
166lexer = lex.lex()
167if __name__ == "__main__":
168    lex.runmain(lexer)
169