1 //
2 // gravity_token.c
3 // gravity
4 //
5 // Created by Marco Bambini on 31/08/14.
6 // Copyright (c) 2014 CreoLabs. All rights reserved.
7 //
8
9 #include "gravity_token.h"
10 #include "gravity_utils.h"
11
token_string(gtoken_s token,uint32_t * len)12 const char *token_string (gtoken_s token, uint32_t *len) {
13 if (len) *len = token.bytes;
14 return token.value;
15 }
16
token_name(gtoken_t token)17 const char *token_name (gtoken_t token) {
18 switch (token) {
19 case TOK_EOF: return "EOF";
20 case TOK_ERROR: return "ERROR";
21 case TOK_COMMENT: return "COMMENT";
22 case TOK_STRING: return "STRING";
23 case TOK_NUMBER: return "NUMBER";
24 case TOK_IDENTIFIER: return "IDENTIFIER";
25 case TOK_SPECIAL: return "SPECIAL";
26 case TOK_MACRO: return "MACRO";
27
28 // keywords
29 case TOK_KEY_FILE: return "file";
30 case TOK_KEY_FUNC: return "func";
31 case TOK_KEY_SUPER: return "super";
32 case TOK_KEY_DEFAULT: return "default";
33 case TOK_KEY_TRUE: return "true";
34 case TOK_KEY_FALSE: return "false";
35 case TOK_KEY_IF: return "if";
36 case TOK_KEY_ELSE: return "else";
37 case TOK_KEY_SWITCH: return "switch";
38 case TOK_KEY_BREAK: return "break";
39 case TOK_KEY_CONTINUE: return "continue";
40 case TOK_KEY_RETURN: return "return";
41 case TOK_KEY_WHILE: return "while";
42 case TOK_KEY_REPEAT: return "repeat";
43 case TOK_KEY_FOR: return "for";
44 case TOK_KEY_IN: return "in";
45 case TOK_KEY_ENUM: return "enum";
46 case TOK_KEY_CLASS: return "class";
47 case TOK_KEY_STRUCT: return "struct";
48 case TOK_KEY_PRIVATE: return "private";
49 case TOK_KEY_INTERNAL: return "internal";
50 case TOK_KEY_PUBLIC: return "public";
51 case TOK_KEY_STATIC: return "static";
52 case TOK_KEY_EXTERN: return "extern";
53 case TOK_KEY_LAZY: return "lazy";
54 case TOK_KEY_CONST: return "const";
55 case TOK_KEY_VAR: return "var";
56 case TOK_KEY_MODULE: return "module";
57 case TOK_KEY_IMPORT: return "import";
58 case TOK_KEY_CASE: return "case";
59 case TOK_KEY_EVENT: return "event";
60 case TOK_KEY_NULL: return "null";
61 case TOK_KEY_UNDEFINED: return "undefined";
62 case TOK_KEY_ISA: return "is";
63 case TOK_KEY_CURRARGS: return "_args";
64 case TOK_KEY_CURRFUNC: return "_func";
65
66 // operators
67 case TOK_OP_ADD: return "+";
68 case TOK_OP_SUB: return "-";
69 case TOK_OP_DIV: return "/";
70 case TOK_OP_MUL: return "*";
71 case TOK_OP_REM: return "%";
72 case TOK_OP_ASSIGN: return "=";
73 case TOK_OP_LESS: return "<";
74 case TOK_OP_GREATER: return ">";
75 case TOK_OP_LESS_EQUAL: return "<=";
76 case TOK_OP_GREATER_EQUAL: return ">=";
77 case TOK_OP_ADD_ASSIGN: return "+=";
78 case TOK_OP_SUB_ASSIGN: return "-=";
79 case TOK_OP_DIV_ASSIGN: return "/=";
80 case TOK_OP_MUL_ASSIGN: return "*=";
81 case TOK_OP_REM_ASSIGN: return "%=";
82 case TOK_OP_NOT: return "!";
83 case TOK_OP_AND: return "&&";
84 case TOK_OP_OR: return "||";
85 case TOK_OP_ISEQUAL: return "==";
86 case TOK_OP_ISNOTEQUAL: return "!=";
87 case TOK_OP_RANGE_INCLUDED: return "...";
88 case TOK_OP_RANGE_EXCLUDED: return "..<";
89 case TOK_OP_TERNARY: return "?";
90 case TOK_OP_SHIFT_LEFT: return "<<";
91 case TOK_OP_SHIFT_RIGHT: return ">>";
92 case TOK_OP_BIT_AND: return "&";
93 case TOK_OP_BIT_OR: return "|";
94 case TOK_OP_BIT_XOR: return "^";
95 case TOK_OP_BIT_NOT: return "~";
96 case TOK_OP_ISIDENTICAL: return "===";
97 case TOK_OP_ISNOTIDENTICAL: return "!==";
98 case TOK_OP_PATTERN_MATCH: return "~=";
99 case TOK_OP_SHIFT_LEFT_ASSIGN: return "<<=";
100 case TOK_OP_SHIFT_RIGHT_ASSIGN: return ">>=";
101 case TOK_OP_BIT_AND_ASSIGN: return "&=";
102 case TOK_OP_BIT_OR_ASSIGN: return "|=";
103 case TOK_OP_BIT_XOR_ASSIGN: return "^=";
104
105 case TOK_OP_OPEN_PARENTHESIS: return "(";
106 case TOK_OP_CLOSED_PARENTHESIS: return ")";
107 case TOK_OP_OPEN_SQUAREBRACKET: return "[";
108 case TOK_OP_CLOSED_SQUAREBRACKET: return "]";
109 case TOK_OP_OPEN_CURLYBRACE: return "{";
110 case TOK_OP_CLOSED_CURLYBRACE: return "}";
111 case TOK_OP_SEMICOLON: return ";";
112 case TOK_OP_COLON: return ":";
113 case TOK_OP_COMMA: return ",";
114 case TOK_OP_DOT: return ".";
115
116 case TOK_END: return "";
117 }
118
119 // should never reach this point
120 return "UNRECOGNIZED TOKEN";
121 }
122
token_keywords_indexes(uint32_t * idx_start,uint32_t * idx_end)123 void token_keywords_indexes (uint32_t *idx_start, uint32_t *idx_end) {
124 *idx_start = (uint32_t)TOK_KEY_FUNC;
125 *idx_end = (uint32_t)TOK_KEY_CURRARGS;
126 };
127
token_special_builtin(gtoken_s * token)128 gtoken_t token_special_builtin(gtoken_s *token) {
129 const char *buffer = token->value;
130 int32_t len = token->bytes;
131
132 switch (len) {
133 case 8:
134 if (string_casencmp(buffer, "__LINE__", len) == 0) {
135 token->builtin = BUILTIN_LINE;
136 return TOK_NUMBER;
137 }
138 if (string_casencmp(buffer, "__FILE__", len) == 0) {
139 token->builtin = BUILTIN_FILE;
140 return TOK_STRING;
141 }
142 break;
143
144 case 9:
145 if (string_casencmp(buffer, "__CLASS__", len) == 0) {
146 token->builtin = BUILTIN_CLASS;
147 return TOK_STRING;
148 }
149 break;
150
151 case 10:
152 if (string_casencmp(buffer, "__COLUMN__", len) == 0) {
153 token->builtin = BUILTIN_COLUMN;
154 return TOK_NUMBER;
155 }
156 break;
157
158 case 12:
159 if (string_casencmp(buffer, "__FUNCTION__", len) == 0) {
160 token->builtin = BUILTIN_FUNC;
161 return TOK_STRING;
162 }
163 break;
164 }
165
166 return TOK_IDENTIFIER;
167 }
168
token_keyword(const char * buffer,int32_t len)169 gtoken_t token_keyword (const char *buffer, int32_t len) {
170 switch (len) {
171 case 2:
172 if (string_casencmp(buffer, "if", len) == 0) return TOK_KEY_IF;
173 if (string_casencmp(buffer, "in", len) == 0) return TOK_KEY_IN;
174 if (string_casencmp(buffer, "or", len) == 0) return TOK_OP_OR;
175 if (string_casencmp(buffer, "is", len) == 0) return TOK_KEY_ISA;
176 break;
177
178 case 3:
179 if (string_casencmp(buffer, "for", len) == 0) return TOK_KEY_FOR;
180 if (string_casencmp(buffer, "var", len) == 0) return TOK_KEY_VAR;
181 if (string_casencmp(buffer, "and", len) == 0) return TOK_OP_AND;
182 if (string_casencmp(buffer, "not", len) == 0) return TOK_OP_NOT;
183 break;
184
185 case 4:
186 if (string_casencmp(buffer, "func", len) == 0) return TOK_KEY_FUNC;
187 if (string_casencmp(buffer, "else", len) == 0) return TOK_KEY_ELSE;
188 if (string_casencmp(buffer, "true", len) == 0) return TOK_KEY_TRUE;
189 if (string_casencmp(buffer, "enum", len) == 0) return TOK_KEY_ENUM;
190 if (string_casencmp(buffer, "case", len) == 0) return TOK_KEY_CASE;
191 if (string_casencmp(buffer, "null", len) == 0) return TOK_KEY_NULL;
192 if (string_casencmp(buffer, "NULL", len) == 0) return TOK_KEY_NULL;
193 if (string_casencmp(buffer, "file", len) == 0) return TOK_KEY_FILE;
194 if (string_casencmp(buffer, "lazy", len) == 0) return TOK_KEY_LAZY;
195 break;
196
197 case 5:
198 if (string_casencmp(buffer, "super", len) == 0) return TOK_KEY_SUPER;
199 if (string_casencmp(buffer, "false", len) == 0) return TOK_KEY_FALSE;
200 if (string_casencmp(buffer, "break", len) == 0) return TOK_KEY_BREAK;
201 if (string_casencmp(buffer, "while", len) == 0) return TOK_KEY_WHILE;
202 if (string_casencmp(buffer, "class", len) == 0) return TOK_KEY_CLASS;
203 if (string_casencmp(buffer, "const", len) == 0) return TOK_KEY_CONST;
204 if (string_casencmp(buffer, "event", len) == 0) return TOK_KEY_EVENT;
205 if (string_casencmp(buffer, "_func", len) == 0) return TOK_KEY_CURRFUNC;
206 if (string_casencmp(buffer, "_args", len) == 0) return TOK_KEY_CURRARGS;
207 break;
208
209 case 6:
210 if (string_casencmp(buffer, "struct", len) == 0) return TOK_KEY_STRUCT;
211 if (string_casencmp(buffer, "repeat", len) == 0) return TOK_KEY_REPEAT;
212 if (string_casencmp(buffer, "switch", len) == 0) return TOK_KEY_SWITCH;
213 if (string_casencmp(buffer, "return", len) == 0) return TOK_KEY_RETURN;
214 if (string_casencmp(buffer, "public", len) == 0) return TOK_KEY_PUBLIC;
215 if (string_casencmp(buffer, "static", len) == 0) return TOK_KEY_STATIC;
216 if (string_casencmp(buffer, "extern", len) == 0) return TOK_KEY_EXTERN;
217 if (string_casencmp(buffer, "import", len) == 0) return TOK_KEY_IMPORT;
218 if (string_casencmp(buffer, "module", len) == 0) return TOK_KEY_MODULE;
219 break;
220
221 case 7:
222 if (string_casencmp(buffer, "default", len) == 0) return TOK_KEY_DEFAULT;
223 if (string_casencmp(buffer, "private", len) == 0) return TOK_KEY_PRIVATE;
224 break;
225
226 case 8:
227 if (string_casencmp(buffer, "continue", len) == 0) return TOK_KEY_CONTINUE;
228 if (string_casencmp(buffer, "internal", len) == 0) return TOK_KEY_INTERNAL;
229 break;
230
231 case 9:
232 if (string_casencmp(buffer, "undefined", len) == 0) return TOK_KEY_UNDEFINED;
233 break;
234 }
235
236 return TOK_IDENTIFIER;
237 }
238
token_literal_name(gliteral_t value)239 const char *token_literal_name (gliteral_t value) {
240 if (value == LITERAL_STRING) return "STRING";
241 else if (value == LITERAL_FLOAT) return "FLOAT";
242 else if (value == LITERAL_INT) return "INTEGER";
243 else if (value == LITERAL_BOOL) return "BOOLEAN";
244 else if (value == LITERAL_STRING_INTERPOLATED) return "STRING INTERPOLATED";
245 return "N/A";
246 }
247
248 // MARK: -
249
token_isidentifier(gtoken_t token)250 bool token_isidentifier (gtoken_t token) {
251 return (token == TOK_IDENTIFIER);
252 }
253
token_isvariable_declaration(gtoken_t token)254 bool token_isvariable_declaration (gtoken_t token) {
255 return ((token == TOK_KEY_CONST) || (token == TOK_KEY_VAR));
256 }
257
token_isstatement(gtoken_t token)258 bool token_isstatement (gtoken_t token) {
259 if (token == TOK_EOF) return false;
260
261 // label_statement (case, default)
262 // expression_statement ('+' | '-' | '!' | 'not' | new | raise | file | isPrimaryExpression)
263 // flow_statement (if, select)
264 // loop_statement (while, loop, for)
265 // jump_statement (break, continue, return)
266 // compound_statement ({)
267 // declaration_statement (isDeclarationStatement)
268 // empty_statement (;)
269 // import_statement (import)
270
271 return (token_islabel_statement(token) || token_isexpression_statement(token) || token_isflow_statement(token) ||
272 token_isloop_statement(token) || token_isjump_statement(token) || token_iscompound_statement(token) ||
273 token_isdeclaration_statement(token) || token_isempty_statement(token) || token_isimport_statement(token) ||
274 token_ismacro(token));
275 }
276
token_isassignment(gtoken_t token)277 bool token_isassignment (gtoken_t token) {
278 return ((token == TOK_OP_ASSIGN) || (token == TOK_OP_MUL_ASSIGN) || (token == TOK_OP_DIV_ASSIGN) ||
279 (token == TOK_OP_REM_ASSIGN) || (token == TOK_OP_ADD_ASSIGN) || (token == TOK_OP_SUB_ASSIGN) ||
280 (token == TOK_OP_SHIFT_LEFT_ASSIGN) || (token == TOK_OP_SHIFT_RIGHT_ASSIGN) ||
281 (token == TOK_OP_BIT_AND_ASSIGN) || (token == TOK_OP_BIT_OR_ASSIGN) || (token == TOK_OP_BIT_XOR_ASSIGN));
282 }
283
token_isvariable_assignment(gtoken_t token)284 bool token_isvariable_assignment (gtoken_t token) {
285 return (token == TOK_OP_ASSIGN);
286 }
287
token_isaccess_specifier(gtoken_t token)288 bool token_isaccess_specifier (gtoken_t token) {
289 return ((token == TOK_KEY_PRIVATE) || (token == TOK_KEY_INTERNAL) || (token == TOK_KEY_PUBLIC));
290 }
291
token_isstorage_specifier(gtoken_t token)292 bool token_isstorage_specifier (gtoken_t token) {
293 return ((token == TOK_KEY_STATIC) || (token == TOK_KEY_EXTERN) || (token == TOK_KEY_LAZY));
294 }
295
token_isprimary_expression(gtoken_t token)296 bool token_isprimary_expression (gtoken_t token) {
297 // literal (number, string)
298 // true, false
299 // IDENTIFIER
300 // 'nil'
301 // 'super'
302 // 'func'
303 // 'undefined'
304 // 'file'
305 // '(' expression ')'
306 // function_expression
307 // list_expression
308 // map_expression
309
310 return ((token == TOK_NUMBER) || (token == TOK_STRING) || (token == TOK_KEY_TRUE) ||
311 (token == TOK_KEY_FALSE) || (token == TOK_IDENTIFIER) || (token == TOK_KEY_NULL) ||
312 (token == TOK_KEY_SUPER) || (token == TOK_KEY_FUNC) || (token == TOK_KEY_UNDEFINED) ||
313 (token == TOK_OP_OPEN_PARENTHESIS) || (token == TOK_OP_OPEN_SQUAREBRACKET) ||
314 (token == TOK_OP_OPEN_CURLYBRACE) || (token == TOK_KEY_FILE));
315
316 }
317
token_isexpression_statement(gtoken_t token)318 bool token_isexpression_statement (gtoken_t token) {
319 // reduced to check for unary_expression
320 // postfix_expression: primary_expression | 'module' (was file)
321 // unary_operator: '+' | '-' | '!' | 'not'
322 // raise_expression: 'raise'
323
324 return (token_isprimary_expression(token) || (token == TOK_OP_ADD) || (token == TOK_OP_SUB) ||
325 (token == TOK_OP_NOT) || (token == TOK_KEY_CURRARGS) || (token == TOK_KEY_CURRFUNC));
326 }
327
token_islabel_statement(gtoken_t token)328 bool token_islabel_statement (gtoken_t token) {
329 return ((token == TOK_KEY_CASE) || (token == TOK_KEY_DEFAULT));
330 }
331
token_isflow_statement(gtoken_t token)332 bool token_isflow_statement (gtoken_t token) {
333 return ((token == TOK_KEY_IF) || (token == TOK_KEY_SWITCH));
334 }
335
token_isloop_statement(gtoken_t token)336 bool token_isloop_statement (gtoken_t token) {
337 return ((token == TOK_KEY_WHILE) || (token == TOK_KEY_REPEAT) || (token == TOK_KEY_FOR));
338 }
339
token_isjump_statement(gtoken_t token)340 bool token_isjump_statement (gtoken_t token) {
341 return ((token == TOK_KEY_BREAK) || (token == TOK_KEY_CONTINUE) || (token == TOK_KEY_RETURN));
342 }
343
token_iscompound_statement(gtoken_t token)344 bool token_iscompound_statement (gtoken_t token) {
345 return (token == TOK_OP_OPEN_CURLYBRACE);
346 }
347
token_isdeclaration_statement(gtoken_t token)348 bool token_isdeclaration_statement (gtoken_t token) {
349 // variable_declaration_statement (CONST, VAR)
350 // function_declaration (FUNC)
351 // class_declaration (CLASS | STRUCT)
352 // enum_declaration (ENUM)
353 // module_declaration (MODULE)
354 // event_declaration_statement (EVENT)
355 // empty_declaration (;)
356
357 return ((token_isaccess_specifier(token) || token_isstorage_specifier(token) || token_isvariable_declaration(token) ||
358 (token == TOK_KEY_FUNC) || (token == TOK_KEY_CLASS) || (token == TOK_KEY_STRUCT) || (token == TOK_KEY_ENUM) ||
359 (token == TOK_KEY_MODULE) || (token == TOK_KEY_EVENT) || (token == TOK_OP_SEMICOLON)));
360 }
361
token_isempty_statement(gtoken_t token)362 bool token_isempty_statement (gtoken_t token) {
363 return (token == TOK_OP_SEMICOLON);
364 }
365
token_isimport_statement(gtoken_t token)366 bool token_isimport_statement (gtoken_t token) {
367 return (token == TOK_KEY_IMPORT);
368 }
369
token_isspecial_statement(gtoken_t token)370 bool token_isspecial_statement (gtoken_t token) {
371 return (token == TOK_SPECIAL);
372 }
373
token_isoperator(gtoken_t token)374 bool token_isoperator (gtoken_t token) {
375 return ((token >= TOK_OP_SHIFT_LEFT) && (token <= TOK_OP_NOT));
376 }
377
token_ismacro(gtoken_t token)378 bool token_ismacro (gtoken_t token) {
379 return (token == TOK_MACRO);
380 }
381
token_iserror(gtoken_t token)382 bool token_iserror (gtoken_t token) {
383 return (token == TOK_ERROR);
384 }
385
token_iseof(gtoken_t token)386 bool token_iseof (gtoken_t token) {
387 return (token == TOK_EOF);
388 }
389