1 //
2 //  gravity_token.c
3 //  gravity
4 //
5 //  Created by Marco Bambini on 31/08/14.
6 //  Copyright (c) 2014 CreoLabs. All rights reserved.
7 //
8 
9 #include "gravity_token.h"
10 #include "gravity_utils.h"
11 
token_string(gtoken_s token,uint32_t * len)12 const char *token_string (gtoken_s token, uint32_t *len) {
13     if (len) *len = token.bytes;
14     return token.value;
15 }
16 
token_name(gtoken_t token)17 const char *token_name (gtoken_t token) {
18     switch (token) {
19         case TOK_EOF: return "EOF";
20         case TOK_ERROR: return "ERROR";
21         case TOK_COMMENT: return "COMMENT";
22         case TOK_STRING: return "STRING";
23         case TOK_NUMBER: return "NUMBER";
24         case TOK_IDENTIFIER: return "IDENTIFIER";
25         case TOK_SPECIAL: return "SPECIAL";
26         case TOK_MACRO: return "MACRO";
27 
28         // keywords
29         case TOK_KEY_FILE: return "file";
30         case TOK_KEY_FUNC: return "func";
31         case TOK_KEY_SUPER: return "super";
32         case TOK_KEY_DEFAULT: return "default";
33         case TOK_KEY_TRUE: return "true";
34         case TOK_KEY_FALSE: return "false";
35         case TOK_KEY_IF: return "if";
36         case TOK_KEY_ELSE: return "else";
37         case TOK_KEY_SWITCH: return "switch";
38         case TOK_KEY_BREAK: return "break";
39         case TOK_KEY_CONTINUE: return "continue";
40         case TOK_KEY_RETURN: return "return";
41         case TOK_KEY_WHILE: return "while";
42         case TOK_KEY_REPEAT: return "repeat";
43         case TOK_KEY_FOR: return "for";
44         case TOK_KEY_IN: return "in";
45         case TOK_KEY_ENUM: return "enum";
46         case TOK_KEY_CLASS: return "class";
47         case TOK_KEY_STRUCT: return "struct";
48         case TOK_KEY_PRIVATE: return "private";
49         case TOK_KEY_INTERNAL: return "internal";
50         case TOK_KEY_PUBLIC: return "public";
51         case TOK_KEY_STATIC: return "static";
52         case TOK_KEY_EXTERN: return "extern";
53         case TOK_KEY_LAZY: return "lazy";
54         case TOK_KEY_CONST: return "const";
55         case TOK_KEY_VAR: return "var";
56         case TOK_KEY_MODULE: return "module";
57         case TOK_KEY_IMPORT: return "import";
58         case TOK_KEY_CASE: return "case";
59         case TOK_KEY_EVENT: return "event";
60         case TOK_KEY_NULL: return "null";
61         case TOK_KEY_UNDEFINED: return "undefined";
62         case TOK_KEY_ISA: return "is";
63         case TOK_KEY_CURRARGS: return "_args";
64         case TOK_KEY_CURRFUNC: return "_func";
65 
66         // operators
67         case TOK_OP_ADD: return "+";
68         case TOK_OP_SUB: return "-";
69         case TOK_OP_DIV: return "/";
70         case TOK_OP_MUL: return "*";
71         case TOK_OP_REM: return "%";
72         case TOK_OP_ASSIGN: return "=";
73         case TOK_OP_LESS: return "<";
74         case TOK_OP_GREATER: return ">";
75         case TOK_OP_LESS_EQUAL: return "<=";
76         case TOK_OP_GREATER_EQUAL: return ">=";
77         case TOK_OP_ADD_ASSIGN: return "+=";
78         case TOK_OP_SUB_ASSIGN: return "-=";
79         case TOK_OP_DIV_ASSIGN: return "/=";
80         case TOK_OP_MUL_ASSIGN: return "*=";
81         case TOK_OP_REM_ASSIGN: return "%=";
82         case TOK_OP_NOT: return "!";
83         case TOK_OP_AND: return "&&";
84         case TOK_OP_OR: return "||";
85         case TOK_OP_ISEQUAL: return "==";
86         case TOK_OP_ISNOTEQUAL: return "!=";
87         case TOK_OP_RANGE_INCLUDED: return "...";
88         case TOK_OP_RANGE_EXCLUDED: return "..<";
89         case TOK_OP_TERNARY: return "?";
90         case TOK_OP_SHIFT_LEFT: return "<<";
91         case TOK_OP_SHIFT_RIGHT: return ">>";
92         case TOK_OP_BIT_AND: return "&";
93         case TOK_OP_BIT_OR: return "|";
94         case TOK_OP_BIT_XOR: return "^";
95         case TOK_OP_BIT_NOT: return "~";
96         case TOK_OP_ISIDENTICAL: return "===";
97         case TOK_OP_ISNOTIDENTICAL: return "!==";
98         case TOK_OP_PATTERN_MATCH: return "~=";
99         case TOK_OP_SHIFT_LEFT_ASSIGN: return "<<=";
100         case TOK_OP_SHIFT_RIGHT_ASSIGN: return ">>=";
101         case TOK_OP_BIT_AND_ASSIGN: return "&=";
102         case TOK_OP_BIT_OR_ASSIGN: return "|=";
103         case TOK_OP_BIT_XOR_ASSIGN: return "^=";
104 
105         case TOK_OP_OPEN_PARENTHESIS: return "(";
106         case TOK_OP_CLOSED_PARENTHESIS: return ")";
107         case TOK_OP_OPEN_SQUAREBRACKET: return "[";
108         case TOK_OP_CLOSED_SQUAREBRACKET: return "]";
109         case TOK_OP_OPEN_CURLYBRACE: return "{";
110         case TOK_OP_CLOSED_CURLYBRACE: return "}";
111         case TOK_OP_SEMICOLON: return ";";
112         case TOK_OP_COLON: return ":";
113         case TOK_OP_COMMA: return ",";
114         case TOK_OP_DOT: return ".";
115 
116         case TOK_END: return "";
117     }
118 
119     // should never reach this point
120     return "UNRECOGNIZED TOKEN";
121 }
122 
token_keywords_indexes(uint32_t * idx_start,uint32_t * idx_end)123 void token_keywords_indexes (uint32_t *idx_start, uint32_t *idx_end) {
124     *idx_start = (uint32_t)TOK_KEY_FUNC;
125     *idx_end = (uint32_t)TOK_KEY_CURRARGS;
126 };
127 
token_special_builtin(gtoken_s * token)128 gtoken_t token_special_builtin(gtoken_s *token) {
129     const char *buffer = token->value;
130     int32_t len = token->bytes;
131 
132     switch (len) {
133         case 8:
134             if (string_casencmp(buffer, "__LINE__", len) == 0) {
135                 token->builtin = BUILTIN_LINE;
136                 return TOK_NUMBER;
137             }
138             if (string_casencmp(buffer, "__FILE__", len) == 0) {
139                 token->builtin = BUILTIN_FILE;
140                 return TOK_STRING;
141             }
142             break;
143 
144         case 9:
145             if (string_casencmp(buffer, "__CLASS__", len) == 0) {
146                 token->builtin = BUILTIN_CLASS;
147                 return TOK_STRING;
148             }
149             break;
150 
151         case 10:
152             if (string_casencmp(buffer, "__COLUMN__", len) == 0) {
153                 token->builtin = BUILTIN_COLUMN;
154                 return TOK_NUMBER;
155             }
156             break;
157 
158         case 12:
159             if (string_casencmp(buffer, "__FUNCTION__", len) == 0) {
160                 token->builtin = BUILTIN_FUNC;
161                 return TOK_STRING;
162             }
163             break;
164     }
165 
166     return TOK_IDENTIFIER;
167 }
168 
token_keyword(const char * buffer,int32_t len)169 gtoken_t token_keyword (const char *buffer, int32_t len) {
170     switch (len) {
171         case 2:
172             if (string_casencmp(buffer, "if", len) == 0) return TOK_KEY_IF;
173             if (string_casencmp(buffer, "in", len) == 0) return TOK_KEY_IN;
174             if (string_casencmp(buffer, "or", len) == 0) return TOK_OP_OR;
175             if (string_casencmp(buffer, "is", len) == 0) return TOK_KEY_ISA;
176             break;
177 
178         case 3:
179             if (string_casencmp(buffer, "for", len) == 0) return TOK_KEY_FOR;
180             if (string_casencmp(buffer, "var", len) == 0) return TOK_KEY_VAR;
181             if (string_casencmp(buffer, "and", len) == 0) return TOK_OP_AND;
182             if (string_casencmp(buffer, "not", len) == 0) return TOK_OP_NOT;
183             break;
184 
185         case 4:
186             if (string_casencmp(buffer, "func", len) == 0) return TOK_KEY_FUNC;
187             if (string_casencmp(buffer, "else", len) == 0) return TOK_KEY_ELSE;
188             if (string_casencmp(buffer, "true", len) == 0) return TOK_KEY_TRUE;
189             if (string_casencmp(buffer, "enum", len) == 0) return TOK_KEY_ENUM;
190             if (string_casencmp(buffer, "case", len) == 0) return TOK_KEY_CASE;
191             if (string_casencmp(buffer, "null", len) == 0) return TOK_KEY_NULL;
192             if (string_casencmp(buffer, "NULL", len) == 0) return TOK_KEY_NULL;
193             if (string_casencmp(buffer, "file", len) == 0) return TOK_KEY_FILE;
194             if (string_casencmp(buffer, "lazy", len) == 0) return TOK_KEY_LAZY;
195             break;
196 
197         case 5:
198             if (string_casencmp(buffer, "super", len) == 0) return TOK_KEY_SUPER;
199             if (string_casencmp(buffer, "false", len) == 0) return TOK_KEY_FALSE;
200             if (string_casencmp(buffer, "break", len) == 0) return TOK_KEY_BREAK;
201             if (string_casencmp(buffer, "while", len) == 0) return TOK_KEY_WHILE;
202             if (string_casencmp(buffer, "class", len) == 0) return TOK_KEY_CLASS;
203             if (string_casencmp(buffer, "const", len) == 0) return TOK_KEY_CONST;
204             if (string_casencmp(buffer, "event", len) == 0) return TOK_KEY_EVENT;
205             if (string_casencmp(buffer, "_func", len) == 0) return TOK_KEY_CURRFUNC;
206             if (string_casencmp(buffer, "_args", len) == 0) return TOK_KEY_CURRARGS;
207             break;
208 
209         case 6:
210             if (string_casencmp(buffer, "struct", len) == 0) return TOK_KEY_STRUCT;
211             if (string_casencmp(buffer, "repeat", len) == 0) return TOK_KEY_REPEAT;
212             if (string_casencmp(buffer, "switch", len) == 0) return TOK_KEY_SWITCH;
213             if (string_casencmp(buffer, "return", len) == 0) return TOK_KEY_RETURN;
214             if (string_casencmp(buffer, "public", len) == 0) return TOK_KEY_PUBLIC;
215             if (string_casencmp(buffer, "static", len) == 0) return TOK_KEY_STATIC;
216             if (string_casencmp(buffer, "extern", len) == 0) return TOK_KEY_EXTERN;
217             if (string_casencmp(buffer, "import", len) == 0) return TOK_KEY_IMPORT;
218             if (string_casencmp(buffer, "module", len) == 0) return TOK_KEY_MODULE;
219             break;
220 
221         case 7:
222             if (string_casencmp(buffer, "default", len) == 0) return TOK_KEY_DEFAULT;
223             if (string_casencmp(buffer, "private", len) == 0) return TOK_KEY_PRIVATE;
224             break;
225 
226         case 8:
227             if (string_casencmp(buffer, "continue", len) == 0) return TOK_KEY_CONTINUE;
228             if (string_casencmp(buffer, "internal", len) == 0) return TOK_KEY_INTERNAL;
229             break;
230 
231         case 9:
232             if (string_casencmp(buffer, "undefined", len) == 0) return TOK_KEY_UNDEFINED;
233             break;
234     }
235 
236     return TOK_IDENTIFIER;
237 }
238 
token_literal_name(gliteral_t value)239 const char *token_literal_name (gliteral_t value) {
240     if (value == LITERAL_STRING) return "STRING";
241     else if (value == LITERAL_FLOAT) return "FLOAT";
242     else if (value == LITERAL_INT) return "INTEGER";
243     else if (value == LITERAL_BOOL) return "BOOLEAN";
244     else if (value == LITERAL_STRING_INTERPOLATED) return "STRING INTERPOLATED";
245     return "N/A";
246 }
247 
248 // MARK: -
249 
token_isidentifier(gtoken_t token)250 bool token_isidentifier (gtoken_t token) {
251     return (token == TOK_IDENTIFIER);
252 }
253 
token_isvariable_declaration(gtoken_t token)254 bool token_isvariable_declaration (gtoken_t token) {
255     return ((token == TOK_KEY_CONST) || (token == TOK_KEY_VAR));
256 }
257 
token_isstatement(gtoken_t token)258 bool token_isstatement (gtoken_t token) {
259     if (token == TOK_EOF) return false;
260 
261     // label_statement (case, default)
262     // expression_statement ('+' | '-' | '!' | 'not' | new | raise | file | isPrimaryExpression)
263     // flow_statement (if, select)
264     // loop_statement (while, loop, for)
265     // jump_statement (break, continue, return)
266     // compound_statement ({)
267     // declaration_statement (isDeclarationStatement)
268     // empty_statement (;)
269     // import_statement (import)
270 
271     return (token_islabel_statement(token) || token_isexpression_statement(token) || token_isflow_statement(token) ||
272             token_isloop_statement(token) || token_isjump_statement(token) || token_iscompound_statement(token) ||
273             token_isdeclaration_statement(token) || token_isempty_statement(token) || token_isimport_statement(token) ||
274             token_ismacro(token));
275 }
276 
token_isassignment(gtoken_t token)277 bool token_isassignment (gtoken_t token) {
278     return ((token == TOK_OP_ASSIGN) || (token == TOK_OP_MUL_ASSIGN) || (token == TOK_OP_DIV_ASSIGN) ||
279             (token == TOK_OP_REM_ASSIGN) || (token == TOK_OP_ADD_ASSIGN) || (token == TOK_OP_SUB_ASSIGN) ||
280             (token == TOK_OP_SHIFT_LEFT_ASSIGN) || (token == TOK_OP_SHIFT_RIGHT_ASSIGN) ||
281             (token == TOK_OP_BIT_AND_ASSIGN) || (token == TOK_OP_BIT_OR_ASSIGN) || (token == TOK_OP_BIT_XOR_ASSIGN));
282 }
283 
token_isvariable_assignment(gtoken_t token)284 bool token_isvariable_assignment (gtoken_t token) {
285     return (token == TOK_OP_ASSIGN);
286 }
287 
token_isaccess_specifier(gtoken_t token)288 bool token_isaccess_specifier (gtoken_t token) {
289     return ((token == TOK_KEY_PRIVATE) || (token == TOK_KEY_INTERNAL) || (token == TOK_KEY_PUBLIC));
290 }
291 
token_isstorage_specifier(gtoken_t token)292 bool token_isstorage_specifier (gtoken_t token) {
293     return ((token == TOK_KEY_STATIC) || (token == TOK_KEY_EXTERN) || (token == TOK_KEY_LAZY));
294 }
295 
token_isprimary_expression(gtoken_t token)296 bool token_isprimary_expression (gtoken_t token) {
297     // literal (number, string)
298     // true, false
299     // IDENTIFIER
300     // 'nil'
301     // 'super'
302     // 'func'
303     // 'undefined'
304     // 'file'
305     // '(' expression ')'
306     // function_expression
307     // list_expression
308     // map_expression
309 
310     return ((token == TOK_NUMBER) || (token == TOK_STRING) || (token == TOK_KEY_TRUE) ||
311             (token == TOK_KEY_FALSE) || (token == TOK_IDENTIFIER) || (token == TOK_KEY_NULL) ||
312             (token == TOK_KEY_SUPER) || (token == TOK_KEY_FUNC) || (token == TOK_KEY_UNDEFINED) ||
313             (token == TOK_OP_OPEN_PARENTHESIS) || (token == TOK_OP_OPEN_SQUAREBRACKET) ||
314             (token == TOK_OP_OPEN_CURLYBRACE) || (token == TOK_KEY_FILE));
315 
316 }
317 
token_isexpression_statement(gtoken_t token)318 bool token_isexpression_statement (gtoken_t token) {
319     // reduced to check for unary_expression
320     // postfix_expression: primary_expression | 'module' (was file)
321     // unary_operator: '+' | '-' | '!' | 'not'
322     // raise_expression: 'raise'
323 
324     return (token_isprimary_expression(token) || (token == TOK_OP_ADD) || (token == TOK_OP_SUB) ||
325             (token == TOK_OP_NOT) || (token == TOK_KEY_CURRARGS) || (token == TOK_KEY_CURRFUNC));
326 }
327 
token_islabel_statement(gtoken_t token)328 bool token_islabel_statement (gtoken_t token) {
329     return ((token == TOK_KEY_CASE) || (token == TOK_KEY_DEFAULT));
330 }
331 
token_isflow_statement(gtoken_t token)332 bool token_isflow_statement (gtoken_t token) {
333     return ((token == TOK_KEY_IF) || (token == TOK_KEY_SWITCH));
334 }
335 
token_isloop_statement(gtoken_t token)336 bool token_isloop_statement (gtoken_t token) {
337     return ((token == TOK_KEY_WHILE) || (token == TOK_KEY_REPEAT)  || (token == TOK_KEY_FOR));
338 }
339 
token_isjump_statement(gtoken_t token)340 bool token_isjump_statement (gtoken_t token) {
341     return ((token == TOK_KEY_BREAK) || (token == TOK_KEY_CONTINUE) || (token == TOK_KEY_RETURN));
342 }
343 
token_iscompound_statement(gtoken_t token)344 bool token_iscompound_statement (gtoken_t token) {
345     return (token == TOK_OP_OPEN_CURLYBRACE);
346 }
347 
token_isdeclaration_statement(gtoken_t token)348 bool token_isdeclaration_statement (gtoken_t token) {
349     // variable_declaration_statement (CONST, VAR)
350     // function_declaration (FUNC)
351     // class_declaration (CLASS | STRUCT)
352     // enum_declaration (ENUM)
353     // module_declaration (MODULE)
354     // event_declaration_statement (EVENT)
355     // empty_declaration (;)
356 
357     return ((token_isaccess_specifier(token) || token_isstorage_specifier(token) || token_isvariable_declaration(token) ||
358             (token == TOK_KEY_FUNC)    || (token == TOK_KEY_CLASS) || (token == TOK_KEY_STRUCT) || (token == TOK_KEY_ENUM) ||
359             (token == TOK_KEY_MODULE) || (token == TOK_KEY_EVENT)  || (token == TOK_OP_SEMICOLON)));
360 }
361 
token_isempty_statement(gtoken_t token)362 bool token_isempty_statement (gtoken_t token) {
363     return (token == TOK_OP_SEMICOLON);
364 }
365 
token_isimport_statement(gtoken_t token)366 bool token_isimport_statement (gtoken_t token) {
367     return (token == TOK_KEY_IMPORT);
368 }
369 
token_isspecial_statement(gtoken_t token)370 bool token_isspecial_statement (gtoken_t token) {
371     return (token == TOK_SPECIAL);
372 }
373 
token_isoperator(gtoken_t token)374 bool token_isoperator (gtoken_t token) {
375     return ((token >= TOK_OP_SHIFT_LEFT) && (token <= TOK_OP_NOT));
376 }
377 
token_ismacro(gtoken_t token)378 bool token_ismacro (gtoken_t token) {
379     return (token == TOK_MACRO);
380 }
381 
token_iserror(gtoken_t token)382 bool token_iserror (gtoken_t token) {
383     return (token == TOK_ERROR);
384 }
385 
token_iseof(gtoken_t token)386 bool token_iseof (gtoken_t token) {
387     return (token == TOK_EOF);
388 }
389