1 /*************************************************************************/
2 /*  gdscript_tokenizer.cpp                                               */
3 /*************************************************************************/
4 /*                       This file is part of:                           */
5 /*                           GODOT ENGINE                                */
6 /*                      https://godotengine.org                          */
7 /*************************************************************************/
8 /* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur.                 */
9 /* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md).   */
10 /*                                                                       */
11 /* Permission is hereby granted, free of charge, to any person obtaining */
12 /* a copy of this software and associated documentation files (the       */
13 /* "Software"), to deal in the Software without restriction, including   */
14 /* without limitation the rights to use, copy, modify, merge, publish,   */
15 /* distribute, sublicense, and/or sell copies of the Software, and to    */
16 /* permit persons to whom the Software is furnished to do so, subject to */
17 /* the following conditions:                                             */
18 /*                                                                       */
19 /* The above copyright notice and this permission notice shall be        */
20 /* included in all copies or substantial portions of the Software.       */
21 /*                                                                       */
22 /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
23 /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
24 /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
25 /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
26 /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
27 /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
28 /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
29 /*************************************************************************/
30 
31 #include "gdscript_tokenizer.h"
32 
33 #include "core/io/marshalls.h"
34 #include "core/map.h"
35 #include "core/print_string.h"
36 #include "gdscript_functions.h"
37 
38 const char *GDScriptTokenizer::token_names[TK_MAX] = {
39 	"Empty",
40 	"Identifier",
41 	"Constant",
42 	"Self",
43 	"Built-In Type",
44 	"Built-In Func",
45 	"In",
46 	"'=='",
47 	"'!='",
48 	"'<'",
49 	"'<='",
50 	"'>'",
51 	"'>='",
52 	"'and'",
53 	"'or'",
54 	"'not'",
55 	"'+'",
56 	"'-'",
57 	"'*'",
58 	"'/'",
59 	"'%'",
60 	"'<<'",
61 	"'>>'",
62 	"'='",
63 	"'+='",
64 	"'-='",
65 	"'*='",
66 	"'/='",
67 	"'%='",
68 	"'<<='",
69 	"'>>='",
70 	"'&='",
71 	"'|='",
72 	"'^='",
73 	"'&'",
74 	"'|'",
75 	"'^'",
76 	"'~'",
77 	//"Plus Plus",
78 	//"Minus Minus",
79 	"if",
80 	"elif",
81 	"else",
82 	"for",
83 	"while",
84 	"break",
85 	"continue",
86 	"pass",
87 	"return",
88 	"match",
89 	"func",
90 	"class",
91 	"class_name",
92 	"extends",
93 	"is",
94 	"onready",
95 	"tool",
96 	"static",
97 	"export",
98 	"setget",
99 	"const",
100 	"var",
101 	"as",
102 	"void",
103 	"enum",
104 	"preload",
105 	"assert",
106 	"yield",
107 	"signal",
108 	"breakpoint",
109 	"rpc",
110 	"sync",
111 	"master",
112 	"puppet",
113 	"slave",
114 	"remotesync",
115 	"mastersync",
116 	"puppetsync",
117 	"'['",
118 	"']'",
119 	"'{'",
120 	"'}'",
121 	"'('",
122 	"')'",
123 	"','",
124 	"';'",
125 	"'.'",
126 	"'?'",
127 	"':'",
128 	"'$'",
129 	"'->'",
130 	"'\\n'",
131 	"PI",
132 	"TAU",
133 	"_",
134 	"INF",
135 	"NAN",
136 	"Error",
137 	"EOF",
138 	"Cursor"
139 };
140 
141 struct _bit {
142 	Variant::Type type;
143 	const char *text;
144 };
145 //built in types
146 
147 static const _bit _type_list[] = {
148 	//types
149 	{ Variant::BOOL, "bool" },
150 	{ Variant::INT, "int" },
151 	{ Variant::REAL, "float" },
152 	{ Variant::STRING, "String" },
153 	{ Variant::VECTOR2, "Vector2" },
154 	{ Variant::RECT2, "Rect2" },
155 	{ Variant::TRANSFORM2D, "Transform2D" },
156 	{ Variant::VECTOR3, "Vector3" },
157 	{ Variant::AABB, "AABB" },
158 	{ Variant::PLANE, "Plane" },
159 	{ Variant::QUAT, "Quat" },
160 	{ Variant::BASIS, "Basis" },
161 	{ Variant::TRANSFORM, "Transform" },
162 	{ Variant::COLOR, "Color" },
163 	{ Variant::_RID, "RID" },
164 	{ Variant::OBJECT, "Object" },
165 	{ Variant::NODE_PATH, "NodePath" },
166 	{ Variant::DICTIONARY, "Dictionary" },
167 	{ Variant::ARRAY, "Array" },
168 	{ Variant::POOL_BYTE_ARRAY, "PoolByteArray" },
169 	{ Variant::POOL_INT_ARRAY, "PoolIntArray" },
170 	{ Variant::POOL_REAL_ARRAY, "PoolRealArray" },
171 	{ Variant::POOL_STRING_ARRAY, "PoolStringArray" },
172 	{ Variant::POOL_VECTOR2_ARRAY, "PoolVector2Array" },
173 	{ Variant::POOL_VECTOR3_ARRAY, "PoolVector3Array" },
174 	{ Variant::POOL_COLOR_ARRAY, "PoolColorArray" },
175 	{ Variant::VARIANT_MAX, NULL },
176 };
177 
178 struct _kws {
179 	GDScriptTokenizer::Token token;
180 	const char *text;
181 };
182 
183 static const _kws _keyword_list[] = {
184 	//ops
185 	{ GDScriptTokenizer::TK_OP_IN, "in" },
186 	{ GDScriptTokenizer::TK_OP_NOT, "not" },
187 	{ GDScriptTokenizer::TK_OP_OR, "or" },
188 	{ GDScriptTokenizer::TK_OP_AND, "and" },
189 	//func
190 	{ GDScriptTokenizer::TK_PR_FUNCTION, "func" },
191 	{ GDScriptTokenizer::TK_PR_CLASS, "class" },
192 	{ GDScriptTokenizer::TK_PR_CLASS_NAME, "class_name" },
193 	{ GDScriptTokenizer::TK_PR_EXTENDS, "extends" },
194 	{ GDScriptTokenizer::TK_PR_IS, "is" },
195 	{ GDScriptTokenizer::TK_PR_ONREADY, "onready" },
196 	{ GDScriptTokenizer::TK_PR_TOOL, "tool" },
197 	{ GDScriptTokenizer::TK_PR_STATIC, "static" },
198 	{ GDScriptTokenizer::TK_PR_EXPORT, "export" },
199 	{ GDScriptTokenizer::TK_PR_SETGET, "setget" },
200 	{ GDScriptTokenizer::TK_PR_VAR, "var" },
201 	{ GDScriptTokenizer::TK_PR_AS, "as" },
202 	{ GDScriptTokenizer::TK_PR_VOID, "void" },
203 	{ GDScriptTokenizer::TK_PR_PRELOAD, "preload" },
204 	{ GDScriptTokenizer::TK_PR_ASSERT, "assert" },
205 	{ GDScriptTokenizer::TK_PR_YIELD, "yield" },
206 	{ GDScriptTokenizer::TK_PR_SIGNAL, "signal" },
207 	{ GDScriptTokenizer::TK_PR_BREAKPOINT, "breakpoint" },
208 	{ GDScriptTokenizer::TK_PR_REMOTE, "remote" },
209 	{ GDScriptTokenizer::TK_PR_MASTER, "master" },
210 	{ GDScriptTokenizer::TK_PR_SLAVE, "slave" },
211 	{ GDScriptTokenizer::TK_PR_PUPPET, "puppet" },
212 	{ GDScriptTokenizer::TK_PR_SYNC, "sync" },
213 	{ GDScriptTokenizer::TK_PR_REMOTESYNC, "remotesync" },
214 	{ GDScriptTokenizer::TK_PR_MASTERSYNC, "mastersync" },
215 	{ GDScriptTokenizer::TK_PR_PUPPETSYNC, "puppetsync" },
216 	{ GDScriptTokenizer::TK_PR_CONST, "const" },
217 	{ GDScriptTokenizer::TK_PR_ENUM, "enum" },
218 	//controlflow
219 	{ GDScriptTokenizer::TK_CF_IF, "if" },
220 	{ GDScriptTokenizer::TK_CF_ELIF, "elif" },
221 	{ GDScriptTokenizer::TK_CF_ELSE, "else" },
222 	{ GDScriptTokenizer::TK_CF_FOR, "for" },
223 	{ GDScriptTokenizer::TK_CF_WHILE, "while" },
224 	{ GDScriptTokenizer::TK_CF_BREAK, "break" },
225 	{ GDScriptTokenizer::TK_CF_CONTINUE, "continue" },
226 	{ GDScriptTokenizer::TK_CF_RETURN, "return" },
227 	{ GDScriptTokenizer::TK_CF_MATCH, "match" },
228 	{ GDScriptTokenizer::TK_CF_PASS, "pass" },
229 	{ GDScriptTokenizer::TK_SELF, "self" },
230 	{ GDScriptTokenizer::TK_CONST_PI, "PI" },
231 	{ GDScriptTokenizer::TK_CONST_TAU, "TAU" },
232 	{ GDScriptTokenizer::TK_WILDCARD, "_" },
233 	{ GDScriptTokenizer::TK_CONST_INF, "INF" },
234 	{ GDScriptTokenizer::TK_CONST_NAN, "NAN" },
235 	{ GDScriptTokenizer::TK_ERROR, NULL }
236 };
237 
get_token_name(Token p_token)238 const char *GDScriptTokenizer::get_token_name(Token p_token) {
239 
240 	ERR_FAIL_INDEX_V(p_token, TK_MAX, "<error>");
241 	return token_names[p_token];
242 }
243 
is_token_literal(int p_offset,bool variable_safe) const244 bool GDScriptTokenizer::is_token_literal(int p_offset, bool variable_safe) const {
245 	switch (get_token(p_offset)) {
246 		// Can always be literal:
247 		case TK_IDENTIFIER:
248 
249 		case TK_PR_ONREADY:
250 		case TK_PR_TOOL:
251 		case TK_PR_STATIC:
252 		case TK_PR_EXPORT:
253 		case TK_PR_SETGET:
254 		case TK_PR_SIGNAL:
255 		case TK_PR_REMOTE:
256 		case TK_PR_MASTER:
257 		case TK_PR_PUPPET:
258 		case TK_PR_SYNC:
259 		case TK_PR_REMOTESYNC:
260 		case TK_PR_MASTERSYNC:
261 		case TK_PR_PUPPETSYNC:
262 			return true;
263 
264 		// Literal for non-variables only:
265 		case TK_BUILT_IN_TYPE:
266 		case TK_BUILT_IN_FUNC:
267 
268 		case TK_OP_IN:
269 			//case TK_OP_NOT:
270 			//case TK_OP_OR:
271 			//case TK_OP_AND:
272 
273 		case TK_PR_CLASS:
274 		case TK_PR_CONST:
275 		case TK_PR_ENUM:
276 		case TK_PR_PRELOAD:
277 		case TK_PR_FUNCTION:
278 		case TK_PR_EXTENDS:
279 		case TK_PR_ASSERT:
280 		case TK_PR_YIELD:
281 		case TK_PR_VAR:
282 
283 		case TK_CF_IF:
284 		case TK_CF_ELIF:
285 		case TK_CF_ELSE:
286 		case TK_CF_FOR:
287 		case TK_CF_WHILE:
288 		case TK_CF_BREAK:
289 		case TK_CF_CONTINUE:
290 		case TK_CF_RETURN:
291 		case TK_CF_MATCH:
292 		case TK_CF_PASS:
293 		case TK_SELF:
294 		case TK_CONST_PI:
295 		case TK_CONST_TAU:
296 		case TK_WILDCARD:
297 		case TK_CONST_INF:
298 		case TK_CONST_NAN:
299 		case TK_ERROR:
300 			return !variable_safe;
301 
302 		case TK_CONSTANT: {
303 			switch (get_token_constant(p_offset).get_type()) {
304 				case Variant::NIL:
305 				case Variant::BOOL:
306 					return true;
307 				default:
308 					return false;
309 			}
310 		}
311 		default:
312 			return false;
313 	}
314 }
315 
get_token_literal(int p_offset) const316 StringName GDScriptTokenizer::get_token_literal(int p_offset) const {
317 	Token token = get_token(p_offset);
318 	switch (token) {
319 		case TK_IDENTIFIER:
320 			return get_token_identifier(p_offset);
321 		case TK_BUILT_IN_TYPE: {
322 			Variant::Type type = get_token_type(p_offset);
323 			int idx = 0;
324 
325 			while (_type_list[idx].text) {
326 				if (type == _type_list[idx].type) {
327 					return _type_list[idx].text;
328 				}
329 				idx++;
330 			}
331 		} break; // Shouldn't get here, stuff happens
332 		case TK_BUILT_IN_FUNC:
333 			return GDScriptFunctions::get_func_name(get_token_built_in_func(p_offset));
334 		case TK_CONSTANT: {
335 			const Variant value = get_token_constant(p_offset);
336 
337 			switch (value.get_type()) {
338 				case Variant::NIL:
339 					return "null";
340 				case Variant::BOOL:
341 					return value ? "true" : "false";
342 				default: {
343 				}
344 			}
345 		}
346 		case TK_OP_AND:
347 		case TK_OP_OR:
348 			break; // Don't get into default, since they can be non-literal
349 		default: {
350 			int idx = 0;
351 
352 			while (_keyword_list[idx].text) {
353 				if (token == _keyword_list[idx].token) {
354 					return _keyword_list[idx].text;
355 				}
356 				idx++;
357 			}
358 		}
359 	}
360 	ERR_FAIL_V_MSG("", "Failed to get token literal.");
361 }
362 
_is_text_char(CharType c)363 static bool _is_text_char(CharType c) {
364 
365 	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_';
366 }
367 
_is_number(CharType c)368 static bool _is_number(CharType c) {
369 
370 	return (c >= '0' && c <= '9');
371 }
372 
_is_hex(CharType c)373 static bool _is_hex(CharType c) {
374 
375 	return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
376 }
377 
_is_bin(CharType c)378 static bool _is_bin(CharType c) {
379 
380 	return (c == '0' || c == '1');
381 }
382 
_make_token(Token p_type)383 void GDScriptTokenizerText::_make_token(Token p_type) {
384 
385 	TokenData &tk = tk_rb[tk_rb_pos];
386 
387 	tk.type = p_type;
388 	tk.line = line;
389 	tk.col = column;
390 
391 	tk_rb_pos = (tk_rb_pos + 1) % TK_RB_SIZE;
392 }
_make_identifier(const StringName & p_identifier)393 void GDScriptTokenizerText::_make_identifier(const StringName &p_identifier) {
394 
395 	TokenData &tk = tk_rb[tk_rb_pos];
396 
397 	tk.type = TK_IDENTIFIER;
398 	tk.identifier = p_identifier;
399 	tk.line = line;
400 	tk.col = column;
401 
402 	tk_rb_pos = (tk_rb_pos + 1) % TK_RB_SIZE;
403 }
404 
_make_built_in_func(GDScriptFunctions::Function p_func)405 void GDScriptTokenizerText::_make_built_in_func(GDScriptFunctions::Function p_func) {
406 
407 	TokenData &tk = tk_rb[tk_rb_pos];
408 
409 	tk.type = TK_BUILT_IN_FUNC;
410 	tk.func = p_func;
411 	tk.line = line;
412 	tk.col = column;
413 
414 	tk_rb_pos = (tk_rb_pos + 1) % TK_RB_SIZE;
415 }
_make_constant(const Variant & p_constant)416 void GDScriptTokenizerText::_make_constant(const Variant &p_constant) {
417 
418 	TokenData &tk = tk_rb[tk_rb_pos];
419 
420 	tk.type = TK_CONSTANT;
421 	tk.constant = p_constant;
422 	tk.line = line;
423 	tk.col = column;
424 
425 	tk_rb_pos = (tk_rb_pos + 1) % TK_RB_SIZE;
426 }
427 
_make_type(const Variant::Type & p_type)428 void GDScriptTokenizerText::_make_type(const Variant::Type &p_type) {
429 
430 	TokenData &tk = tk_rb[tk_rb_pos];
431 
432 	tk.type = TK_BUILT_IN_TYPE;
433 	tk.vtype = p_type;
434 	tk.line = line;
435 	tk.col = column;
436 
437 	tk_rb_pos = (tk_rb_pos + 1) % TK_RB_SIZE;
438 }
439 
_make_error(const String & p_error)440 void GDScriptTokenizerText::_make_error(const String &p_error) {
441 
442 	error_flag = true;
443 	last_error = p_error;
444 
445 	TokenData &tk = tk_rb[tk_rb_pos];
446 	tk.type = TK_ERROR;
447 	tk.constant = p_error;
448 	tk.line = line;
449 	tk.col = column;
450 	tk_rb_pos = (tk_rb_pos + 1) % TK_RB_SIZE;
451 }
452 
_make_newline(int p_indentation,int p_tabs)453 void GDScriptTokenizerText::_make_newline(int p_indentation, int p_tabs) {
454 
455 	TokenData &tk = tk_rb[tk_rb_pos];
456 	tk.type = TK_NEWLINE;
457 	tk.constant = Vector2(p_indentation, p_tabs);
458 	tk.line = line;
459 	tk.col = column;
460 	tk_rb_pos = (tk_rb_pos + 1) % TK_RB_SIZE;
461 }
462 
_advance()463 void GDScriptTokenizerText::_advance() {
464 
465 	if (error_flag) {
466 		//parser broke
467 		_make_error(last_error);
468 		return;
469 	}
470 
471 	if (code_pos >= len) {
472 		_make_token(TK_EOF);
473 		return;
474 	}
475 #define GETCHAR(m_ofs) ((m_ofs + code_pos) >= len ? 0 : _code[m_ofs + code_pos])
476 #define INCPOS(m_amount)      \
477 	{                         \
478 		code_pos += m_amount; \
479 		column += m_amount;   \
480 	}
481 	while (true) {
482 
483 		bool is_node_path = false;
484 		StringMode string_mode = STRING_DOUBLE_QUOTE;
485 
486 		switch (GETCHAR(0)) {
487 			case 0:
488 				_make_token(TK_EOF);
489 				break;
490 			case '\\':
491 				INCPOS(1);
492 				if (GETCHAR(0) == '\r') {
493 					INCPOS(1);
494 				}
495 
496 				if (GETCHAR(0) != '\n') {
497 					_make_error("Expected newline after '\\'.");
498 					return;
499 				}
500 
501 				INCPOS(1);
502 				line++;
503 
504 				while (GETCHAR(0) == ' ' || GETCHAR(0) == '\t') {
505 					INCPOS(1);
506 				}
507 
508 				continue;
509 			case '\t':
510 			case '\r':
511 			case ' ':
512 				INCPOS(1);
513 				continue;
514 			case '#': { // line comment skip
515 #ifdef DEBUG_ENABLED
516 				String comment;
517 #endif // DEBUG_ENABLED
518 				while (GETCHAR(0) != '\n') {
519 #ifdef DEBUG_ENABLED
520 					comment += GETCHAR(0);
521 #endif // DEBUG_ENABLED
522 					code_pos++;
523 					if (GETCHAR(0) == 0) { //end of file
524 						//_make_error("Unterminated Comment");
525 						_make_token(TK_EOF);
526 						return;
527 					}
528 				}
529 #ifdef DEBUG_ENABLED
530 				String comment_content = comment.trim_prefix("#").trim_prefix(" ");
531 				if (comment_content.begins_with("warning-ignore:")) {
532 					String code = comment_content.get_slice(":", 1);
533 					warning_skips.push_back(Pair<int, String>(line, code.strip_edges().to_lower()));
534 				} else if (comment_content.begins_with("warning-ignore-all:")) {
535 					String code = comment_content.get_slice(":", 1);
536 					warning_global_skips.insert(code.strip_edges().to_lower());
537 				} else if (comment_content.strip_edges() == "warnings-disable") {
538 					ignore_warnings = true;
539 				}
540 #endif // DEBUG_ENABLED
541 				FALLTHROUGH;
542 			}
543 			case '\n': {
544 				line++;
545 				INCPOS(1);
546 				bool used_spaces = false;
547 				int tabs = 0;
548 				column = 1;
549 				int i = 0;
550 				while (true) {
551 					if (GETCHAR(i) == ' ') {
552 						i++;
553 						used_spaces = true;
554 					} else if (GETCHAR(i) == '\t') {
555 						if (used_spaces) {
556 							_make_error("Spaces used before tabs on a line");
557 							return;
558 						}
559 						i++;
560 						tabs++;
561 					} else {
562 						break; // not indentation anymore
563 					}
564 				}
565 
566 				_make_newline(i, tabs);
567 				return;
568 			}
569 			case '/': {
570 
571 				switch (GETCHAR(1)) {
572 					case '=': { // diveq
573 
574 						_make_token(TK_OP_ASSIGN_DIV);
575 						INCPOS(1);
576 
577 					} break;
578 					default:
579 						_make_token(TK_OP_DIV);
580 				}
581 			} break;
582 			case '=': {
583 				if (GETCHAR(1) == '=') {
584 					_make_token(TK_OP_EQUAL);
585 					INCPOS(1);
586 
587 				} else
588 					_make_token(TK_OP_ASSIGN);
589 
590 			} break;
591 			case '<': {
592 				if (GETCHAR(1) == '=') {
593 
594 					_make_token(TK_OP_LESS_EQUAL);
595 					INCPOS(1);
596 				} else if (GETCHAR(1) == '<') {
597 					if (GETCHAR(2) == '=') {
598 						_make_token(TK_OP_ASSIGN_SHIFT_LEFT);
599 						INCPOS(1);
600 					} else {
601 						_make_token(TK_OP_SHIFT_LEFT);
602 					}
603 					INCPOS(1);
604 				} else
605 					_make_token(TK_OP_LESS);
606 
607 			} break;
608 			case '>': {
609 				if (GETCHAR(1) == '=') {
610 					_make_token(TK_OP_GREATER_EQUAL);
611 					INCPOS(1);
612 				} else if (GETCHAR(1) == '>') {
613 					if (GETCHAR(2) == '=') {
614 						_make_token(TK_OP_ASSIGN_SHIFT_RIGHT);
615 						INCPOS(1);
616 
617 					} else {
618 						_make_token(TK_OP_SHIFT_RIGHT);
619 					}
620 					INCPOS(1);
621 				} else {
622 					_make_token(TK_OP_GREATER);
623 				}
624 
625 			} break;
626 			case '!': {
627 				if (GETCHAR(1) == '=') {
628 					_make_token(TK_OP_NOT_EQUAL);
629 					INCPOS(1);
630 				} else {
631 					_make_token(TK_OP_NOT);
632 				}
633 
634 			} break;
635 			//case '"' //string - no strings in shader
636 			//case '\'' //string - no strings in shader
637 			case '{':
638 				_make_token(TK_CURLY_BRACKET_OPEN);
639 				break;
640 			case '}':
641 				_make_token(TK_CURLY_BRACKET_CLOSE);
642 				break;
643 			case '[':
644 				_make_token(TK_BRACKET_OPEN);
645 				break;
646 			case ']':
647 				_make_token(TK_BRACKET_CLOSE);
648 				break;
649 			case '(':
650 				_make_token(TK_PARENTHESIS_OPEN);
651 				break;
652 			case ')':
653 				_make_token(TK_PARENTHESIS_CLOSE);
654 				break;
655 			case ',':
656 				_make_token(TK_COMMA);
657 				break;
658 			case ';':
659 				_make_token(TK_SEMICOLON);
660 				break;
661 			case '?':
662 				_make_token(TK_QUESTION_MARK);
663 				break;
664 			case ':':
665 				_make_token(TK_COLON); //for methods maybe but now useless.
666 				break;
667 			case '$':
668 				_make_token(TK_DOLLAR); //for the get_node() shortener
669 				break;
670 			case '^': {
671 				if (GETCHAR(1) == '=') {
672 					_make_token(TK_OP_ASSIGN_BIT_XOR);
673 					INCPOS(1);
674 				} else {
675 					_make_token(TK_OP_BIT_XOR);
676 				}
677 
678 			} break;
679 			case '~':
680 				_make_token(TK_OP_BIT_INVERT);
681 				break;
682 			case '&': {
683 				if (GETCHAR(1) == '&') {
684 
685 					_make_token(TK_OP_AND);
686 					INCPOS(1);
687 				} else if (GETCHAR(1) == '=') {
688 					_make_token(TK_OP_ASSIGN_BIT_AND);
689 					INCPOS(1);
690 				} else {
691 					_make_token(TK_OP_BIT_AND);
692 				}
693 			} break;
694 			case '|': {
695 				if (GETCHAR(1) == '|') {
696 
697 					_make_token(TK_OP_OR);
698 					INCPOS(1);
699 				} else if (GETCHAR(1) == '=') {
700 					_make_token(TK_OP_ASSIGN_BIT_OR);
701 					INCPOS(1);
702 				} else {
703 					_make_token(TK_OP_BIT_OR);
704 				}
705 			} break;
706 			case '*': {
707 
708 				if (GETCHAR(1) == '=') {
709 					_make_token(TK_OP_ASSIGN_MUL);
710 					INCPOS(1);
711 				} else {
712 					_make_token(TK_OP_MUL);
713 				}
714 			} break;
715 			case '+': {
716 
717 				if (GETCHAR(1) == '=') {
718 					_make_token(TK_OP_ASSIGN_ADD);
719 					INCPOS(1);
720 					/*
721 				}  else if (GETCHAR(1)=='+') {
722 					_make_token(TK_OP_PLUS_PLUS);
723 					INCPOS(1);
724 				*/
725 				} else {
726 					_make_token(TK_OP_ADD);
727 				}
728 
729 			} break;
730 			case '-': {
731 
732 				if (GETCHAR(1) == '=') {
733 					_make_token(TK_OP_ASSIGN_SUB);
734 					INCPOS(1);
735 				} else if (GETCHAR(1) == '>') {
736 					_make_token(TK_FORWARD_ARROW);
737 					INCPOS(1);
738 				} else {
739 					_make_token(TK_OP_SUB);
740 				}
741 			} break;
742 			case '%': {
743 
744 				if (GETCHAR(1) == '=') {
745 					_make_token(TK_OP_ASSIGN_MOD);
746 					INCPOS(1);
747 				} else {
748 					_make_token(TK_OP_MOD);
749 				}
750 			} break;
751 			case '@':
752 				if (CharType(GETCHAR(1)) != '"' && CharType(GETCHAR(1)) != '\'') {
753 					_make_error("Unexpected '@'");
754 					return;
755 				}
756 				INCPOS(1);
757 				is_node_path = true;
758 				FALLTHROUGH;
759 			case '\'':
760 			case '"': {
761 
762 				if (GETCHAR(0) == '\'')
763 					string_mode = STRING_SINGLE_QUOTE;
764 
765 				int i = 1;
766 				if (string_mode == STRING_DOUBLE_QUOTE && GETCHAR(i) == '"' && GETCHAR(i + 1) == '"') {
767 					i += 2;
768 					string_mode = STRING_MULTILINE;
769 				}
770 
771 				String str;
772 				while (true) {
773 					if (CharType(GETCHAR(i)) == 0) {
774 
775 						_make_error("Unterminated String");
776 						return;
777 					} else if (string_mode == STRING_DOUBLE_QUOTE && CharType(GETCHAR(i)) == '"') {
778 						break;
779 					} else if (string_mode == STRING_SINGLE_QUOTE && CharType(GETCHAR(i)) == '\'') {
780 						break;
781 					} else if (string_mode == STRING_MULTILINE && CharType(GETCHAR(i)) == '\"' && CharType(GETCHAR(i + 1)) == '\"' && CharType(GETCHAR(i + 2)) == '\"') {
782 						i += 2;
783 						break;
784 					} else if (string_mode != STRING_MULTILINE && CharType(GETCHAR(i)) == '\n') {
785 						_make_error("Unexpected EOL at String.");
786 						return;
787 					} else if (CharType(GETCHAR(i)) == 0xFFFF) {
788 						//string ends here, next will be TK
789 						i--;
790 						break;
791 					} else if (CharType(GETCHAR(i)) == '\\') {
792 						//escaped characters...
793 						i++;
794 						CharType next = GETCHAR(i);
795 						if (next == 0) {
796 							_make_error("Unterminated String");
797 							return;
798 						}
799 						CharType res = 0;
800 
801 						switch (next) {
802 
803 							case 'a': res = 7; break;
804 							case 'b': res = 8; break;
805 							case 't': res = 9; break;
806 							case 'n': res = 10; break;
807 							case 'v': res = 11; break;
808 							case 'f': res = 12; break;
809 							case 'r': res = 13; break;
810 							case '\'': res = '\''; break;
811 							case '\"': res = '\"'; break;
812 							case '\\': res = '\\'; break;
813 							case '/':
814 								res = '/';
815 								break; //wtf
816 
817 							case 'u': {
818 								//hexnumbarh - oct is deprecated
819 								i += 1;
820 								for (int j = 0; j < 4; j++) {
821 									CharType c = GETCHAR(i + j);
822 									if (c == 0) {
823 										_make_error("Unterminated String");
824 										return;
825 									}
826 
827 									CharType v = 0;
828 									if (c >= '0' && c <= '9') {
829 										v = c - '0';
830 									} else if (c >= 'a' && c <= 'f') {
831 										v = c - 'a';
832 										v += 10;
833 									} else if (c >= 'A' && c <= 'F') {
834 										v = c - 'A';
835 										v += 10;
836 									} else {
837 										_make_error("Malformed hex constant in string");
838 										return;
839 									}
840 
841 									res <<= 4;
842 									res |= v;
843 								}
844 								i += 3;
845 
846 							} break;
847 							default: {
848 
849 								_make_error("Invalid escape sequence");
850 								return;
851 							} break;
852 						}
853 
854 						str += res;
855 
856 					} else {
857 						if (CharType(GETCHAR(i)) == '\n') {
858 							line++;
859 							column = 1;
860 						}
861 
862 						str += CharType(GETCHAR(i));
863 					}
864 					i++;
865 				}
866 				INCPOS(i);
867 
868 				if (is_node_path) {
869 					_make_constant(NodePath(str));
870 				} else {
871 					_make_constant(str);
872 				}
873 
874 			} break;
875 			case 0xFFFF: {
876 				_make_token(TK_CURSOR);
877 			} break;
878 			default: {
879 
880 				if (_is_number(GETCHAR(0)) || (GETCHAR(0) == '.' && _is_number(GETCHAR(1)))) {
881 					// parse number
882 					bool period_found = false;
883 					bool exponent_found = false;
884 					bool hexa_found = false;
885 					bool bin_found = false;
886 					bool sign_found = false;
887 
888 					String str;
889 					int i = 0;
890 
891 					while (true) {
892 						if (GETCHAR(i) == '.') {
893 							if (period_found || exponent_found) {
894 								_make_error("Invalid numeric constant at '.'");
895 								return;
896 							} else if (bin_found) {
897 								_make_error("Invalid binary constant at '.'");
898 								return;
899 							} else if (hexa_found) {
900 								_make_error("Invalid hexadecimal constant at '.'");
901 								return;
902 							}
903 							period_found = true;
904 						} else if (GETCHAR(i) == 'x') {
905 							if (hexa_found || bin_found || str.length() != 1 || !((i == 1 && str[0] == '0') || (i == 2 && str[1] == '0' && str[0] == '-'))) {
906 								_make_error("Invalid numeric constant at 'x'");
907 								return;
908 							}
909 							hexa_found = true;
910 						} else if (hexa_found && _is_hex(GETCHAR(i))) {
911 
912 						} else if (!hexa_found && GETCHAR(i) == 'b') {
913 							if (bin_found || str.length() != 1 || !((i == 1 && str[0] == '0') || (i == 2 && str[1] == '0' && str[0] == '-'))) {
914 								_make_error("Invalid numeric constant at 'b'");
915 								return;
916 							}
917 							bin_found = true;
918 						} else if (!hexa_found && GETCHAR(i) == 'e') {
919 							if (exponent_found || bin_found) {
920 								_make_error("Invalid numeric constant at 'e'");
921 								return;
922 							}
923 							exponent_found = true;
924 						} else if (_is_number(GETCHAR(i))) {
925 							//all ok
926 
927 						} else if (bin_found && _is_bin(GETCHAR(i))) {
928 
929 						} else if ((GETCHAR(i) == '-' || GETCHAR(i) == '+') && exponent_found) {
930 							if (sign_found) {
931 								_make_error("Invalid numeric constant at '-'");
932 								return;
933 							}
934 							sign_found = true;
935 						} else if (GETCHAR(i) == '_') {
936 							i++;
937 							continue; // Included for readability, shouldn't be a part of the string
938 						} else
939 							break;
940 
941 						str += CharType(GETCHAR(i));
942 						i++;
943 					}
944 
945 					if (!(_is_number(str[str.length() - 1]) || (hexa_found && _is_hex(str[str.length() - 1])))) {
946 						_make_error("Invalid numeric constant: " + str);
947 						return;
948 					}
949 
950 					INCPOS(i);
951 					if (hexa_found) {
952 						int64_t val = str.hex_to_int64();
953 						_make_constant(val);
954 					} else if (bin_found) {
955 						int64_t val = str.bin_to_int64();
956 						_make_constant(val);
957 					} else if (period_found || exponent_found) {
958 						double val = str.to_double();
959 						_make_constant(val);
960 					} else {
961 						int64_t val = str.to_int64();
962 						_make_constant(val);
963 					}
964 
965 					return;
966 				}
967 
968 				if (GETCHAR(0) == '.') {
969 					//parse period
970 					_make_token(TK_PERIOD);
971 					break;
972 				}
973 
974 				if (_is_text_char(GETCHAR(0))) {
975 					// parse identifier
976 					String str;
977 					str += CharType(GETCHAR(0));
978 
979 					int i = 1;
980 					while (_is_text_char(GETCHAR(i))) {
981 						str += CharType(GETCHAR(i));
982 						i++;
983 					}
984 
985 					bool identifier = false;
986 
987 					if (str == "null") {
988 						_make_constant(Variant());
989 
990 					} else if (str == "true") {
991 						_make_constant(true);
992 
993 					} else if (str == "false") {
994 						_make_constant(false);
995 					} else {
996 
997 						bool found = false;
998 
999 						{
1000 
1001 							int idx = 0;
1002 
1003 							while (_type_list[idx].text) {
1004 
1005 								if (str == _type_list[idx].text) {
1006 									_make_type(_type_list[idx].type);
1007 									found = true;
1008 									break;
1009 								}
1010 								idx++;
1011 							}
1012 						}
1013 
1014 						if (!found) {
1015 
1016 							//built in func?
1017 
1018 							for (int j = 0; j < GDScriptFunctions::FUNC_MAX; j++) {
1019 
1020 								if (str == GDScriptFunctions::get_func_name(GDScriptFunctions::Function(j))) {
1021 
1022 									_make_built_in_func(GDScriptFunctions::Function(j));
1023 									found = true;
1024 									break;
1025 								}
1026 							}
1027 						}
1028 
1029 						if (!found) {
1030 							//keyword
1031 
1032 							int idx = 0;
1033 							found = false;
1034 
1035 							while (_keyword_list[idx].text) {
1036 
1037 								if (str == _keyword_list[idx].text) {
1038 									_make_token(_keyword_list[idx].token);
1039 									found = true;
1040 									break;
1041 								}
1042 								idx++;
1043 							}
1044 						}
1045 
1046 						if (!found)
1047 							identifier = true;
1048 					}
1049 
1050 					if (identifier) {
1051 						_make_identifier(str);
1052 					}
1053 					INCPOS(str.length());
1054 					return;
1055 				}
1056 
1057 				_make_error("Unknown character");
1058 				return;
1059 
1060 			} break;
1061 		}
1062 
1063 		INCPOS(1);
1064 		break;
1065 	}
1066 }
1067 
set_code(const String & p_code)1068 void GDScriptTokenizerText::set_code(const String &p_code) {
1069 
1070 	code = p_code;
1071 	len = p_code.length();
1072 	if (len) {
1073 		_code = &code[0];
1074 	} else {
1075 		_code = NULL;
1076 	}
1077 	code_pos = 0;
1078 	line = 1; //it is stand-ar-ized that lines begin in 1 in code..
1079 	column = 1; //the same holds for columns
1080 	tk_rb_pos = 0;
1081 	error_flag = false;
1082 #ifdef DEBUG_ENABLED
1083 	ignore_warnings = false;
1084 #endif // DEBUG_ENABLED
1085 	last_error = "";
1086 	for (int i = 0; i < MAX_LOOKAHEAD + 1; i++)
1087 		_advance();
1088 }
1089 
get_token(int p_offset) const1090 GDScriptTokenizerText::Token GDScriptTokenizerText::get_token(int p_offset) const {
1091 	ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, TK_ERROR);
1092 	ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, TK_ERROR);
1093 
1094 	int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
1095 	return tk_rb[ofs].type;
1096 }
1097 
get_token_line(int p_offset) const1098 int GDScriptTokenizerText::get_token_line(int p_offset) const {
1099 	ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, -1);
1100 	ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, -1);
1101 
1102 	int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
1103 	return tk_rb[ofs].line;
1104 }
1105 
get_token_column(int p_offset) const1106 int GDScriptTokenizerText::get_token_column(int p_offset) const {
1107 	ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, -1);
1108 	ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, -1);
1109 
1110 	int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
1111 	return tk_rb[ofs].col;
1112 }
1113 
get_token_constant(int p_offset) const1114 const Variant &GDScriptTokenizerText::get_token_constant(int p_offset) const {
1115 	ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, tk_rb[0].constant);
1116 	ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, tk_rb[0].constant);
1117 
1118 	int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
1119 	ERR_FAIL_COND_V(tk_rb[ofs].type != TK_CONSTANT, tk_rb[0].constant);
1120 	return tk_rb[ofs].constant;
1121 }
1122 
get_token_identifier(int p_offset) const1123 StringName GDScriptTokenizerText::get_token_identifier(int p_offset) const {
1124 
1125 	ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, StringName());
1126 	ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, StringName());
1127 
1128 	int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
1129 	ERR_FAIL_COND_V(tk_rb[ofs].type != TK_IDENTIFIER, StringName());
1130 	return tk_rb[ofs].identifier;
1131 }
1132 
get_token_built_in_func(int p_offset) const1133 GDScriptFunctions::Function GDScriptTokenizerText::get_token_built_in_func(int p_offset) const {
1134 
1135 	ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, GDScriptFunctions::FUNC_MAX);
1136 	ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, GDScriptFunctions::FUNC_MAX);
1137 
1138 	int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
1139 	ERR_FAIL_COND_V(tk_rb[ofs].type != TK_BUILT_IN_FUNC, GDScriptFunctions::FUNC_MAX);
1140 	return tk_rb[ofs].func;
1141 }
1142 
get_token_type(int p_offset) const1143 Variant::Type GDScriptTokenizerText::get_token_type(int p_offset) const {
1144 
1145 	ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, Variant::NIL);
1146 	ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, Variant::NIL);
1147 
1148 	int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
1149 	ERR_FAIL_COND_V(tk_rb[ofs].type != TK_BUILT_IN_TYPE, Variant::NIL);
1150 	return tk_rb[ofs].vtype;
1151 }
1152 
get_token_line_indent(int p_offset) const1153 int GDScriptTokenizerText::get_token_line_indent(int p_offset) const {
1154 
1155 	ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, 0);
1156 	ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, 0);
1157 
1158 	int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
1159 	ERR_FAIL_COND_V(tk_rb[ofs].type != TK_NEWLINE, 0);
1160 	return tk_rb[ofs].constant.operator Vector2().x;
1161 }
1162 
get_token_line_tab_indent(int p_offset) const1163 int GDScriptTokenizerText::get_token_line_tab_indent(int p_offset) const {
1164 
1165 	ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, 0);
1166 	ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, 0);
1167 
1168 	int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
1169 	ERR_FAIL_COND_V(tk_rb[ofs].type != TK_NEWLINE, 0);
1170 	return tk_rb[ofs].constant.operator Vector2().y;
1171 }
1172 
get_token_error(int p_offset) const1173 String GDScriptTokenizerText::get_token_error(int p_offset) const {
1174 
1175 	ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, String());
1176 	ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, String());
1177 
1178 	int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
1179 	ERR_FAIL_COND_V(tk_rb[ofs].type != TK_ERROR, String());
1180 	return tk_rb[ofs].constant;
1181 }
1182 
advance(int p_amount)1183 void GDScriptTokenizerText::advance(int p_amount) {
1184 
1185 	ERR_FAIL_COND(p_amount <= 0);
1186 	for (int i = 0; i < p_amount; i++)
1187 		_advance();
1188 }
1189 
1190 //////////////////////////////////////////////////////////////////////////////////////////////////////
1191 
1192 #define BYTECODE_VERSION 13
1193 
set_code_buffer(const Vector<uint8_t> & p_buffer)1194 Error GDScriptTokenizerBuffer::set_code_buffer(const Vector<uint8_t> &p_buffer) {
1195 
1196 	const uint8_t *buf = p_buffer.ptr();
1197 	int total_len = p_buffer.size();
1198 	ERR_FAIL_COND_V(p_buffer.size() < 24 || p_buffer[0] != 'G' || p_buffer[1] != 'D' || p_buffer[2] != 'S' || p_buffer[3] != 'C', ERR_INVALID_DATA);
1199 
1200 	int version = decode_uint32(&buf[4]);
1201 	ERR_FAIL_COND_V_MSG(version > BYTECODE_VERSION, ERR_INVALID_DATA, "Bytecode is too recent! Please use a newer engine version.");
1202 
1203 	int identifier_count = decode_uint32(&buf[8]);
1204 	int constant_count = decode_uint32(&buf[12]);
1205 	int line_count = decode_uint32(&buf[16]);
1206 	int token_count = decode_uint32(&buf[20]);
1207 
1208 	const uint8_t *b = &buf[24];
1209 	total_len -= 24;
1210 
1211 	identifiers.resize(identifier_count);
1212 	for (int i = 0; i < identifier_count; i++) {
1213 
1214 		int len = decode_uint32(b);
1215 		ERR_FAIL_COND_V(len > total_len, ERR_INVALID_DATA);
1216 		b += 4;
1217 		Vector<uint8_t> cs;
1218 		cs.resize(len);
1219 		for (int j = 0; j < len; j++) {
1220 			cs.write[j] = b[j] ^ 0xb6;
1221 		}
1222 
1223 		cs.write[cs.size() - 1] = 0;
1224 		String s;
1225 		s.parse_utf8((const char *)cs.ptr());
1226 		b += len;
1227 		total_len -= len + 4;
1228 		identifiers.write[i] = s;
1229 	}
1230 
1231 	constants.resize(constant_count);
1232 	for (int i = 0; i < constant_count; i++) {
1233 
1234 		Variant v;
1235 		int len;
1236 		// An object cannot be constant, never decode objects
1237 		Error err = decode_variant(v, b, total_len, &len, false);
1238 		if (err)
1239 			return err;
1240 		b += len;
1241 		total_len -= len;
1242 		constants.write[i] = v;
1243 	}
1244 
1245 	ERR_FAIL_COND_V(line_count * 8 > total_len, ERR_INVALID_DATA);
1246 
1247 	for (int i = 0; i < line_count; i++) {
1248 
1249 		uint32_t token = decode_uint32(b);
1250 		b += 4;
1251 		uint32_t linecol = decode_uint32(b);
1252 		b += 4;
1253 
1254 		lines.insert(token, linecol);
1255 		total_len -= 8;
1256 	}
1257 
1258 	tokens.resize(token_count);
1259 
1260 	for (int i = 0; i < token_count; i++) {
1261 
1262 		ERR_FAIL_COND_V(total_len < 1, ERR_INVALID_DATA);
1263 
1264 		if ((*b) & TOKEN_BYTE_MASK) { //little endian always
1265 			ERR_FAIL_COND_V(total_len < 4, ERR_INVALID_DATA);
1266 
1267 			tokens.write[i] = decode_uint32(b) & ~TOKEN_BYTE_MASK;
1268 			b += 4;
1269 		} else {
1270 			tokens.write[i] = *b;
1271 			b += 1;
1272 			total_len--;
1273 		}
1274 	}
1275 
1276 	token = 0;
1277 
1278 	return OK;
1279 }
1280 
parse_code_string(const String & p_code)1281 Vector<uint8_t> GDScriptTokenizerBuffer::parse_code_string(const String &p_code) {
1282 
1283 	Vector<uint8_t> buf;
1284 
1285 	Map<StringName, int> identifier_map;
1286 	HashMap<Variant, int, VariantHasher, VariantComparator> constant_map;
1287 	Map<uint32_t, int> line_map;
1288 	Vector<uint32_t> token_array;
1289 
1290 	GDScriptTokenizerText tt;
1291 	tt.set_code(p_code);
1292 	int line = -1;
1293 
1294 	while (true) {
1295 
1296 		if (tt.get_token_line() != line) {
1297 
1298 			line = tt.get_token_line();
1299 			line_map[line] = token_array.size();
1300 		}
1301 
1302 		uint32_t token = tt.get_token();
1303 		switch (tt.get_token()) {
1304 
1305 			case TK_IDENTIFIER: {
1306 				StringName id = tt.get_token_identifier();
1307 				if (!identifier_map.has(id)) {
1308 					int idx = identifier_map.size();
1309 					identifier_map[id] = idx;
1310 				}
1311 				token |= identifier_map[id] << TOKEN_BITS;
1312 			} break;
1313 			case TK_CONSTANT: {
1314 
1315 				const Variant &c = tt.get_token_constant();
1316 				if (!constant_map.has(c)) {
1317 					int idx = constant_map.size();
1318 					constant_map[c] = idx;
1319 				}
1320 				token |= constant_map[c] << TOKEN_BITS;
1321 			} break;
1322 			case TK_BUILT_IN_TYPE: {
1323 
1324 				token |= tt.get_token_type() << TOKEN_BITS;
1325 			} break;
1326 			case TK_BUILT_IN_FUNC: {
1327 
1328 				token |= tt.get_token_built_in_func() << TOKEN_BITS;
1329 
1330 			} break;
1331 			case TK_NEWLINE: {
1332 
1333 				token |= tt.get_token_line_indent() << TOKEN_BITS;
1334 			} break;
1335 			case TK_ERROR: {
1336 
1337 				ERR_FAIL_V(Vector<uint8_t>());
1338 			} break;
1339 			default: {
1340 			}
1341 		};
1342 
1343 		token_array.push_back(token);
1344 
1345 		if (tt.get_token() == TK_EOF)
1346 			break;
1347 		tt.advance();
1348 	}
1349 
1350 	//reverse maps
1351 
1352 	Map<int, StringName> rev_identifier_map;
1353 	for (Map<StringName, int>::Element *E = identifier_map.front(); E; E = E->next()) {
1354 		rev_identifier_map[E->get()] = E->key();
1355 	}
1356 
1357 	Map<int, Variant> rev_constant_map;
1358 	const Variant *K = NULL;
1359 	while ((K = constant_map.next(K))) {
1360 		rev_constant_map[constant_map[*K]] = *K;
1361 	}
1362 
1363 	Map<int, uint32_t> rev_line_map;
1364 	for (Map<uint32_t, int>::Element *E = line_map.front(); E; E = E->next()) {
1365 		rev_line_map[E->get()] = E->key();
1366 	}
1367 
1368 	//save header
1369 	buf.resize(24);
1370 	buf.write[0] = 'G';
1371 	buf.write[1] = 'D';
1372 	buf.write[2] = 'S';
1373 	buf.write[3] = 'C';
1374 	encode_uint32(BYTECODE_VERSION, &buf.write[4]);
1375 	encode_uint32(identifier_map.size(), &buf.write[8]);
1376 	encode_uint32(constant_map.size(), &buf.write[12]);
1377 	encode_uint32(line_map.size(), &buf.write[16]);
1378 	encode_uint32(token_array.size(), &buf.write[20]);
1379 
1380 	//save identifiers
1381 
1382 	for (Map<int, StringName>::Element *E = rev_identifier_map.front(); E; E = E->next()) {
1383 
1384 		CharString cs = String(E->get()).utf8();
1385 		int len = cs.length() + 1;
1386 		int extra = 4 - (len % 4);
1387 		if (extra == 4)
1388 			extra = 0;
1389 
1390 		uint8_t ibuf[4];
1391 		encode_uint32(len + extra, ibuf);
1392 		for (int i = 0; i < 4; i++) {
1393 			buf.push_back(ibuf[i]);
1394 		}
1395 		for (int i = 0; i < len; i++) {
1396 			buf.push_back(cs[i] ^ 0xb6);
1397 		}
1398 		for (int i = 0; i < extra; i++) {
1399 			buf.push_back(0 ^ 0xb6);
1400 		}
1401 	}
1402 
1403 	for (Map<int, Variant>::Element *E = rev_constant_map.front(); E; E = E->next()) {
1404 
1405 		int len;
1406 		// Objects cannot be constant, never encode objects
1407 		Error err = encode_variant(E->get(), NULL, len, false);
1408 		ERR_FAIL_COND_V_MSG(err != OK, Vector<uint8_t>(), "Error when trying to encode Variant.");
1409 		int pos = buf.size();
1410 		buf.resize(pos + len);
1411 		encode_variant(E->get(), &buf.write[pos], len, false);
1412 	}
1413 
1414 	for (Map<int, uint32_t>::Element *E = rev_line_map.front(); E; E = E->next()) {
1415 
1416 		uint8_t ibuf[8];
1417 		encode_uint32(E->key(), &ibuf[0]);
1418 		encode_uint32(E->get(), &ibuf[4]);
1419 		for (int i = 0; i < 8; i++)
1420 			buf.push_back(ibuf[i]);
1421 	}
1422 
1423 	for (int i = 0; i < token_array.size(); i++) {
1424 
1425 		uint32_t token = token_array[i];
1426 
1427 		if (token & ~TOKEN_MASK) {
1428 			uint8_t buf4[4];
1429 			encode_uint32(token_array[i] | TOKEN_BYTE_MASK, &buf4[0]);
1430 			for (int j = 0; j < 4; j++) {
1431 				buf.push_back(buf4[j]);
1432 			}
1433 		} else {
1434 			buf.push_back(token);
1435 		}
1436 	}
1437 
1438 	return buf;
1439 }
1440 
get_token(int p_offset) const1441 GDScriptTokenizerBuffer::Token GDScriptTokenizerBuffer::get_token(int p_offset) const {
1442 
1443 	int offset = token + p_offset;
1444 
1445 	if (offset < 0 || offset >= tokens.size())
1446 		return TK_EOF;
1447 
1448 	return GDScriptTokenizerBuffer::Token(tokens[offset] & TOKEN_MASK);
1449 }
1450 
get_token_identifier(int p_offset) const1451 StringName GDScriptTokenizerBuffer::get_token_identifier(int p_offset) const {
1452 
1453 	int offset = token + p_offset;
1454 
1455 	ERR_FAIL_INDEX_V(offset, tokens.size(), StringName());
1456 	uint32_t identifier = tokens[offset] >> TOKEN_BITS;
1457 	ERR_FAIL_UNSIGNED_INDEX_V(identifier, (uint32_t)identifiers.size(), StringName());
1458 
1459 	return identifiers[identifier];
1460 }
1461 
get_token_built_in_func(int p_offset) const1462 GDScriptFunctions::Function GDScriptTokenizerBuffer::get_token_built_in_func(int p_offset) const {
1463 
1464 	int offset = token + p_offset;
1465 	ERR_FAIL_INDEX_V(offset, tokens.size(), GDScriptFunctions::FUNC_MAX);
1466 	return GDScriptFunctions::Function(tokens[offset] >> TOKEN_BITS);
1467 }
1468 
get_token_type(int p_offset) const1469 Variant::Type GDScriptTokenizerBuffer::get_token_type(int p_offset) const {
1470 
1471 	int offset = token + p_offset;
1472 	ERR_FAIL_INDEX_V(offset, tokens.size(), Variant::NIL);
1473 
1474 	return Variant::Type(tokens[offset] >> TOKEN_BITS);
1475 }
1476 
get_token_line(int p_offset) const1477 int GDScriptTokenizerBuffer::get_token_line(int p_offset) const {
1478 
1479 	int offset = token + p_offset;
1480 	int pos = lines.find_nearest(offset);
1481 
1482 	if (pos < 0)
1483 		return -1;
1484 	if (pos >= lines.size())
1485 		pos = lines.size() - 1;
1486 
1487 	uint32_t l = lines.getv(pos);
1488 	return l & TOKEN_LINE_MASK;
1489 }
get_token_column(int p_offset) const1490 int GDScriptTokenizerBuffer::get_token_column(int p_offset) const {
1491 
1492 	int offset = token + p_offset;
1493 	int pos = lines.find_nearest(offset);
1494 	if (pos < 0)
1495 		return -1;
1496 	if (pos >= lines.size())
1497 		pos = lines.size() - 1;
1498 
1499 	uint32_t l = lines.getv(pos);
1500 	return l >> TOKEN_LINE_BITS;
1501 }
get_token_line_indent(int p_offset) const1502 int GDScriptTokenizerBuffer::get_token_line_indent(int p_offset) const {
1503 
1504 	int offset = token + p_offset;
1505 	ERR_FAIL_INDEX_V(offset, tokens.size(), 0);
1506 	return tokens[offset] >> TOKEN_BITS;
1507 }
get_token_constant(int p_offset) const1508 const Variant &GDScriptTokenizerBuffer::get_token_constant(int p_offset) const {
1509 
1510 	int offset = token + p_offset;
1511 	ERR_FAIL_INDEX_V(offset, tokens.size(), nil);
1512 	uint32_t constant = tokens[offset] >> TOKEN_BITS;
1513 	ERR_FAIL_UNSIGNED_INDEX_V(constant, (uint32_t)constants.size(), nil);
1514 	return constants[constant];
1515 }
get_token_error(int p_offset) const1516 String GDScriptTokenizerBuffer::get_token_error(int p_offset) const {
1517 
1518 	ERR_FAIL_V(String());
1519 }
1520 
advance(int p_amount)1521 void GDScriptTokenizerBuffer::advance(int p_amount) {
1522 
1523 	ERR_FAIL_INDEX(p_amount + token, tokens.size());
1524 	token += p_amount;
1525 }
GDScriptTokenizerBuffer()1526 GDScriptTokenizerBuffer::GDScriptTokenizerBuffer() {
1527 
1528 	token = 0;
1529 }
1530