1 /*
2 * OpenClonk, http://www.openclonk.org
3 *
4 * Copyright (c) 2001-2009, RedWolf Design GmbH, http://www.clonk.de/
5 * Copyright (c) 2009-2016, The OpenClonk Team and contributors
6 *
7 * Distributed under the terms of the ISC license; see accompanying file
8 * "COPYING" for details.
9 *
10 * "Clonk" is a registered trademark of Matthes Bender, used with permission.
11 * See accompanying file "TRADEMARK" for details.
12 *
13 * To redistribute this file separately, substitute the full license texts
14 * for the above references.
15 */
16 // parses scripts
17
18 #include "C4Include.h"
19 #include "script/C4AulParse.h"
20
21 #include "object/C4Def.h"
22 #include "script/C4AulDebug.h"
23 #include "script/C4AulExec.h"
24
25 #ifndef DEBUG_BYTECODE_DUMP
26 #define DEBUG_BYTECODE_DUMP 0
27 #endif
28 #include <iomanip>
29
30 #define C4AUL_Include "#include"
31 #define C4AUL_Append "#appendto"
32 #define C4AUL_Warning "#warning"
33
34 #define C4Aul_Warning_enable "enable"
35 #define C4Aul_Warning_disable "disable"
36
37 #define C4AUL_Func "func"
38
39 #define C4AUL_Private "private"
40 #define C4AUL_Protected "protected"
41 #define C4AUL_Public "public"
42 #define C4AUL_Global "global"
43 #define C4AUL_Const "const"
44
45 #define C4AUL_If "if"
46 #define C4AUL_Else "else"
47 #define C4AUL_Do "do"
48 #define C4AUL_While "while"
49 #define C4AUL_For "for"
50 #define C4AUL_In "in"
51 #define C4AUL_Return "return"
52 #define C4AUL_Var "Var"
53 #define C4AUL_Par "Par"
54 #define C4AUL_Break "break"
55 #define C4AUL_Continue "continue"
56 #define C4AUL_this "this"
57
58 #define C4AUL_GlobalNamed "static"
59 #define C4AUL_LocalNamed "local"
60 #define C4AUL_VarNamed "var"
61
62 #define C4AUL_TypeInt "int"
63 #define C4AUL_TypeBool "bool"
64 #define C4AUL_TypeC4ID "id"
65 #define C4AUL_TypeDef "def"
66 #define C4AUL_TypeEffect "effect"
67 #define C4AUL_TypeC4Object "object"
68 #define C4AUL_TypePropList "proplist"
69 #define C4AUL_TypeString "string"
70 #define C4AUL_TypeArray "array"
71 #define C4AUL_TypeFunction "func"
72
73 #define C4AUL_True "true"
74 #define C4AUL_False "false"
75 #define C4AUL_Nil "nil"
76 #define C4AUL_New "new"
77
78 // script token type
79 enum C4AulTokenType : int
80 {
81 ATT_INVALID,// invalid token
82 ATT_DIR, // directive
83 ATT_IDTF, // identifier
84 ATT_INT, // integer constant
85 ATT_STRING, // string constant
86 ATT_DOT, // "."
87 ATT_COMMA, // ","
88 ATT_COLON, // ":"
89 ATT_SCOLON, // ";"
90 ATT_BOPEN, // "("
91 ATT_BCLOSE, // ")"
92 ATT_BOPEN2, // "["
93 ATT_BCLOSE2,// "]"
94 ATT_BLOPEN, // "{"
95 ATT_BLCLOSE,// "}"
96 ATT_CALL, // "->"
97 ATT_CALLFS, // "->~"
98 ATT_LDOTS, // '...'
99 ATT_SET, // '='
100 ATT_OPERATOR,// operator
101 ATT_EOF // end of file
102 };
103
C4AulParse(C4ScriptHost * a)104 C4AulParse::C4AulParse(C4ScriptHost *a) :
105 Fn(nullptr), Host(a), pOrgScript(a), Engine(a->Engine),
106 SPos(a->Script.getData()), TokenSPos(SPos),
107 TokenType(ATT_INVALID),
108 ContextToExecIn(nullptr)
109 { }
110
C4AulParse(C4AulScriptFunc * Fn,C4AulScriptContext * context,C4AulScriptEngine * Engine)111 C4AulParse::C4AulParse(C4AulScriptFunc * Fn, C4AulScriptContext* context, C4AulScriptEngine *Engine) :
112 Fn(Fn), Host(nullptr), pOrgScript(nullptr), Engine(Engine),
113 SPos(Fn->Script), TokenSPos(SPos),
114 TokenType(ATT_INVALID),
115 ContextToExecIn(context)
116 { }
117
~C4AulParse()118 C4AulParse::~C4AulParse()
119 {
120 ClearToken();
121 }
122
Warn(const char * pMsg,...)123 void C4ScriptHost::Warn(const char *pMsg, ...)
124 {
125 va_list args; va_start(args, pMsg);
126 StdStrBuf Buf = FormatStringV(pMsg, args);
127 Buf.AppendFormat(" (%s)", ScriptName.getData());
128 Engine->GetErrorHandler()->OnWarning(Buf.getData());
129 va_end(args);
130 }
131
Warn(C4AulWarningId warning,...)132 void C4AulParse::Warn(C4AulWarningId warning, ...)
133 {
134 if (!IsWarningEnabled(TokenSPos, warning))
135 return;
136 va_list args; va_start(args, warning);
137 StdStrBuf Buf = FormatStringV(C4AulWarningMessages[static_cast<size_t>(warning)], args);
138 AppendPosition(Buf);
139 Buf.AppendFormat(" [%s]", C4AulWarningIDs[static_cast<size_t>(warning)]);
140 Engine->GetErrorHandler()->OnWarning(Buf.getData());
141 va_end(args);
142 }
143
IsWarningEnabled(const char * pos,C4AulWarningId warning) const144 bool C4AulParse::IsWarningEnabled(const char *pos, C4AulWarningId warning) const
145 {
146 if (pOrgScript) return pOrgScript->IsWarningEnabled(pos, warning);
147 // In DirectExec, the default warnings are always active.
148 switch (warning)
149 {
150 #define DIAG(id, text, enabled) case C4AulWarningId::id: return enabled;
151 #include "C4AulWarnings.h"
152 #undef DIAG
153 default: return false;
154 }
155 }
156
Error(const char * pMsg,...)157 void C4AulParse::Error(const char *pMsg, ...)
158 {
159 va_list args; va_start(args, pMsg);
160 StdStrBuf Buf;
161 Buf.FormatV(pMsg, args);
162
163 throw C4AulParseError(this, Buf.getData());
164 }
165
AppendPosition(StdStrBuf & Buf)166 void C4AulParse::AppendPosition(StdStrBuf & Buf)
167 {
168 if (Fn && Fn->GetName())
169 {
170 // Show function name
171 Buf.AppendFormat(" (in %s", Fn->GetName());
172
173 // Exact position
174 if (Fn->pOrgScript && TokenSPos)
175 Buf.AppendFormat(", %s:%d:%d)",
176 Fn->pOrgScript->ScriptName.getData(),
177 SGetLine(Fn->pOrgScript->GetScript(), TokenSPos),
178 SLineGetCharacters(Fn->pOrgScript->GetScript(), TokenSPos));
179 else
180 Buf.AppendChar(')');
181 }
182 else if (pOrgScript)
183 {
184 // Script name
185 Buf.AppendFormat(" (%s:%d:%d)",
186 pOrgScript->ScriptName.getData(),
187 SGetLine(pOrgScript->GetScript(), TokenSPos),
188 SLineGetCharacters(pOrgScript->GetScript(), TokenSPos));
189 }
190 // show a warning if the error is in a remote script
191 if (pOrgScript != Host && Host)
192 Buf.AppendFormat(" (as #appendto/#include to %s)", Host->ScriptName.getData());
193 }
194
C4AulParseError(C4AulParse * state,const char * pMsg)195 C4AulParseError::C4AulParseError(C4AulParse * state, const char *pMsg)
196 {
197 // compose error string
198 sMessage.Copy(pMsg);
199 state->AppendPosition(sMessage);
200 }
201
C4AulParseError(C4ScriptHost * pScript,const char * pMsg)202 C4AulParseError::C4AulParseError(C4ScriptHost *pScript, const char *pMsg)
203 {
204 // compose error string
205 sMessage.Copy(pMsg);
206 if (pScript)
207 {
208 // Script name
209 sMessage.AppendFormat(" (%s)",
210 pScript->ScriptName.getData());
211 }
212 }
213
C4AulParseError(C4AulScriptFunc * Fn,const char * SPos,const char * pMsg)214 C4AulParseError::C4AulParseError(C4AulScriptFunc * Fn, const char *SPos, const char *pMsg)
215 {
216 // compose error string
217 sMessage.Copy(pMsg);
218 if (!Fn) return;
219 sMessage.Append(" (");
220 // Show function name
221 if (Fn->GetName())
222 sMessage.AppendFormat("in %s", Fn->GetName());
223 if (Fn->GetName() && Fn->pOrgScript && SPos)
224 sMessage.Append(", ");
225 // Exact position
226 if (Fn->pOrgScript && SPos)
227 sMessage.AppendFormat("%s:%d:%d)",
228 Fn->pOrgScript->ScriptName.getData(),
229 SGetLine(Fn->pOrgScript->GetScript(), SPos),
230 SLineGetCharacters(Fn->pOrgScript->GetScript(), SPos));
231 else
232 sMessage.AppendChar(')');
233 }
234
AdvanceSpaces()235 bool C4AulParse::AdvanceSpaces()
236 {
237 if (!SPos)
238 return false;
239 while(*SPos)
240 {
241 if (*SPos == '/')
242 {
243 // // comment
244 if (SPos[1] == '/')
245 {
246 SPos += 2;
247 while (*SPos && *SPos != 13 && *SPos != 10)
248 ++SPos;
249 }
250 // /* comment */
251 else if (SPos[1] == '*')
252 {
253 SPos += 2;
254 while (*SPos && (*SPos != '*' || SPos[1] != '/'))
255 ++SPos;
256 SPos += 2;
257 }
258 else
259 return true;
260 }
261 // Skip any "zero width no-break spaces" (also known as Byte Order Marks)
262 else if (*SPos == '\xEF' && SPos[1] == '\xBB' && SPos[2] == '\xBF')
263 SPos += 3;
264 else if ((unsigned)*SPos > 32)
265 return true;
266 else
267 ++SPos;
268 }
269 // end of script reached
270 return false;
271 }
272
273 //=========================== C4Script Operator Map ===================================
274 const C4ScriptOpDef C4ScriptOpMap[] =
275 {
276 // priority postfix
277 // | identifier | changer
278 // | | Bytecode | | no second id
279 // | | | | | | RetType ParType1 ParType2
280 // prefix
281 { 15, "++", AB_Inc, false, true, false, C4V_Int, C4V_Int, C4V_Any},
282 { 15, "--", AB_Dec, false, true, false, C4V_Int, C4V_Int, C4V_Any},
283 { 15, "~", AB_BitNot, false, false, false, C4V_Int, C4V_Int, C4V_Any},
284 { 15, "!", AB_Not, false, false, false, C4V_Bool, C4V_Bool, C4V_Any},
285 { 15, "+", AB_ERR, false, false, false, C4V_Int, C4V_Int, C4V_Any},
286 { 15, "-", AB_Neg, false, false, false, C4V_Int, C4V_Int, C4V_Any},
287
288 // postfix (whithout second statement)
289 { 16, "++", AB_Inc, true, true, true, C4V_Int, C4V_Int, C4V_Any},
290 { 16, "--", AB_Dec, true, true, true, C4V_Int, C4V_Int, C4V_Any},
291
292 // postfix
293 { 14, "**", AB_Pow, true, false, false, C4V_Int, C4V_Int, C4V_Int},
294 { 13, "/", AB_Div, true, false, false, C4V_Int, C4V_Int, C4V_Int},
295 { 13, "*", AB_Mul, true, false, false, C4V_Int, C4V_Int, C4V_Int},
296 { 13, "%", AB_Mod, true, false, false, C4V_Int, C4V_Int, C4V_Int},
297 { 12, "-", AB_Sub, true, false, false, C4V_Int, C4V_Int, C4V_Int},
298 { 12, "+", AB_Sum, true, false, false, C4V_Int, C4V_Int, C4V_Int},
299 { 11, "<<", AB_LeftShift, true, false, false, C4V_Int, C4V_Int, C4V_Int},
300 { 11, ">>", AB_RightShift, true, false, false, C4V_Int, C4V_Int, C4V_Int},
301 { 10, "<", AB_LessThan, true, false, false, C4V_Bool, C4V_Int, C4V_Int},
302 { 10, "<=", AB_LessThanEqual, true, false, false, C4V_Bool, C4V_Int, C4V_Int},
303 { 10, ">", AB_GreaterThan, true, false, false, C4V_Bool, C4V_Int, C4V_Int},
304 { 10, ">=", AB_GreaterThanEqual, true, false, false, C4V_Bool, C4V_Int, C4V_Int},
305 { 9, "==", AB_Equal, true, false, false, C4V_Bool, C4V_Any, C4V_Any},
306 { 9, "!=", AB_NotEqual, true, false, false, C4V_Bool, C4V_Any, C4V_Any},
307 { 8, "&", AB_BitAnd, true, false, false, C4V_Int, C4V_Int, C4V_Int},
308 { 6, "^", AB_BitXOr, true, false, false, C4V_Int, C4V_Int, C4V_Int},
309 { 6, "|", AB_BitOr, true, false, false, C4V_Int, C4V_Int, C4V_Int},
310 { 5, "&&", AB_JUMPAND, true, false, false, C4V_Bool, C4V_Bool, C4V_Bool},
311 { 4, "||", AB_JUMPOR, true, false, false, C4V_Bool, C4V_Bool, C4V_Bool},
312 { 3, "??", AB_JUMPNNIL, true, false, false, C4V_Any, C4V_Any, C4V_Any},
313
314 // changers
315 { 2, "*=", AB_Mul, true, true, false, C4V_Int, C4V_Int, C4V_Int},
316 { 2, "/=", AB_Div, true, true, false, C4V_Int, C4V_Int, C4V_Int},
317 { 2, "%=", AB_Mod, true, true, false, C4V_Int, C4V_Int, C4V_Int},
318 { 2, "+=", AB_Sum, true, true, false, C4V_Int, C4V_Int, C4V_Int},
319 { 2, "-=", AB_Sub, true, true, false, C4V_Int, C4V_Int, C4V_Int},
320 { 2, "&=", AB_BitAnd, true, true, false, C4V_Int, C4V_Int, C4V_Int},
321 { 2, "|=", AB_BitOr, true, true, false, C4V_Int, C4V_Int, C4V_Int},
322 { 2, "^=", AB_BitXOr, true, true, false, C4V_Int, C4V_Int, C4V_Int},
323
324 { 0, nullptr, AB_ERR, false, false, false, C4V_Nil, C4V_Nil, C4V_Nil}
325 };
326
GetOperator(const char * pScript)327 int C4AulParse::GetOperator(const char* pScript)
328 {
329 // return value:
330 // >= 0: operator found. could be found in C4ScriptOfDef
331 // -1: isn't an operator
332
333 unsigned int i;
334
335 if (!*pScript) return 0;
336 // operators are not alphabetical
337 if ((*pScript >= 'a' && *pScript <= 'z') ||
338 (*pScript >= 'A' && *pScript <= 'Z'))
339 {
340 return -1;
341 }
342
343 // find the longest operator
344 int len = 0; int maxfound = -1;
345 for (i=0; C4ScriptOpMap[i].Identifier; i++)
346 {
347 if (SEqual2(pScript, C4ScriptOpMap[i].Identifier))
348 {
349 int oplen = SLen(C4ScriptOpMap[i].Identifier);
350 if (oplen > len)
351 {
352 len = oplen;
353 maxfound = i;
354 }
355 }
356 }
357 return maxfound;
358 }
359
ClearToken()360 void C4AulParse::ClearToken()
361 {
362 // if last token was a string, make sure its ref is deleted
363 if (TokenType == ATT_STRING && cStr)
364 {
365 cStr->DecRef();
366 TokenType = ATT_INVALID;
367 }
368 }
369
GetNextToken()370 C4AulTokenType C4AulParse::GetNextToken()
371 {
372 // clear mem of prev token
373 ClearToken();
374 // move to start of token
375 if (!AdvanceSpaces()) return ATT_EOF;
376 // store offset
377 TokenSPos = SPos;
378
379 // get char
380 char C = *(SPos++);
381 // Mostly sorted by frequency, except that tokens that have
382 // other tokens as prefixes need to be checked for first.
383 if (Inside(C, 'a', 'z') || Inside(C, 'A', 'Z') || C == '_' || C == '#')
384 {
385 // identifier or directive
386 bool dir = C == '#';
387 int Len = 1;
388 C = *SPos;
389 while (Inside(C, '0', '9') || Inside(C, 'a', 'z') || Inside(C, 'A', 'Z') || C == '_')
390 {
391 ++Len;
392 C = *(++SPos);
393 }
394
395 // Special case for #warning because we don't want to give it to the parser
396 if (dir && SEqual2(TokenSPos, C4AUL_Warning))
397 {
398 // Look for end of line or end of file
399 while (*SPos != '\n' && *SPos != '\0') ++SPos;
400 Parse_WarningPragma();
401 // And actually return the next token.
402 return GetNextToken();
403 }
404
405 Len = std::min(Len, C4AUL_MAX_Identifier);
406 SCopy(TokenSPos, Idtf, Len);
407 return dir ? ATT_DIR : ATT_IDTF;
408 }
409 else if (C == '(') return ATT_BOPEN; // "("
410 else if (C == ')') return ATT_BCLOSE; // ")"
411 else if (C == ',') return ATT_COMMA; // ","
412 else if (C == ';') return ATT_SCOLON; // ";"
413 else if (Inside(C, '0', '9'))
414 {
415 // integer
416 if (C == '0' && *SPos == 'x')
417 {
418 // hexadecimal
419 cInt = StrToI32(SPos + 1, 16, &SPos);
420 return ATT_INT;
421 }
422 else
423 {
424 // decimal
425 cInt = StrToI32(TokenSPos, 10, &SPos);
426 return ATT_INT;
427 }
428 }
429 else if (C == '-' && *SPos == '>' && *(SPos + 1) == '~')
430 { SPos+=2; return ATT_CALLFS;}// "->~"
431 else if (C == '-' && *SPos == '>')
432 { ++SPos; return ATT_CALL; } // "->"
433 else if ((cInt = GetOperator(SPos - 1)) != -1)
434 {
435 SPos += SLen(C4ScriptOpMap[cInt].Identifier) - 1;
436 return ATT_OPERATOR;
437 }
438 else if (C == '=') return ATT_SET; // "="
439 else if (C == '{') return ATT_BLOPEN; // "{"
440 else if (C == '}') return ATT_BLCLOSE;// "}"
441 else if (C == '"')
442 {
443 // string
444 std::string strbuf;
445 strbuf.reserve(512); // assume most strings to be smaller than this
446 // string end
447 while (*SPos != '"')
448 {
449 C = *SPos;
450 ++SPos;
451 if (C == '\\') // escape
452 switch (*SPos)
453 {
454 case '"': ++SPos; strbuf.push_back('"'); break;
455 case '\\': ++SPos; strbuf.push_back('\\'); break;
456 case 'n': ++SPos; strbuf.push_back('\n'); break;
457 case 't': ++SPos; strbuf.push_back('\t'); break;
458 case 'x':
459 {
460 ++SPos;
461 // hexadecimal escape: \xAD.
462 // First char must be a hexdigit
463 if (!std::isxdigit(*SPos))
464 {
465 Warn(C4AulWarningId::invalid_hex_escape);
466 strbuf.push_back('\\'); strbuf.push_back('x');
467 }
468 else
469 {
470 char ch = 0;
471 while (std::isxdigit(*SPos))
472 {
473 ch *= 16;
474 if (*SPos >= '0' && *SPos <= '9')
475 ch += *SPos - '0';
476 else if (*SPos >= 'a' && *SPos <= 'f')
477 ch += *SPos - 'a' + 10;
478 else if (*SPos >= 'A' && *SPos <= 'F')
479 ch += *SPos - 'A' + 10;
480 ++SPos;
481 };
482 strbuf.push_back(ch);
483 }
484 break;
485 }
486 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7':
487 {
488 // Octal escape: \142
489 char ch = 0;
490 while (SPos[0] >= '0' && SPos[0] <= '7')
491 {
492 ch *= 8;
493 ch += *SPos++ -'0';
494 }
495 strbuf.push_back(ch);
496 break;
497 }
498 default:
499 {
500 // just insert "\"
501 strbuf.push_back('\\');
502 // show warning
503 Warn(C4AulWarningId::invalid_escape_sequence, *(SPos + 1));
504 }
505 }
506 else if (C == 0 || C == 10 || C == 13) // line break / feed
507 throw C4AulParseError(this, "string not closed");
508 else
509 // copy character
510 strbuf.push_back(C);
511 }
512 ++SPos;
513 cStr = Strings.RegString(StdStrBuf(strbuf.data(),strbuf.size()));
514 // hold onto string, ClearToken will deref it
515 cStr->IncRef();
516 return ATT_STRING;
517 }
518 else if (C == '[') return ATT_BOPEN2; // "["
519 else if (C == ']') return ATT_BCLOSE2;// "]"
520 else if (C == '.' && *SPos == '.' && *(SPos + 1) == '.')
521 { SPos+=2; return ATT_LDOTS; } // "..."
522 else if (C == '.') return ATT_DOT; // "."
523 else if (C == ':') return ATT_COLON; // ":"
524 else
525 {
526 // show appropriate error message
527 if (C >= '!' && C <= '~')
528 throw C4AulParseError(this, FormatString("unexpected character '%c' found", C).getData());
529 else
530 throw C4AulParseError(this, FormatString(R"(unexpected character \x%x found)", (int)(unsigned char) C).getData());
531 }
532 }
533
GetTTName(C4AulBCCType e)534 static const char * GetTTName(C4AulBCCType e)
535 {
536 switch (e)
537 {
538 case AB_ARRAYA: return "ARRAYA"; // array access
539 case AB_ARRAYA_SET: return "ARRAYA_SET"; // setter
540 case AB_PROP: return "PROP";
541 case AB_PROP_SET: return "PROP_SET";
542 case AB_ARRAY_SLICE: return "ARRAY_SLICE";
543 case AB_ARRAY_SLICE_SET: return "ARRAY_SLICE_SET";
544 case AB_STACK_SET: return "STACK_SET";
545 case AB_LOCALN: return "LOCALN"; // a named local
546 case AB_LOCALN_SET: return "LOCALN_SET";
547 case AB_GLOBALN: return "GLOBALN"; // a named global
548 case AB_GLOBALN_SET: return "GLOBALN_SET";
549 case AB_PAR: return "PAR"; // Par statement
550 case AB_THIS: return "THIS";
551 case AB_FUNC: return "FUNC"; // function
552
553 // prefix
554 case AB_Inc: return "Inc"; // ++
555 case AB_Dec: return "Dec"; // --
556 case AB_BitNot: return "BitNot"; // ~
557 case AB_Not: return "Not"; // !
558 case AB_Neg: return "Neg"; // -
559
560 // postfix
561 case AB_Pow: return "Pow"; // **
562 case AB_Div: return "Div"; // /
563 case AB_Mul: return "Mul"; // *
564 case AB_Mod: return "Mod"; // %
565 case AB_Sub: return "Sub"; // -
566 case AB_Sum: return "Sum"; // +
567 case AB_LeftShift: return "LeftShift"; // <<
568 case AB_RightShift: return "RightShift"; // >>
569 case AB_LessThan: return "LessThan"; // <
570 case AB_LessThanEqual: return "LessThanEqual"; // <=
571 case AB_GreaterThan: return "GreaterThan"; // >
572 case AB_GreaterThanEqual: return "GreaterThanEqual"; // >=
573 case AB_Equal: return "Equal"; // ==
574 case AB_NotEqual: return "NotEqual"; // !=
575 case AB_BitAnd: return "BitAnd"; // &
576 case AB_BitXOr: return "BitXOr"; // ^
577 case AB_BitOr: return "BitOr"; // |
578
579 case AB_CALL: return "CALL"; // direct object call
580 case AB_CALLFS: return "CALLFS"; // failsafe direct call
581 case AB_STACK: return "STACK"; // push nulls / pop
582 case AB_INT: return "INT"; // constant: int
583 case AB_BOOL: return "BOOL"; // constant: bool
584 case AB_STRING: return "STRING"; // constant: string
585 case AB_CPROPLIST: return "CPROPLIST"; // constant: proplist
586 case AB_CARRAY: return "CARRAY"; // constant: array
587 case AB_CFUNCTION: return "CFUNCTION"; // constant: function
588 case AB_NIL: return "NIL"; // constant: nil
589 case AB_NEW_ARRAY: return "NEW_ARRAY"; // semi-constant: array
590 case AB_DUP: return "DUP"; // duplicate value from stack
591 case AB_DUP_CONTEXT: return "AB_DUP_CONTEXT"; // duplicate value from stack of parent function
592 case AB_NEW_PROPLIST: return "NEW_PROPLIST"; // create a new proplist
593 case AB_POP_TO: return "POP_TO"; // initialization of named var
594 case AB_JUMP: return "JUMP"; // jump
595 case AB_JUMPAND: return "JUMPAND";
596 case AB_JUMPOR: return "JUMPOR";
597 case AB_JUMPNNIL: return "JUMPNNIL"; // nil-coalescing operator ("??")
598 case AB_CONDN: return "CONDN"; // conditional jump (negated, pops stack)
599 case AB_COND: return "COND"; // conditional jump (pops stack)
600 case AB_FOREACH_NEXT: return "FOREACH_NEXT"; // foreach: next element
601 case AB_RETURN: return "RETURN"; // return statement
602 case AB_ERR: return "ERR"; // parse error at this position
603 case AB_DEBUG: return "DEBUG"; // debug break
604 case AB_EOFN: return "EOFN"; // end of function
605 }
606 assert(false); return "UNKNOWN";
607 }
608
DumpByteCode()609 void C4AulScriptFunc::DumpByteCode()
610 {
611 if (DEBUG_BYTECODE_DUMP)
612 {
613 fprintf(stderr, "%s:\n", GetName());
614 std::map<C4AulBCC *, int> labels;
615 int labeln = 0;
616 for (auto & bcc: Code)
617 {
618 switch (bcc.bccType)
619 {
620 case AB_JUMP: case AB_JUMPAND: case AB_JUMPOR: case AB_JUMPNNIL: case AB_CONDN: case AB_COND:
621 labels[&bcc + bcc.Par.i] = ++labeln; break;
622 default: break;
623 }
624 }
625 for (auto & bcc: Code)
626 {
627 C4AulBCCType eType = bcc.bccType;
628 if (labels.find(&bcc) != labels.end())
629 fprintf(stderr, "%d:\n", labels[&bcc]);
630 fprintf(stderr, "\t%d\t%-20s", GetLineOfCode(&bcc), GetTTName(eType));
631 switch (eType)
632 {
633 case AB_FUNC:
634 fprintf(stderr, "\t%s\n", bcc.Par.f->GetFullName().getData()); break;
635 case AB_ERR:
636 if (bcc.Par.s)
637 case AB_CALL: case AB_CALLFS: case AB_LOCALN: case AB_LOCALN_SET: case AB_PROP: case AB_PROP_SET:
638 fprintf(stderr, "\t%s\n", bcc.Par.s->GetCStr()); break;
639 case AB_STRING:
640 {
641 const StdStrBuf &s = bcc.Par.s->GetData();
642 std::string es;
643 std::for_each(s.getData(), s.getData() + s.getLength(), [&es](char c) {
644 if (std::isgraph((unsigned char)c))
645 {
646 es += c;
647 }
648 else
649 {
650 switch (c)
651 {
652 case '\'': es.append(R"(\')"); break;
653 case '\"': es.append(R"(\")"); break;
654 case '\\': es.append(R"(\\)"); break;
655 case '\a': es.append(R"(\a)"); break;
656 case '\b': es.append(R"(\b)"); break;
657 case '\f': es.append(R"(\f)"); break;
658 case '\n': es.append(R"(\n)"); break;
659 case '\r': es.append(R"(\r)"); break;
660 case '\t': es.append(R"(\t)"); break;
661 case '\v': es.append(R"(\v)"); break;
662 default:
663 {
664 std::stringstream hex;
665 hex << R"(\x)" << std::hex << std::setw(2) << std::setfill('0') << static_cast<int>((unsigned char)c);
666 es.append(hex.str());
667 break;
668 }
669 }
670 }
671 });
672 fprintf(stderr, "\t\"%s\"\n", es.c_str()); break;
673 }
674 case AB_DEBUG: case AB_NIL: case AB_RETURN:
675 case AB_PAR: case AB_THIS:
676 case AB_ARRAYA: case AB_ARRAYA_SET: case AB_ARRAY_SLICE: case AB_ARRAY_SLICE_SET:
677 case AB_EOFN:
678 assert(!bcc.Par.X); fprintf(stderr, "\n"); break;
679 case AB_CARRAY:
680 fprintf(stderr, "\t%s\n", C4VArray(bcc.Par.a).GetDataString().getData()); break;
681 case AB_CPROPLIST:
682 fprintf(stderr, "\t%s\n", C4VPropList(bcc.Par.p).GetDataString().getData()); break;
683 case AB_JUMP: case AB_JUMPAND: case AB_JUMPOR: case AB_JUMPNNIL: case AB_CONDN: case AB_COND:
684 fprintf(stderr, "\t% -d\n", labels[&bcc + bcc.Par.i]); break;
685 default:
686 fprintf(stderr, "\t% -d\n", bcc.Par.i); break;
687 }
688 }
689 }
690 }
691
Preparse()692 bool C4ScriptHost::Preparse()
693 {
694 // handle easiest case first
695 if (State < ASS_NONE) return false;
696
697 // clear stuff
698 Includes.clear(); Appends.clear();
699
700 GetPropList()->C4PropList::Clear();
701 GetPropList()->SetProperty(P_Prototype, C4VPropList(Engine->GetPropList()));
702 LocalValues.Clear();
703
704 // Add any engine functions specific to this script
705 AddEngineFunctions();
706
707 // Insert default warnings
708 assert(enabledWarnings.empty());
709 auto &warnings = enabledWarnings[Script.getData()];
710 #define DIAG(id, text, enabled) warnings.set(static_cast<size_t>(C4AulWarningId::id), enabled);
711 #include "C4AulWarnings.h"
712 #undef DIAG
713
714 C4AulParse parser(this);
715 ast = parser.Parse_Script(this);
716
717 C4AulCompiler::Preparse(this, this, ast.get());
718
719 // #include will have to be resolved now...
720 IncludesResolved = false;
721
722 // Parse will write the properties back after the ones from included scripts
723 GetPropList()->Properties.Swap(&LocalValues);
724
725 // return success
726 this->State = ASS_PREPARSED;
727 return true;
728 }
729
GetTokenName(C4AulTokenType TokenType)730 static const char * GetTokenName(C4AulTokenType TokenType)
731 {
732 switch (TokenType)
733 {
734 case ATT_INVALID: return "invalid token";
735 case ATT_DIR: return "directive";
736 case ATT_IDTF: return "identifier";
737 case ATT_INT: return "integer constant";
738 case ATT_STRING: return "string constant";
739 case ATT_DOT: return "'.'";
740 case ATT_COMMA: return "','";
741 case ATT_COLON: return "':'";
742 case ATT_SCOLON: return "';'";
743 case ATT_BOPEN: return "'('";
744 case ATT_BCLOSE: return "')'";
745 case ATT_BOPEN2: return "'['";
746 case ATT_BCLOSE2: return "']'";
747 case ATT_BLOPEN: return "'{'";
748 case ATT_BLCLOSE: return "'}'";
749 case ATT_CALL: return "'->'";
750 case ATT_CALLFS: return "'->~'";
751 case ATT_LDOTS: return "'...'";
752 case ATT_SET: return "'='";
753 case ATT_OPERATOR: return "operator";
754 case ATT_EOF: return "end of file";
755 default: return "unrecognized token";
756 }
757 }
758
Shift()759 void C4AulParse::Shift()
760 {
761 TokenType = GetNextToken();
762 }
Check(C4AulTokenType RefTokenType,const char * Expected)763 void C4AulParse::Check(C4AulTokenType RefTokenType, const char * Expected)
764 {
765 if (TokenType != RefTokenType)
766 UnexpectedToken(Expected ? Expected : GetTokenName(RefTokenType));
767 }
Match(C4AulTokenType RefTokenType,const char * Expected)768 void C4AulParse::Match(C4AulTokenType RefTokenType, const char * Expected)
769 {
770 Check(RefTokenType, Expected);
771 Shift();
772 }
UnexpectedToken(const char * Expected)773 void C4AulParse::UnexpectedToken(const char * Expected)
774 {
775 throw C4AulParseError(this, FormatString("%s expected, but found %s", Expected, GetTokenName(TokenType)).getData());
776 }
777
Parse_WarningPragma()778 void C4AulParse::Parse_WarningPragma()
779 {
780 assert(SEqual2(TokenSPos, C4AUL_Warning));
781 assert(std::isspace(TokenSPos[sizeof(C4AUL_Warning) - 1]));
782
783
784 // Read parameters in to string buffer. The sizeof() includes the terminating \0, but
785 // that's okay because we need to skip (at least) one whitespace character anyway.
786 std::string line(TokenSPos + sizeof(C4AUL_Warning), SPos);
787 auto end = line.end();
788 auto cursor = std::find_if_not(begin(line), end, IsWhiteSpace);
789
790 if (cursor == end)
791 throw C4AulParseError(this, "'" C4Aul_Warning_enable "' or '" C4Aul_Warning_disable "' expected, but found end of line");
792
793 // Split directive on whitespace
794 auto start = cursor;
795 cursor = std::find_if(start, end, IsWhiteSpace);
796 bool enable_warning = false;
797 if (std::equal(start, cursor, C4Aul_Warning_enable))
798 {
799 enable_warning = true;
800 }
801 else if (std::equal(start, cursor, C4Aul_Warning_disable))
802 {
803 enable_warning = false;
804 }
805 else
806 {
807 throw C4AulParseError(this, FormatString("'" C4Aul_Warning_enable "' or '" C4Aul_Warning_disable "' expected, but found '%s'", std::string(start, cursor).c_str()).getData());
808 }
809
810 cursor = std::find_if_not(cursor, end, IsWhiteSpace);
811 if (cursor == end)
812 {
813 // enable or disable all warnings
814 #define DIAG(id, text, enabled) pOrgScript->EnableWarning(TokenSPos, C4AulWarningId::id, enable_warning);
815 #include "C4AulWarnings.h"
816 #undef DIAG
817 return;
818 }
819
820 // enable or disable specific warnings
821 static const std::map<std::string, C4AulWarningId> warnings{
822 #define DIAG(id, text, enabled) std::make_pair(#id, C4AulWarningId::id),
823 #include "C4AulWarnings.h"
824 #undef DIAG
825 };
826 while (cursor != end)
827 {
828 start = std::find_if_not(cursor, end, IsWhiteSpace);
829 cursor = std::find_if(start, end, IsWhiteSpace);
830 auto entry = warnings.find(std::string(start, cursor));
831 if (entry != warnings.end())
832 {
833 pOrgScript->EnableWarning(TokenSPos, entry->second, enable_warning);
834 }
835 }
836 }
837
ParseDirectExecFunc(C4AulScriptEngine * Engine,C4AulScriptContext * context)838 void C4AulScriptFunc::ParseDirectExecFunc(C4AulScriptEngine *Engine, C4AulScriptContext* context)
839 {
840 ClearCode();
841 // parse
842 C4AulParse state(this, context, Engine);
843 auto func = state.Parse_DirectExec(Script, true);
844 C4AulCompiler::Compile(this, func.get());
845 }
846
ParseDirectExecStatement(C4AulScriptEngine * Engine,C4AulScriptContext * context)847 void C4AulScriptFunc::ParseDirectExecStatement(C4AulScriptEngine *Engine, C4AulScriptContext* context)
848 {
849 ClearCode();
850 // parse
851 C4AulParse state(this, context, Engine);
852 auto func = state.Parse_DirectExec(Script, false);
853 C4AulCompiler::Compile(this, func.get());
854 }
855
Parse_DirectExec(const char * code,bool whole_function)856 std::unique_ptr<::aul::ast::FunctionDecl> C4AulParse::Parse_DirectExec(const char *code, bool whole_function)
857 {
858 // get first token
859 Shift();
860 // Synthesize a wrapping function which we can call
861 std::unique_ptr<::aul::ast::FunctionDecl> func;
862 if (whole_function)
863 {
864 func = Parse_ToplevelFunctionDecl();
865 }
866 else
867 {
868 auto expr = Parse_Expression();
869 func = std::make_unique<::aul::ast::FunctionDecl>("$internal$eval");
870 func->body = std::make_unique<::aul::ast::Block>();
871 func->body->children.push_back(std::make_unique<::aul::ast::Return>(std::move(expr)));
872 }
873 Match(ATT_EOF);
874 return func;
875 }
876
Parse_Script(C4ScriptHost * scripthost)877 std::unique_ptr<::aul::ast::Script> C4AulParse::Parse_Script(C4ScriptHost * scripthost)
878 {
879 pOrgScript = scripthost;
880 SPos = pOrgScript->Script.getData();
881 const char * SPos0 = SPos;
882 bool first_error = true;
883 auto script = ::aul::ast::Script::New(SPos0);
884 while (true) try
885 {
886 // Go to the next token if the current token could not be processed or no token has yet been parsed
887 if (SPos == SPos0)
888 {
889 Shift();
890 }
891 SPos0 = SPos;
892 switch (TokenType)
893 {
894 case ATT_DIR:
895 // check for include statement
896 if (SEqual(Idtf, C4AUL_Include))
897 {
898 Shift();
899 // get id of script to include
900 Check(ATT_IDTF, "script name");
901 script->declarations.push_back(::aul::ast::IncludePragma::New(TokenSPos, Idtf));
902 Shift();
903 }
904 else if (SEqual(Idtf, C4AUL_Append))
905 {
906 if (pOrgScript->GetPropList()->GetDef())
907 throw C4AulParseError(this, "#appendto in a Definition");
908 Shift();
909 // get id of script to include/append
910 switch (TokenType)
911 {
912 case ATT_IDTF:
913 script->declarations.push_back(::aul::ast::AppendtoPragma::New(TokenSPos, Idtf));
914 break;
915 case ATT_OPERATOR:
916 if (SEqual(C4ScriptOpMap[cInt].Identifier, "*"))
917 {
918 script->declarations.push_back(::aul::ast::AppendtoPragma::New(TokenSPos));
919 break;
920 }
921 //fallthrough
922 default:
923 // -> ID expected
924 UnexpectedToken("identifier or '*'");
925 }
926 Shift();
927 }
928 else
929 // -> unknown directive
930 Error("unknown directive: %s", Idtf);
931 break;
932 case ATT_IDTF:
933 // need a keyword here to avoid parsing random function contents
934 // after a syntax error in a function
935 // check for object-local variable definition (local)
936 if (SEqual(Idtf, C4AUL_LocalNamed) || SEqual(Idtf, C4AUL_GlobalNamed))
937 {
938 script->declarations.push_back(Parse_Var());
939 Match(ATT_SCOLON);
940 }
941 // check for variable definition (static)
942 else
943 script->declarations.push_back(Parse_ToplevelFunctionDecl());
944 break;
945 case ATT_EOF:
946 return script;
947 default:
948 UnexpectedToken("declaration");
949 }
950 first_error = true;
951 }
952 catch (C4AulError &err)
953 {
954 if (first_error)
955 {
956 ++Engine->errCnt;
957 ::ScriptEngine.ErrorHandler->OnError(err.what());
958 }
959 first_error = false;
960 }
961 }
962
Parse_ToplevelFunctionDecl()963 std::unique_ptr<::aul::ast::FunctionDecl> C4AulParse::Parse_ToplevelFunctionDecl()
964 {
965 const char *NodeStart = TokenSPos;
966 bool is_global = SEqual(Idtf, C4AUL_Global);
967 // skip access modifier
968 if (SEqual(Idtf, C4AUL_Private) ||
969 SEqual(Idtf, C4AUL_Protected) ||
970 SEqual(Idtf, C4AUL_Public) ||
971 SEqual(Idtf, C4AUL_Global))
972 {
973 Shift();
974 }
975
976 // check for func declaration
977 if (!SEqual(Idtf, C4AUL_Func))
978 Error("Declaration expected, but found identifier: %s", Idtf);
979 Shift();
980 // get next token, must be func name
981 Check(ATT_IDTF, "function name");
982
983 auto func = ::aul::ast::FunctionDecl::New(NodeStart, Idtf);
984 func->is_global = is_global;
985 Shift();
986 Parse_Function(func.get());
987 return func;
988 }
989
Parse_Function(::aul::ast::Function * func)990 void C4AulParse::Parse_Function(::aul::ast::Function *func)
991 {
992 Match(ATT_BOPEN);
993 // get pars
994 while (TokenType != ATT_BCLOSE)
995 {
996 // too many parameters?
997 if (func->params.size() >= C4AUL_MAX_Par)
998 throw C4AulParseError(this, "'func' parameter list: too many parameters (max 10)");
999 if (TokenType == ATT_LDOTS)
1000 {
1001 func->has_unnamed_params = true;
1002 Shift();
1003 // don't allow any more parameters after ellipsis
1004 break;
1005 }
1006 // must be a name or type now
1007 Check(ATT_IDTF, "parameter, '...', or ')'");
1008 // type identifier?
1009 C4V_Type t = C4V_Any;
1010 if (SEqual(Idtf, C4AUL_TypeInt)) { t = C4V_Int; Shift(); }
1011 else if (SEqual(Idtf, C4AUL_TypeBool)) { t = C4V_Bool; Shift(); }
1012 else if (SEqual(Idtf, C4AUL_TypeC4ID)) { t = C4V_Def; Shift(); }
1013 else if (SEqual(Idtf, C4AUL_TypeDef)) { t = C4V_Def; Shift(); }
1014 else if (SEqual(Idtf, C4AUL_TypeEffect)) { t = C4V_Effect; Shift(); }
1015 else if (SEqual(Idtf, C4AUL_TypeC4Object)) { t = C4V_Object; Shift(); }
1016 else if (SEqual(Idtf, C4AUL_TypePropList)) { t = C4V_PropList; Shift(); }
1017 else if (SEqual(Idtf, C4AUL_TypeString)) { t = C4V_String; Shift(); }
1018 else if (SEqual(Idtf, C4AUL_TypeArray)) { t = C4V_Array; Shift(); }
1019 else if (SEqual(Idtf, C4AUL_TypeFunction)) { t = C4V_Function; Shift(); }
1020 // a parameter name which matched a type name?
1021 std::string par_name;
1022 if (TokenType == ATT_BCLOSE || TokenType == ATT_COMMA)
1023 {
1024 par_name = Idtf;
1025 Warn(C4AulWarningId::type_name_used_as_par_name, Idtf);
1026 }
1027 else
1028 {
1029 Check(ATT_IDTF, "parameter name");
1030 par_name = Idtf;
1031 Shift();
1032 }
1033 func->params.emplace_back(par_name, t);
1034 // end of params?
1035 if (TokenType == ATT_BCLOSE)
1036 {
1037 break;
1038 }
1039 // must be a comma now
1040 Match(ATT_COMMA, "',' or ')'");
1041 }
1042 Match(ATT_BCLOSE);
1043 func->body = Parse_Block();
1044 }
1045
Parse_Block()1046 std::unique_ptr<::aul::ast::Block> C4AulParse::Parse_Block()
1047 {
1048 auto block = ::aul::ast::Block::New(TokenSPos);
1049 Match(ATT_BLOPEN);
1050 while (TokenType != ATT_BLCLOSE)
1051 {
1052 block->children.push_back(Parse_Statement());
1053 }
1054 Shift();
1055 return block;
1056 }
1057
Parse_Statement()1058 std::unique_ptr<::aul::ast::Stmt> C4AulParse::Parse_Statement()
1059 {
1060 const char *NodeStart = TokenSPos;
1061 std::unique_ptr<::aul::ast::Stmt> stmt;
1062 switch (TokenType)
1063 {
1064 // do we have a block start?
1065 case ATT_BLOPEN:
1066 return Parse_Block();
1067 case ATT_BOPEN:
1068 case ATT_BOPEN2:
1069 case ATT_SET:
1070 case ATT_OPERATOR:
1071 case ATT_INT:
1072 case ATT_STRING:
1073 {
1074 stmt = Parse_Expression();
1075 Match(ATT_SCOLON);
1076 return stmt;
1077 }
1078 // additional function separator
1079 case ATT_SCOLON:
1080 Shift();
1081 return ::aul::ast::Noop::New(NodeStart);
1082 case ATT_IDTF:
1083 // check for variable definition
1084 if (SEqual(Idtf, C4AUL_VarNamed) || SEqual(Idtf, C4AUL_LocalNamed) || SEqual(Idtf, C4AUL_GlobalNamed))
1085 stmt = Parse_Var();
1086 // check new-form func begin
1087 else if (SEqual(Idtf, C4AUL_Func) ||
1088 SEqual(Idtf, C4AUL_Private) ||
1089 SEqual(Idtf, C4AUL_Protected) ||
1090 SEqual(Idtf, C4AUL_Public) ||
1091 SEqual(Idtf, C4AUL_Global))
1092 {
1093 throw C4AulParseError(this, "unexpected end of function");
1094 }
1095 // get function by identifier: first check special functions
1096 else if (SEqual(Idtf, C4AUL_If)) // if
1097 {
1098 return Parse_If();
1099 }
1100 else if (SEqual(Idtf, C4AUL_Else)) // else
1101 {
1102 throw C4AulParseError(this, "misplaced 'else'");
1103 }
1104 else if (SEqual(Idtf, C4AUL_Do)) // while
1105 {
1106 stmt = Parse_DoWhile();
1107 }
1108 else if (SEqual(Idtf, C4AUL_While)) // while
1109 {
1110 return Parse_While();
1111 }
1112 else if (SEqual(Idtf, C4AUL_For)) // for
1113 {
1114 PushParsePos();
1115 Shift();
1116 // Look if it's the for([var] foo in array)-form
1117 // must be followed by a bracket
1118 Match(ATT_BOPEN);
1119 // optional var
1120 if (TokenType == ATT_IDTF && SEqual(Idtf, C4AUL_VarNamed))
1121 Shift();
1122 // variable and "in"
1123 if (TokenType == ATT_IDTF
1124 && GetNextToken() == ATT_IDTF
1125 && SEqual(Idtf, C4AUL_In))
1126 {
1127 // reparse the stuff in the brackets like normal statements
1128 PopParsePos();
1129 return Parse_ForEach();
1130 }
1131 else
1132 {
1133 // reparse the stuff in the brackets like normal statements
1134 PopParsePos();
1135 return Parse_For();
1136 }
1137 }
1138 else if (SEqual(Idtf, C4AUL_Return)) // return
1139 {
1140 Shift();
1141 if (TokenType == ATT_SCOLON)
1142 {
1143 // allow return; without return value (implies nil)
1144 stmt = ::aul::ast::Return::New(NodeStart, ::aul::ast::NilLit::New(NodeStart));
1145 }
1146 else
1147 {
1148 // return retval;
1149 stmt = ::aul::ast::Return::New(NodeStart, Parse_Expression());
1150 }
1151 }
1152 else if (SEqual(Idtf, C4AUL_Break)) // break
1153 {
1154 Shift();
1155 stmt = ::aul::ast::Break::New(NodeStart);
1156 }
1157 else if (SEqual(Idtf, C4AUL_Continue)) // continue
1158 {
1159 Shift();
1160 stmt = ::aul::ast::Continue::New(NodeStart);
1161 }
1162 else
1163 {
1164 stmt = Parse_Expression();
1165 }
1166 Match(ATT_SCOLON);
1167 assert(stmt);
1168 return stmt;
1169 default:
1170 UnexpectedToken("statement");
1171 }
1172 }
1173
Parse_CallParams(::aul::ast::CallExpr * call)1174 void C4AulParse::Parse_CallParams(::aul::ast::CallExpr *call)
1175 {
1176 assert(call != nullptr);
1177 assert(call->args.empty());
1178
1179 // so it's a regular function; force "("
1180 Match(ATT_BOPEN);
1181 while(TokenType != ATT_BCLOSE) switch(TokenType)
1182 {
1183 case ATT_COMMA:
1184 // got no parameter before a ","
1185 Warn(C4AulWarningId::empty_parameter_in_call, (unsigned)call->args.size(), call->callee.c_str());
1186 call->args.push_back(::aul::ast::NilLit::New(TokenSPos));
1187 Shift();
1188 break;
1189 case ATT_LDOTS:
1190 // functions using ... always take as many parameters as possible
1191 Shift();
1192 call->append_unnamed_pars = true;
1193 // Do not allow more parameters even if there is space left
1194 Check(ATT_BCLOSE);
1195 break;
1196 default:
1197 // get a parameter
1198 call->args.push_back(Parse_Expression());
1199 // end of parameter list?
1200 if (TokenType != ATT_BCLOSE)
1201 Match(ATT_COMMA, "',' or ')'");
1202 break;
1203 }
1204 Match(ATT_BCLOSE);
1205 }
1206
Parse_Array()1207 std::unique_ptr<::aul::ast::ArrayLit> C4AulParse::Parse_Array()
1208 {
1209 auto arr = ::aul::ast::ArrayLit::New(TokenSPos);
1210 // force "["
1211 Match(ATT_BOPEN2);
1212 // Create an array
1213 while (TokenType != ATT_BCLOSE2)
1214 {
1215 // got no parameter before a ","? then push nil
1216 if (TokenType == ATT_COMMA)
1217 {
1218 Warn(C4AulWarningId::empty_parameter_in_array, (unsigned)arr->values.size());
1219 arr->values.emplace_back(::aul::ast::NilLit::New(TokenSPos));
1220 }
1221 else
1222 arr->values.emplace_back(Parse_Expression());
1223 if (TokenType == ATT_BCLOSE2)
1224 break;
1225 Match(ATT_COMMA, "',' or ']'");
1226 // [] -> size 0, [*,] -> size 2, [*,*,] -> size 3
1227 if (TokenType == ATT_BCLOSE2)
1228 {
1229 Warn(C4AulWarningId::empty_parameter_in_array, (unsigned)arr->values.size());
1230 arr->values.emplace_back(::aul::ast::NilLit::New(TokenSPos));
1231 }
1232 }
1233 Shift();
1234 return arr;
1235 }
1236
Parse_PropList()1237 std::unique_ptr<::aul::ast::ProplistLit> C4AulParse::Parse_PropList()
1238 {
1239 auto proplist = ::aul::ast::ProplistLit::New(TokenSPos);
1240 if (TokenType == ATT_IDTF && SEqual(Idtf, C4AUL_New))
1241 {
1242 Shift();
1243 proplist->values.emplace_back(Strings.P[P_Prototype].GetCStr(), Parse_Expression());
1244 }
1245 Match(ATT_BLOPEN);
1246 while (TokenType != ATT_BLCLOSE)
1247 {
1248 std::string key;
1249 if (TokenType == ATT_IDTF)
1250 {
1251 key = Idtf;
1252 Shift();
1253 }
1254 else if (TokenType == ATT_STRING)
1255 {
1256 key = cStr->GetCStr();
1257 Shift();
1258 }
1259 else UnexpectedToken("string or identifier");
1260 if (TokenType != ATT_COLON && TokenType != ATT_SET)
1261 UnexpectedToken("':' or '='");
1262 Shift();
1263 proplist->values.emplace_back(key, Parse_Expression());
1264 if (TokenType == ATT_COMMA)
1265 Shift();
1266 else if (TokenType != ATT_BLCLOSE)
1267 UnexpectedToken("'}' or ','");
1268 }
1269 Shift();
1270 return proplist;
1271 }
1272
Parse_DoWhile()1273 std::unique_ptr<::aul::ast::DoLoop> C4AulParse::Parse_DoWhile()
1274 {
1275 auto loop = ::aul::ast::DoLoop::New(TokenSPos);
1276 Shift();
1277 loop->body = Parse_Statement();
1278 // Execute condition
1279 if (TokenType != ATT_IDTF || !SEqual(Idtf, C4AUL_While))
1280 UnexpectedToken("'while'");
1281 Shift();
1282 Match(ATT_BOPEN);
1283 loop->cond = Parse_Expression();
1284 Match(ATT_BCLOSE);
1285 return loop;
1286 }
1287
Parse_While()1288 std::unique_ptr<::aul::ast::WhileLoop> C4AulParse::Parse_While()
1289 {
1290 auto loop = ::aul::ast::WhileLoop::New(TokenSPos);
1291 Shift();
1292 // Execute condition
1293 Match(ATT_BOPEN);
1294 loop->cond = Parse_Expression();
1295 Match(ATT_BCLOSE);
1296 // Execute body
1297 loop->body = Parse_Statement();
1298 return loop;
1299 }
1300
Parse_If()1301 std::unique_ptr<::aul::ast::If> C4AulParse::Parse_If()
1302 {
1303 auto stmt = ::aul::ast::If::New(TokenSPos);
1304 Shift();
1305 Match(ATT_BOPEN);
1306 stmt->cond = Parse_Expression();
1307 Match(ATT_BCLOSE);
1308 // parse controlled statement
1309 stmt->iftrue = Parse_Statement();
1310 if (TokenType == ATT_IDTF && SEqual(Idtf, C4AUL_Else))
1311 {
1312 Shift();
1313 // expect a command now
1314 stmt->iffalse = Parse_Statement();
1315 }
1316 return stmt;
1317 }
1318
Parse_For()1319 std::unique_ptr<::aul::ast::ForLoop> C4AulParse::Parse_For()
1320 {
1321 auto loop = ::aul::ast::ForLoop::New(TokenSPos);
1322 Match(ATT_IDTF); Match(ATT_BOPEN);
1323 // Initialization
1324 if (TokenType == ATT_IDTF && SEqual(Idtf, C4AUL_VarNamed))
1325 {
1326 loop->init = Parse_Var();
1327 }
1328 else if (TokenType != ATT_SCOLON)
1329 {
1330 loop->init = Parse_Expression();
1331 }
1332 // Consume first semicolon
1333 Match(ATT_SCOLON);
1334 // Condition
1335 if (TokenType != ATT_SCOLON)
1336 {
1337 loop->cond = Parse_Expression();
1338 }
1339 // Consume second semicolon
1340 Match(ATT_SCOLON);
1341 // Incrementor
1342 if (TokenType != ATT_BCLOSE)
1343 {
1344 loop->incr = Parse_Expression();
1345 }
1346 // Consume closing bracket
1347 Match(ATT_BCLOSE);
1348 loop->body = Parse_Statement();
1349 return loop;
1350 }
1351
Parse_ForEach()1352 std::unique_ptr<::aul::ast::RangeLoop> C4AulParse::Parse_ForEach()
1353 {
1354 auto loop = ::aul::ast::RangeLoop::New(TokenSPos);
1355 Match(ATT_IDTF); Match(ATT_BOPEN);
1356 if (TokenType == ATT_IDTF && SEqual(Idtf, C4AUL_VarNamed))
1357 {
1358 loop->scoped_var = true;
1359 Shift();
1360 }
1361 // get variable name
1362 Check(ATT_IDTF, "variable name");
1363 loop->var = Idtf;
1364 Shift();
1365 if (TokenType != ATT_IDTF || !SEqual(Idtf, C4AUL_In))
1366 UnexpectedToken("'in'");
1367 Shift();
1368 // get expression for array
1369 loop->cond = Parse_Expression();
1370 Match(ATT_BCLOSE);
1371 // loop body
1372 loop->body = Parse_Statement();
1373 return loop;
1374 }
1375
GetPropertyByS(const C4PropList * p,const char * s,C4Value & v)1376 static bool GetPropertyByS(const C4PropList * p, const char * s, C4Value & v)
1377 {
1378 C4String * k = Strings.FindString(s);
1379 if (!k) return false;
1380 return p->GetPropertyByS(k, &v);
1381 }
1382
Parse_Expression(int iParentPrio)1383 std::unique_ptr<::aul::ast::Expr> C4AulParse::Parse_Expression(int iParentPrio)
1384 {
1385 const char *NodeStart = TokenSPos;
1386 std::unique_ptr<::aul::ast::Expr> expr;
1387 const C4ScriptOpDef * op;
1388 C4AulFunc *FoundFn = nullptr;
1389 C4Value val;
1390 switch (TokenType)
1391 {
1392 case ATT_IDTF:
1393 // XXX: Resolving literals here means that you can't create a variable
1394 // with the names "true", "false", "nil" or "this" anymore. I don't
1395 // consider this too much of a problem, because there is never a reason
1396 // to do this and it makes my job a lot easier
1397 if (SEqual(Idtf, C4AUL_True))
1398 {
1399 Shift();
1400 expr = ::aul::ast::BoolLit::New(NodeStart, true);
1401 }
1402 else if (SEqual(Idtf, C4AUL_False))
1403 {
1404 Shift();
1405 expr = ::aul::ast::BoolLit::New(NodeStart, false);
1406 }
1407 else if (SEqual(Idtf, C4AUL_Nil))
1408 {
1409 Shift();
1410 expr = ::aul::ast::NilLit::New(NodeStart);
1411 }
1412 else if (SEqual(Idtf, C4AUL_this))
1413 {
1414 Shift();
1415 if (TokenType == ATT_BOPEN)
1416 {
1417 Shift();
1418 Match(ATT_BCLOSE);
1419 // TODO: maybe warn about "this" with parentheses?
1420 }
1421 expr = ::aul::ast::ThisLit::New(NodeStart);
1422 }
1423 // XXX: Other things that people aren't allowed to do anymore: name their variables or functions any of:
1424 // "if", "else", "for", "while", "do", "return", or "Par".
1425 // We could allow variables with these names and disambiguate based on the syntax, but no.
1426 else if (SEqual(Idtf, C4AUL_If) || SEqual(Idtf, C4AUL_Else) || SEqual(Idtf, C4AUL_For) || SEqual(Idtf, C4AUL_While) || SEqual(Idtf, C4AUL_Do) || SEqual(Idtf, C4AUL_Return))
1427 {
1428 Error("reserved identifier not allowed in expressions: %s", Idtf);
1429 }
1430 else if (SEqual(Idtf, C4AUL_Par))
1431 {
1432 Shift();
1433 // "Par" is special in that it isn't a function and thus doesn't accept an arbitrary number of parameters
1434 Match(ATT_BOPEN);
1435 expr = ::aul::ast::ParExpr::New(NodeStart, Parse_Expression());
1436 Match(ATT_BCLOSE);
1437 }
1438 else if (SEqual(Idtf, C4AUL_New))
1439 {
1440 // Because people might call a variables or functions "new", we need to look ahead and guess whether it's a proplist constructor.
1441 PushParsePos();
1442 Shift();
1443 if (TokenType == ATT_IDTF)
1444 {
1445 // this must be a proplist because two identifiers can't immediately follow each other
1446 PopParsePos();
1447 expr = Parse_PropList();
1448 }
1449 else
1450 {
1451 // Some non-identifier means this is either a variable, a function, or a syntax error. Which one exactly is something we'll figure out later.
1452 PopParsePos();
1453 }
1454 }
1455 else if (SEqual(Idtf, C4AUL_Func))
1456 {
1457 PushParsePos();
1458 Shift();
1459 if (TokenType == ATT_BOPEN)
1460 {
1461 auto func = ::aul::ast::FunctionExpr::New(NodeStart);
1462 Parse_Function(func.get());
1463 expr = std::move(func);
1464 DiscardParsePos();
1465 }
1466 else
1467 {
1468 PopParsePos();
1469 }
1470 }
1471 if (!expr)
1472 {
1473 // If we end up here, it must be a proper identifier (or a reserved word that's used as an identifier).
1474 // Time to look ahead and see whether it's a function call.
1475 std::string identifier = Idtf;
1476 Shift();
1477 if (TokenType == ATT_BOPEN)
1478 {
1479 // Well, it looks like one, at least
1480 auto func = ::aul::ast::CallExpr::New(NodeStart);
1481 func->callee = identifier;
1482 Parse_CallParams(func.get());
1483 expr = std::move(func);
1484 }
1485 else
1486 {
1487 // It's most certainly not a function call.
1488 expr = ::aul::ast::VarExpr::New(NodeStart, identifier);
1489 }
1490 }
1491 break;
1492 case ATT_INT: // constant in cInt
1493 expr = ::aul::ast::IntLit::New(NodeStart, cInt);
1494 Shift();
1495 break;
1496 case ATT_STRING: // reference in cStr
1497 expr = ::aul::ast::StringLit::New(NodeStart, cStr->GetCStr());
1498 Shift();
1499 break;
1500 case ATT_OPERATOR:
1501 // -> must be a prefix operator
1502 op = &C4ScriptOpMap[cInt];
1503 // postfix?
1504 if (op->Postfix)
1505 // oops. that's wrong
1506 throw C4AulParseError(this, "postfix operator without first expression");
1507 Shift();
1508 // generate code for the following expression
1509 expr = Parse_Expression(op->Priority);
1510 if (SEqual(op->Identifier, "+"))
1511 {
1512 // This is a no-op.
1513 }
1514 else
1515 {
1516 expr = ::aul::ast::UnOpExpr::New(NodeStart, op - C4ScriptOpMap, std::move(expr));
1517 }
1518 break;
1519 case ATT_BOPEN:
1520 Shift();
1521 expr = Parse_Expression();
1522 Match(ATT_BCLOSE);
1523 break;
1524 case ATT_BOPEN2:
1525 expr = Parse_Array();
1526 break;
1527 case ATT_BLOPEN:
1528 expr = Parse_PropList();
1529 break;
1530 default:
1531 UnexpectedToken("expression");
1532 }
1533
1534 assert(expr);
1535
1536 while (true)
1537 {
1538 NodeStart = TokenSPos;
1539 switch (TokenType)
1540 {
1541 case ATT_SET:
1542 {
1543 // back out of any kind of parent operator
1544 // (except other setters, as those are right-associative)
1545 if (iParentPrio > 1)
1546 return expr;
1547 Shift();
1548 expr = ::aul::ast::AssignmentExpr::New(NodeStart, std::move(expr), Parse_Expression(1));
1549 break;
1550 }
1551 case ATT_OPERATOR:
1552 {
1553 // expect postfix operator
1554 const C4ScriptOpDef * op = &C4ScriptOpMap[cInt];
1555 if (!op->Postfix)
1556 {
1557 // does an operator with the same name exist?
1558 // when it's a postfix-operator, it can be used instead.
1559 const C4ScriptOpDef * postfixop;
1560 for (postfixop = op + 1; postfixop->Identifier; ++postfixop)
1561 if (SEqual(op->Identifier, postfixop->Identifier))
1562 if (postfixop->Postfix)
1563 break;
1564 // not found?
1565 if (!postfixop->Identifier)
1566 {
1567 Error("unexpected prefix operator: %s", op->Identifier);
1568 }
1569 // otherwise use the new-found correct postfix operator
1570 op = postfixop;
1571 }
1572
1573 if (iParentPrio + !op->Changer > op->Priority)
1574 return expr;
1575
1576 Shift();
1577 if (op->NoSecondStatement)
1578 {
1579 // Postfix unary op
1580 expr = ::aul::ast::UnOpExpr::New(NodeStart, op - C4ScriptOpMap, std::move(expr));
1581 }
1582 else
1583 {
1584 expr = ::aul::ast::BinOpExpr::New(NodeStart, op - C4ScriptOpMap, std::move(expr), Parse_Expression(op->Priority));
1585 }
1586 break;
1587 }
1588 case ATT_BOPEN2:
1589 {
1590 // parse either [index], or [start:end] in which case either index is optional
1591 Shift();
1592 ::aul::ast::ExprPtr start;
1593 if (TokenType == ATT_COLON)
1594 start = ::aul::ast::IntLit::New(TokenSPos, 0); // slice with first index missing -> implicit start index zero
1595 else
1596 start = Parse_Expression();
1597
1598 if (TokenType == ATT_BCLOSE2)
1599 {
1600 expr = ::aul::ast::SubscriptExpr::New(NodeStart, std::move(expr), std::move(start));
1601 }
1602 else if (TokenType == ATT_COLON)
1603 {
1604 Shift();
1605 ::aul::ast::ExprPtr end;
1606 if (TokenType == ATT_BCLOSE2)
1607 {
1608 end = ::aul::ast::IntLit::New(TokenSPos, std::numeric_limits<int32_t>::max());
1609 }
1610 else
1611 {
1612 end = Parse_Expression();
1613 }
1614 expr = ::aul::ast::SliceExpr::New(NodeStart, std::move(expr), std::move(start), std::move(end));
1615 }
1616 else
1617 {
1618 UnexpectedToken("']' or ':'");
1619 }
1620 Match(ATT_BCLOSE2);
1621 break;
1622 }
1623 case ATT_DOT:
1624 Shift();
1625 Check(ATT_IDTF, "property name");
1626 expr = ::aul::ast::SubscriptExpr::New(NodeStart, std::move(expr), ::aul::ast::StringLit::New(TokenSPos, Idtf));
1627 Shift();
1628 break;
1629 case ATT_CALL: case ATT_CALLFS:
1630 {
1631 auto call = ::aul::ast::CallExpr::New(NodeStart);
1632 call->context = std::move(expr);
1633 call->safe_call = TokenType == ATT_CALLFS;
1634 Shift();
1635 Check(ATT_IDTF, "function name after '->'");
1636 call->callee = Idtf;
1637 Shift();
1638 Parse_CallParams(call.get());
1639 expr = std::move(call);
1640 break;
1641 }
1642 default:
1643 return expr;
1644 }
1645 }
1646 }
1647
Parse_Var()1648 std::unique_ptr<::aul::ast::VarDecl> C4AulParse::Parse_Var()
1649 {
1650 auto decl = ::aul::ast::VarDecl::New(TokenSPos);
1651 if (SEqual(Idtf, C4AUL_VarNamed))
1652 {
1653 decl->scope = ::aul::ast::VarDecl::Scope::Func;
1654 }
1655 else if (SEqual(Idtf, C4AUL_LocalNamed))
1656 {
1657 decl->scope = ::aul::ast::VarDecl::Scope::Object;
1658 }
1659 else if (SEqual(Idtf, C4AUL_GlobalNamed))
1660 {
1661 decl->scope = ::aul::ast::VarDecl::Scope::Global;
1662 }
1663 else
1664 {
1665 assert(0 && "C4AulParse::Parse_Var called with invalid parse state (current token should be scope of variable)");
1666 // Uh this shouldn't happen, ever
1667 Error("internal error: C4AulParse::Parse_Var called with invalid parse state (current token should be scope of variable, but is '%s')", Idtf);
1668 }
1669 Shift();
1670 if (TokenType == ATT_IDTF && SEqual(Idtf, C4AUL_Const))
1671 {
1672 decl->constant = true;
1673 Shift();
1674 }
1675 while (true)
1676 {
1677 // get desired variable name
1678 Check(ATT_IDTF, "variable name");
1679 std::string identifier = Idtf;
1680 Shift();
1681 ::aul::ast::ExprPtr init;
1682 if (TokenType == ATT_SET)
1683 {
1684 Shift();
1685 init = Parse_Expression();
1686 }
1687 decl->decls.push_back({ identifier, std::move(init) });
1688 if (TokenType == ATT_SCOLON)
1689 return decl;
1690 Match(ATT_COMMA, "',' or ';'");
1691 }
1692 }
1693
CopyPropList(C4Set<C4Property> & from,C4PropListStatic * to)1694 void C4ScriptHost::CopyPropList(C4Set<C4Property> & from, C4PropListStatic * to)
1695 {
1696 // append all funcs and local variable initializations
1697 const C4Property * prop = from.First();
1698 while (prop)
1699 {
1700 switch(prop->Value.GetType())
1701 {
1702 case C4V_Function:
1703 {
1704 C4AulScriptFunc * sf = prop->Value.getFunction()->SFunc();
1705 if (sf)
1706 {
1707 C4AulScriptFunc *sfc;
1708 if (sf->pOrgScript != this)
1709 sfc = new C4AulScriptFunc(to, *sf);
1710 else
1711 sfc = sf;
1712 sfc->SetOverloaded(to->GetFunc(sf->Name));
1713 to->SetPropertyByS(prop->Key, C4VFunction(sfc));
1714 }
1715 else
1716 {
1717 // engine function
1718 to->SetPropertyByS(prop->Key, prop->Value);
1719 }
1720 }
1721 break;
1722 case C4V_PropList:
1723 {
1724 C4PropListStatic * p = prop->Value._getPropList()->IsStatic();
1725 assert(p);
1726 if (prop->Key != &::Strings.P[P_Prototype])
1727 if (!p || p->GetParent() != to)
1728 {
1729 p = C4PropList::NewStatic(nullptr, to, prop->Key);
1730 CopyPropList(prop->Value._getPropList()->Properties, p);
1731 }
1732 to->SetPropertyByS(prop->Key, C4VPropList(p));
1733 }
1734 break;
1735 case C4V_Array: // FIXME: copy the array if necessary
1736 default:
1737 to->SetPropertyByS(prop->Key, prop->Value);
1738 }
1739 prop = from.Next(prop);
1740 }
1741 }
1742
Parse()1743 bool C4ScriptHost::Parse()
1744 {
1745 // check state
1746 if (State != ASS_LINKED) return false;
1747
1748 if (!Appends.empty())
1749 {
1750 // #appendto scripts are not allowed to contain global functions or belong to definitions
1751 // so their contents are not reachable
1752 return true;
1753 }
1754
1755 C4PropListStatic * p = GetPropList();
1756
1757 for (auto & SourceScript : SourceScripts)
1758 {
1759 CopyPropList(SourceScript->LocalValues, p);
1760 if (SourceScript == this)
1761 continue;
1762 // definition appends
1763 if (GetPropList() && GetPropList()->GetDef() && SourceScript->GetPropList() && SourceScript->GetPropList()->GetDef())
1764 GetPropList()->GetDef()->IncludeDefinition(SourceScript->GetPropList()->GetDef());
1765 }
1766
1767 // generate bytecode
1768 for (auto &s : SourceScripts)
1769 C4AulCompiler::Compile(this, s, s->ast.get());
1770
1771 // save line count
1772 Engine->lineCnt += SGetLine(Script.getData(), Script.getPtr(Script.getLength()));
1773
1774 // finished
1775 State = ASS_PARSED;
1776
1777 return true;
1778 }
1779
EnableWarning(const char * pos,C4AulWarningId warning,bool enable)1780 void C4ScriptHost::EnableWarning(const char *pos, C4AulWarningId warning, bool enable)
1781 {
1782 auto entry = enabledWarnings.emplace(pos, decltype(enabledWarnings)::mapped_type{});
1783 if (entry.second)
1784 {
1785 // If there was no earlier entry for this position, copy the previous
1786 // warning state
1787 assert(entry.first != enabledWarnings.begin());
1788 auto previous = entry.first;
1789 --previous;
1790 entry.first->second = previous->second;
1791 }
1792 entry.first->second.set(static_cast<size_t>(warning), enable);
1793 }
1794
IsWarningEnabled(const char * pos,C4AulWarningId warning) const1795 bool C4ScriptHost::IsWarningEnabled(const char *pos, C4AulWarningId warning) const
1796 {
1797 assert(!enabledWarnings.empty());
1798 if (enabledWarnings.empty())
1799 return false;
1800
1801 // find nearest set of warnings at or before the current position
1802 auto entry = enabledWarnings.upper_bound(pos);
1803 assert(entry != enabledWarnings.begin());
1804 if (entry != enabledWarnings.begin())
1805 {
1806 --entry;
1807 }
1808
1809 return entry->second.test(static_cast<size_t>(warning));
1810 }
1811
PushParsePos()1812 void C4AulParse::PushParsePos()
1813 {
1814 parse_pos_stack.push(TokenSPos);
1815 }
1816
PopParsePos()1817 void C4AulParse::PopParsePos()
1818 {
1819 assert(!parse_pos_stack.empty());
1820 SPos = parse_pos_stack.top();
1821 DiscardParsePos();
1822 Shift();
1823 }
1824
DiscardParsePos()1825 void C4AulParse::DiscardParsePos()
1826 {
1827 assert(!parse_pos_stack.empty());
1828 parse_pos_stack.pop();
1829 }
1830