1 // This is an open source non-commercial project. Dear PVS-Studio, please check
2 // it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com
3
4 /// VimL expression parser
5
6 // Planned incompatibilities (to be included into vim_diff.txt when this parser
7 // will be an actual part of VimL evaluation process):
8 //
9 // 1. Expressions are first fully parsed and only then executed. This means
10 // that while ":echo [system('touch abc')" will create file "abc" in Vim and
11 // only then raise syntax error regarding missing comma in list in Neovim
12 // trying to execute that will immediately raise syntax error regarding
13 // missing list end without actually executing anything.
14 // 2. Expressions are first fully parsed, without considering any runtime
15 // information. This means things like that "d.a" does not change its
16 // meaning depending on type of "d" (or whether Vim is currently executing or
17 // skipping). For compatibility reasons the dot thus may either be “concat
18 // or subscript” operator or just “concat” operator.
19 // 3. Expressions parser is aware whether it is called for :echo or <C-r>=.
20 // This means that while "<C-r>=1 | 2<CR>" is equivalent to "<C-r>=1<CR>"
21 // because "| 2" part is left to be treated as a command separator and then
22 // ignored in Neovim it is an error.
23 // 4. Expressions parser has generally better error reporting. But for
24 // compatibility reasons most errors have error code E15 while error messages
25 // are significantly different from Vim’s E15. Also some error codes were
26 // retired because of being harder to emulate or because of them being
27 // a result of differences in parsing process: e.g. with ":echo {a, b}" Vim
28 // will attempt to parse expression as lambda, fail, check whether it is
29 // a curly-braces-name, fail again, and evaluate that as a dictionary, giving
30 // error regarding undefined variable "a" (or about missing colon). Neovim
31 // will not try to evaluate anything here: comma right after an argument name
32 // means that expression may not be anything, but lambda, so the resulting
33 // error message will never be about missing variable or colon: it will be
34 // about missing arrow (or a continuation of argument list).
35 // 5. Failing to parse expression always gives exactly one error message: no
36 // more stack of error messages like >
37 //
38 // :echo [1,
39 // E697: Missing end of List ']':
40 // E15: Invalid expression: [1,
41 //
42 // < , just exactly one E697 message.
43 // 6. Some expressions involving calling parenthesis which are treated
44 // separately by Vim even when not separated by spaces are treated as one
45 // expression by Neovim: e.g. ":echo (1)(1)" will yield runtime error after
46 // failing to call "1", while Vim will echo "1 1". Reasoning is the same:
47 // type of what is in the first expression is generally not known when
48 // parsing, so to have separate expressions like this separate them with
49 // spaces.
50 // 7. 'isident' no longer applies to environment variables, they always include
51 // ASCII alphanumeric characters and underscore and nothing except this.
52
53 #include <assert.h>
54 #include <stdbool.h>
55 #include <stddef.h>
56 #include <string.h>
57
58 #include "nvim/ascii.h"
59 #include "nvim/assert.h"
60 #include "nvim/charset.h"
61 #include "nvim/eval/typval.h"
62 #include "nvim/lib/kvec.h"
63 #include "nvim/memory.h"
64 #include "nvim/types.h"
65 #include "nvim/vim.h"
66 #include "nvim/viml/parser/expressions.h"
67 #include "nvim/viml/parser/parser.h"
68
69 #define vim_str2nr(s, ...) vim_str2nr((const char_u *)(s), __VA_ARGS__)
70
71 typedef kvec_withinit_t(ExprASTNode **, 16) ExprASTStack;
72
73 /// Which nodes may be wanted
74 typedef enum {
75 /// Operators: function call, subscripts, binary operators, …
76 ///
77 /// For unrestricted expressions.
78 kENodeOperator,
79 /// Values: literals, variables, nested expressions, unary operators.
80 ///
81 /// For unrestricted expressions as well, implies that top item in AST stack
82 /// points to NULL.
83 kENodeValue,
84 } ExprASTWantedNode;
85
86 /// Parse type: what is being parsed currently
87 typedef enum {
88 /// Parsing regular VimL expression
89 kEPTExpr = 0,
90 /// Parsing lambda arguments
91 ///
92 /// Just like parsing function arguments, but it is valid to be ended with an
93 /// arrow only.
94 kEPTLambdaArguments,
95 /// Assignment: parsing for :let
96 kEPTAssignment,
97 /// Single assignment: used when lists are not allowed (i.e. when nesting)
98 kEPTSingleAssignment,
99 } ExprASTParseType;
100
101 typedef kvec_withinit_t(ExprASTParseType, 4) ExprASTParseTypeStack;
102
103 /// Operator priority level
104 typedef enum {
105 kEOpLvlInvalid = 0,
106 kEOpLvlComplexIdentifier,
107 kEOpLvlParens,
108 kEOpLvlAssignment,
109 kEOpLvlArrow,
110 kEOpLvlComma,
111 kEOpLvlColon,
112 kEOpLvlTernaryValue,
113 kEOpLvlTernary,
114 kEOpLvlOr,
115 kEOpLvlAnd,
116 kEOpLvlComparison,
117 kEOpLvlAddition, ///< Addition, subtraction and concatenation.
118 kEOpLvlMultiplication, ///< Multiplication, division and modulo.
119 kEOpLvlUnary, ///< Unary operations: not, minus, plus.
120 kEOpLvlSubscript, ///< Subscripts.
121 kEOpLvlValue, ///< Values: literals, variables, nested expressions, …
122 } ExprOpLvl;
123
124 /// Operator associativity
125 typedef enum {
126 kEOpAssNo= 'n', ///< Not associative / not applicable.
127 kEOpAssLeft = 'l', ///< Left associativity.
128 kEOpAssRight = 'r', ///< Right associativity.
129 } ExprOpAssociativity;
130
131 #ifdef INCLUDE_GENERATED_DECLARATIONS
132 # include "viml/parser/expressions.c.generated.h"
133 #endif
134
135 /// Scale number by a given factor
136 ///
137 /// Used to apply exponent to a number. Idea taken from uClibc.
138 ///
139 /// @param[in] num Number to scale. Does not bother doing anything if it is
140 /// zero.
141 /// @param[in] base Base, should be 10 since non-decimal floating-point
142 /// numbers are not supported.
143 /// @param[in] exponent Exponent to scale by.
144 /// @param[in] exponent_negative True if exponent is negative.
scale_number(const float_T num,const uint8_t base,const uvarnumber_T exponent,const bool exponent_negative)145 static inline float_T scale_number(const float_T num, const uint8_t base,
146 const uvarnumber_T exponent, const bool exponent_negative)
147 FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_CONST
148 {
149 if (num == 0 || exponent == 0) {
150 return num;
151 }
152 assert(base);
153 uvarnumber_T exp = exponent;
154 float_T p_base = (float_T)base;
155 float_T ret = num;
156 while (exp) {
157 if (exp & 1) {
158 if (exponent_negative) {
159 ret /= p_base;
160 } else {
161 ret *= p_base;
162 }
163 }
164 exp >>= 1;
165 p_base *= p_base;
166 }
167 return ret;
168 }
169
170 /// Get next token for the VimL expression input
171 ///
172 /// @param pstate Parser state.
173 /// @param[in] flags Flags, @see LexExprFlags.
174 ///
175 /// @return Next token.
viml_pexpr_next_token(ParserState * const pstate,const int flags)176 LexExprToken viml_pexpr_next_token(ParserState *const pstate, const int flags)
177 FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
178 {
179 LexExprToken ret = {
180 .type = kExprLexInvalid,
181 .start = pstate->pos,
182 };
183 ParserLine pline;
184 if (!viml_parser_get_remaining_line(pstate, &pline)) {
185 ret.type = kExprLexEOC;
186 return ret;
187 }
188 if (pline.size <= 0) {
189 ret.len = 0;
190 ret.type = kExprLexEOC;
191 goto viml_pexpr_next_token_adv_return;
192 }
193 ret.len = 1;
194 const uint8_t schar = (uint8_t)pline.data[0];
195 #define GET_CCS(ret, pline) \
196 do { \
197 if (ret.len < pline.size \
198 && strchr("?#", pline.data[ret.len]) != NULL) { \
199 ret.data.cmp.ccs = \
200 (ExprCaseCompareStrategy)pline.data[ret.len]; \
201 ret.len++; \
202 } else { \
203 ret.data.cmp.ccs = kCCStrategyUseOption; \
204 } \
205 } while (0)
206 switch (schar) {
207 // Paired brackets.
208 #define BRACKET(typ, opning, clsing) \
209 case opning: \
210 case clsing: { \
211 ret.type = typ; \
212 ret.data.brc.closing = (schar == clsing); \
213 break; \
214 }
215 BRACKET(kExprLexParenthesis, '(', ')')
216 BRACKET(kExprLexBracket, '[', ']')
217 BRACKET(kExprLexFigureBrace, '{', '}')
218 #undef BRACKET
219
220 // Single character tokens without data.
221 #define CHAR(typ, ch) \
222 case ch: { \
223 ret.type = typ; \
224 break; \
225 }
226 CHAR(kExprLexQuestion, '?')
227 CHAR(kExprLexColon, ':')
228 CHAR(kExprLexComma, ',')
229 #undef CHAR
230
231 // Multiplication/division/modulo.
232 #define MUL(mul_type, ch) \
233 case ch: { \
234 ret.type = kExprLexMultiplication; \
235 ret.data.mul.type = mul_type; \
236 break; \
237 }
238 MUL(kExprLexMulMul, '*')
239 MUL(kExprLexMulDiv, '/')
240 MUL(kExprLexMulMod, '%')
241 #undef MUL
242
243 #define CHARREG(typ, cond) \
244 do { \
245 ret.type = typ; \
246 for (; (ret.len < pline.size \
247 && cond(pline.data[ret.len])) \
248 ; ret.len++) { \
249 } \
250 } while (0)
251
252 // Whitespace.
253 case ' ':
254 case TAB:
255 CHARREG(kExprLexSpacing, ascii_iswhite);
256 break;
257
258 // Control character, except for NUL, NL and TAB.
259 case Ctrl_A:
260 case Ctrl_B:
261 case Ctrl_C:
262 case Ctrl_D:
263 case Ctrl_E:
264 case Ctrl_F:
265 case Ctrl_G:
266 case Ctrl_H:
267
268 case Ctrl_K:
269 case Ctrl_L:
270 case Ctrl_M:
271 case Ctrl_N:
272 case Ctrl_O:
273 case Ctrl_P:
274 case Ctrl_Q:
275 case Ctrl_R:
276 case Ctrl_S:
277 case Ctrl_T:
278 case Ctrl_U:
279 case Ctrl_V:
280 case Ctrl_W:
281 case Ctrl_X:
282 case Ctrl_Y:
283 case Ctrl_Z:
284 #define ISCTRL(schar) (schar < ' ')
285 CHARREG(kExprLexInvalid, ISCTRL);
286 ret.data.err.type = kExprLexSpacing;
287 ret.data.err.msg =
288 _("E15: Invalid control character present in input: %.*s");
289 break;
290 #undef ISCTRL
291
292 // Number.
293 case '0':
294 case '1':
295 case '2':
296 case '3':
297 case '4':
298 case '5':
299 case '6':
300 case '7':
301 case '8':
302 case '9': {
303 ret.data.num.is_float = false;
304 ret.data.num.base = 10;
305 size_t frac_start = 0;
306 size_t exp_start = 0;
307 size_t frac_end = 0;
308 bool exp_negative = false;
309 CHARREG(kExprLexNumber, ascii_isdigit);
310 if (flags & kELFlagAllowFloat) {
311 const LexExprToken non_float_ret = ret;
312 if (pline.size > ret.len + 1
313 && pline.data[ret.len] == '.'
314 && ascii_isdigit(pline.data[ret.len + 1])) {
315 ret.len++;
316 frac_start = ret.len;
317 frac_end = ret.len;
318 ret.data.num.is_float = true;
319 for (; ret.len < pline.size && ascii_isdigit(pline.data[ret.len])
320 ; ret.len++) {
321 // A small optimization: trailing zeroes in fractional part do not
322 // add anything to significand, so it is useless to include them in
323 // frac_end.
324 if (pline.data[ret.len] != '0') {
325 frac_end = ret.len + 1;
326 }
327 }
328 if (pline.size > ret.len + 1
329 && (pline.data[ret.len] == 'e'
330 || pline.data[ret.len] == 'E')
331 && ((pline.size > ret.len + 2
332 && (pline.data[ret.len + 1] == '+'
333 || pline.data[ret.len + 1] == '-')
334 && ascii_isdigit(pline.data[ret.len + 2]))
335 || ascii_isdigit(pline.data[ret.len + 1]))) {
336 ret.len++;
337 if (pline.data[ret.len] == '+'
338 || (exp_negative = (pline.data[ret.len] == '-'))) {
339 ret.len++;
340 }
341 exp_start = ret.len;
342 CHARREG(kExprLexNumber, ascii_isdigit);
343 }
344 }
345 if (pline.size > ret.len
346 && (pline.data[ret.len] == '.'
347 || ASCII_ISALPHA(pline.data[ret.len]))) {
348 ret = non_float_ret;
349 }
350 }
351 // TODO(ZyX-I): detect overflows
352 if (ret.data.num.is_float) {
353 // Vim used to use string2float here which in turn uses strtod(). There
354 // are two problems with this approach:
355 // 1. strtod() is locale-dependent. Not sure how it is worked around so
356 // that I do not see relevant bugs, but it still does not look like
357 // a good idea.
358 // 2. strtod() does not accept length argument.
359 //
360 // The below variant of parsing floats was recognized as acceptable
361 // because it is basically how uClibc does the thing: it generates
362 // a number ignoring decimal point (but recording its position), then
363 // uses recorded position to scale number down when processing exponent.
364 float_T significand_part = 0;
365 uvarnumber_T exp_part = 0;
366 const size_t frac_size = (size_t)(frac_end - frac_start);
367 for (size_t i = 0; i < frac_end; i++) {
368 if (i == frac_start - 1) {
369 continue;
370 }
371 significand_part = significand_part * 10 + (pline.data[i] - '0');
372 }
373 if (exp_start) {
374 vim_str2nr(pline.data + exp_start, NULL, NULL, 0, NULL, &exp_part,
375 (int)(ret.len - exp_start), false);
376 }
377 if (exp_negative) {
378 exp_part += frac_size;
379 } else {
380 if (exp_part < frac_size) {
381 exp_negative = true;
382 exp_part = frac_size - exp_part;
383 } else {
384 exp_part -= frac_size;
385 }
386 }
387 ret.data.num.val.floating = scale_number(significand_part, 10, exp_part,
388 exp_negative);
389 } else {
390 int len;
391 int prep;
392 vim_str2nr(pline.data, &prep, &len, STR2NR_ALL, NULL,
393 &ret.data.num.val.integer, (int)pline.size, false);
394 ret.len = (size_t)len;
395 const uint8_t bases[] = {
396 [0] = 10,
397 ['0'] = 8,
398 ['x'] = 16, ['X'] = 16,
399 ['b'] = 2, ['B'] = 2,
400 };
401 ret.data.num.base = bases[prep];
402 }
403 break;
404 }
405
406 #define ISWORD_OR_AUTOLOAD(x) \
407 (ascii_isident(x) || (x) == AUTOLOAD_CHAR)
408
409 // Environment variable.
410 case '$':
411 CHARREG(kExprLexEnv, ascii_isident);
412 break;
413
414 // Normal variable/function name.
415 case 'a':
416 case 'b':
417 case 'c':
418 case 'd':
419 case 'e':
420 case 'f':
421 case 'g':
422 case 'h':
423 case 'i':
424 case 'j':
425 case 'k':
426 case 'l':
427 case 'm':
428 case 'n':
429 case 'o':
430 case 'p':
431 case 'q':
432 case 'r':
433 case 's':
434 case 't':
435 case 'u':
436 case 'v':
437 case 'w':
438 case 'x':
439 case 'y':
440 case 'z':
441 case 'A':
442 case 'B':
443 case 'C':
444 case 'D':
445 case 'E':
446 case 'F':
447 case 'G':
448 case 'H':
449 case 'I':
450 case 'J':
451 case 'K':
452 case 'L':
453 case 'M':
454 case 'N':
455 case 'O':
456 case 'P':
457 case 'Q':
458 case 'R':
459 case 'S':
460 case 'T':
461 case 'U':
462 case 'V':
463 case 'W':
464 case 'X':
465 case 'Y':
466 case 'Z':
467 case '_':
468 ret.data.var.scope = 0;
469 ret.data.var.autoload = false;
470 CHARREG(kExprLexPlainIdentifier, ascii_isident);
471 // "is" and "isnot" operators.
472 if (!(flags & kELFlagIsNotCmp)
473 && ((ret.len == 2 && memcmp(pline.data, "is", 2) == 0)
474 || (ret.len == 5 && memcmp(pline.data, "isnot", 5) == 0))) {
475 ret.type = kExprLexComparison;
476 ret.data.cmp.type = kExprCmpIdentical;
477 ret.data.cmp.inv = (ret.len == 5);
478 GET_CCS(ret, pline);
479 // Scope: `s:`, etc.
480 } else if (ret.len == 1
481 && pline.size > 1
482 && memchr(EXPR_VAR_SCOPE_LIST, schar,
483 sizeof(EXPR_VAR_SCOPE_LIST)) != NULL
484 && pline.data[ret.len] == ':'
485 && !(flags & kELFlagForbidScope)) {
486 ret.len++;
487 ret.data.var.scope = (ExprVarScope)schar;
488 CHARREG(kExprLexPlainIdentifier, ISWORD_OR_AUTOLOAD);
489 ret.data.var.autoload = (
490 memchr(pline.data + 2, AUTOLOAD_CHAR, ret.len - 2)
491 != NULL);
492 // Previous CHARREG stopped at autoload character in order to make it
493 // possible to detect `is#`. Continue now with autoload characters
494 // included.
495 //
496 // Warning: there is ambiguity for the lexer: `is#Foo(1)` is a call of
497 // function `is#Foo()`, `1is#Foo(1)` is a comparison `1 is# Foo(1)`. This
498 // needs to be resolved on the higher level where context is available.
499 } else if (pline.size > ret.len
500 && pline.data[ret.len] == AUTOLOAD_CHAR) {
501 ret.data.var.autoload = true;
502 CHARREG(kExprLexPlainIdentifier, ISWORD_OR_AUTOLOAD);
503 }
504 break;
505
506 #undef ISWORD_OR_AUTOLOAD
507 #undef CHARREG
508
509 // Option.
510 case '&': {
511 #define OPTNAMEMISS(ret) \
512 do { \
513 ret.type = kExprLexInvalid; \
514 ret.data.err.type = kExprLexOption; \
515 ret.data.err.msg = _("E112: Option name missing: %.*s"); \
516 } while (0)
517 if (pline.size > 1 && pline.data[1] == '&') {
518 ret.type = kExprLexAnd;
519 ret.len++;
520 break;
521 }
522 if (pline.size == 1 || !ASCII_ISALPHA(pline.data[1])) {
523 OPTNAMEMISS(ret);
524 break;
525 }
526 ret.type = kExprLexOption;
527 if (pline.size > 2
528 && pline.data[2] == ':'
529 && memchr(EXPR_OPT_SCOPE_LIST, pline.data[1],
530 sizeof(EXPR_OPT_SCOPE_LIST)) != NULL) {
531 ret.len += 2;
532 ret.data.opt.scope = (ExprOptScope)pline.data[1];
533 ret.data.opt.name = pline.data + 3;
534 } else {
535 ret.data.opt.scope = kExprOptScopeUnspecified;
536 ret.data.opt.name = pline.data + 1;
537 }
538 const char *p = ret.data.opt.name;
539 const char *const e = pline.data + pline.size;
540 if (e - p >= 4 && p[0] == 't' && p[1] == '_') {
541 ret.data.opt.len = 4;
542 ret.len += 4;
543 } else {
544 for (; p < e && ASCII_ISALPHA(*p); p++) {
545 }
546 ret.data.opt.len = (size_t)(p - ret.data.opt.name);
547 if (ret.data.opt.len == 0) {
548 OPTNAMEMISS(ret);
549 } else {
550 ret.len += ret.data.opt.len;
551 }
552 }
553 break;
554 #undef OPTNAMEMISS
555 }
556
557 // Register.
558 case '@':
559 ret.type = kExprLexRegister;
560 if (pline.size > 1) {
561 ret.len++;
562 ret.data.reg.name = (uint8_t)pline.data[1];
563 } else {
564 ret.data.reg.name = -1;
565 }
566 break;
567
568 // Single quoted string.
569 case '\'':
570 ret.type = kExprLexSingleQuotedString;
571 ret.data.str.closed = false;
572 for (; ret.len < pline.size && !ret.data.str.closed; ret.len++) {
573 if (pline.data[ret.len] == '\'') {
574 if (ret.len + 1 < pline.size && pline.data[ret.len + 1] == '\'') {
575 ret.len++;
576 } else {
577 ret.data.str.closed = true;
578 }
579 }
580 }
581 break;
582
583 // Double quoted string.
584 case '"':
585 ret.type = kExprLexDoubleQuotedString;
586 ret.data.str.closed = false;
587 for (; ret.len < pline.size && !ret.data.str.closed; ret.len++) {
588 if (pline.data[ret.len] == '\\') {
589 if (ret.len + 1 < pline.size) {
590 ret.len++;
591 }
592 } else if (pline.data[ret.len] == '"') {
593 ret.data.str.closed = true;
594 }
595 }
596 break;
597
598 // Unary not, (un)equality and regex (not) match comparison operators.
599 case '!':
600 case '=':
601 if (pline.size == 1) {
602 ret.type = (schar == '!' ? kExprLexNot : kExprLexAssignment);
603 ret.data.ass.type = kExprAsgnPlain;
604 break;
605 }
606 ret.type = kExprLexComparison;
607 ret.data.cmp.inv = (schar == '!');
608 if (pline.data[1] == '=') {
609 ret.data.cmp.type = kExprCmpEqual;
610 ret.len++;
611 } else if (pline.data[1] == '~') {
612 ret.data.cmp.type = kExprCmpMatches;
613 ret.len++;
614 } else if (schar == '!') {
615 ret.type = kExprLexNot;
616 } else {
617 ret.type = kExprLexAssignment;
618 ret.data.ass.type = kExprAsgnPlain;
619 }
620 GET_CCS(ret, pline);
621 break;
622
623 // Less/greater [or equal to] comparison operators.
624 case '>':
625 case '<': {
626 ret.type = kExprLexComparison;
627 const bool haseqsign = (pline.size > 1 && pline.data[1] == '=');
628 if (haseqsign) {
629 ret.len++;
630 }
631 GET_CCS(ret, pline);
632 ret.data.cmp.inv = (schar == '<');
633 ret.data.cmp.type = ((ret.data.cmp.inv ^ haseqsign)
634 ? kExprCmpGreaterOrEqual
635 : kExprCmpGreater);
636 break;
637 }
638
639 // Minus sign, arrow from lambdas or augmented assignment.
640 case '-': {
641 if (pline.size > 1 && pline.data[1] == '>') {
642 ret.len++;
643 ret.type = kExprLexArrow;
644 } else if (pline.size > 1 && pline.data[1] == '=') {
645 ret.len++;
646 ret.type = kExprLexAssignment;
647 ret.data.ass.type = kExprAsgnSubtract;
648 } else {
649 ret.type = kExprLexMinus;
650 }
651 break;
652 }
653
654 // Sign or augmented assignment.
655 #define CHAR_OR_ASSIGN(ch, ch_type, ass_type) \
656 case ch: { \
657 if (pline.size > 1 && pline.data[1] == '=') { \
658 ret.len++; \
659 ret.type = kExprLexAssignment; \
660 ret.data.ass.type = ass_type; \
661 } else { \
662 ret.type = ch_type; \
663 } \
664 break; \
665 }
666 CHAR_OR_ASSIGN('+', kExprLexPlus, kExprAsgnAdd)
667 CHAR_OR_ASSIGN('.', kExprLexDot, kExprAsgnConcat)
668 #undef CHAR_OR_ASSIGN
669
670 // Expression end because Ex command ended.
671 case NUL:
672 case NL:
673 if (flags & kELFlagForbidEOC) {
674 ret.type = kExprLexInvalid;
675 ret.data.err.msg = _("E15: Unexpected EOC character: %.*s");
676 ret.data.err.type = kExprLexSpacing;
677 } else {
678 ret.type = kExprLexEOC;
679 }
680 break;
681
682 case '|':
683 if (pline.size >= 2 && pline.data[ret.len] == '|') {
684 // "||" is or.
685 ret.len++;
686 ret.type = kExprLexOr;
687 } else if (flags & kELFlagForbidEOC) {
688 // Note: `<C-r>=1 | 2<CR>` actually yields 1 in Vim without any
689 // errors. This will be changed here.
690 ret.type = kExprLexInvalid;
691 ret.data.err.msg = _("E15: Unexpected EOC character: %.*s");
692 ret.data.err.type = kExprLexOr;
693 } else {
694 ret.type = kExprLexEOC;
695 }
696 break;
697
698 // Everything else is not valid.
699 default:
700 ret.len = (size_t)utfc_ptr2len_len((const char_u *)pline.data,
701 (int)pline.size);
702 ret.type = kExprLexInvalid;
703 ret.data.err.type = kExprLexPlainIdentifier;
704 ret.data.err.msg = _("E15: Unidentified character: %.*s");
705 break;
706 }
707 #undef GET_CCS
708 viml_pexpr_next_token_adv_return:
709 if (!(flags & kELFlagPeek)) {
710 viml_parser_advance(pstate, ret.len);
711 }
712 return ret;
713 }
714
715 static const char *const eltkn_type_tab[] = {
716 [kExprLexInvalid] = "Invalid",
717 [kExprLexMissing] = "Missing",
718 [kExprLexSpacing] = "Spacing",
719 [kExprLexEOC] = "EOC",
720
721 [kExprLexQuestion] = "Question",
722 [kExprLexColon] = "Colon",
723 [kExprLexOr] = "Or",
724 [kExprLexAnd] = "And",
725 [kExprLexComparison] = "Comparison",
726 [kExprLexPlus] = "Plus",
727 [kExprLexMinus] = "Minus",
728 [kExprLexDot] = "Dot",
729 [kExprLexMultiplication] = "Multiplication",
730
731 [kExprLexNot] = "Not",
732
733 [kExprLexNumber] = "Number",
734 [kExprLexSingleQuotedString] = "SingleQuotedString",
735 [kExprLexDoubleQuotedString] = "DoubleQuotedString",
736 [kExprLexOption] = "Option",
737 [kExprLexRegister] = "Register",
738 [kExprLexEnv] = "Env",
739 [kExprLexPlainIdentifier] = "PlainIdentifier",
740
741 [kExprLexBracket] = "Bracket",
742 [kExprLexFigureBrace] = "FigureBrace",
743 [kExprLexParenthesis] = "Parenthesis",
744 [kExprLexComma] = "Comma",
745 [kExprLexArrow] = "Arrow",
746 [kExprLexAssignment] = "Assignment",
747 };
748
749 const char *const eltkn_cmp_type_tab[] = {
750 [kExprCmpEqual] = "Equal",
751 [kExprCmpMatches] = "Matches",
752 [kExprCmpGreater] = "Greater",
753 [kExprCmpGreaterOrEqual] = "GreaterOrEqual",
754 [kExprCmpIdentical] = "Identical",
755 };
756
757 const char *const expr_asgn_type_tab[] = {
758 [kExprAsgnPlain] = "Plain",
759 [kExprAsgnAdd] = "Add",
760 [kExprAsgnSubtract] = "Subtract",
761 [kExprAsgnConcat] = "Concat",
762 };
763
764 const char *const ccs_tab[] = {
765 [kCCStrategyUseOption] = "UseOption",
766 [kCCStrategyMatchCase] = "MatchCase",
767 [kCCStrategyIgnoreCase] = "IgnoreCase",
768 };
769
770 static const char *const eltkn_mul_type_tab[] = {
771 [kExprLexMulMul] = "Mul",
772 [kExprLexMulDiv] = "Div",
773 [kExprLexMulMod] = "Mod",
774 };
775
776 static const char *const eltkn_opt_scope_tab[] = {
777 [kExprOptScopeUnspecified] = "Unspecified",
778 [kExprOptScopeGlobal] = "Global",
779 [kExprOptScopeLocal] = "Local",
780 };
781
782 /// Represent token as a string
783 ///
784 /// Intended for testing and debugging purposes.
785 ///
786 /// @param[in] pstate Parser state, needed to get token string from it. May be
787 /// NULL, in which case in place of obtaining part of the
788 /// string represented by token only token length is
789 /// returned.
790 /// @param[in] token Token to represent.
791 /// @param[out] ret_size Return string size, for cases like NULs inside
792 /// a string. May be NULL.
793 ///
794 /// @return Token represented in a string form, in a static buffer (overwritten
795 /// on each call).
viml_pexpr_repr_token(const ParserState * const pstate,const LexExprToken token,size_t * const ret_size)796 const char *viml_pexpr_repr_token(const ParserState *const pstate, const LexExprToken token,
797 size_t *const ret_size)
798 FUNC_ATTR_WARN_UNUSED_RESULT
799 {
800 static char ret[1024];
801 char *p = ret;
802 const char *const e = &ret[1024] - 1;
803 #define ADDSTR(...) \
804 do { \
805 p += snprintf(p, (size_t)(sizeof(ret) - (size_t)(p - ret)), __VA_ARGS__); \
806 if (p >= e) { \
807 goto viml_pexpr_repr_token_end; \
808 } \
809 } while (0)
810 ADDSTR("%zu:%zu:%s", token.start.line, token.start.col,
811 eltkn_type_tab[token.type]);
812 switch (token.type) {
813 #define TKNARGS(tkn_type, ...) \
814 case tkn_type: { \
815 ADDSTR(__VA_ARGS__); \
816 break; \
817 }
818 TKNARGS(kExprLexComparison, "(type=%s,ccs=%s,inv=%i)",
819 eltkn_cmp_type_tab[token.data.cmp.type],
820 ccs_tab[token.data.cmp.ccs],
821 (int)token.data.cmp.inv)
822 TKNARGS(kExprLexMultiplication, "(type=%s)",
823 eltkn_mul_type_tab[token.data.mul.type])
824 TKNARGS(kExprLexAssignment, "(type=%s)",
825 expr_asgn_type_tab[token.data.ass.type])
826 TKNARGS(kExprLexRegister, "(name=%s)", intchar2str(token.data.reg.name))
827 case kExprLexDoubleQuotedString:
828 TKNARGS(kExprLexSingleQuotedString, "(closed=%i)",
829 (int)token.data.str.closed)
830 TKNARGS(kExprLexOption, "(scope=%s,name=%.*s)",
831 eltkn_opt_scope_tab[token.data.opt.scope],
832 (int)token.data.opt.len, token.data.opt.name)
833 TKNARGS(kExprLexPlainIdentifier, "(scope=%s,autoload=%i)",
834 intchar2str((int)token.data.var.scope),
835 (int)token.data.var.autoload)
836 TKNARGS(kExprLexNumber, "(is_float=%i,base=%i,val=%lg)",
837 (int)token.data.num.is_float,
838 (int)token.data.num.base,
839 (double)(token.data.num.is_float
840 ? (double)token.data.num.val.floating
841 : (double)token.data.num.val.integer))
842 TKNARGS(kExprLexInvalid, "(msg=%s)", token.data.err.msg)
843 default:
844 // No additional arguments.
845 break;
846 #undef TKNARGS
847 }
848 if (pstate == NULL) {
849 ADDSTR("::%zu", token.len);
850 } else {
851 *p++ = ':';
852 memmove(p, &pstate->reader.lines.items[token.start.line].data[token.start.col],
853 token.len);
854 p += token.len;
855 *p = NUL;
856 }
857 #undef ADDSTR
858 viml_pexpr_repr_token_end:
859 if (ret_size != NULL) {
860 *ret_size = (size_t)(p - ret);
861 }
862 return ret;
863 }
864
865 const char *const east_node_type_tab[] = {
866 [kExprNodeMissing] = "Missing",
867 [kExprNodeOpMissing] = "OpMissing",
868 [kExprNodeTernary] = "Ternary",
869 [kExprNodeTernaryValue] = "TernaryValue",
870 [kExprNodeRegister] = "Register",
871 [kExprNodeSubscript] = "Subscript",
872 [kExprNodeListLiteral] = "ListLiteral",
873 [kExprNodeUnaryPlus] = "UnaryPlus",
874 [kExprNodeBinaryPlus] = "BinaryPlus",
875 [kExprNodeNested] = "Nested",
876 [kExprNodeCall] = "Call",
877 [kExprNodePlainIdentifier] = "PlainIdentifier",
878 [kExprNodePlainKey] = "PlainKey",
879 [kExprNodeComplexIdentifier] = "ComplexIdentifier",
880 [kExprNodeUnknownFigure] = "UnknownFigure",
881 [kExprNodeLambda] = "Lambda",
882 [kExprNodeDictLiteral] = "DictLiteral",
883 [kExprNodeCurlyBracesIdentifier] = "CurlyBracesIdentifier",
884 [kExprNodeComma] = "Comma",
885 [kExprNodeColon] = "Colon",
886 [kExprNodeArrow] = "Arrow",
887 [kExprNodeComparison] = "Comparison",
888 [kExprNodeConcat] = "Concat",
889 [kExprNodeConcatOrSubscript] = "ConcatOrSubscript",
890 [kExprNodeInteger] = "Integer",
891 [kExprNodeFloat] = "Float",
892 [kExprNodeSingleQuotedString] = "SingleQuotedString",
893 [kExprNodeDoubleQuotedString] = "DoubleQuotedString",
894 [kExprNodeOr] = "Or",
895 [kExprNodeAnd] = "And",
896 [kExprNodeUnaryMinus] = "UnaryMinus",
897 [kExprNodeBinaryMinus] = "BinaryMinus",
898 [kExprNodeNot] = "Not",
899 [kExprNodeMultiplication] = "Multiplication",
900 [kExprNodeDivision] = "Division",
901 [kExprNodeMod] = "Mod",
902 [kExprNodeOption] = "Option",
903 [kExprNodeEnvironment] = "Environment",
904 [kExprNodeAssignment] = "Assignment",
905 };
906
907 /// Represent `int` character as a string
908 ///
909 /// Converts
910 /// - ASCII digits into '{digit}'
911 /// - ASCII printable characters into a single-character strings
912 /// - everything else to numbers.
913 ///
914 /// @param[in] ch Character to convert.
915 ///
916 /// @return Converted string, stored in a static buffer (overridden after each
917 /// call).
intchar2str(const int ch)918 static const char *intchar2str(const int ch)
919 FUNC_ATTR_WARN_UNUSED_RESULT
920 {
921 static char buf[sizeof(int) * 3 + 1];
922 if (' ' <= ch && ch < 0x7f) {
923 if (ascii_isdigit(ch)) {
924 buf[0] = '\'';
925 buf[1] = (char)ch;
926 buf[2] = '\'';
927 buf[3] = NUL;
928 } else {
929 buf[0] = (char)ch;
930 buf[1] = NUL;
931 }
932 } else {
933 snprintf(buf, sizeof(buf), "%i", ch);
934 }
935 return buf;
936 }
937
938 #ifdef UNIT_TESTING
939 # include <stdio.h>
940
941 REAL_FATTR_UNUSED
viml_pexpr_debug_print_ast_node(const ExprASTNode * const * const eastnode_p,const char * const prefix)942 static inline void viml_pexpr_debug_print_ast_node(const ExprASTNode *const *const eastnode_p,
943 const char *const prefix)
944 {
945 if (*eastnode_p == NULL) {
946 fprintf(stderr, "%s %p : NULL\n", prefix, (void *)eastnode_p);
947 } else {
948 fprintf(stderr, "%s %p : %p : %s : %zu:%zu:%zu\n",
949 prefix, (void *)eastnode_p, (void *)(*eastnode_p),
950 east_node_type_tab[(*eastnode_p)->type], (*eastnode_p)->start.line,
951 (*eastnode_p)->start.col, (*eastnode_p)->len);
952 }
953 }
954
955 REAL_FATTR_UNUSED
viml_pexpr_debug_print_ast_stack(const ExprASTStack * const ast_stack,const char * const msg)956 static inline void viml_pexpr_debug_print_ast_stack(const ExprASTStack *const ast_stack,
957 const char *const msg)
958 FUNC_ATTR_NONNULL_ALL FUNC_ATTR_ALWAYS_INLINE
959 {
960 fprintf(stderr, "\n%sstack: %zu:\n", msg, kv_size(*ast_stack));
961 for (size_t i = 0; i < kv_size(*ast_stack); i++) {
962 viml_pexpr_debug_print_ast_node((const ExprASTNode *const *)kv_A(*ast_stack, i),
963 "-");
964 }
965 }
966
967 REAL_FATTR_UNUSED
viml_pexpr_debug_print_token(const ParserState * const pstate,const LexExprToken token)968 static inline void viml_pexpr_debug_print_token(const ParserState *const pstate,
969 const LexExprToken token)
970 FUNC_ATTR_ALWAYS_INLINE
971 {
972 fprintf(stderr, "\ntkn: %s\n", viml_pexpr_repr_token(pstate, token, NULL));
973 }
974 # define PSTACK(msg) \
975 viml_pexpr_debug_print_ast_stack(&ast_stack, #msg)
976 # define PSTACK_P(msg) \
977 viml_pexpr_debug_print_ast_stack(ast_stack, #msg)
978 # define PNODE_P(eastnode_p, msg) \
979 viml_pexpr_debug_print_ast_node((const ExprASTNode *const *)eastnode_p, \
980 (#msg))
981 # define PTOKEN(tkn) \
982 viml_pexpr_debug_print_token(pstate, tkn)
983 #endif
984
985 const uint8_t node_maxchildren[] = {
986 [kExprNodeMissing] = 0,
987 [kExprNodeOpMissing] = 2,
988 [kExprNodeTernary] = 2,
989 [kExprNodeTernaryValue] = 2,
990 [kExprNodeRegister] = 0,
991 [kExprNodeSubscript] = 2,
992 [kExprNodeListLiteral] = 1,
993 [kExprNodeUnaryPlus] = 1,
994 [kExprNodeBinaryPlus] = 2,
995 [kExprNodeNested] = 1,
996 [kExprNodeCall] = 2,
997 [kExprNodePlainIdentifier] = 0,
998 [kExprNodePlainKey] = 0,
999 [kExprNodeComplexIdentifier] = 2,
1000 [kExprNodeUnknownFigure] = 1,
1001 [kExprNodeLambda] = 2,
1002 [kExprNodeDictLiteral] = 1,
1003 [kExprNodeCurlyBracesIdentifier] = 1,
1004 [kExprNodeComma] = 2,
1005 [kExprNodeColon] = 2,
1006 [kExprNodeArrow] = 2,
1007 [kExprNodeComparison] = 2,
1008 [kExprNodeConcat] = 2,
1009 [kExprNodeConcatOrSubscript] = 2,
1010 [kExprNodeInteger] = 0,
1011 [kExprNodeFloat] = 0,
1012 [kExprNodeSingleQuotedString] = 0,
1013 [kExprNodeDoubleQuotedString] = 0,
1014 [kExprNodeOr] = 2,
1015 [kExprNodeAnd] = 2,
1016 [kExprNodeUnaryMinus] = 1,
1017 [kExprNodeBinaryMinus] = 2,
1018 [kExprNodeNot] = 1,
1019 [kExprNodeMultiplication] = 2,
1020 [kExprNodeDivision] = 2,
1021 [kExprNodeMod] = 2,
1022 [kExprNodeOption] = 0,
1023 [kExprNodeEnvironment] = 0,
1024 [kExprNodeAssignment] = 2,
1025 };
1026
1027 /// Free memory occupied by AST
1028 ///
1029 /// @param ast AST stack to free.
viml_pexpr_free_ast(ExprAST ast)1030 void viml_pexpr_free_ast(ExprAST ast)
1031 {
1032 ExprASTStack ast_stack;
1033 kvi_init(ast_stack);
1034 kvi_push(ast_stack, &ast.root);
1035 while (kv_size(ast_stack)) {
1036 ExprASTNode **const cur_node = kv_last(ast_stack);
1037 #ifndef NDEBUG
1038 // Explicitly check for AST recursiveness.
1039 for (size_t i = 0; i < kv_size(ast_stack) - 1; i++) {
1040 assert(*kv_A(ast_stack, i) != *cur_node);
1041 }
1042 #endif
1043 if (*cur_node == NULL) {
1044 assert(kv_size(ast_stack) == 1);
1045 kv_drop(ast_stack, 1);
1046 } else if ((*cur_node)->children != NULL) {
1047 #ifndef NDEBUG
1048 const uint8_t maxchildren = node_maxchildren[(*cur_node)->type];
1049 assert(maxchildren > 0);
1050 assert(maxchildren <= 2);
1051 assert(maxchildren == 1
1052 ? (*cur_node)->children->next == NULL
1053 : ((*cur_node)->children->next == NULL
1054 || (*cur_node)->children->next->next == NULL));
1055 #endif
1056 kvi_push(ast_stack, &(*cur_node)->children);
1057 } else if ((*cur_node)->next != NULL) {
1058 kvi_push(ast_stack, &(*cur_node)->next);
1059 } else if (*cur_node != NULL) {
1060 kv_drop(ast_stack, 1);
1061 switch ((*cur_node)->type) {
1062 case kExprNodeDoubleQuotedString:
1063 case kExprNodeSingleQuotedString:
1064 xfree((*cur_node)->data.str.value);
1065 break;
1066 case kExprNodeMissing:
1067 case kExprNodeOpMissing:
1068 case kExprNodeTernary:
1069 case kExprNodeTernaryValue:
1070 case kExprNodeRegister:
1071 case kExprNodeSubscript:
1072 case kExprNodeListLiteral:
1073 case kExprNodeUnaryPlus:
1074 case kExprNodeBinaryPlus:
1075 case kExprNodeNested:
1076 case kExprNodeCall:
1077 case kExprNodePlainIdentifier:
1078 case kExprNodePlainKey:
1079 case kExprNodeComplexIdentifier:
1080 case kExprNodeUnknownFigure:
1081 case kExprNodeLambda:
1082 case kExprNodeDictLiteral:
1083 case kExprNodeCurlyBracesIdentifier:
1084 case kExprNodeAssignment:
1085 case kExprNodeComma:
1086 case kExprNodeColon:
1087 case kExprNodeArrow:
1088 case kExprNodeComparison:
1089 case kExprNodeConcat:
1090 case kExprNodeConcatOrSubscript:
1091 case kExprNodeInteger:
1092 case kExprNodeFloat:
1093 case kExprNodeOr:
1094 case kExprNodeAnd:
1095 case kExprNodeUnaryMinus:
1096 case kExprNodeBinaryMinus:
1097 case kExprNodeNot:
1098 case kExprNodeMultiplication:
1099 case kExprNodeDivision:
1100 case kExprNodeMod:
1101 case kExprNodeOption:
1102 case kExprNodeEnvironment:
1103 break;
1104 }
1105 xfree(*cur_node);
1106 *cur_node = NULL;
1107 }
1108 }
1109 kvi_destroy(ast_stack);
1110 }
1111
1112 // Binary operator precedence and associativity:
1113 //
1114 // Operator | Precedence | Associativity
1115 // ---------+------------+-----------------
1116 // || | 2 | left
1117 // && | 3 | left
1118 // cmp* | 4 | not associative
1119 // + - . | 5 | left
1120 // * / % | 6 | left
1121 //
1122 // * comparison operators:
1123 //
1124 // == ==# ==? != !=# !=?
1125 // =~ =~# =~? !~ !~# !~?
1126 // > ># >? <= <=# <=?
1127 // < <# <? >= >=# >=?
1128 // is is# is? isnot isnot# isnot?
1129
1130 /// Allocate a new node and set some of the values
1131 ///
1132 /// @param[in] type Node type to allocate.
1133 /// @param[in] level Node level to allocate
viml_pexpr_new_node(const ExprASTNodeType type)1134 static inline ExprASTNode *viml_pexpr_new_node(const ExprASTNodeType type)
1135 FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_MALLOC
1136 {
1137 ExprASTNode *ret = xmalloc(sizeof(*ret));
1138 ret->type = type;
1139 ret->children = NULL;
1140 ret->next = NULL;
1141 return ret;
1142 }
1143
1144 static struct {
1145 ExprOpLvl lvl;
1146 ExprOpAssociativity ass;
1147 } node_type_to_node_props[] = {
1148 [kExprNodeMissing] = { kEOpLvlInvalid, kEOpAssNo, },
1149 [kExprNodeOpMissing] = { kEOpLvlMultiplication, kEOpAssNo },
1150
1151 [kExprNodeNested] = { kEOpLvlParens, kEOpAssNo },
1152 // Note: below nodes are kEOpLvlSubscript for “binary operator” itself, but
1153 // kEOpLvlParens when it comes to inside the parenthesis.
1154 [kExprNodeCall] = { kEOpLvlParens, kEOpAssNo },
1155 [kExprNodeSubscript] = { kEOpLvlParens, kEOpAssNo },
1156
1157 [kExprNodeUnknownFigure] = { kEOpLvlParens, kEOpAssLeft },
1158 [kExprNodeLambda] = { kEOpLvlParens, kEOpAssNo },
1159 [kExprNodeDictLiteral] = { kEOpLvlParens, kEOpAssNo },
1160 [kExprNodeListLiteral] = { kEOpLvlParens, kEOpAssNo },
1161
1162 [kExprNodeArrow] = { kEOpLvlArrow, kEOpAssNo },
1163
1164 // Right associativity for comma because this means easier access to arguments
1165 // list, etc: for "[a, b, c, d]" you can access "a" in one step if it is
1166 // represented as "list(comma(a, comma(b, comma(c, d))))" then if it is
1167 // "list(comma(comma(comma(a, b), c), d))" in which case you will need to
1168 // traverse all three comma() structures. And with comma operator (including
1169 // actual comma operator from C which is not present in VimL) nobody cares
1170 // about associativity, only about order of execution.
1171 [kExprNodeComma] = { kEOpLvlComma, kEOpAssRight },
1172
1173 // Colons are not eligible for chaining, so nobody cares about associativity.
1174 [kExprNodeColon] = { kEOpLvlColon, kEOpAssNo },
1175
1176 [kExprNodeTernary] = { kEOpLvlTernary, kEOpAssRight },
1177
1178 [kExprNodeOr] = { kEOpLvlOr, kEOpAssLeft },
1179
1180 [kExprNodeAnd] = { kEOpLvlAnd, kEOpAssLeft },
1181
1182 [kExprNodeTernaryValue] = { kEOpLvlTernaryValue, kEOpAssRight },
1183
1184 [kExprNodeComparison] = { kEOpLvlComparison, kEOpAssRight },
1185
1186 [kExprNodeBinaryPlus] = { kEOpLvlAddition, kEOpAssLeft },
1187 [kExprNodeBinaryMinus] = { kEOpLvlAddition, kEOpAssLeft },
1188 [kExprNodeConcat] = { kEOpLvlAddition, kEOpAssLeft },
1189
1190 [kExprNodeMultiplication] = { kEOpLvlMultiplication, kEOpAssLeft },
1191 [kExprNodeDivision] = { kEOpLvlMultiplication, kEOpAssLeft },
1192 [kExprNodeMod] = { kEOpLvlMultiplication, kEOpAssLeft },
1193
1194 [kExprNodeUnaryPlus] = { kEOpLvlUnary, kEOpAssNo },
1195 [kExprNodeUnaryMinus] = { kEOpLvlUnary, kEOpAssNo },
1196 [kExprNodeNot] = { kEOpLvlUnary, kEOpAssNo },
1197
1198 [kExprNodeConcatOrSubscript] = { kEOpLvlSubscript, kEOpAssLeft },
1199
1200 [kExprNodeCurlyBracesIdentifier] = { kEOpLvlComplexIdentifier, kEOpAssLeft },
1201
1202 [kExprNodeAssignment] = { kEOpLvlAssignment, kEOpAssLeft },
1203
1204 [kExprNodeComplexIdentifier] = { kEOpLvlValue, kEOpAssLeft },
1205
1206 [kExprNodePlainIdentifier] = { kEOpLvlValue, kEOpAssNo },
1207 [kExprNodePlainKey] = { kEOpLvlValue, kEOpAssNo },
1208 [kExprNodeRegister] = { kEOpLvlValue, kEOpAssNo },
1209 [kExprNodeInteger] = { kEOpLvlValue, kEOpAssNo },
1210 [kExprNodeFloat] = { kEOpLvlValue, kEOpAssNo },
1211 [kExprNodeDoubleQuotedString] = { kEOpLvlValue, kEOpAssNo },
1212 [kExprNodeSingleQuotedString] = { kEOpLvlValue, kEOpAssNo },
1213 [kExprNodeOption] = { kEOpLvlValue, kEOpAssNo },
1214 [kExprNodeEnvironment] = { kEOpLvlValue, kEOpAssNo },
1215 };
1216
1217 /// Get AST node priority level
1218 ///
1219 /// Used primary to reduce line length, so keep the name short.
1220 ///
1221 /// @param[in] node Node to get priority for.
1222 ///
1223 /// @return Node priority level.
node_lvl(const ExprASTNode node)1224 static inline ExprOpLvl node_lvl(const ExprASTNode node)
1225 FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT
1226 {
1227 return node_type_to_node_props[node.type].lvl;
1228 }
1229
1230 /// Get AST node associativity, to be used for operator nodes primary
1231 ///
1232 /// Used primary to reduce line length, so keep the name short.
1233 ///
1234 /// @param[in] node Node to get priority for.
1235 ///
1236 /// @return Node associativity.
node_ass(const ExprASTNode node)1237 static inline ExprOpAssociativity node_ass(const ExprASTNode node)
1238 FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT
1239 {
1240 return node_type_to_node_props[node.type].ass;
1241 }
1242
1243 /// Handle binary operator
1244 ///
1245 /// This function is responsible for handling priority levels as well.
1246 ///
1247 /// @param[in] pstate Parser state, used for error reporting.
1248 /// @param ast_stack AST stack. May be popped of some values and will
1249 /// definitely receive new ones.
1250 /// @param bop_node New node to handle.
1251 /// @param[out] want_node_p New value of want_node.
1252 /// @param[out] ast_err Location where error is saved, if any.
1253 ///
1254 /// @return True if no errors occurred, false otherwise.
viml_pexpr_handle_bop(const ParserState * const pstate,ExprASTStack * const ast_stack,ExprASTNode * const bop_node,ExprASTWantedNode * const want_node_p,ExprASTError * const ast_err)1255 static bool viml_pexpr_handle_bop(const ParserState *const pstate, ExprASTStack *const ast_stack,
1256 ExprASTNode *const bop_node, ExprASTWantedNode *const want_node_p,
1257 ExprASTError *const ast_err)
1258 FUNC_ATTR_NONNULL_ALL
1259 {
1260 bool ret = true;
1261 ExprASTNode **top_node_p = NULL;
1262 ExprASTNode *top_node;
1263 ExprOpLvl top_node_lvl;
1264 ExprOpAssociativity top_node_ass;
1265 assert(kv_size(*ast_stack));
1266 const ExprOpLvl bop_node_lvl = ((bop_node->type == kExprNodeCall
1267 || bop_node->type == kExprNodeSubscript)
1268 ? kEOpLvlSubscript
1269 : node_lvl(*bop_node));
1270 #ifndef NDEBUG
1271 const ExprOpAssociativity bop_node_ass = (
1272 (bop_node->type == kExprNodeCall
1273 || bop_node->type == kExprNodeSubscript)
1274 ? kEOpAssLeft
1275 : node_ass(*bop_node));
1276 #endif
1277 do {
1278 ExprASTNode **new_top_node_p = kv_last(*ast_stack);
1279 ExprASTNode *new_top_node = *new_top_node_p;
1280 assert(new_top_node != NULL);
1281 const ExprOpLvl new_top_node_lvl = node_lvl(*new_top_node);
1282 const ExprOpAssociativity new_top_node_ass = node_ass(*new_top_node);
1283 assert(bop_node_lvl != new_top_node_lvl
1284 || bop_node_ass == new_top_node_ass);
1285 if (top_node_p != NULL
1286 && ((bop_node_lvl > new_top_node_lvl
1287 || (bop_node_lvl == new_top_node_lvl
1288 && new_top_node_ass == kEOpAssNo)))) {
1289 break;
1290 }
1291 kv_drop(*ast_stack, 1);
1292 top_node_p = new_top_node_p;
1293 top_node = new_top_node;
1294 top_node_lvl = new_top_node_lvl;
1295 top_node_ass = new_top_node_ass;
1296 if (bop_node_lvl == top_node_lvl && top_node_ass == kEOpAssRight) {
1297 break;
1298 }
1299 } while (kv_size(*ast_stack));
1300 if (top_node_ass == kEOpAssLeft || top_node_lvl != bop_node_lvl) {
1301 // outer(op(x,y)) -> outer(new_op(op(x,y),*))
1302 //
1303 // Before: top_node_p = outer(*), points to op(x,y)
1304 // Other stack elements unknown
1305 //
1306 // After: top_node_p = outer(*), points to new_op(op(x,y))
1307 // &bop_node->children->next = new_op(op(x,y),*), points to NULL
1308 *top_node_p = bop_node;
1309 bop_node->children = top_node;
1310 assert(bop_node->children->next == NULL);
1311 kvi_push(*ast_stack, top_node_p);
1312 kvi_push(*ast_stack, &bop_node->children->next);
1313 } else {
1314 assert(top_node_lvl == bop_node_lvl && top_node_ass == kEOpAssRight);
1315 assert(top_node->children != NULL && top_node->children->next != NULL);
1316 // outer(op(x,y)) -> outer(op(x,new_op(y,*)))
1317 //
1318 // Before: top_node_p = outer(*), points to op(x,y)
1319 // Other stack elements unknown
1320 //
1321 // After: top_node_p = outer(*), points to op(x,new_op(y))
1322 // &top_node->children->next = op(x,*), points to new_op(y)
1323 // &bop_node->children->next = new_op(y,*), points to NULL
1324 bop_node->children = top_node->children->next;
1325 top_node->children->next = bop_node;
1326 assert(bop_node->children->next == NULL);
1327 kvi_push(*ast_stack, top_node_p);
1328 kvi_push(*ast_stack, &top_node->children->next);
1329 kvi_push(*ast_stack, &bop_node->children->next);
1330 // TODO(ZyX-I): Make this not error, but treat like Python does
1331 if (bop_node->type == kExprNodeComparison) {
1332 east_set_error(pstate, ast_err,
1333 _("E15: Operator is not associative: %.*s"),
1334 bop_node->start);
1335 ret = false;
1336 }
1337 }
1338 *want_node_p = kENodeValue;
1339 return ret;
1340 }
1341
1342 /// ParserPosition literal based on ParserPosition pos with columns shifted
1343 ///
1344 /// Function does not check whether resulting position is valid.
1345 ///
1346 /// @param[in] pos Position to shift.
1347 /// @param[in] shift Number of bytes to shift.
1348 ///
1349 /// @return Shifted position.
shifted_pos(const ParserPosition pos,const size_t shift)1350 static inline ParserPosition shifted_pos(const ParserPosition pos, const size_t shift)
1351 FUNC_ATTR_CONST FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_WARN_UNUSED_RESULT
1352 {
1353 return (ParserPosition) { .line = pos.line, .col = pos.col + shift };
1354 }
1355
1356 /// ParserPosition literal based on ParserPosition pos with specified column
1357 ///
1358 /// Function does not check whether remaining position is valid.
1359 ///
1360 /// @param[in] pos Position to adjust.
1361 /// @param[in] new_col New column.
1362 ///
1363 /// @return Shifted position.
recol_pos(const ParserPosition pos,const size_t new_col)1364 static inline ParserPosition recol_pos(const ParserPosition pos, const size_t new_col)
1365 FUNC_ATTR_CONST FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_WARN_UNUSED_RESULT
1366 {
1367 return (ParserPosition) { .line = pos.line, .col = new_col };
1368 }
1369
1370 /// Get highlight group name
1371 #define HL(g) (is_invalid ? "NvimInvalid" #g : "Nvim" #g)
1372
1373 /// Highlight current token with the given group
1374 #define HL_CUR_TOKEN(g) \
1375 viml_parser_highlight(pstate, cur_token.start, cur_token.len, \
1376 HL(g))
1377
1378 /// Allocate new node, saving some values
1379 #define NEW_NODE(type) \
1380 viml_pexpr_new_node(type)
1381
1382 /// Set position of the given node to position from the given token
1383 ///
1384 /// @param cur_node Node to modify.
1385 /// @param cur_token Token to set position from.
1386 #define POS_FROM_TOKEN(cur_node, cur_token) \
1387 do { \
1388 (cur_node)->start = cur_token.start; \
1389 (cur_node)->len = cur_token.len; \
1390 } while (0)
1391
1392 /// Allocate new node and set its position from the current token
1393 ///
1394 /// If previous token happened to contain spacing then it will be included.
1395 ///
1396 /// @param cur_node Variable to save allocated node to.
1397 /// @param typ Node type.
1398 #define NEW_NODE_WITH_CUR_POS(cur_node, typ) \
1399 do { \
1400 (cur_node) = NEW_NODE(typ); \
1401 POS_FROM_TOKEN((cur_node), cur_token); \
1402 if (prev_token.type == kExprLexSpacing) { \
1403 (cur_node)->start = prev_token.start; \
1404 (cur_node)->len += prev_token.len; \
1405 } \
1406 } while (0)
1407
1408 /// Check whether it is possible to have next expression after current
1409 ///
1410 /// For :echo: `:echo @a @a` is a valid expression. `:echo (@a @a)` is not.
1411 #define MAY_HAVE_NEXT_EXPR \
1412 (kv_size(ast_stack) == 1)
1413
1414 /// Add operator node
1415 ///
1416 /// @param[in] cur_node Node to add.
1417 #define ADD_OP_NODE(cur_node) \
1418 is_invalid |= !viml_pexpr_handle_bop(pstate, &ast_stack, cur_node, \
1419 &want_node, &ast.err)
1420
1421 /// Record missing operator: for things like
1422 ///
1423 /// :echo @a @a
1424 ///
1425 /// (allowed) or
1426 ///
1427 /// :echo (@a @a)
1428 ///
1429 /// (parsed as OpMissing(@a, @a)).
1430 #define OP_MISSING \
1431 do { \
1432 if (flags & kExprFlagsMulti && MAY_HAVE_NEXT_EXPR) { \
1433 /* Multiple expressions allowed, return without calling */ \
1434 /* viml_parser_advance(). */ \
1435 goto viml_pexpr_parse_end; \
1436 } else { \
1437 assert(*top_node_p != NULL); \
1438 ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Missing operator: %.*s")); \
1439 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeOpMissing); \
1440 cur_node->len = 0; \
1441 ADD_OP_NODE(cur_node); \
1442 goto viml_pexpr_parse_process_token; \
1443 } \
1444 } while (0)
1445
1446 /// Record missing value: for things like "* 5"
1447 ///
1448 /// @param[in] msg Error message.
1449 #define ADD_VALUE_IF_MISSING(msg) \
1450 do { \
1451 if (want_node == kENodeValue) { \
1452 ERROR_FROM_TOKEN_AND_MSG(cur_token, (msg)); \
1453 NEW_NODE_WITH_CUR_POS((*top_node_p), kExprNodeMissing); \
1454 (*top_node_p)->len = 0; \
1455 want_node = kENodeOperator; \
1456 } \
1457 } while (0)
1458
1459 /// Set AST error, unless AST already is not correct
1460 ///
1461 /// @param[out] ret_ast AST to set error in.
1462 /// @param[in] pstate Parser state, used to get error message argument.
1463 /// @param[in] msg Error message, assumed to be already translated and
1464 /// containing a single %token "%.*s".
1465 /// @param[in] start Position at which error occurred.
east_set_error(const ParserState * const pstate,ExprASTError * const ret_ast_err,const char * const msg,const ParserPosition start)1466 static inline void east_set_error(const ParserState *const pstate, ExprASTError *const ret_ast_err,
1467 const char *const msg, const ParserPosition start)
1468 FUNC_ATTR_NONNULL_ALL FUNC_ATTR_ALWAYS_INLINE
1469 {
1470 if (ret_ast_err->msg != NULL) {
1471 return;
1472 }
1473 const ParserLine pline = pstate->reader.lines.items[start.line];
1474 ret_ast_err->msg = msg;
1475 ret_ast_err->arg_len = (int)(pline.size - start.col);
1476 ret_ast_err->arg = pline.data ? pline.data + start.col : NULL;
1477 }
1478
1479 /// Set error from the given token and given message
1480 #define ERROR_FROM_TOKEN_AND_MSG(cur_token, msg) \
1481 do { \
1482 is_invalid = true; \
1483 east_set_error(pstate, &ast.err, msg, cur_token.start); \
1484 } while (0)
1485
1486 /// Like #ERROR_FROM_TOKEN_AND_MSG, but gets position from a node
1487 #define ERROR_FROM_NODE_AND_MSG(node, msg) \
1488 do { \
1489 is_invalid = true; \
1490 east_set_error(pstate, &ast.err, msg, node->start); \
1491 } while (0)
1492
1493 /// Set error from the given kExprLexInvalid token
1494 #define ERROR_FROM_TOKEN(cur_token) \
1495 ERROR_FROM_TOKEN_AND_MSG(cur_token, cur_token.data.err.msg)
1496
1497 /// Select figure brace type, altering highlighting as well if needed
1498 ///
1499 /// @param[out] node Node to modify type.
1500 /// @param[in] new_type New type, one of ExprASTNodeType values without
1501 /// kExprNode prefix.
1502 /// @param[in] hl Corresponding highlighting, passed as an argument to #HL.
1503 #define SELECT_FIGURE_BRACE_TYPE(node, new_type, hl) \
1504 do { \
1505 ExprASTNode *const node_ = (node); \
1506 assert(node_->type == kExprNodeUnknownFigure \
1507 || node_->type == kExprNode##new_type); \
1508 node_->type = kExprNode##new_type; \
1509 if (pstate->colors) { \
1510 kv_A(*pstate->colors, node_->data.fig.opening_hl_idx).group = \
1511 HL(hl); \
1512 } \
1513 } while (0)
1514
1515 /// Add identifier which should constitute complex identifier node
1516 ///
1517 /// This one is to be called only in case want_node is kENodeOperator.
1518 ///
1519 /// @param new_ident_node_code Code used to create a new identifier node and
1520 /// update want_node and ast_stack, without
1521 /// a trailing semicolon.
1522 /// @param hl Highlighting name to use, passed as an argument to #HL.
1523 #define ADD_IDENT(new_ident_node_code, hl) \
1524 do { \
1525 assert(want_node == kENodeOperator); \
1526 /* Operator: may only be curly braces name, but only under certain */ \
1527 /* conditions. */ \
1528 /* First condition is that there is no space before a part of complex */ \
1529 /* identifier. */ \
1530 if (prev_token.type == kExprLexSpacing) { \
1531 OP_MISSING; \
1532 } \
1533 switch ((*top_node_p)->type) { \
1534 /* Second is that previous node is one of the identifiers: */ \
1535 /* complex, plain, curly braces. */ \
1536 /* TODO(ZyX-I): Extend syntax to allow ${expr}. This is needed to */ \
1537 /* handle environment variables like those bash uses for */ \
1538 /* `export -f`: their names consist not only of alphanumeric */ \
1539 /* characetrs. */ \
1540 case kExprNodeComplexIdentifier: \
1541 case kExprNodePlainIdentifier: \
1542 case kExprNodeCurlyBracesIdentifier: { \
1543 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeComplexIdentifier); \
1544 cur_node->len = 0; \
1545 cur_node->children = *top_node_p; \
1546 *top_node_p = cur_node; \
1547 kvi_push(ast_stack, &cur_node->children->next); \
1548 ExprASTNode **const new_top_node_p = kv_last(ast_stack); \
1549 assert(*new_top_node_p == NULL); \
1550 new_ident_node_code; \
1551 *new_top_node_p = cur_node; \
1552 HL_CUR_TOKEN(hl); \
1553 break; \
1554 } \
1555 default: { \
1556 OP_MISSING; \
1557 break; \
1558 } \
1559 } \
1560 } while (0)
1561
1562 /// Determine whether given parse type is an assignment
1563 ///
1564 /// @param[in] pt Checked parse type.
1565 ///
1566 /// @return true if parsing an assignment, false otherwise.
pt_is_assignment(const ExprASTParseType pt)1567 static inline bool pt_is_assignment(const ExprASTParseType pt)
1568 FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT
1569 {
1570 return (pt == kEPTAssignment || pt == kEPTSingleAssignment);
1571 }
1572
1573 /// Structure used to define “string shifts” necessary to map string
1574 /// highlighting to actual strings.
1575 typedef struct {
1576 size_t start; ///< Where special character starts in original string.
1577 size_t orig_len; ///< Length of orininal string (e.g. 4 for "\x80").
1578 size_t act_len; ///< Length of resulting character(s) (e.g. 1 for "\x80").
1579 bool escape_not_known; ///< True if escape sequence in original is not known.
1580 } StringShift;
1581
1582 /// Parse and highlight single- or double-quoted string
1583 ///
1584 /// Function is supposed to detect and highlight regular expressions (but does
1585 /// not do now).
1586 ///
1587 /// @param[out] pstate Parser state which also contains a place where
1588 /// highlighting is saved.
1589 /// @param[out] node Node where string parsing results are saved.
1590 /// @param[in] token Token to highlight.
1591 /// @param[in] ast_stack Parser AST stack, used to detect whether current
1592 /// string is a regex.
1593 /// @param[in] is_invalid Whether currently processed token is not valid.
parse_quoted_string(ParserState * const pstate,ExprASTNode * const node,const LexExprToken token,const ExprASTStack ast_stack,const bool is_invalid)1594 static void parse_quoted_string(ParserState *const pstate, ExprASTNode *const node,
1595 const LexExprToken token, const ExprASTStack ast_stack,
1596 const bool is_invalid)
1597 FUNC_ATTR_NONNULL_ALL
1598 {
1599 const ParserLine pline = pstate->reader.lines.items[token.start.line];
1600 const char *const s = pline.data + token.start.col;
1601 const char *const e = s + token.len - token.data.str.closed;
1602 const char *p = s + 1;
1603 const bool is_double = (token.type == kExprLexDoubleQuotedString);
1604 size_t size = token.len - token.data.str.closed - 1;
1605 kvec_withinit_t(StringShift, 16) shifts;
1606 kvi_init(shifts);
1607 if (!is_double) {
1608 viml_parser_highlight(pstate, token.start, 1, HL(SingleQuote));
1609 while (p < e) {
1610 const char *const chunk_e = memchr(p, '\'', (size_t)(e - p));
1611 if (chunk_e == NULL) {
1612 break;
1613 }
1614 size--;
1615 p = chunk_e + 2;
1616 if (pstate->colors) {
1617 kvi_push(shifts, ((StringShift) {
1618 .start = token.start.col + (size_t)(chunk_e - s),
1619 .orig_len = 2,
1620 .act_len = 1,
1621 .escape_not_known = false,
1622 }));
1623 }
1624 }
1625 node->data.str.size = size;
1626 if (size == 0) {
1627 node->data.str.value = NULL;
1628 } else {
1629 char *v_p;
1630 v_p = node->data.str.value = xmallocz(size);
1631 p = s + 1;
1632 while (p < e) {
1633 const char *const chunk_e = memchr(p, '\'', (size_t)(e - p));
1634 if (chunk_e == NULL) {
1635 memcpy(v_p, p, (size_t)(e - p));
1636 break;
1637 }
1638 memcpy(v_p, p, (size_t)(chunk_e - p));
1639 v_p += (size_t)(chunk_e - p) + 1;
1640 v_p[-1] = '\'';
1641 p = chunk_e + 2;
1642 }
1643 }
1644 } else {
1645 viml_parser_highlight(pstate, token.start, 1, HL(DoubleQuote));
1646 for (p = s + 1; p < e; p++) {
1647 if (*p == '\\' && p + 1 < e) {
1648 p++;
1649 if (p + 1 == e) {
1650 size--;
1651 break;
1652 }
1653 switch (*p) {
1654 // A "\<x>" form occupies at least 4 characters, and produces up to
1655 // 6 characters: reserve space for 2 extra, but do not compute actual
1656 // length just now, it would be costy.
1657 case '<':
1658 size += 2;
1659 break;
1660 // Hexadecimal, always single byte, but at least three bytes each.
1661 case 'x':
1662 case 'X':
1663 size--;
1664 if (ascii_isxdigit(p[1])) {
1665 size--;
1666 if (p + 2 < e && ascii_isxdigit(p[2])) {
1667 size--;
1668 }
1669 }
1670 break;
1671 // Unicode
1672 //
1673 // \uF takes 1 byte which is 2 bytes less then escape sequence.
1674 // \uFF: 2 bytes, 2 bytes less.
1675 // \uFFF: 3 bytes, 2 bytes less.
1676 // \uFFFF: 3 bytes, 3 bytes less.
1677 // \UFFFFF: 4 bytes, 3 bytes less.
1678 // \UFFFFFF: 5 bytes, 3 bytes less.
1679 // \UFFFFFFF: 6 bytes, 3 bytes less.
1680 // \U7FFFFFFF: 6 bytes, 4 bytes less.
1681 case 'u':
1682 case 'U': {
1683 const char *const esc_start = p;
1684 size_t n = (*p == 'u' ? 4 : 8);
1685 int nr = 0;
1686 p++;
1687 while (p + 1 < e && n-- && ascii_isxdigit(p[1])) {
1688 p++;
1689 nr = (nr << 4) + hex2nr(*p);
1690 }
1691 // Escape length: (esc_start - 1) points to "\\", esc_start to "u"
1692 // or "U", p to the byte after last byte. So escape sequence
1693 // occupies p - (esc_start - 1), but it stands for a utf_char2len
1694 // bytes.
1695 size -= (size_t)((p - (esc_start - 1)) - utf_char2len(nr));
1696 p--;
1697 break;
1698 }
1699 // Octal, always single byte, but at least two bytes each.
1700 case '0':
1701 case '1':
1702 case '2':
1703 case '3':
1704 case '4':
1705 case '5':
1706 case '6':
1707 case '7':
1708 size--;
1709 p++;
1710 if (*p >= '0' && *p <= '7') {
1711 size--;
1712 p++;
1713 if (p < e && *p >= '0' && *p <= '7') {
1714 size--;
1715 p++;
1716 }
1717 }
1718 break;
1719 default:
1720 size--;
1721 break;
1722 }
1723 }
1724 }
1725 if (size == 0) {
1726 node->data.str.value = NULL;
1727 node->data.str.size = 0;
1728 } else {
1729 char *v_p;
1730 v_p = node->data.str.value = xmalloc(size);
1731 p = s + 1;
1732 while (p < e) {
1733 const char *const chunk_e = memchr(p, '\\', (size_t)(e - p));
1734 if (chunk_e == NULL) {
1735 memcpy(v_p, p, (size_t)(e - p));
1736 v_p += e - p;
1737 break;
1738 }
1739 memcpy(v_p, p, (size_t)(chunk_e - p));
1740 v_p += (size_t)(chunk_e - p);
1741 p = chunk_e + 1;
1742 if (p == e) {
1743 *v_p++ = '\\';
1744 break;
1745 }
1746 bool is_unknown = false;
1747 const char *const v_p_start = v_p;
1748 switch (*p) {
1749 #define SINGLE_CHAR_ESC(ch, real_ch) \
1750 case ch: { \
1751 *v_p++ = real_ch; \
1752 p++; \
1753 break; \
1754 }
1755 SINGLE_CHAR_ESC('b', BS)
1756 SINGLE_CHAR_ESC('e', ESC)
1757 SINGLE_CHAR_ESC('f', FF)
1758 SINGLE_CHAR_ESC('n', NL)
1759 SINGLE_CHAR_ESC('r', CAR)
1760 SINGLE_CHAR_ESC('t', TAB)
1761 SINGLE_CHAR_ESC('"', '"')
1762 SINGLE_CHAR_ESC('\\', '\\')
1763 #undef SINGLE_CHAR_ESC
1764
1765 // Hexadecimal or unicode.
1766 case 'X':
1767 case 'x':
1768 case 'u':
1769 case 'U':
1770 if (p + 1 < e && ascii_isxdigit(p[1])) {
1771 size_t n;
1772 int nr;
1773 bool is_hex = (*p == 'x' || *p == 'X');
1774
1775 if (is_hex) {
1776 n = 2;
1777 } else if (*p == 'u') {
1778 n = 4;
1779 } else {
1780 n = 8;
1781 }
1782 nr = 0;
1783 while (p + 1 < e && n-- && ascii_isxdigit(p[1])) {
1784 p++;
1785 nr = (nr << 4) + hex2nr(*p);
1786 }
1787 p++;
1788 if (is_hex) {
1789 *v_p++ = (char)nr;
1790 } else {
1791 v_p += utf_char2bytes(nr, (char_u *)v_p);
1792 }
1793 } else {
1794 is_unknown = true;
1795 *v_p++ = *p;
1796 p++;
1797 }
1798 break;
1799 // Octal: "\1", "\12", "\123".
1800 case '0':
1801 case '1':
1802 case '2':
1803 case '3':
1804 case '4':
1805 case '5':
1806 case '6':
1807 case '7': {
1808 uint8_t ch = (uint8_t)(*p++ - '0');
1809 if (p < e && *p >= '0' && *p <= '7') {
1810 ch = (uint8_t)((ch << 3) + *p++ - '0');
1811 if (p < e && *p >= '0' && *p <= '7') {
1812 ch = (uint8_t)((ch << 3) + *p++ - '0');
1813 }
1814 }
1815 *v_p++ = (char)ch;
1816 break;
1817 }
1818 // Special key, e.g.: "\<C-W>"
1819 case '<': {
1820 const size_t special_len = (
1821 trans_special((const char_u **)&p, (size_t)(e - p),
1822 (char_u *)v_p, true, true));
1823 if (special_len != 0) {
1824 v_p += special_len;
1825 } else {
1826 is_unknown = true;
1827 mb_copy_char((const char_u **)&p, (char_u **)&v_p);
1828 }
1829 break;
1830 }
1831 default:
1832 is_unknown = true;
1833 mb_copy_char((const char_u **)&p, (char_u **)&v_p);
1834 break;
1835 }
1836 if (pstate->colors) {
1837 kvi_push(shifts, ((StringShift) {
1838 .start = token.start.col + (size_t)(chunk_e - s),
1839 .orig_len = (size_t)(p - chunk_e),
1840 .act_len = (size_t)(v_p - (char *)v_p_start),
1841 .escape_not_known = is_unknown,
1842 }));
1843 }
1844 }
1845 node->data.str.size = (size_t)(v_p - node->data.str.value);
1846 }
1847 }
1848 if (pstate->colors) {
1849 // TODO(ZyX-I): use ast_stack to determine and highlight regular expressions
1850 // TODO(ZyX-I): use ast_stack to determine and highlight printf format str
1851 // TODO(ZyX-I): use ast_stack to determine and highlight expression strings
1852 size_t next_col = token.start.col + 1;
1853 const char *const body_str = (is_double
1854 ? HL(DoubleQuotedBody)
1855 : HL(SingleQuotedBody));
1856 const char *const esc_str = (is_double
1857 ? HL(DoubleQuotedEscape)
1858 : HL(SingleQuotedQuote));
1859 const char *const ukn_esc_str = (is_double
1860 ? HL(DoubleQuotedUnknownEscape)
1861 : HL(SingleQuotedUnknownEscape));
1862 for (size_t i = 0; i < kv_size(shifts); i++) {
1863 const StringShift cur_shift = kv_A(shifts, i);
1864 if (cur_shift.start > next_col) {
1865 viml_parser_highlight(pstate, recol_pos(token.start, next_col),
1866 cur_shift.start - next_col,
1867 body_str);
1868 }
1869 viml_parser_highlight(pstate, recol_pos(token.start, cur_shift.start),
1870 cur_shift.orig_len,
1871 (cur_shift.escape_not_known
1872 ? ukn_esc_str
1873 : esc_str));
1874 next_col = cur_shift.start + cur_shift.orig_len;
1875 }
1876 if (next_col - token.start.col < token.len - token.data.str.closed) {
1877 viml_parser_highlight(pstate, recol_pos(token.start, next_col),
1878 (token.start.col
1879 + token.len
1880 - token.data.str.closed
1881 - next_col),
1882 body_str);
1883 }
1884 }
1885 if (token.data.str.closed) {
1886 if (is_double) {
1887 viml_parser_highlight(pstate, shifted_pos(token.start, token.len - 1),
1888 1, HL(DoubleQuote));
1889 } else {
1890 viml_parser_highlight(pstate, shifted_pos(token.start, token.len - 1),
1891 1, HL(SingleQuote));
1892 }
1893 }
1894 kvi_destroy(shifts);
1895 }
1896
1897 /// Additional flags to pass to lexer depending on want_node
1898 static const int want_node_to_lexer_flags[] = {
1899 [kENodeValue] = kELFlagIsNotCmp,
1900 [kENodeOperator] = kELFlagForbidScope,
1901 };
1902
1903 /// Number of characters to highlight as NumberPrefix depending on the base
1904 static const uint8_t base_to_prefix_length[] = {
1905 [2] = 2,
1906 [8] = 1,
1907 [10] = 0,
1908 [16] = 2,
1909 };
1910
1911 /// Parse one VimL expression
1912 ///
1913 /// @param pstate Parser state.
1914 /// @param[in] flags Additional flags, see ExprParserFlags
1915 ///
1916 /// @return Parsed AST.
viml_pexpr_parse(ParserState * const pstate,const int flags)1917 ExprAST viml_pexpr_parse(ParserState *const pstate, const int flags)
1918 FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
1919 {
1920 ExprAST ast = {
1921 .err = {
1922 .msg = NULL,
1923 .arg_len = 0,
1924 .arg = NULL,
1925 },
1926 .root = NULL,
1927 };
1928 // Expression stack contains current branch in AST tree: that is
1929 // - Stack item 0 contains root of the tree, i.e. &ast->root.
1930 // - Stack item i points to the previous stack items’ last child.
1931 //
1932 // When parser expects “value” node that is something like identifier or "["
1933 // (list start) last stack item contains NULL. Otherwise last stack item is
1934 // supposed to contain last “finished” value: e.g. "1" or "+(1, 1)" (node
1935 // representing "1+1").
1936 ExprASTStack ast_stack;
1937 kvi_init(ast_stack);
1938 kvi_push(ast_stack, &ast.root);
1939 ExprASTWantedNode want_node = kENodeValue;
1940 ExprASTParseTypeStack pt_stack;
1941 kvi_init(pt_stack);
1942 kvi_push(pt_stack, kEPTExpr);
1943 if (flags & kExprFlagsParseLet) {
1944 kvi_push(pt_stack, kEPTAssignment);
1945 }
1946 LexExprToken prev_token = { .type = kExprLexMissing };
1947 bool highlighted_prev_spacing = false;
1948 // Lambda node, valid when parsing lambda arguments only.
1949 ExprASTNode *lambda_node = NULL;
1950 size_t asgn_level = 0;
1951 do {
1952 const bool is_concat_or_subscript = (
1953 want_node == kENodeValue
1954 && kv_size(ast_stack) > 1
1955 && (*kv_Z(ast_stack,
1956 1))->type == kExprNodeConcatOrSubscript);
1957 const int lexer_additional_flags = (
1958 kELFlagPeek
1959 | ((flags & kExprFlagsDisallowEOC) ? kELFlagForbidEOC : 0)
1960 | ((want_node == kENodeValue
1961 && (kv_size(ast_stack) == 1
1962 || ((*kv_Z(ast_stack, 1))->type != kExprNodeConcat
1963 && ((*kv_Z(ast_stack, 1))->type
1964 != kExprNodeConcatOrSubscript))))
1965 ? kELFlagAllowFloat
1966 : 0));
1967 LexExprToken cur_token = viml_pexpr_next_token(pstate,
1968 want_node_to_lexer_flags[want_node] |
1969 lexer_additional_flags);
1970 if (cur_token.type == kExprLexEOC) {
1971 break;
1972 }
1973 LexExprTokenType tok_type = cur_token.type;
1974 const bool token_invalid = (tok_type == kExprLexInvalid);
1975 bool is_invalid = token_invalid;
1976 viml_pexpr_parse_process_token:
1977 // May use different flags this time.
1978 cur_token = viml_pexpr_next_token(pstate,
1979 want_node_to_lexer_flags[want_node] | lexer_additional_flags);
1980 if (tok_type == kExprLexSpacing) {
1981 if (is_invalid) {
1982 HL_CUR_TOKEN(Spacing);
1983 } else {
1984 // Do not do anything: let regular spacing be highlighted as normal.
1985 // This also allows later to highlight spacing as invalid.
1986 }
1987 goto viml_pexpr_parse_cycle_end;
1988 } else if (is_invalid && prev_token.type == kExprLexSpacing
1989 && !highlighted_prev_spacing) {
1990 viml_parser_highlight(pstate, prev_token.start, prev_token.len,
1991 HL(Spacing));
1992 is_invalid = false;
1993 highlighted_prev_spacing = true;
1994 }
1995 const ParserLine pline = pstate->reader.lines.items[cur_token.start.line];
1996 ExprASTNode **const top_node_p = kv_last(ast_stack);
1997 assert(kv_size(ast_stack) >= 1);
1998 ExprASTNode *cur_node = NULL;
1999 #ifndef NDEBUG
2000 const bool want_value = (want_node == kENodeValue);
2001 assert(want_value == (*top_node_p == NULL));
2002 assert(kv_A(ast_stack, 0) == &ast.root);
2003 // Check that stack item i + 1 points to stack items’ i *last* child.
2004 for (size_t i = 0; i + 1 < kv_size(ast_stack); i++) {
2005 const bool item_null = (want_value && i + 2 == kv_size(ast_stack));
2006 assert((&(*kv_A(ast_stack, i))->children == kv_A(ast_stack, i + 1)
2007 && (item_null
2008 ? (*kv_A(ast_stack, i))->children == NULL
2009 : (*kv_A(ast_stack, i))->children->next == NULL))
2010 || ((&(*kv_A(ast_stack, i))->children->next
2011 == kv_A(ast_stack, i + 1))
2012 && (item_null
2013 ? (*kv_A(ast_stack, i))->children->next == NULL
2014 : (*kv_A(ast_stack, i))->children->next->next == NULL)));
2015 }
2016 #endif
2017 // Note: in Vim whether expression "cond?d.a:2" is valid depends both on
2018 // "cond" and whether "d" is a dictionary: expression is valid if condition
2019 // is true and "d" is a dictionary (with "a" key or it will complain about
2020 // missing one, but this is not relevant); if any of the requirements is
2021 // broken then this thing is parsed as "d . a:2" yielding missing colon
2022 // error. This parser does not allow such ambiguity, especially because it
2023 // simply can’t: whether "d" is a dictionary is not known at the parsing
2024 // time.
2025 //
2026 // Here example will always contain a concat with "a:2" sucking colon,
2027 // making expression invalid both because there is no longer a spare colon
2028 // for ternary and because concatenating dictionary with anything is not
2029 // valid. There are more cases when this will make a difference though.
2030 const bool node_is_key = (
2031 is_concat_or_subscript
2032 && (cur_token.type == kExprLexPlainIdentifier
2033 ? (!cur_token.data.var.autoload
2034 && cur_token.data.var.scope == kExprVarScopeMissing)
2035 : (cur_token.type == kExprLexNumber))
2036 && prev_token.type != kExprLexSpacing);
2037 if (is_concat_or_subscript && !node_is_key) {
2038 // Note: in Vim "d. a" (this is the reason behind `prev_token.type !=
2039 // kExprLexSpacing` part of the condition) as well as any other "d.{expr}"
2040 // where "{expr}" does not look like a key is invalid whenever "d" happens
2041 // to be a dictionary. Since parser has no idea whether preceding
2042 // expression is actually a dictionary it can’t outright reject anything,
2043 // so it turns kExprNodeConcatOrSubscript into kExprNodeConcat instead,
2044 // which will yield different errors then Vim does in a number of
2045 // circumstances, and in any case runtime and not parse time errors.
2046 (*kv_Z(ast_stack, 1))->type = kExprNodeConcat;
2047 }
2048 // Pop some stack pt_stack items in case of misplaced nodes.
2049 const bool is_single_assignment = kv_last(pt_stack) == kEPTSingleAssignment;
2050 switch (kv_last(pt_stack)) {
2051 case kEPTExpr:
2052 break;
2053 case kEPTLambdaArguments:
2054 if ((want_node == kENodeOperator
2055 && tok_type != kExprLexComma
2056 && tok_type != kExprLexArrow)
2057 || (want_node == kENodeValue
2058 && !(cur_token.type == kExprLexPlainIdentifier
2059 && cur_token.data.var.scope == kExprVarScopeMissing
2060 && !cur_token.data.var.autoload)
2061 && tok_type != kExprLexArrow)) {
2062 lambda_node->data.fig.type_guesses.allow_lambda = false;
2063 if (lambda_node->children != NULL
2064 && lambda_node->children->type == kExprNodeComma) {
2065 // If lambda has comma child this means that parser has already seen
2066 // at least "{arg1,", so node cannot possibly be anything, but
2067 // lambda.
2068
2069 // Vim may give E121 or E720 in this case, but it does not look
2070 // right to have either because both are results of reevaluation
2071 // possibly-lambda node as a dictionary and here this is not going
2072 // to happen.
2073 ERROR_FROM_TOKEN_AND_MSG(cur_token,
2074 _("E15: Expected lambda arguments list or arrow: %.*s"));
2075 } else {
2076 // Else it may appear that possibly-lambda node is actually
2077 // a dictionary or curly-braces-name identifier.
2078 lambda_node = NULL;
2079 kv_drop(pt_stack, 1);
2080 }
2081 }
2082 break;
2083 case kEPTSingleAssignment:
2084 case kEPTAssignment:
2085 if (want_node == kENodeValue
2086 && tok_type != kExprLexBracket
2087 && tok_type != kExprLexPlainIdentifier
2088 && (tok_type != kExprLexFigureBrace || cur_token.data.brc.closing)
2089 && !(node_is_key && tok_type == kExprLexNumber)
2090 && tok_type != kExprLexEnv
2091 && tok_type != kExprLexOption
2092 && tok_type != kExprLexRegister) {
2093 ERROR_FROM_TOKEN_AND_MSG(cur_token,
2094 _("E15: Expected value part of assignment lvalue: %.*s"));
2095 kv_drop(pt_stack, 1);
2096 } else if (want_node == kENodeOperator
2097 && tok_type != kExprLexBracket
2098 && (tok_type != kExprLexFigureBrace
2099 || cur_token.data.brc.closing)
2100 && tok_type != kExprLexDot
2101 && (tok_type != kExprLexComma || !is_single_assignment)
2102 && tok_type != kExprLexAssignment
2103 // Curly brace identifiers: will contain plain identifier or
2104 // another curly brace in position where operator is wanted.
2105 && !((tok_type == kExprLexPlainIdentifier
2106 || (tok_type == kExprLexFigureBrace
2107 && !cur_token.data.brc.closing))
2108 && prev_token.type != kExprLexSpacing)) {
2109 if (flags & kExprFlagsMulti && MAY_HAVE_NEXT_EXPR) {
2110 goto viml_pexpr_parse_end;
2111 }
2112 ERROR_FROM_TOKEN_AND_MSG(cur_token,
2113 _("E15: Expected assignment operator or subscript: %.*s"));
2114 kv_drop(pt_stack, 1);
2115 }
2116 assert(kv_size(pt_stack));
2117 break;
2118 }
2119 assert(kv_size(pt_stack));
2120 const ExprASTParseType cur_pt = kv_last(pt_stack);
2121 assert(lambda_node == NULL || cur_pt == kEPTLambdaArguments);
2122 switch (tok_type) {
2123 case kExprLexMissing:
2124 case kExprLexSpacing:
2125 case kExprLexEOC:
2126 abort();
2127 case kExprLexInvalid:
2128 ERROR_FROM_TOKEN(cur_token);
2129 tok_type = cur_token.data.err.type;
2130 goto viml_pexpr_parse_process_token;
2131 case kExprLexRegister: {
2132 if (want_node == kENodeOperator) {
2133 // Register in operator position: e.g. @a @a
2134 OP_MISSING;
2135 }
2136 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeRegister);
2137 cur_node->data.reg.name = cur_token.data.reg.name;
2138 *top_node_p = cur_node;
2139 want_node = kENodeOperator;
2140 HL_CUR_TOKEN(Register);
2141 break;
2142 }
2143 #define SIMPLE_UB_OP(op) \
2144 case kExprLex##op: { \
2145 if (want_node == kENodeValue) { \
2146 /* Value level: assume unary operator. */ \
2147 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeUnary##op); \
2148 *top_node_p = cur_node; \
2149 kvi_push(ast_stack, &cur_node->children); \
2150 HL_CUR_TOKEN(Unary##op); \
2151 } else { \
2152 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeBinary##op); \
2153 ADD_OP_NODE(cur_node); \
2154 HL_CUR_TOKEN(Binary##op); \
2155 } \
2156 want_node = kENodeValue; \
2157 break; \
2158 }
2159 SIMPLE_UB_OP(Plus)
2160 SIMPLE_UB_OP(Minus)
2161 #undef SIMPLE_UB_OP
2162 #define SIMPLE_B_OP(op, msg) \
2163 case kExprLex##op: { \
2164 ADD_VALUE_IF_MISSING(_("E15: Unexpected " msg ": %.*s")); \
2165 NEW_NODE_WITH_CUR_POS(cur_node, kExprNode##op); \
2166 HL_CUR_TOKEN(op); \
2167 ADD_OP_NODE(cur_node); \
2168 break; \
2169 }
2170 SIMPLE_B_OP(Or, "or operator")
2171 SIMPLE_B_OP(And, "and operator")
2172 #undef SIMPLE_B_OP
2173 case kExprLexMultiplication:
2174 ADD_VALUE_IF_MISSING(_("E15: Unexpected multiplication-like operator: %.*s"));
2175 switch (cur_token.data.mul.type) {
2176 #define MUL_OP(lex_op_tail, node_op_tail) \
2177 case kExprLexMul##lex_op_tail: { \
2178 NEW_NODE_WITH_CUR_POS(cur_node, kExprNode##node_op_tail); \
2179 HL_CUR_TOKEN(node_op_tail); \
2180 break; \
2181 }
2182 MUL_OP(Mul, Multiplication)
2183 MUL_OP(Div, Division)
2184 MUL_OP(Mod, Mod)
2185 #undef MUL_OP
2186 }
2187 ADD_OP_NODE(cur_node);
2188 break;
2189 case kExprLexOption: {
2190 if (want_node == kENodeOperator) {
2191 OP_MISSING;
2192 }
2193 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeOption);
2194 if (cur_token.type == kExprLexInvalid) {
2195 assert(cur_token.len == 1
2196 || (cur_token.len == 3
2197 && pline.data[cur_token.start.col + 2] == ':'));
2198 cur_node->data.opt.ident = (
2199 pline.data + cur_token.start.col + cur_token.len);
2200 cur_node->data.opt.ident_len = 0;
2201 cur_node->data.opt.scope = (
2202 cur_token.len == 3
2203 ? (ExprOptScope)pline.data[cur_token.start.col + 1]
2204 : kExprOptScopeUnspecified);
2205 } else {
2206 cur_node->data.opt.ident = cur_token.data.opt.name;
2207 cur_node->data.opt.ident_len = cur_token.data.opt.len;
2208 cur_node->data.opt.scope = cur_token.data.opt.scope;
2209 }
2210 *top_node_p = cur_node;
2211 want_node = kENodeOperator;
2212 viml_parser_highlight(pstate, cur_token.start, 1, HL(OptionSigil));
2213 const size_t scope_shift = (
2214 cur_token.data.opt.scope == kExprOptScopeUnspecified ? 0 : 2);
2215 if (scope_shift) {
2216 viml_parser_highlight(pstate, shifted_pos(cur_token.start, 1), 1,
2217 HL(OptionScope));
2218 viml_parser_highlight(pstate, shifted_pos(cur_token.start, 2), 1,
2219 HL(OptionScopeDelimiter));
2220 }
2221 viml_parser_highlight(pstate, shifted_pos(cur_token.start, scope_shift + 1),
2222 cur_token.len - (scope_shift + 1), HL(OptionName));
2223 break;
2224 }
2225 case kExprLexEnv:
2226 if (want_node == kENodeOperator) {
2227 OP_MISSING;
2228 }
2229 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeEnvironment);
2230 cur_node->data.env.ident = pline.data + cur_token.start.col + 1;
2231 cur_node->data.env.ident_len = cur_token.len - 1;
2232 if (cur_node->data.env.ident_len == 0) {
2233 ERROR_FROM_TOKEN_AND_MSG(cur_token,
2234 _("E15: Environment variable name missing"));
2235 }
2236 *top_node_p = cur_node;
2237 want_node = kENodeOperator;
2238 viml_parser_highlight(pstate, cur_token.start, 1, HL(EnvironmentSigil));
2239 viml_parser_highlight(pstate, shifted_pos(cur_token.start, 1),
2240 cur_token.len - 1, HL(EnvironmentName));
2241 break;
2242 case kExprLexNot:
2243 if (want_node == kENodeOperator) {
2244 OP_MISSING;
2245 }
2246 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeNot);
2247 *top_node_p = cur_node;
2248 kvi_push(ast_stack, &cur_node->children);
2249 HL_CUR_TOKEN(Not);
2250 break;
2251 case kExprLexComparison:
2252 ADD_VALUE_IF_MISSING(_("E15: Expected value, got comparison operator: %.*s"));
2253 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeComparison);
2254 if (cur_token.type == kExprLexInvalid) {
2255 cur_node->data.cmp.ccs = kCCStrategyUseOption;
2256 cur_node->data.cmp.type = kExprCmpEqual;
2257 cur_node->data.cmp.inv = false;
2258 } else {
2259 cur_node->data.cmp.ccs = cur_token.data.cmp.ccs;
2260 cur_node->data.cmp.type = cur_token.data.cmp.type;
2261 cur_node->data.cmp.inv = cur_token.data.cmp.inv;
2262 }
2263 ADD_OP_NODE(cur_node);
2264 if (cur_token.data.cmp.ccs != kCCStrategyUseOption) {
2265 viml_parser_highlight(pstate, cur_token.start, cur_token.len - 1,
2266 HL(Comparison));
2267 viml_parser_highlight(pstate, shifted_pos(cur_token.start, cur_token.len - 1), 1,
2268 HL(ComparisonModifier));
2269 } else {
2270 HL_CUR_TOKEN(Comparison);
2271 }
2272 want_node = kENodeValue;
2273 break;
2274 case kExprLexComma:
2275 assert(!(want_node == kENodeValue && cur_pt == kEPTLambdaArguments));
2276 if (want_node == kENodeValue) {
2277 // Value level: comma appearing here is not valid.
2278 // Note: in Vim string(,x) will give E116, this is not the case here.
2279 ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Expected value, got comma: %.*s"));
2280 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeMissing);
2281 cur_node->len = 0;
2282 *top_node_p = cur_node;
2283 want_node = kENodeOperator;
2284 }
2285 if (cur_pt == kEPTLambdaArguments) {
2286 assert(lambda_node != NULL);
2287 assert(lambda_node->data.fig.type_guesses.allow_lambda);
2288 SELECT_FIGURE_BRACE_TYPE(lambda_node, Lambda, Lambda);
2289 }
2290 if (kv_size(ast_stack) < 2) {
2291 goto viml_pexpr_parse_invalid_comma;
2292 }
2293 for (size_t i = 1; i < kv_size(ast_stack); i++) {
2294 ExprASTNode *const *const eastnode_p =
2295 (ExprASTNode *const *)kv_Z(ast_stack, i);
2296 const ExprASTNodeType eastnode_type = (*eastnode_p)->type;
2297 const ExprOpLvl eastnode_lvl = node_lvl(**eastnode_p);
2298 if (eastnode_type == kExprNodeLambda) {
2299 assert(cur_pt == kEPTLambdaArguments
2300 && want_node == kENodeOperator);
2301 break;
2302 } else if (eastnode_type == kExprNodeDictLiteral
2303 || eastnode_type == kExprNodeListLiteral
2304 || eastnode_type == kExprNodeCall) {
2305 break;
2306 } else if (eastnode_type == kExprNodeComma
2307 || eastnode_type == kExprNodeColon
2308 || eastnode_lvl > kEOpLvlComma) {
2309 // Do nothing
2310 } else {
2311 viml_pexpr_parse_invalid_comma:
2312 ERROR_FROM_TOKEN_AND_MSG(cur_token,
2313 _("E15: Comma outside of call, lambda or literal: %.*s"));
2314 break;
2315 }
2316 if (i == kv_size(ast_stack) - 1) {
2317 goto viml_pexpr_parse_invalid_comma;
2318 }
2319 }
2320 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeComma);
2321 ADD_OP_NODE(cur_node);
2322 HL_CUR_TOKEN(Comma);
2323 break;
2324 #define EXP_VAL_COLON "E15: Expected value, got colon: %.*s"
2325 case kExprLexColon: {
2326 bool is_ternary = false;
2327 if (kv_size(ast_stack) < 2) {
2328 goto viml_pexpr_parse_invalid_colon;
2329 }
2330 bool can_be_ternary = true;
2331 bool is_subscript = false;
2332 for (size_t i = 1; i < kv_size(ast_stack); i++) {
2333 ExprASTNode *const *const eastnode_p =
2334 (ExprASTNode *const *)kv_Z(ast_stack, i);
2335 const ExprASTNodeType eastnode_type = (*eastnode_p)->type;
2336 const ExprOpLvl eastnode_lvl = node_lvl(**eastnode_p);
2337 STATIC_ASSERT(kEOpLvlTernary > kEOpLvlComma,
2338 "Unexpected operator priorities");
2339 if (can_be_ternary && eastnode_type == kExprNodeTernaryValue
2340 && !(*eastnode_p)->data.ter.got_colon) {
2341 kv_drop(ast_stack, i);
2342 (*eastnode_p)->start = cur_token.start;
2343 (*eastnode_p)->len = cur_token.len;
2344 if (prev_token.type == kExprLexSpacing) {
2345 (*eastnode_p)->start = prev_token.start;
2346 (*eastnode_p)->len += prev_token.len;
2347 }
2348 is_ternary = true;
2349 (*eastnode_p)->data.ter.got_colon = true;
2350 ADD_VALUE_IF_MISSING(_(EXP_VAL_COLON));
2351 assert((*eastnode_p)->children != NULL);
2352 assert((*eastnode_p)->children->next == NULL);
2353 kvi_push(ast_stack, &(*eastnode_p)->children->next);
2354 break;
2355 } else if (eastnode_type == kExprNodeUnknownFigure) {
2356 SELECT_FIGURE_BRACE_TYPE(*eastnode_p, DictLiteral, Dict);
2357 break;
2358 } else if (eastnode_type == kExprNodeDictLiteral) {
2359 break;
2360 } else if (eastnode_type == kExprNodeSubscript) {
2361 is_subscript = true;
2362 // can_be_ternary = false;
2363 assert(!is_ternary);
2364 break;
2365 } else if (eastnode_type == kExprNodeColon) {
2366 goto viml_pexpr_parse_invalid_colon;
2367 } else if (eastnode_lvl >= kEOpLvlTernaryValue) {
2368 // Do nothing
2369 } else if (eastnode_lvl >= kEOpLvlComma) {
2370 can_be_ternary = false;
2371 } else {
2372 goto viml_pexpr_parse_invalid_colon;
2373 }
2374 if (i == kv_size(ast_stack) - 1) {
2375 goto viml_pexpr_parse_invalid_colon;
2376 }
2377 }
2378 if (is_subscript) {
2379 assert(kv_size(ast_stack) > 1);
2380 // Colon immediately following subscript start: it is empty subscript
2381 // part like a[:2].
2382 if (want_node == kENodeValue
2383 && (*kv_Z(ast_stack, 1))->type == kExprNodeSubscript) {
2384 NEW_NODE_WITH_CUR_POS(*top_node_p, kExprNodeMissing);
2385 (*top_node_p)->len = 0;
2386 want_node = kENodeOperator;
2387 } else {
2388 ADD_VALUE_IF_MISSING(_(EXP_VAL_COLON));
2389 }
2390 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeColon);
2391 ADD_OP_NODE(cur_node);
2392 HL_CUR_TOKEN(SubscriptColon);
2393 } else {
2394 goto viml_pexpr_parse_valid_colon;
2395 viml_pexpr_parse_invalid_colon:
2396 ERROR_FROM_TOKEN_AND_MSG(cur_token,
2397 _("E15: Colon outside of dictionary or ternary operator: %.*s"));
2398 viml_pexpr_parse_valid_colon:
2399 ADD_VALUE_IF_MISSING(_(EXP_VAL_COLON));
2400 if (is_ternary) {
2401 HL_CUR_TOKEN(TernaryColon);
2402 } else {
2403 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeColon);
2404 ADD_OP_NODE(cur_node);
2405 HL_CUR_TOKEN(Colon);
2406 }
2407 }
2408 want_node = kENodeValue;
2409 break;
2410 }
2411 #undef EXP_VAL_COLON
2412 case kExprLexBracket:
2413 if (cur_token.data.brc.closing) {
2414 ExprASTNode **new_top_node_p = NULL;
2415 // Always drop the topmost value:
2416 //
2417 // 1. When want_node != kENodeValue topmost item on stack is
2418 // a *finished* left operand, which may as well be "{@a}" which
2419 // needs not be finished again.
2420 // 2. Otherwise it is pointing to NULL what nobody wants.
2421 kv_drop(ast_stack, 1);
2422 if (!kv_size(ast_stack)) {
2423 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeListLiteral);
2424 cur_node->len = 0;
2425 if (want_node != kENodeValue) {
2426 cur_node->children = *top_node_p;
2427 }
2428 *top_node_p = cur_node;
2429 goto viml_pexpr_parse_bracket_closing_error;
2430 }
2431 if (want_node == kENodeValue) {
2432 // It is OK to want value if
2433 //
2434 // 1. It is empty list literal, in which case top node will be
2435 // ListLiteral.
2436 // 2. It is list literal with trailing comma, in which case top node
2437 // will be that comma.
2438 // 3. It is subscript with colon, but without one of the values:
2439 // e.g. "a[:]", "a[1:]", top node will be colon in this case.
2440 if ((*kv_last(ast_stack))->type != kExprNodeListLiteral
2441 && (*kv_last(ast_stack))->type != kExprNodeComma
2442 && (*kv_last(ast_stack))->type != kExprNodeColon) {
2443 ERROR_FROM_TOKEN_AND_MSG(cur_token,
2444 _("E15: Expected value, got closing bracket: %.*s"));
2445 }
2446 }
2447 do {
2448 new_top_node_p = kv_pop(ast_stack);
2449 } while (kv_size(ast_stack)
2450 && (new_top_node_p == NULL
2451 || ((*new_top_node_p)->type != kExprNodeListLiteral
2452 && (*new_top_node_p)->type != kExprNodeSubscript)));
2453 ExprASTNode *new_top_node = *new_top_node_p;
2454 switch (new_top_node->type) {
2455 case kExprNodeListLiteral:
2456 if (pt_is_assignment(cur_pt) && new_top_node->children == NULL) {
2457 ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E475: Unable to assign to empty list: %.*s"));
2458 }
2459 HL_CUR_TOKEN(List);
2460 break;
2461 case kExprNodeSubscript:
2462 HL_CUR_TOKEN(SubscriptBracket);
2463 break;
2464 default:
2465 viml_pexpr_parse_bracket_closing_error:
2466 assert(!kv_size(ast_stack));
2467 ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Unexpected closing figure brace: %.*s"));
2468 HL_CUR_TOKEN(List);
2469 break;
2470 }
2471 kvi_push(ast_stack, new_top_node_p);
2472 want_node = kENodeOperator;
2473 if (kv_size(ast_stack) <= asgn_level) {
2474 assert(kv_size(ast_stack) == asgn_level);
2475 asgn_level = 0;
2476 if (cur_pt == kEPTAssignment) {
2477 assert(ast.err.msg);
2478 } else if (cur_pt == kEPTExpr
2479 && kv_size(pt_stack) > 1
2480 && pt_is_assignment(kv_Z(pt_stack, 1))) {
2481 kv_drop(pt_stack, 1);
2482 }
2483 }
2484 if (cur_pt == kEPTSingleAssignment && kv_size(ast_stack) == 1) {
2485 kv_drop(pt_stack, 1);
2486 }
2487 } else {
2488 if (want_node == kENodeValue) {
2489 // Value means list literal or list assignment.
2490 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeListLiteral);
2491 *top_node_p = cur_node;
2492 kvi_push(ast_stack, &cur_node->children);
2493 want_node = kENodeValue;
2494 if (cur_pt == kEPTAssignment) {
2495 // Additional assignment parse type allows to easily forbid nested
2496 // lists.
2497 kvi_push(pt_stack, kEPTSingleAssignment);
2498 } else if (cur_pt == kEPTSingleAssignment) {
2499 ERROR_FROM_TOKEN_AND_MSG(cur_token,
2500 _("E475: Nested lists not allowed when assigning: %.*s"));
2501 }
2502 HL_CUR_TOKEN(List);
2503 } else {
2504 // Operator means subscript, also in assignment. But in assignment
2505 // subscript may be pretty much any expression, so need to push
2506 // kEPTExpr.
2507 if (prev_token.type == kExprLexSpacing) {
2508 OP_MISSING;
2509 }
2510 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeSubscript);
2511 ADD_OP_NODE(cur_node);
2512 HL_CUR_TOKEN(SubscriptBracket);
2513 if (pt_is_assignment(cur_pt)) {
2514 assert(want_node == kENodeValue); // Subtract 1 for NULL at top.
2515 asgn_level = kv_size(ast_stack) - 1;
2516 kvi_push(pt_stack, kEPTExpr);
2517 }
2518 }
2519 }
2520 break;
2521 case kExprLexFigureBrace:
2522 if (cur_token.data.brc.closing) {
2523 ExprASTNode **new_top_node_p = NULL;
2524 // Always drop the topmost value:
2525 //
2526 // 1. When want_node != kENodeValue topmost item on stack is
2527 // a *finished* left operand, which may as well be "{@a}" which
2528 // needs not be finished again.
2529 // 2. Otherwise it is pointing to NULL what nobody wants.
2530 kv_drop(ast_stack, 1);
2531 if (!kv_size(ast_stack)) {
2532 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeUnknownFigure);
2533 cur_node->data.fig.type_guesses.allow_lambda = false;
2534 cur_node->data.fig.type_guesses.allow_dict = false;
2535 cur_node->data.fig.type_guesses.allow_ident = false;
2536 cur_node->len = 0;
2537 if (want_node != kENodeValue) {
2538 cur_node->children = *top_node_p;
2539 }
2540 *top_node_p = cur_node;
2541 new_top_node_p = top_node_p;
2542 goto viml_pexpr_parse_figure_brace_closing_error;
2543 }
2544 if (want_node == kENodeValue) {
2545 if ((*kv_last(ast_stack))->type != kExprNodeUnknownFigure
2546 && (*kv_last(ast_stack))->type != kExprNodeComma) {
2547 // kv_last being UnknownFigure may occur for empty dictionary
2548 // literal, while Comma is expected in case of non-empty one.
2549 ERROR_FROM_TOKEN_AND_MSG(cur_token,
2550 _("E15: Expected value, got closing figure brace: %.*s"));
2551 }
2552 }
2553 do {
2554 new_top_node_p = kv_pop(ast_stack);
2555 } while (kv_size(ast_stack)
2556 && (new_top_node_p == NULL
2557 || ((*new_top_node_p)->type != kExprNodeUnknownFigure
2558 && (*new_top_node_p)->type != kExprNodeDictLiteral
2559 && ((*new_top_node_p)->type
2560 != kExprNodeCurlyBracesIdentifier)
2561 && (*new_top_node_p)->type != kExprNodeLambda)));
2562 ExprASTNode *new_top_node = *new_top_node_p;
2563 switch (new_top_node->type) {
2564 case kExprNodeUnknownFigure:
2565 if (new_top_node->children == NULL) {
2566 // No children of curly braces node indicates empty dictionary.
2567 assert(want_node == kENodeValue);
2568 assert(new_top_node->data.fig.type_guesses.allow_dict);
2569 SELECT_FIGURE_BRACE_TYPE(new_top_node, DictLiteral, Dict);
2570 HL_CUR_TOKEN(Dict);
2571 } else if (new_top_node->data.fig.type_guesses.allow_ident) {
2572 SELECT_FIGURE_BRACE_TYPE(new_top_node, CurlyBracesIdentifier,
2573 Curly);
2574 HL_CUR_TOKEN(Curly);
2575 } else {
2576 // If by this time type of the node has not already been
2577 // guessed, but it definitely is not a curly braces name then
2578 // it is invalid for sure.
2579 ERROR_FROM_NODE_AND_MSG(new_top_node,
2580 _("E15: Don't know what figure brace means: %.*s"));
2581 if (pstate->colors) {
2582 // Will reset to NvimInvalidFigureBrace.
2583 kv_A(*pstate->colors,
2584 new_top_node->data.fig.opening_hl_idx).group = (
2585 HL(FigureBrace));
2586 }
2587 HL_CUR_TOKEN(FigureBrace);
2588 }
2589 break;
2590 case kExprNodeDictLiteral:
2591 HL_CUR_TOKEN(Dict);
2592 break;
2593 case kExprNodeCurlyBracesIdentifier:
2594 HL_CUR_TOKEN(Curly);
2595 break;
2596 case kExprNodeLambda:
2597 HL_CUR_TOKEN(Lambda);
2598 break;
2599 default:
2600 viml_pexpr_parse_figure_brace_closing_error:
2601 assert(!kv_size(ast_stack));
2602 ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Unexpected closing figure brace: %.*s"));
2603 HL_CUR_TOKEN(FigureBrace);
2604 break;
2605 }
2606 kvi_push(ast_stack, new_top_node_p);
2607 want_node = kENodeOperator;
2608 if (kv_size(ast_stack) <= asgn_level) {
2609 assert(kv_size(ast_stack) == asgn_level);
2610 if (cur_pt == kEPTExpr
2611 && kv_size(pt_stack) > 1
2612 && pt_is_assignment(kv_Z(pt_stack, 1))) {
2613 kv_drop(pt_stack, 1);
2614 asgn_level = 0;
2615 }
2616 }
2617 } else {
2618 if (want_node == kENodeValue) {
2619 HL_CUR_TOKEN(FigureBrace);
2620 // Value: may be any of lambda, dictionary literal and curly braces
2621 // name.
2622
2623 // Though if we are in an assignment this may only be a curly braces
2624 // name.
2625 if (pt_is_assignment(cur_pt)) {
2626 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeCurlyBracesIdentifier);
2627 cur_node->data.fig.type_guesses.allow_lambda = false;
2628 cur_node->data.fig.type_guesses.allow_dict = false;
2629 cur_node->data.fig.type_guesses.allow_ident = true;
2630 kvi_push(pt_stack, kEPTExpr);
2631 } else {
2632 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeUnknownFigure);
2633 cur_node->data.fig.type_guesses.allow_lambda = true;
2634 cur_node->data.fig.type_guesses.allow_dict = true;
2635 cur_node->data.fig.type_guesses.allow_ident = true;
2636 }
2637 if (pstate->colors) {
2638 cur_node->data.fig.opening_hl_idx = kv_size(*pstate->colors) - 1;
2639 }
2640 *top_node_p = cur_node;
2641 kvi_push(ast_stack, &cur_node->children);
2642 kvi_push(pt_stack, kEPTLambdaArguments);
2643 lambda_node = cur_node;
2644 } else {
2645 ADD_IDENT(do {
2646 NEW_NODE_WITH_CUR_POS(cur_node,
2647 kExprNodeCurlyBracesIdentifier);
2648 cur_node->data.fig.opening_hl_idx = kv_size(*pstate->colors);
2649 cur_node->data.fig.type_guesses.allow_lambda = false;
2650 cur_node->data.fig.type_guesses.allow_dict = false;
2651 cur_node->data.fig.type_guesses.allow_ident = true;
2652 kvi_push(ast_stack, &cur_node->children);
2653 if (pt_is_assignment(cur_pt)) {
2654 kvi_push(pt_stack, kEPTExpr);
2655 }
2656 want_node = kENodeValue;
2657 } while (0),
2658 Curly);
2659 }
2660 if (pt_is_assignment(cur_pt)
2661 && !pt_is_assignment(kv_last(pt_stack))) {
2662 assert(want_node == kENodeValue); // Subtract 1 for NULL at top.
2663 asgn_level = kv_size(ast_stack) - 1;
2664 }
2665 }
2666 break;
2667 case kExprLexArrow:
2668 if (cur_pt == kEPTLambdaArguments) {
2669 kv_drop(pt_stack, 1);
2670 assert(kv_size(pt_stack));
2671 if (want_node == kENodeValue) {
2672 // Wanting value means trailing comma and NULL at the top of the
2673 // stack.
2674 kv_drop(ast_stack, 1);
2675 }
2676 assert(kv_size(ast_stack) >= 1);
2677 while ((*kv_last(ast_stack))->type != kExprNodeLambda
2678 && (*kv_last(ast_stack))->type != kExprNodeUnknownFigure) {
2679 kv_drop(ast_stack, 1);
2680 }
2681 assert((*kv_last(ast_stack)) == lambda_node);
2682 SELECT_FIGURE_BRACE_TYPE(lambda_node, Lambda, Lambda);
2683 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeArrow);
2684 if (lambda_node->children == NULL) {
2685 assert(want_node == kENodeValue);
2686 lambda_node->children = cur_node;
2687 kvi_push(ast_stack, &lambda_node->children);
2688 } else {
2689 assert(lambda_node->children->next == NULL);
2690 lambda_node->children->next = cur_node;
2691 kvi_push(ast_stack, &lambda_node->children->next);
2692 }
2693 kvi_push(ast_stack, &cur_node->children);
2694 lambda_node = NULL;
2695 } else {
2696 // Only first branch is valid.
2697 ADD_VALUE_IF_MISSING(_("E15: Unexpected arrow: %.*s"));
2698 ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Arrow outside of lambda: %.*s"));
2699 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeArrow);
2700 ADD_OP_NODE(cur_node);
2701 }
2702 want_node = kENodeValue;
2703 HL_CUR_TOKEN(Arrow);
2704 break;
2705 case kExprLexPlainIdentifier: {
2706 const ExprVarScope scope = (cur_token.type == kExprLexInvalid
2707 ? kExprVarScopeMissing
2708 : cur_token.data.var.scope);
2709 if (want_node == kENodeValue) {
2710 want_node = kENodeOperator;
2711 NEW_NODE_WITH_CUR_POS(cur_node,
2712 (node_is_key
2713 ? kExprNodePlainKey
2714 : kExprNodePlainIdentifier));
2715 cur_node->data.var.scope = scope;
2716 const size_t scope_shift = (scope == kExprVarScopeMissing ? 0 : 2);
2717 cur_node->data.var.ident = (pline.data + cur_token.start.col
2718 + scope_shift);
2719 cur_node->data.var.ident_len = cur_token.len - scope_shift;
2720 *top_node_p = cur_node;
2721 if (scope_shift) {
2722 assert(!node_is_key);
2723 viml_parser_highlight(pstate, cur_token.start, 1,
2724 HL(IdentifierScope));
2725 viml_parser_highlight(pstate, shifted_pos(cur_token.start, 1), 1,
2726 HL(IdentifierScopeDelimiter));
2727 }
2728 viml_parser_highlight(pstate, shifted_pos(cur_token.start,
2729 scope_shift),
2730 cur_token.len - scope_shift,
2731 (node_is_key
2732 ? HL(IdentifierKey)
2733 : HL(IdentifierName)));
2734 } else {
2735 if (scope == kExprVarScopeMissing) {
2736 ADD_IDENT(do {
2737 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodePlainIdentifier);
2738 cur_node->data.var.scope = scope;
2739 cur_node->data.var.ident = pline.data + cur_token.start.col;
2740 cur_node->data.var.ident_len = cur_token.len;
2741 want_node = kENodeOperator;
2742 } while (0),
2743 IdentifierName);
2744 } else {
2745 OP_MISSING;
2746 }
2747 }
2748 break;
2749 }
2750 case kExprLexNumber:
2751 if (want_node != kENodeValue) {
2752 OP_MISSING;
2753 }
2754 if (node_is_key) {
2755 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodePlainKey);
2756 cur_node->data.var.ident = pline.data + cur_token.start.col;
2757 cur_node->data.var.ident_len = cur_token.len;
2758 HL_CUR_TOKEN(IdentifierKey);
2759 } else if (cur_token.data.num.is_float) {
2760 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeFloat);
2761 cur_node->data.flt.value = cur_token.data.num.val.floating;
2762 HL_CUR_TOKEN(Float);
2763 } else {
2764 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeInteger);
2765 cur_node->data.num.value = cur_token.data.num.val.integer;
2766 const uint8_t prefix_length = base_to_prefix_length[
2767 cur_token.data.num.base];
2768 viml_parser_highlight(pstate, cur_token.start, prefix_length,
2769 HL(NumberPrefix));
2770 viml_parser_highlight(pstate, shifted_pos(cur_token.start, prefix_length),
2771 cur_token.len - prefix_length, HL(Number));
2772 }
2773 want_node = kENodeOperator;
2774 *top_node_p = cur_node;
2775 break;
2776 case kExprLexDot:
2777 ADD_VALUE_IF_MISSING(_("E15: Unexpected dot: %.*s"));
2778 if (prev_token.type == kExprLexSpacing) {
2779 if (cur_pt == kEPTAssignment) {
2780 ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Cannot concatenate in assignments: %.*s"));
2781 }
2782 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeConcat);
2783 HL_CUR_TOKEN(Concat);
2784 } else {
2785 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeConcatOrSubscript);
2786 HL_CUR_TOKEN(ConcatOrSubscript);
2787 }
2788 ADD_OP_NODE(cur_node);
2789 break;
2790 case kExprLexParenthesis:
2791 if (cur_token.data.brc.closing) {
2792 if (want_node == kENodeValue) {
2793 if (kv_size(ast_stack) > 1) {
2794 const ExprASTNode *const prev_top_node = *kv_Z(ast_stack, 1);
2795 if (prev_top_node->type == kExprNodeCall) {
2796 // Function call without arguments, this is not an error.
2797 // But further code does not expect NULL nodes.
2798 kv_drop(ast_stack, 1);
2799 goto viml_pexpr_parse_no_paren_closing_error;
2800 }
2801 }
2802 ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Expected value, got parenthesis: %.*s"));
2803 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeMissing);
2804 cur_node->len = 0;
2805 *top_node_p = cur_node;
2806 } else {
2807 // Always drop the topmost value: when want_node != kENodeValue
2808 // topmost item on stack is a *finished* left operand, which may as
2809 // well be "(@a)" which needs not be finished again.
2810 kv_drop(ast_stack, 1);
2811 }
2812 viml_pexpr_parse_no_paren_closing_error: {}
2813 ExprASTNode **new_top_node_p = NULL;
2814 while (kv_size(ast_stack)
2815 && (new_top_node_p == NULL
2816 || ((*new_top_node_p)->type != kExprNodeNested
2817 && (*new_top_node_p)->type != kExprNodeCall))) {
2818 new_top_node_p = kv_pop(ast_stack);
2819 }
2820 if (new_top_node_p != NULL
2821 && ((*new_top_node_p)->type == kExprNodeNested
2822 || (*new_top_node_p)->type == kExprNodeCall)) {
2823 if ((*new_top_node_p)->type == kExprNodeNested) {
2824 HL_CUR_TOKEN(NestingParenthesis);
2825 } else {
2826 HL_CUR_TOKEN(CallingParenthesis);
2827 }
2828 } else {
2829 // “Always drop the topmost value” branch has got rid of the single
2830 // value stack had, so there is nothing known to enclose. Correct
2831 // this.
2832 if (new_top_node_p == NULL) {
2833 new_top_node_p = top_node_p;
2834 }
2835 ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Unexpected closing parenthesis: %.*s"));
2836 HL_CUR_TOKEN(NestingParenthesis);
2837 cur_node = NEW_NODE(kExprNodeNested);
2838 cur_node->start = cur_token.start;
2839 cur_node->len = 0;
2840 // Unexpected closing parenthesis, assume that it was wanted to
2841 // enclose everything in ().
2842 cur_node->children = *new_top_node_p;
2843 *new_top_node_p = cur_node;
2844 assert(cur_node->next == NULL);
2845 }
2846 kvi_push(ast_stack, new_top_node_p);
2847 want_node = kENodeOperator;
2848 } else {
2849 switch (want_node) {
2850 case kENodeValue:
2851 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeNested);
2852 *top_node_p = cur_node;
2853 kvi_push(ast_stack, &cur_node->children);
2854 HL_CUR_TOKEN(NestingParenthesis);
2855 break;
2856 case kENodeOperator:
2857 if (prev_token.type == kExprLexSpacing) {
2858 // For some reason "function (args)" is a function call, but
2859 // "(funcref) (args)" is not. AFAIR this somehow involves
2860 // compatibility and Bram was commenting that this is
2861 // intentionally inconsistent and he is not very happy with the
2862 // situation himself.
2863 if ((*top_node_p)->type != kExprNodePlainIdentifier
2864 && (*top_node_p)->type != kExprNodeComplexIdentifier
2865 && (*top_node_p)->type != kExprNodeCurlyBracesIdentifier) {
2866 OP_MISSING;
2867 }
2868 }
2869 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeCall);
2870 ADD_OP_NODE(cur_node);
2871 HL_CUR_TOKEN(CallingParenthesis);
2872 break;
2873 }
2874 want_node = kENodeValue;
2875 }
2876 break;
2877 case kExprLexQuestion: {
2878 ADD_VALUE_IF_MISSING(_("E15: Expected value, got question mark: %.*s"));
2879 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeTernary);
2880 ADD_OP_NODE(cur_node);
2881 HL_CUR_TOKEN(Ternary);
2882 ExprASTNode *ter_val_node;
2883 NEW_NODE_WITH_CUR_POS(ter_val_node, kExprNodeTernaryValue);
2884 ter_val_node->data.ter.got_colon = false;
2885 assert(cur_node->children != NULL);
2886 assert(cur_node->children->next == NULL);
2887 assert(kv_last(ast_stack) == &cur_node->children->next);
2888 *kv_last(ast_stack) = ter_val_node;
2889 kvi_push(ast_stack, &ter_val_node->children);
2890 break;
2891 }
2892 case kExprLexDoubleQuotedString:
2893 case kExprLexSingleQuotedString: {
2894 const bool is_double = (tok_type == kExprLexDoubleQuotedString);
2895 if (!cur_token.data.str.closed) {
2896 // It is weird, but Vim has two identical errors messages with
2897 // different error numbers: "E114: Missing quote" and
2898 // "E115: Missing quote".
2899 ERROR_FROM_TOKEN_AND_MSG(cur_token, (is_double
2900 ? _("E114: Missing double quote: %.*s")
2901 : _("E115: Missing single quote: %.*s")));
2902 }
2903 if (want_node == kENodeOperator) {
2904 OP_MISSING;
2905 }
2906 NEW_NODE_WITH_CUR_POS(cur_node, (is_double
2907 ? kExprNodeDoubleQuotedString
2908 : kExprNodeSingleQuotedString));
2909 *top_node_p = cur_node;
2910 parse_quoted_string(pstate, cur_node, cur_token, ast_stack, is_invalid);
2911 want_node = kENodeOperator;
2912 break;
2913 }
2914 case kExprLexAssignment:
2915 if (cur_pt == kEPTAssignment) {
2916 kv_drop(pt_stack, 1);
2917 } else if (cur_pt == kEPTSingleAssignment) {
2918 kv_drop(pt_stack, 2);
2919 ERROR_FROM_TOKEN_AND_MSG(cur_token,
2920 _("E475: Expected closing bracket to end list assignment "
2921 "lvalue: %.*s"));
2922 } else {
2923 ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Misplaced assignment: %.*s"));
2924 }
2925 assert(kv_size(pt_stack));
2926 assert(kv_last(pt_stack) == kEPTExpr);
2927 ADD_VALUE_IF_MISSING(_("E15: Unexpected assignment: %.*s"));
2928 NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeAssignment);
2929 cur_node->data.ass.type = cur_token.data.ass.type;
2930 switch (cur_token.data.ass.type) {
2931 #define HL_ASGN(asgn, hl) \
2932 case kExprAsgn##asgn: { HL_CUR_TOKEN(hl); break; }
2933 HL_ASGN(Plain, PlainAssignment)
2934 HL_ASGN(Add, AssignmentWithAddition)
2935 HL_ASGN(Subtract, AssignmentWithSubtraction)
2936 HL_ASGN(Concat, AssignmentWithConcatenation)
2937 #undef HL_ASGN
2938 }
2939 ADD_OP_NODE(cur_node);
2940 break;
2941 }
2942 viml_pexpr_parse_cycle_end:
2943 prev_token = cur_token;
2944 highlighted_prev_spacing = false;
2945 viml_parser_advance(pstate, cur_token.len);
2946 } while (true);
2947 viml_pexpr_parse_end:
2948 assert(kv_size(pt_stack));
2949 assert(kv_size(ast_stack));
2950 if (want_node == kENodeValue
2951 // Blacklist some parse type entries as their presence means better error
2952 // message in the other branch.
2953 && kv_last(pt_stack) != kEPTLambdaArguments) {
2954 east_set_error(pstate, &ast.err, _("E15: Expected value, got EOC: %.*s"),
2955 pstate->pos);
2956 } else if (kv_size(ast_stack) != 1) {
2957 // Something may be wrong, check whether it really is.
2958
2959 // Pointer to ast.root must never be dropped, so “!= 1” is expected to be
2960 // the same as “> 1”.
2961 assert(kv_size(ast_stack));
2962 // Topmost stack item must be a *finished* value, so it must not be
2963 // analyzed. E.g. it may contain an already finished nested expression.
2964 kv_drop(ast_stack, 1);
2965 while (ast.err.msg == NULL && kv_size(ast_stack)) {
2966 const ExprASTNode *const cur_node = (*kv_pop(ast_stack));
2967 // This should only happen when want_node == kENodeValue.
2968 assert(cur_node != NULL);
2969 // TODO(ZyX-I): Rehighlight as invalid?
2970 switch (cur_node->type) {
2971 case kExprNodeOpMissing:
2972 case kExprNodeMissing:
2973 // Error should’ve been already reported.
2974 break;
2975 case kExprNodeCall:
2976 east_set_error(pstate, &ast.err,
2977 _("E116: Missing closing parenthesis for function call: %.*s"),
2978 cur_node->start);
2979 break;
2980 case kExprNodeNested:
2981 east_set_error(pstate, &ast.err,
2982 _("E110: Missing closing parenthesis for nested expression"
2983 ": %.*s"),
2984 cur_node->start);
2985 break;
2986 case kExprNodeListLiteral:
2987 // For whatever reason "[1" yields "E696: Missing comma in list" error
2988 // in Vim while "[1," yields E697.
2989 east_set_error(pstate, &ast.err,
2990 _("E697: Missing end of List ']': %.*s"),
2991 cur_node->start);
2992 break;
2993 case kExprNodeDictLiteral:
2994 // Same problem like with list literal with E722 (missing comma) vs
2995 // E723, but additionally just "{" yields only E15.
2996 east_set_error(pstate, &ast.err,
2997 _("E723: Missing end of Dictionary '}': %.*s"),
2998 cur_node->start);
2999 break;
3000 case kExprNodeUnknownFigure:
3001 east_set_error(pstate, &ast.err,
3002 _("E15: Missing closing figure brace: %.*s"),
3003 cur_node->start);
3004 break;
3005 case kExprNodeLambda:
3006 east_set_error(pstate, &ast.err,
3007 _("E15: Missing closing figure brace for lambda: %.*s"),
3008 cur_node->start);
3009 break;
3010 case kExprNodeCurlyBracesIdentifier:
3011 // Until trailing "}" it is impossible to distinguish curly braces
3012 // identifier and dictionary, so it must not appear in the stack like
3013 // this.
3014 abort();
3015 case kExprNodeInteger:
3016 case kExprNodeFloat:
3017 case kExprNodeSingleQuotedString:
3018 case kExprNodeDoubleQuotedString:
3019 case kExprNodeOption:
3020 case kExprNodeEnvironment:
3021 case kExprNodeRegister:
3022 case kExprNodePlainIdentifier:
3023 case kExprNodePlainKey:
3024 // These are plain values and not containers, for them it should only
3025 // be possible to show up in the topmost stack element, but it was
3026 // unconditionally popped at the start.
3027 abort();
3028 case kExprNodeComma:
3029 case kExprNodeColon:
3030 case kExprNodeArrow:
3031 // It is actually only valid inside something else, but everything
3032 // where one of the above is valid requires to be closed and thus is
3033 // to be caught later.
3034 break;
3035 case kExprNodeSubscript:
3036 case kExprNodeConcatOrSubscript:
3037 case kExprNodeComplexIdentifier:
3038 case kExprNodeAssignment:
3039 case kExprNodeMod:
3040 case kExprNodeDivision:
3041 case kExprNodeMultiplication:
3042 case kExprNodeNot:
3043 case kExprNodeAnd:
3044 case kExprNodeOr:
3045 case kExprNodeConcat:
3046 case kExprNodeComparison:
3047 case kExprNodeUnaryMinus:
3048 case kExprNodeUnaryPlus:
3049 case kExprNodeBinaryMinus:
3050 case kExprNodeTernary:
3051 case kExprNodeBinaryPlus:
3052 // It is OK to see these in the stack.
3053 break;
3054 case kExprNodeTernaryValue:
3055 if (!cur_node->data.ter.got_colon) {
3056 // Actually Vim throws E109 in more cases.
3057 east_set_error(pstate, &ast.err, _("E109: Missing ':' after '?': %.*s"),
3058 cur_node->start);
3059 }
3060 break;
3061 }
3062 }
3063 }
3064 kvi_destroy(ast_stack);
3065 return ast;
3066 } // NOLINT(readability/fn_size)
3067
3068 #undef NEW_NODE
3069 #undef HL
3070