1 // This is an open source non-commercial project. Dear PVS-Studio, please check
2 // it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com
3 
4 /// VimL expression parser
5 
6 // Planned incompatibilities (to be included into vim_diff.txt when this parser
7 // will be an actual part of VimL evaluation process):
8 //
9 // 1. Expressions are first fully parsed and only then executed.  This means
10 //    that while ":echo [system('touch abc')" will create file "abc" in Vim and
11 //    only then raise syntax error regarding missing comma in list in Neovim
12 //    trying to execute that will immediately raise syntax error regarding
13 //    missing list end without actually executing anything.
14 // 2. Expressions are first fully parsed, without considering any runtime
15 //    information.  This means things like that "d.a" does not change its
16 //    meaning depending on type of "d" (or whether Vim is currently executing or
17 //    skipping).  For compatibility reasons the dot thus may either be “concat
18 //    or subscript” operator or just “concat” operator.
19 // 3. Expressions parser is aware whether it is called for :echo or <C-r>=.
20 //    This means that while "<C-r>=1 | 2<CR>" is equivalent to "<C-r>=1<CR>"
21 //    because "| 2" part is left to be treated as a command separator and then
22 //    ignored in Neovim it is an error.
23 // 4. Expressions parser has generally better error reporting.  But for
24 //    compatibility reasons most errors have error code E15 while error messages
25 //    are significantly different from Vim’s E15.  Also some error codes were
26 //    retired because of being harder to emulate or because of them being
27 //    a result of differences in parsing process: e.g. with ":echo {a, b}" Vim
28 //    will attempt to parse expression as lambda, fail, check whether it is
29 //    a curly-braces-name, fail again, and evaluate that as a dictionary, giving
30 //    error regarding undefined variable "a" (or about missing colon).  Neovim
31 //    will not try to evaluate anything here: comma right after an argument name
32 //    means that expression may not be anything, but lambda, so the resulting
33 //    error message will never be about missing variable or colon: it will be
34 //    about missing arrow (or a continuation of argument list).
35 // 5. Failing to parse expression always gives exactly one error message: no
36 //    more stack of error messages like >
37 //
38 //        :echo [1,
39 //        E697: Missing end of List ']':
40 //        E15: Invalid expression: [1,
41 //
42 // <  , just exactly one E697 message.
43 // 6. Some expressions involving calling parenthesis which are treated
44 //    separately by Vim even when not separated by spaces are treated as one
45 //    expression by Neovim: e.g. ":echo (1)(1)" will yield runtime error after
46 //    failing to call "1", while Vim will echo "1 1". Reasoning is the same:
47 //    type of what is in the first expression is generally not known when
48 //    parsing, so to have separate expressions like this separate them with
49 //    spaces.
50 // 7. 'isident' no longer applies to environment variables, they always include
51 //    ASCII alphanumeric characters and underscore and nothing except this.
52 
53 #include <assert.h>
54 #include <stdbool.h>
55 #include <stddef.h>
56 #include <string.h>
57 
58 #include "nvim/ascii.h"
59 #include "nvim/assert.h"
60 #include "nvim/charset.h"
61 #include "nvim/eval/typval.h"
62 #include "nvim/lib/kvec.h"
63 #include "nvim/memory.h"
64 #include "nvim/types.h"
65 #include "nvim/vim.h"
66 #include "nvim/viml/parser/expressions.h"
67 #include "nvim/viml/parser/parser.h"
68 
69 #define vim_str2nr(s, ...) vim_str2nr((const char_u *)(s), __VA_ARGS__)
70 
71 typedef kvec_withinit_t(ExprASTNode **, 16) ExprASTStack;
72 
73 /// Which nodes may be wanted
74 typedef enum {
75   /// Operators: function call, subscripts, binary operators, …
76   ///
77   /// For unrestricted expressions.
78   kENodeOperator,
79   /// Values: literals, variables, nested expressions, unary operators.
80   ///
81   /// For unrestricted expressions as well, implies that top item in AST stack
82   /// points to NULL.
83   kENodeValue,
84 } ExprASTWantedNode;
85 
86 /// Parse type: what is being parsed currently
87 typedef enum {
88   /// Parsing regular VimL expression
89   kEPTExpr = 0,
90   /// Parsing lambda arguments
91   ///
92   /// Just like parsing function arguments, but it is valid to be ended with an
93   /// arrow only.
94   kEPTLambdaArguments,
95   /// Assignment: parsing for :let
96   kEPTAssignment,
97   /// Single assignment: used when lists are not allowed (i.e. when nesting)
98   kEPTSingleAssignment,
99 } ExprASTParseType;
100 
101 typedef kvec_withinit_t(ExprASTParseType, 4) ExprASTParseTypeStack;
102 
103 /// Operator priority level
104 typedef enum {
105   kEOpLvlInvalid = 0,
106   kEOpLvlComplexIdentifier,
107   kEOpLvlParens,
108   kEOpLvlAssignment,
109   kEOpLvlArrow,
110   kEOpLvlComma,
111   kEOpLvlColon,
112   kEOpLvlTernaryValue,
113   kEOpLvlTernary,
114   kEOpLvlOr,
115   kEOpLvlAnd,
116   kEOpLvlComparison,
117   kEOpLvlAddition,  ///< Addition, subtraction and concatenation.
118   kEOpLvlMultiplication,  ///< Multiplication, division and modulo.
119   kEOpLvlUnary,  ///< Unary operations: not, minus, plus.
120   kEOpLvlSubscript,  ///< Subscripts.
121   kEOpLvlValue,  ///< Values: literals, variables, nested expressions, …
122 } ExprOpLvl;
123 
124 /// Operator associativity
125 typedef enum {
126   kEOpAssNo= 'n',  ///< Not associative / not applicable.
127   kEOpAssLeft = 'l',  ///< Left associativity.
128   kEOpAssRight = 'r',  ///< Right associativity.
129 } ExprOpAssociativity;
130 
131 #ifdef INCLUDE_GENERATED_DECLARATIONS
132 # include "viml/parser/expressions.c.generated.h"
133 #endif
134 
135 /// Scale number by a given factor
136 ///
137 /// Used to apply exponent to a number. Idea taken from uClibc.
138 ///
139 /// @param[in]  num  Number to scale. Does not bother doing anything if it is
140 ///                  zero.
141 /// @param[in]  base  Base, should be 10 since non-decimal floating-point
142 ///                   numbers are not supported.
143 /// @param[in]  exponent  Exponent to scale by.
144 /// @param[in]  exponent_negative  True if exponent is negative.
scale_number(const float_T num,const uint8_t base,const uvarnumber_T exponent,const bool exponent_negative)145 static inline float_T scale_number(const float_T num, const uint8_t base,
146                                    const uvarnumber_T exponent, const bool exponent_negative)
147   FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_CONST
148 {
149   if (num == 0 || exponent == 0) {
150     return num;
151   }
152   assert(base);
153   uvarnumber_T exp = exponent;
154   float_T p_base = (float_T)base;
155   float_T ret = num;
156   while (exp) {
157     if (exp & 1) {
158       if (exponent_negative) {
159         ret /= p_base;
160       } else {
161         ret *= p_base;
162       }
163     }
164     exp >>= 1;
165     p_base *= p_base;
166   }
167   return ret;
168 }
169 
170 /// Get next token for the VimL expression input
171 ///
172 /// @param  pstate  Parser state.
173 /// @param[in]  flags  Flags, @see LexExprFlags.
174 ///
175 /// @return Next token.
viml_pexpr_next_token(ParserState * const pstate,const int flags)176 LexExprToken viml_pexpr_next_token(ParserState *const pstate, const int flags)
177   FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
178 {
179   LexExprToken ret = {
180     .type = kExprLexInvalid,
181     .start = pstate->pos,
182   };
183   ParserLine pline;
184   if (!viml_parser_get_remaining_line(pstate, &pline)) {
185     ret.type = kExprLexEOC;
186     return ret;
187   }
188   if (pline.size <= 0) {
189     ret.len = 0;
190     ret.type = kExprLexEOC;
191     goto viml_pexpr_next_token_adv_return;
192   }
193   ret.len = 1;
194   const uint8_t schar = (uint8_t)pline.data[0];
195 #define GET_CCS(ret, pline) \
196   do { \
197     if (ret.len < pline.size \
198         && strchr("?#", pline.data[ret.len]) != NULL) { \
199       ret.data.cmp.ccs = \
200         (ExprCaseCompareStrategy)pline.data[ret.len]; \
201       ret.len++; \
202     } else { \
203       ret.data.cmp.ccs = kCCStrategyUseOption; \
204     } \
205   } while (0)
206   switch (schar) {
207     // Paired brackets.
208 #define BRACKET(typ, opning, clsing) \
209 case opning: \
210 case clsing: { \
211   ret.type = typ; \
212   ret.data.brc.closing = (schar == clsing); \
213   break; \
214 }
215     BRACKET(kExprLexParenthesis, '(', ')')
216     BRACKET(kExprLexBracket, '[', ']')
217     BRACKET(kExprLexFigureBrace, '{', '}')
218 #undef BRACKET
219 
220     // Single character tokens without data.
221 #define CHAR(typ, ch) \
222 case ch: { \
223   ret.type = typ; \
224   break; \
225 }
226     CHAR(kExprLexQuestion, '?')
227     CHAR(kExprLexColon, ':')
228     CHAR(kExprLexComma, ',')
229 #undef CHAR
230 
231     // Multiplication/division/modulo.
232 #define MUL(mul_type, ch) \
233 case ch: { \
234   ret.type = kExprLexMultiplication; \
235   ret.data.mul.type = mul_type; \
236   break; \
237 }
238     MUL(kExprLexMulMul, '*')
239     MUL(kExprLexMulDiv, '/')
240     MUL(kExprLexMulMod, '%')
241 #undef MUL
242 
243 #define CHARREG(typ, cond) \
244   do { \
245     ret.type = typ; \
246     for (; (ret.len < pline.size \
247             && cond(pline.data[ret.len])) \
248          ; ret.len++) { \
249     } \
250   } while (0)
251 
252   // Whitespace.
253   case ' ':
254   case TAB:
255     CHARREG(kExprLexSpacing, ascii_iswhite);
256     break;
257 
258   // Control character, except for NUL, NL and TAB.
259   case Ctrl_A:
260   case Ctrl_B:
261   case Ctrl_C:
262   case Ctrl_D:
263   case Ctrl_E:
264   case Ctrl_F:
265   case Ctrl_G:
266   case Ctrl_H:
267 
268   case Ctrl_K:
269   case Ctrl_L:
270   case Ctrl_M:
271   case Ctrl_N:
272   case Ctrl_O:
273   case Ctrl_P:
274   case Ctrl_Q:
275   case Ctrl_R:
276   case Ctrl_S:
277   case Ctrl_T:
278   case Ctrl_U:
279   case Ctrl_V:
280   case Ctrl_W:
281   case Ctrl_X:
282   case Ctrl_Y:
283   case Ctrl_Z:
284 #define ISCTRL(schar) (schar < ' ')
285     CHARREG(kExprLexInvalid, ISCTRL);
286     ret.data.err.type = kExprLexSpacing;
287     ret.data.err.msg =
288       _("E15: Invalid control character present in input: %.*s");
289     break;
290 #undef ISCTRL
291 
292   // Number.
293   case '0':
294   case '1':
295   case '2':
296   case '3':
297   case '4':
298   case '5':
299   case '6':
300   case '7':
301   case '8':
302   case '9': {
303     ret.data.num.is_float = false;
304     ret.data.num.base = 10;
305     size_t frac_start = 0;
306     size_t exp_start = 0;
307     size_t frac_end = 0;
308     bool exp_negative = false;
309     CHARREG(kExprLexNumber, ascii_isdigit);
310     if (flags & kELFlagAllowFloat) {
311       const LexExprToken non_float_ret = ret;
312       if (pline.size > ret.len + 1
313           && pline.data[ret.len] == '.'
314           && ascii_isdigit(pline.data[ret.len + 1])) {
315         ret.len++;
316         frac_start = ret.len;
317         frac_end = ret.len;
318         ret.data.num.is_float = true;
319         for (; ret.len < pline.size && ascii_isdigit(pline.data[ret.len])
320              ; ret.len++) {
321           // A small optimization: trailing zeroes in fractional part do not
322           // add anything to significand, so it is useless to include them in
323           // frac_end.
324           if (pline.data[ret.len] != '0') {
325             frac_end = ret.len + 1;
326           }
327         }
328         if (pline.size > ret.len + 1
329             && (pline.data[ret.len] == 'e'
330                 || pline.data[ret.len] == 'E')
331             && ((pline.size > ret.len + 2
332                  && (pline.data[ret.len + 1] == '+'
333                      || pline.data[ret.len + 1] == '-')
334                  && ascii_isdigit(pline.data[ret.len + 2]))
335                 || ascii_isdigit(pline.data[ret.len + 1]))) {
336           ret.len++;
337           if (pline.data[ret.len] == '+'
338               || (exp_negative = (pline.data[ret.len] == '-'))) {
339             ret.len++;
340           }
341           exp_start = ret.len;
342           CHARREG(kExprLexNumber, ascii_isdigit);
343         }
344       }
345       if (pline.size > ret.len
346           && (pline.data[ret.len] == '.'
347               || ASCII_ISALPHA(pline.data[ret.len]))) {
348         ret = non_float_ret;
349       }
350     }
351     // TODO(ZyX-I): detect overflows
352     if (ret.data.num.is_float) {
353       // Vim used to use string2float here which in turn uses strtod(). There
354       // are two problems with this approach:
355       // 1. strtod() is locale-dependent. Not sure how it is worked around so
356       //    that I do not see relevant bugs, but it still does not look like
357       //    a good idea.
358       // 2. strtod() does not accept length argument.
359       //
360       // The below variant of parsing floats was recognized as acceptable
361       // because it is basically how uClibc does the thing: it generates
362       // a number ignoring decimal point (but recording its position), then
363       // uses recorded position to scale number down when processing exponent.
364       float_T significand_part = 0;
365       uvarnumber_T exp_part = 0;
366       const size_t frac_size = (size_t)(frac_end - frac_start);
367       for (size_t i = 0; i < frac_end; i++) {
368         if (i == frac_start - 1) {
369           continue;
370         }
371         significand_part = significand_part * 10 + (pline.data[i] - '0');
372       }
373       if (exp_start) {
374         vim_str2nr(pline.data + exp_start, NULL, NULL, 0, NULL, &exp_part,
375                    (int)(ret.len - exp_start), false);
376       }
377       if (exp_negative) {
378         exp_part += frac_size;
379       } else {
380         if (exp_part < frac_size) {
381           exp_negative = true;
382           exp_part = frac_size - exp_part;
383         } else {
384           exp_part -= frac_size;
385         }
386       }
387       ret.data.num.val.floating = scale_number(significand_part, 10, exp_part,
388                                                exp_negative);
389     } else {
390       int len;
391       int prep;
392       vim_str2nr(pline.data, &prep, &len, STR2NR_ALL, NULL,
393                  &ret.data.num.val.integer, (int)pline.size, false);
394       ret.len = (size_t)len;
395       const uint8_t bases[] = {
396         [0] = 10,
397         ['0'] = 8,
398         ['x'] = 16, ['X'] = 16,
399         ['b'] = 2, ['B'] = 2,
400       };
401       ret.data.num.base = bases[prep];
402     }
403     break;
404   }
405 
406 #define ISWORD_OR_AUTOLOAD(x) \
407   (ascii_isident(x) || (x) == AUTOLOAD_CHAR)
408 
409   // Environment variable.
410   case '$':
411     CHARREG(kExprLexEnv, ascii_isident);
412     break;
413 
414   // Normal variable/function name.
415   case 'a':
416   case 'b':
417   case 'c':
418   case 'd':
419   case 'e':
420   case 'f':
421   case 'g':
422   case 'h':
423   case 'i':
424   case 'j':
425   case 'k':
426   case 'l':
427   case 'm':
428   case 'n':
429   case 'o':
430   case 'p':
431   case 'q':
432   case 'r':
433   case 's':
434   case 't':
435   case 'u':
436   case 'v':
437   case 'w':
438   case 'x':
439   case 'y':
440   case 'z':
441   case 'A':
442   case 'B':
443   case 'C':
444   case 'D':
445   case 'E':
446   case 'F':
447   case 'G':
448   case 'H':
449   case 'I':
450   case 'J':
451   case 'K':
452   case 'L':
453   case 'M':
454   case 'N':
455   case 'O':
456   case 'P':
457   case 'Q':
458   case 'R':
459   case 'S':
460   case 'T':
461   case 'U':
462   case 'V':
463   case 'W':
464   case 'X':
465   case 'Y':
466   case 'Z':
467   case '_':
468     ret.data.var.scope = 0;
469     ret.data.var.autoload = false;
470     CHARREG(kExprLexPlainIdentifier, ascii_isident);
471     // "is" and "isnot" operators.
472     if (!(flags & kELFlagIsNotCmp)
473         && ((ret.len == 2 && memcmp(pline.data, "is", 2) == 0)
474             || (ret.len == 5 && memcmp(pline.data, "isnot", 5) == 0))) {
475       ret.type = kExprLexComparison;
476       ret.data.cmp.type = kExprCmpIdentical;
477       ret.data.cmp.inv = (ret.len == 5);
478       GET_CCS(ret, pline);
479       // Scope: `s:`, etc.
480     } else if (ret.len == 1
481                && pline.size > 1
482                && memchr(EXPR_VAR_SCOPE_LIST, schar,
483                          sizeof(EXPR_VAR_SCOPE_LIST)) != NULL
484                && pline.data[ret.len] == ':'
485                && !(flags & kELFlagForbidScope)) {
486       ret.len++;
487       ret.data.var.scope = (ExprVarScope)schar;
488       CHARREG(kExprLexPlainIdentifier, ISWORD_OR_AUTOLOAD);
489       ret.data.var.autoload = (
490                                memchr(pline.data + 2, AUTOLOAD_CHAR, ret.len - 2)
491                                != NULL);
492       // Previous CHARREG stopped at autoload character in order to make it
493       // possible to detect `is#`. Continue now with autoload characters
494       // included.
495       //
496       // Warning: there is ambiguity for the lexer: `is#Foo(1)` is a call of
497       // function `is#Foo()`, `1is#Foo(1)` is a comparison `1 is# Foo(1)`. This
498       // needs to be resolved on the higher level where context is available.
499     } else if (pline.size > ret.len
500                && pline.data[ret.len] == AUTOLOAD_CHAR) {
501       ret.data.var.autoload = true;
502       CHARREG(kExprLexPlainIdentifier, ISWORD_OR_AUTOLOAD);
503     }
504     break;
505 
506 #undef ISWORD_OR_AUTOLOAD
507 #undef CHARREG
508 
509   // Option.
510   case '&': {
511 #define OPTNAMEMISS(ret) \
512   do { \
513     ret.type = kExprLexInvalid; \
514     ret.data.err.type = kExprLexOption; \
515     ret.data.err.msg = _("E112: Option name missing: %.*s"); \
516   } while (0)
517     if (pline.size > 1 && pline.data[1] == '&') {
518       ret.type = kExprLexAnd;
519       ret.len++;
520       break;
521     }
522     if (pline.size == 1 || !ASCII_ISALPHA(pline.data[1])) {
523       OPTNAMEMISS(ret);
524       break;
525     }
526     ret.type = kExprLexOption;
527     if (pline.size > 2
528         && pline.data[2] == ':'
529         && memchr(EXPR_OPT_SCOPE_LIST, pline.data[1],
530                   sizeof(EXPR_OPT_SCOPE_LIST)) != NULL) {
531       ret.len += 2;
532       ret.data.opt.scope = (ExprOptScope)pline.data[1];
533       ret.data.opt.name = pline.data + 3;
534     } else {
535       ret.data.opt.scope = kExprOptScopeUnspecified;
536       ret.data.opt.name = pline.data + 1;
537     }
538     const char *p = ret.data.opt.name;
539     const char *const e = pline.data + pline.size;
540     if (e - p >= 4 && p[0] == 't' && p[1] == '_') {
541       ret.data.opt.len = 4;
542       ret.len += 4;
543     } else {
544       for (; p < e && ASCII_ISALPHA(*p); p++) {
545       }
546       ret.data.opt.len = (size_t)(p - ret.data.opt.name);
547       if (ret.data.opt.len == 0) {
548         OPTNAMEMISS(ret);
549       } else {
550         ret.len += ret.data.opt.len;
551       }
552     }
553     break;
554 #undef OPTNAMEMISS
555   }
556 
557   // Register.
558   case '@':
559     ret.type = kExprLexRegister;
560     if (pline.size > 1) {
561       ret.len++;
562       ret.data.reg.name = (uint8_t)pline.data[1];
563     } else {
564       ret.data.reg.name = -1;
565     }
566     break;
567 
568   // Single quoted string.
569   case '\'':
570     ret.type = kExprLexSingleQuotedString;
571     ret.data.str.closed = false;
572     for (; ret.len < pline.size && !ret.data.str.closed; ret.len++) {
573       if (pline.data[ret.len] == '\'') {
574         if (ret.len + 1 < pline.size && pline.data[ret.len + 1] == '\'') {
575           ret.len++;
576         } else {
577           ret.data.str.closed = true;
578         }
579       }
580     }
581     break;
582 
583   // Double quoted string.
584   case '"':
585     ret.type = kExprLexDoubleQuotedString;
586     ret.data.str.closed = false;
587     for (; ret.len < pline.size && !ret.data.str.closed; ret.len++) {
588       if (pline.data[ret.len] == '\\') {
589         if (ret.len + 1 < pline.size) {
590           ret.len++;
591         }
592       } else if (pline.data[ret.len] == '"') {
593         ret.data.str.closed = true;
594       }
595     }
596     break;
597 
598   // Unary not, (un)equality and regex (not) match comparison operators.
599   case '!':
600   case '=':
601     if (pline.size == 1) {
602       ret.type = (schar == '!' ? kExprLexNot : kExprLexAssignment);
603       ret.data.ass.type = kExprAsgnPlain;
604       break;
605     }
606     ret.type = kExprLexComparison;
607     ret.data.cmp.inv = (schar == '!');
608     if (pline.data[1] == '=') {
609       ret.data.cmp.type = kExprCmpEqual;
610       ret.len++;
611     } else if (pline.data[1] == '~') {
612       ret.data.cmp.type = kExprCmpMatches;
613       ret.len++;
614     } else if (schar == '!') {
615       ret.type = kExprLexNot;
616     } else {
617       ret.type = kExprLexAssignment;
618       ret.data.ass.type = kExprAsgnPlain;
619     }
620     GET_CCS(ret, pline);
621     break;
622 
623   // Less/greater [or equal to] comparison operators.
624   case '>':
625   case '<': {
626     ret.type = kExprLexComparison;
627     const bool haseqsign = (pline.size > 1 && pline.data[1] == '=');
628     if (haseqsign) {
629       ret.len++;
630     }
631     GET_CCS(ret, pline);
632     ret.data.cmp.inv = (schar == '<');
633     ret.data.cmp.type = ((ret.data.cmp.inv ^ haseqsign)
634                            ? kExprCmpGreaterOrEqual
635                            : kExprCmpGreater);
636     break;
637   }
638 
639   // Minus sign, arrow from lambdas or augmented assignment.
640   case '-': {
641     if (pline.size > 1 && pline.data[1] == '>') {
642       ret.len++;
643       ret.type = kExprLexArrow;
644     } else if (pline.size > 1 && pline.data[1] == '=') {
645       ret.len++;
646       ret.type = kExprLexAssignment;
647       ret.data.ass.type = kExprAsgnSubtract;
648     } else {
649       ret.type = kExprLexMinus;
650     }
651     break;
652   }
653 
654     // Sign or augmented assignment.
655 #define CHAR_OR_ASSIGN(ch, ch_type, ass_type) \
656 case ch: { \
657   if (pline.size > 1 && pline.data[1] == '=') { \
658     ret.len++; \
659     ret.type = kExprLexAssignment; \
660     ret.data.ass.type = ass_type; \
661   } else { \
662     ret.type = ch_type; \
663   } \
664   break; \
665 }
666     CHAR_OR_ASSIGN('+', kExprLexPlus, kExprAsgnAdd)
667     CHAR_OR_ASSIGN('.', kExprLexDot, kExprAsgnConcat)
668 #undef CHAR_OR_ASSIGN
669 
670   // Expression end because Ex command ended.
671   case NUL:
672   case NL:
673     if (flags & kELFlagForbidEOC) {
674       ret.type = kExprLexInvalid;
675       ret.data.err.msg = _("E15: Unexpected EOC character: %.*s");
676       ret.data.err.type = kExprLexSpacing;
677     } else {
678       ret.type = kExprLexEOC;
679     }
680     break;
681 
682   case '|':
683     if (pline.size >= 2 && pline.data[ret.len] == '|') {
684       // "||" is or.
685       ret.len++;
686       ret.type = kExprLexOr;
687     } else if (flags & kELFlagForbidEOC) {
688       // Note: `<C-r>=1 | 2<CR>` actually yields 1 in Vim without any
689       //       errors. This will be changed here.
690       ret.type = kExprLexInvalid;
691       ret.data.err.msg = _("E15: Unexpected EOC character: %.*s");
692       ret.data.err.type = kExprLexOr;
693     } else {
694       ret.type = kExprLexEOC;
695     }
696     break;
697 
698   // Everything else is not valid.
699   default:
700     ret.len = (size_t)utfc_ptr2len_len((const char_u *)pline.data,
701                                        (int)pline.size);
702     ret.type = kExprLexInvalid;
703     ret.data.err.type = kExprLexPlainIdentifier;
704     ret.data.err.msg = _("E15: Unidentified character: %.*s");
705     break;
706   }
707 #undef GET_CCS
708 viml_pexpr_next_token_adv_return:
709   if (!(flags & kELFlagPeek)) {
710     viml_parser_advance(pstate, ret.len);
711   }
712   return ret;
713 }
714 
715 static const char *const eltkn_type_tab[] = {
716   [kExprLexInvalid] = "Invalid",
717   [kExprLexMissing] = "Missing",
718   [kExprLexSpacing] = "Spacing",
719   [kExprLexEOC] = "EOC",
720 
721   [kExprLexQuestion] = "Question",
722   [kExprLexColon] = "Colon",
723   [kExprLexOr] = "Or",
724   [kExprLexAnd] = "And",
725   [kExprLexComparison] = "Comparison",
726   [kExprLexPlus] = "Plus",
727   [kExprLexMinus] = "Minus",
728   [kExprLexDot] = "Dot",
729   [kExprLexMultiplication] = "Multiplication",
730 
731   [kExprLexNot] = "Not",
732 
733   [kExprLexNumber] = "Number",
734   [kExprLexSingleQuotedString] = "SingleQuotedString",
735   [kExprLexDoubleQuotedString] = "DoubleQuotedString",
736   [kExprLexOption] = "Option",
737   [kExprLexRegister] = "Register",
738   [kExprLexEnv] = "Env",
739   [kExprLexPlainIdentifier] = "PlainIdentifier",
740 
741   [kExprLexBracket] = "Bracket",
742   [kExprLexFigureBrace] = "FigureBrace",
743   [kExprLexParenthesis] = "Parenthesis",
744   [kExprLexComma] = "Comma",
745   [kExprLexArrow] = "Arrow",
746   [kExprLexAssignment] = "Assignment",
747 };
748 
749 const char *const eltkn_cmp_type_tab[] = {
750   [kExprCmpEqual] = "Equal",
751   [kExprCmpMatches] = "Matches",
752   [kExprCmpGreater] = "Greater",
753   [kExprCmpGreaterOrEqual] = "GreaterOrEqual",
754   [kExprCmpIdentical] = "Identical",
755 };
756 
757 const char *const expr_asgn_type_tab[] = {
758   [kExprAsgnPlain] = "Plain",
759   [kExprAsgnAdd] = "Add",
760   [kExprAsgnSubtract] = "Subtract",
761   [kExprAsgnConcat] = "Concat",
762 };
763 
764 const char *const ccs_tab[] = {
765   [kCCStrategyUseOption] = "UseOption",
766   [kCCStrategyMatchCase] = "MatchCase",
767   [kCCStrategyIgnoreCase] = "IgnoreCase",
768 };
769 
770 static const char *const eltkn_mul_type_tab[] = {
771   [kExprLexMulMul] = "Mul",
772   [kExprLexMulDiv] = "Div",
773   [kExprLexMulMod] = "Mod",
774 };
775 
776 static const char *const eltkn_opt_scope_tab[] = {
777   [kExprOptScopeUnspecified] = "Unspecified",
778   [kExprOptScopeGlobal] = "Global",
779   [kExprOptScopeLocal] = "Local",
780 };
781 
782 /// Represent token as a string
783 ///
784 /// Intended for testing and debugging purposes.
785 ///
786 /// @param[in]  pstate  Parser state, needed to get token string from it. May be
787 ///                     NULL, in which case in place of obtaining part of the
788 ///                     string represented by token only token length is
789 ///                     returned.
790 /// @param[in]  token  Token to represent.
791 /// @param[out]  ret_size  Return string size, for cases like NULs inside
792 ///                        a string. May be NULL.
793 ///
794 /// @return Token represented in a string form, in a static buffer (overwritten
795 ///         on each call).
viml_pexpr_repr_token(const ParserState * const pstate,const LexExprToken token,size_t * const ret_size)796 const char *viml_pexpr_repr_token(const ParserState *const pstate, const LexExprToken token,
797                                   size_t *const ret_size)
798   FUNC_ATTR_WARN_UNUSED_RESULT
799 {
800   static char ret[1024];
801   char *p = ret;
802   const char *const e = &ret[1024] - 1;
803 #define ADDSTR(...) \
804   do { \
805     p += snprintf(p, (size_t)(sizeof(ret) - (size_t)(p - ret)), __VA_ARGS__); \
806     if (p >= e) { \
807       goto viml_pexpr_repr_token_end; \
808     } \
809   } while (0)
810   ADDSTR("%zu:%zu:%s", token.start.line, token.start.col,
811          eltkn_type_tab[token.type]);
812   switch (token.type) {
813 #define TKNARGS(tkn_type, ...) \
814 case tkn_type: { \
815   ADDSTR(__VA_ARGS__); \
816   break; \
817 }
818     TKNARGS(kExprLexComparison, "(type=%s,ccs=%s,inv=%i)",
819             eltkn_cmp_type_tab[token.data.cmp.type],
820             ccs_tab[token.data.cmp.ccs],
821             (int)token.data.cmp.inv)
822     TKNARGS(kExprLexMultiplication, "(type=%s)",
823             eltkn_mul_type_tab[token.data.mul.type])
824     TKNARGS(kExprLexAssignment, "(type=%s)",
825             expr_asgn_type_tab[token.data.ass.type])
826     TKNARGS(kExprLexRegister, "(name=%s)", intchar2str(token.data.reg.name))
827   case kExprLexDoubleQuotedString:
828     TKNARGS(kExprLexSingleQuotedString, "(closed=%i)",
829             (int)token.data.str.closed)
830     TKNARGS(kExprLexOption, "(scope=%s,name=%.*s)",
831             eltkn_opt_scope_tab[token.data.opt.scope],
832             (int)token.data.opt.len, token.data.opt.name)
833     TKNARGS(kExprLexPlainIdentifier, "(scope=%s,autoload=%i)",
834             intchar2str((int)token.data.var.scope),
835             (int)token.data.var.autoload)
836     TKNARGS(kExprLexNumber, "(is_float=%i,base=%i,val=%lg)",
837             (int)token.data.num.is_float,
838             (int)token.data.num.base,
839             (double)(token.data.num.is_float
840                      ? (double)token.data.num.val.floating
841                      : (double)token.data.num.val.integer))
842     TKNARGS(kExprLexInvalid, "(msg=%s)", token.data.err.msg)
843   default:
844     // No additional arguments.
845     break;
846 #undef TKNARGS
847   }
848   if (pstate == NULL) {
849     ADDSTR("::%zu", token.len);
850   } else {
851     *p++ = ':';
852     memmove(p, &pstate->reader.lines.items[token.start.line].data[token.start.col],
853             token.len);
854     p += token.len;
855     *p = NUL;
856   }
857 #undef ADDSTR
858 viml_pexpr_repr_token_end:
859   if (ret_size != NULL) {
860     *ret_size = (size_t)(p - ret);
861   }
862   return ret;
863 }
864 
865 const char *const east_node_type_tab[] = {
866   [kExprNodeMissing] = "Missing",
867   [kExprNodeOpMissing] = "OpMissing",
868   [kExprNodeTernary] = "Ternary",
869   [kExprNodeTernaryValue] = "TernaryValue",
870   [kExprNodeRegister] = "Register",
871   [kExprNodeSubscript] = "Subscript",
872   [kExprNodeListLiteral] = "ListLiteral",
873   [kExprNodeUnaryPlus] = "UnaryPlus",
874   [kExprNodeBinaryPlus] = "BinaryPlus",
875   [kExprNodeNested] = "Nested",
876   [kExprNodeCall] = "Call",
877   [kExprNodePlainIdentifier] = "PlainIdentifier",
878   [kExprNodePlainKey] = "PlainKey",
879   [kExprNodeComplexIdentifier] = "ComplexIdentifier",
880   [kExprNodeUnknownFigure] = "UnknownFigure",
881   [kExprNodeLambda] = "Lambda",
882   [kExprNodeDictLiteral] = "DictLiteral",
883   [kExprNodeCurlyBracesIdentifier] = "CurlyBracesIdentifier",
884   [kExprNodeComma] = "Comma",
885   [kExprNodeColon] = "Colon",
886   [kExprNodeArrow] = "Arrow",
887   [kExprNodeComparison] = "Comparison",
888   [kExprNodeConcat] = "Concat",
889   [kExprNodeConcatOrSubscript] = "ConcatOrSubscript",
890   [kExprNodeInteger] = "Integer",
891   [kExprNodeFloat] = "Float",
892   [kExprNodeSingleQuotedString] = "SingleQuotedString",
893   [kExprNodeDoubleQuotedString] = "DoubleQuotedString",
894   [kExprNodeOr] = "Or",
895   [kExprNodeAnd] = "And",
896   [kExprNodeUnaryMinus] = "UnaryMinus",
897   [kExprNodeBinaryMinus] = "BinaryMinus",
898   [kExprNodeNot] = "Not",
899   [kExprNodeMultiplication] = "Multiplication",
900   [kExprNodeDivision] = "Division",
901   [kExprNodeMod] = "Mod",
902   [kExprNodeOption] = "Option",
903   [kExprNodeEnvironment] = "Environment",
904   [kExprNodeAssignment] = "Assignment",
905 };
906 
907 /// Represent `int` character as a string
908 ///
909 /// Converts
910 /// - ASCII digits into '{digit}'
911 /// - ASCII printable characters into a single-character strings
912 /// - everything else to numbers.
913 ///
914 /// @param[in]  ch  Character to convert.
915 ///
916 /// @return Converted string, stored in a static buffer (overridden after each
917 ///         call).
intchar2str(const int ch)918 static const char *intchar2str(const int ch)
919   FUNC_ATTR_WARN_UNUSED_RESULT
920 {
921   static char buf[sizeof(int) * 3 + 1];
922   if (' ' <= ch && ch < 0x7f) {
923     if (ascii_isdigit(ch)) {
924       buf[0] = '\'';
925       buf[1] = (char)ch;
926       buf[2] = '\'';
927       buf[3] = NUL;
928     } else {
929       buf[0] = (char)ch;
930       buf[1] = NUL;
931     }
932   } else {
933     snprintf(buf, sizeof(buf), "%i", ch);
934   }
935   return buf;
936 }
937 
938 #ifdef UNIT_TESTING
939 # include <stdio.h>
940 
941 REAL_FATTR_UNUSED
viml_pexpr_debug_print_ast_node(const ExprASTNode * const * const eastnode_p,const char * const prefix)942 static inline void viml_pexpr_debug_print_ast_node(const ExprASTNode *const *const eastnode_p,
943                                                    const char *const prefix)
944 {
945   if (*eastnode_p == NULL) {
946     fprintf(stderr, "%s %p : NULL\n", prefix, (void *)eastnode_p);
947   } else {
948     fprintf(stderr, "%s %p : %p : %s : %zu:%zu:%zu\n",
949             prefix, (void *)eastnode_p, (void *)(*eastnode_p),
950             east_node_type_tab[(*eastnode_p)->type], (*eastnode_p)->start.line,
951             (*eastnode_p)->start.col, (*eastnode_p)->len);
952   }
953 }
954 
955 REAL_FATTR_UNUSED
viml_pexpr_debug_print_ast_stack(const ExprASTStack * const ast_stack,const char * const msg)956 static inline void viml_pexpr_debug_print_ast_stack(const ExprASTStack *const ast_stack,
957                                                     const char *const msg)
958   FUNC_ATTR_NONNULL_ALL FUNC_ATTR_ALWAYS_INLINE
959 {
960   fprintf(stderr, "\n%sstack: %zu:\n", msg, kv_size(*ast_stack));
961   for (size_t i = 0; i < kv_size(*ast_stack); i++) {
962     viml_pexpr_debug_print_ast_node((const ExprASTNode *const *)kv_A(*ast_stack, i),
963                                     "-");
964   }
965 }
966 
967 REAL_FATTR_UNUSED
viml_pexpr_debug_print_token(const ParserState * const pstate,const LexExprToken token)968 static inline void viml_pexpr_debug_print_token(const ParserState *const pstate,
969                                                 const LexExprToken token)
970   FUNC_ATTR_ALWAYS_INLINE
971 {
972   fprintf(stderr, "\ntkn: %s\n", viml_pexpr_repr_token(pstate, token, NULL));
973 }
974 # define PSTACK(msg) \
975   viml_pexpr_debug_print_ast_stack(&ast_stack, #msg)
976 # define PSTACK_P(msg) \
977   viml_pexpr_debug_print_ast_stack(ast_stack, #msg)
978 # define PNODE_P(eastnode_p, msg) \
979   viml_pexpr_debug_print_ast_node((const ExprASTNode *const *)eastnode_p, \
980                                   (#msg))
981 # define PTOKEN(tkn) \
982   viml_pexpr_debug_print_token(pstate, tkn)
983 #endif
984 
985 const uint8_t node_maxchildren[] = {
986   [kExprNodeMissing] = 0,
987   [kExprNodeOpMissing] = 2,
988   [kExprNodeTernary] = 2,
989   [kExprNodeTernaryValue] = 2,
990   [kExprNodeRegister] = 0,
991   [kExprNodeSubscript] = 2,
992   [kExprNodeListLiteral] = 1,
993   [kExprNodeUnaryPlus] = 1,
994   [kExprNodeBinaryPlus] = 2,
995   [kExprNodeNested] = 1,
996   [kExprNodeCall] = 2,
997   [kExprNodePlainIdentifier] = 0,
998   [kExprNodePlainKey] = 0,
999   [kExprNodeComplexIdentifier] = 2,
1000   [kExprNodeUnknownFigure] = 1,
1001   [kExprNodeLambda] = 2,
1002   [kExprNodeDictLiteral] = 1,
1003   [kExprNodeCurlyBracesIdentifier] = 1,
1004   [kExprNodeComma] = 2,
1005   [kExprNodeColon] = 2,
1006   [kExprNodeArrow] = 2,
1007   [kExprNodeComparison] = 2,
1008   [kExprNodeConcat] = 2,
1009   [kExprNodeConcatOrSubscript] = 2,
1010   [kExprNodeInteger] = 0,
1011   [kExprNodeFloat] = 0,
1012   [kExprNodeSingleQuotedString] = 0,
1013   [kExprNodeDoubleQuotedString] = 0,
1014   [kExprNodeOr] = 2,
1015   [kExprNodeAnd] = 2,
1016   [kExprNodeUnaryMinus] = 1,
1017   [kExprNodeBinaryMinus] = 2,
1018   [kExprNodeNot] = 1,
1019   [kExprNodeMultiplication] = 2,
1020   [kExprNodeDivision] = 2,
1021   [kExprNodeMod] = 2,
1022   [kExprNodeOption] = 0,
1023   [kExprNodeEnvironment] = 0,
1024   [kExprNodeAssignment] = 2,
1025 };
1026 
1027 /// Free memory occupied by AST
1028 ///
1029 /// @param  ast  AST stack to free.
viml_pexpr_free_ast(ExprAST ast)1030 void viml_pexpr_free_ast(ExprAST ast)
1031 {
1032   ExprASTStack ast_stack;
1033   kvi_init(ast_stack);
1034   kvi_push(ast_stack, &ast.root);
1035   while (kv_size(ast_stack)) {
1036     ExprASTNode **const cur_node = kv_last(ast_stack);
1037 #ifndef NDEBUG
1038     // Explicitly check for AST recursiveness.
1039     for (size_t i = 0; i < kv_size(ast_stack) - 1; i++) {
1040       assert(*kv_A(ast_stack, i) != *cur_node);
1041     }
1042 #endif
1043     if (*cur_node == NULL) {
1044       assert(kv_size(ast_stack) == 1);
1045       kv_drop(ast_stack, 1);
1046     } else if ((*cur_node)->children != NULL) {
1047 #ifndef NDEBUG
1048       const uint8_t maxchildren = node_maxchildren[(*cur_node)->type];
1049       assert(maxchildren > 0);
1050       assert(maxchildren <= 2);
1051       assert(maxchildren == 1
1052              ? (*cur_node)->children->next == NULL
1053              : ((*cur_node)->children->next == NULL
1054                 || (*cur_node)->children->next->next == NULL));
1055 #endif
1056       kvi_push(ast_stack, &(*cur_node)->children);
1057     } else if ((*cur_node)->next != NULL) {
1058       kvi_push(ast_stack, &(*cur_node)->next);
1059     } else if (*cur_node != NULL) {
1060       kv_drop(ast_stack, 1);
1061       switch ((*cur_node)->type) {
1062       case kExprNodeDoubleQuotedString:
1063       case kExprNodeSingleQuotedString:
1064         xfree((*cur_node)->data.str.value);
1065         break;
1066       case kExprNodeMissing:
1067       case kExprNodeOpMissing:
1068       case kExprNodeTernary:
1069       case kExprNodeTernaryValue:
1070       case kExprNodeRegister:
1071       case kExprNodeSubscript:
1072       case kExprNodeListLiteral:
1073       case kExprNodeUnaryPlus:
1074       case kExprNodeBinaryPlus:
1075       case kExprNodeNested:
1076       case kExprNodeCall:
1077       case kExprNodePlainIdentifier:
1078       case kExprNodePlainKey:
1079       case kExprNodeComplexIdentifier:
1080       case kExprNodeUnknownFigure:
1081       case kExprNodeLambda:
1082       case kExprNodeDictLiteral:
1083       case kExprNodeCurlyBracesIdentifier:
1084       case kExprNodeAssignment:
1085       case kExprNodeComma:
1086       case kExprNodeColon:
1087       case kExprNodeArrow:
1088       case kExprNodeComparison:
1089       case kExprNodeConcat:
1090       case kExprNodeConcatOrSubscript:
1091       case kExprNodeInteger:
1092       case kExprNodeFloat:
1093       case kExprNodeOr:
1094       case kExprNodeAnd:
1095       case kExprNodeUnaryMinus:
1096       case kExprNodeBinaryMinus:
1097       case kExprNodeNot:
1098       case kExprNodeMultiplication:
1099       case kExprNodeDivision:
1100       case kExprNodeMod:
1101       case kExprNodeOption:
1102       case kExprNodeEnvironment:
1103         break;
1104       }
1105       xfree(*cur_node);
1106       *cur_node = NULL;
1107     }
1108   }
1109   kvi_destroy(ast_stack);
1110 }
1111 
1112 // Binary operator precedence and associativity:
1113 //
1114 // Operator | Precedence | Associativity
1115 // ---------+------------+-----------------
1116 // ||       | 2          | left
1117 // &&       | 3          | left
1118 // cmp*     | 4          | not associative
1119 // + - .    | 5          | left
1120 // * / %    | 6          | left
1121 //
1122 // * comparison operators:
1123 //
1124 // == ==# ==?  != !=# !=?
1125 // =~ =~# =~?  !~ !~# !~?
1126 //  >  >#  >?  <= <=# <=?
1127 //  <  <#  <?  >= >=# >=?
1128 // is is# is?  isnot isnot# isnot?
1129 
1130 /// Allocate a new node and set some of the values
1131 ///
1132 /// @param[in]  type  Node type to allocate.
1133 /// @param[in]  level  Node level to allocate
viml_pexpr_new_node(const ExprASTNodeType type)1134 static inline ExprASTNode *viml_pexpr_new_node(const ExprASTNodeType type)
1135   FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_MALLOC
1136 {
1137   ExprASTNode *ret = xmalloc(sizeof(*ret));
1138   ret->type = type;
1139   ret->children = NULL;
1140   ret->next = NULL;
1141   return ret;
1142 }
1143 
1144 static struct {
1145   ExprOpLvl lvl;
1146   ExprOpAssociativity ass;
1147 } node_type_to_node_props[] = {
1148   [kExprNodeMissing] = { kEOpLvlInvalid, kEOpAssNo, },
1149   [kExprNodeOpMissing] = { kEOpLvlMultiplication, kEOpAssNo },
1150 
1151   [kExprNodeNested] = { kEOpLvlParens, kEOpAssNo },
1152   // Note: below nodes are kEOpLvlSubscript for “binary operator” itself, but
1153   //       kEOpLvlParens when it comes to inside the parenthesis.
1154   [kExprNodeCall] = { kEOpLvlParens, kEOpAssNo },
1155   [kExprNodeSubscript] = { kEOpLvlParens, kEOpAssNo },
1156 
1157   [kExprNodeUnknownFigure] = { kEOpLvlParens, kEOpAssLeft },
1158   [kExprNodeLambda] = { kEOpLvlParens, kEOpAssNo },
1159   [kExprNodeDictLiteral] = { kEOpLvlParens, kEOpAssNo },
1160   [kExprNodeListLiteral] = { kEOpLvlParens, kEOpAssNo },
1161 
1162   [kExprNodeArrow] = { kEOpLvlArrow, kEOpAssNo },
1163 
1164   // Right associativity for comma because this means easier access to arguments
1165   // list, etc: for "[a, b, c, d]" you can access "a" in one step if it is
1166   // represented as "list(comma(a, comma(b, comma(c, d))))" then if it is
1167   // "list(comma(comma(comma(a, b), c), d))" in which case you will need to
1168   // traverse all three comma() structures. And with comma operator (including
1169   // actual comma operator from C which is not present in VimL) nobody cares
1170   // about associativity, only about order of execution.
1171   [kExprNodeComma] = { kEOpLvlComma, kEOpAssRight },
1172 
1173   // Colons are not eligible for chaining, so nobody cares about associativity.
1174   [kExprNodeColon] = { kEOpLvlColon, kEOpAssNo },
1175 
1176   [kExprNodeTernary] = { kEOpLvlTernary, kEOpAssRight },
1177 
1178   [kExprNodeOr] = { kEOpLvlOr, kEOpAssLeft },
1179 
1180   [kExprNodeAnd] = { kEOpLvlAnd, kEOpAssLeft },
1181 
1182   [kExprNodeTernaryValue] = { kEOpLvlTernaryValue, kEOpAssRight },
1183 
1184   [kExprNodeComparison] = { kEOpLvlComparison, kEOpAssRight },
1185 
1186   [kExprNodeBinaryPlus] = { kEOpLvlAddition, kEOpAssLeft },
1187   [kExprNodeBinaryMinus] = { kEOpLvlAddition, kEOpAssLeft },
1188   [kExprNodeConcat] = { kEOpLvlAddition, kEOpAssLeft },
1189 
1190   [kExprNodeMultiplication] = { kEOpLvlMultiplication, kEOpAssLeft },
1191   [kExprNodeDivision] = { kEOpLvlMultiplication, kEOpAssLeft },
1192   [kExprNodeMod] = { kEOpLvlMultiplication, kEOpAssLeft },
1193 
1194   [kExprNodeUnaryPlus] = { kEOpLvlUnary, kEOpAssNo },
1195   [kExprNodeUnaryMinus] = { kEOpLvlUnary, kEOpAssNo },
1196   [kExprNodeNot] = { kEOpLvlUnary, kEOpAssNo },
1197 
1198   [kExprNodeConcatOrSubscript] = { kEOpLvlSubscript, kEOpAssLeft },
1199 
1200   [kExprNodeCurlyBracesIdentifier] = { kEOpLvlComplexIdentifier, kEOpAssLeft },
1201 
1202   [kExprNodeAssignment] = { kEOpLvlAssignment, kEOpAssLeft },
1203 
1204   [kExprNodeComplexIdentifier] = { kEOpLvlValue, kEOpAssLeft },
1205 
1206   [kExprNodePlainIdentifier] = { kEOpLvlValue, kEOpAssNo },
1207   [kExprNodePlainKey] = { kEOpLvlValue, kEOpAssNo },
1208   [kExprNodeRegister] = { kEOpLvlValue, kEOpAssNo },
1209   [kExprNodeInteger] = { kEOpLvlValue, kEOpAssNo },
1210   [kExprNodeFloat] = { kEOpLvlValue, kEOpAssNo },
1211   [kExprNodeDoubleQuotedString] = { kEOpLvlValue, kEOpAssNo },
1212   [kExprNodeSingleQuotedString] = { kEOpLvlValue, kEOpAssNo },
1213   [kExprNodeOption] = { kEOpLvlValue, kEOpAssNo },
1214   [kExprNodeEnvironment] = { kEOpLvlValue, kEOpAssNo },
1215 };
1216 
1217 /// Get AST node priority level
1218 ///
1219 /// Used primary to reduce line length, so keep the name short.
1220 ///
1221 /// @param[in]  node  Node to get priority for.
1222 ///
1223 /// @return Node priority level.
node_lvl(const ExprASTNode node)1224 static inline ExprOpLvl node_lvl(const ExprASTNode node)
1225   FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT
1226 {
1227   return node_type_to_node_props[node.type].lvl;
1228 }
1229 
1230 /// Get AST node associativity, to be used for operator nodes primary
1231 ///
1232 /// Used primary to reduce line length, so keep the name short.
1233 ///
1234 /// @param[in]  node  Node to get priority for.
1235 ///
1236 /// @return Node associativity.
node_ass(const ExprASTNode node)1237 static inline ExprOpAssociativity node_ass(const ExprASTNode node)
1238   FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT
1239 {
1240   return node_type_to_node_props[node.type].ass;
1241 }
1242 
1243 /// Handle binary operator
1244 ///
1245 /// This function is responsible for handling priority levels as well.
1246 ///
1247 /// @param[in]  pstate  Parser state, used for error reporting.
1248 /// @param  ast_stack  AST stack. May be popped of some values and will
1249 ///                    definitely receive new ones.
1250 /// @param  bop_node  New node to handle.
1251 /// @param[out]  want_node_p  New value of want_node.
1252 /// @param[out]  ast_err  Location where error is saved, if any.
1253 ///
1254 /// @return True if no errors occurred, false otherwise.
viml_pexpr_handle_bop(const ParserState * const pstate,ExprASTStack * const ast_stack,ExprASTNode * const bop_node,ExprASTWantedNode * const want_node_p,ExprASTError * const ast_err)1255 static bool viml_pexpr_handle_bop(const ParserState *const pstate, ExprASTStack *const ast_stack,
1256                                   ExprASTNode *const bop_node, ExprASTWantedNode *const want_node_p,
1257                                   ExprASTError *const ast_err)
1258   FUNC_ATTR_NONNULL_ALL
1259 {
1260   bool ret = true;
1261   ExprASTNode **top_node_p = NULL;
1262   ExprASTNode *top_node;
1263   ExprOpLvl top_node_lvl;
1264   ExprOpAssociativity top_node_ass;
1265   assert(kv_size(*ast_stack));
1266   const ExprOpLvl bop_node_lvl = ((bop_node->type == kExprNodeCall
1267                                    || bop_node->type == kExprNodeSubscript)
1268                                   ? kEOpLvlSubscript
1269                                   : node_lvl(*bop_node));
1270 #ifndef NDEBUG
1271   const ExprOpAssociativity bop_node_ass = (
1272                                             (bop_node->type == kExprNodeCall
1273                                              || bop_node->type == kExprNodeSubscript)
1274       ? kEOpAssLeft
1275       : node_ass(*bop_node));
1276 #endif
1277   do {
1278     ExprASTNode **new_top_node_p = kv_last(*ast_stack);
1279     ExprASTNode *new_top_node = *new_top_node_p;
1280     assert(new_top_node != NULL);
1281     const ExprOpLvl new_top_node_lvl = node_lvl(*new_top_node);
1282     const ExprOpAssociativity new_top_node_ass = node_ass(*new_top_node);
1283     assert(bop_node_lvl != new_top_node_lvl
1284            || bop_node_ass == new_top_node_ass);
1285     if (top_node_p != NULL
1286         && ((bop_node_lvl > new_top_node_lvl
1287              || (bop_node_lvl == new_top_node_lvl
1288                  && new_top_node_ass == kEOpAssNo)))) {
1289       break;
1290     }
1291     kv_drop(*ast_stack, 1);
1292     top_node_p = new_top_node_p;
1293     top_node = new_top_node;
1294     top_node_lvl = new_top_node_lvl;
1295     top_node_ass = new_top_node_ass;
1296     if (bop_node_lvl == top_node_lvl && top_node_ass == kEOpAssRight) {
1297       break;
1298     }
1299   } while (kv_size(*ast_stack));
1300   if (top_node_ass == kEOpAssLeft || top_node_lvl != bop_node_lvl) {
1301     // outer(op(x,y)) -> outer(new_op(op(x,y),*))
1302     //
1303     // Before: top_node_p = outer(*), points to op(x,y)
1304     //         Other stack elements unknown
1305     //
1306     // After: top_node_p = outer(*), points to new_op(op(x,y))
1307     //        &bop_node->children->next = new_op(op(x,y),*), points to NULL
1308     *top_node_p = bop_node;
1309     bop_node->children = top_node;
1310     assert(bop_node->children->next == NULL);
1311     kvi_push(*ast_stack, top_node_p);
1312     kvi_push(*ast_stack, &bop_node->children->next);
1313   } else {
1314     assert(top_node_lvl == bop_node_lvl && top_node_ass == kEOpAssRight);
1315     assert(top_node->children != NULL && top_node->children->next != NULL);
1316     // outer(op(x,y)) -> outer(op(x,new_op(y,*)))
1317     //
1318     // Before: top_node_p = outer(*), points to op(x,y)
1319     //         Other stack elements unknown
1320     //
1321     // After: top_node_p = outer(*), points to op(x,new_op(y))
1322     //        &top_node->children->next = op(x,*), points to new_op(y)
1323     //        &bop_node->children->next = new_op(y,*), points to NULL
1324     bop_node->children = top_node->children->next;
1325     top_node->children->next = bop_node;
1326     assert(bop_node->children->next == NULL);
1327     kvi_push(*ast_stack, top_node_p);
1328     kvi_push(*ast_stack, &top_node->children->next);
1329     kvi_push(*ast_stack, &bop_node->children->next);
1330     // TODO(ZyX-I): Make this not error, but treat like Python does
1331     if (bop_node->type == kExprNodeComparison) {
1332       east_set_error(pstate, ast_err,
1333                      _("E15: Operator is not associative: %.*s"),
1334                      bop_node->start);
1335       ret = false;
1336     }
1337   }
1338   *want_node_p = kENodeValue;
1339   return ret;
1340 }
1341 
1342 /// ParserPosition literal based on ParserPosition pos with columns shifted
1343 ///
1344 /// Function does not check whether resulting position is valid.
1345 ///
1346 /// @param[in]  pos  Position to shift.
1347 /// @param[in]  shift  Number of bytes to shift.
1348 ///
1349 /// @return Shifted position.
shifted_pos(const ParserPosition pos,const size_t shift)1350 static inline ParserPosition shifted_pos(const ParserPosition pos, const size_t shift)
1351   FUNC_ATTR_CONST FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_WARN_UNUSED_RESULT
1352 {
1353   return (ParserPosition) { .line = pos.line, .col = pos.col + shift };
1354 }
1355 
1356 /// ParserPosition literal based on ParserPosition pos with specified column
1357 ///
1358 /// Function does not check whether remaining position is valid.
1359 ///
1360 /// @param[in]  pos  Position to adjust.
1361 /// @param[in]  new_col  New column.
1362 ///
1363 /// @return Shifted position.
recol_pos(const ParserPosition pos,const size_t new_col)1364 static inline ParserPosition recol_pos(const ParserPosition pos, const size_t new_col)
1365   FUNC_ATTR_CONST FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_WARN_UNUSED_RESULT
1366 {
1367   return (ParserPosition) { .line = pos.line, .col = new_col };
1368 }
1369 
1370 /// Get highlight group name
1371 #define HL(g) (is_invalid ? "NvimInvalid" #g : "Nvim" #g)
1372 
1373 /// Highlight current token with the given group
1374 #define HL_CUR_TOKEN(g) \
1375   viml_parser_highlight(pstate, cur_token.start, cur_token.len, \
1376                         HL(g))
1377 
1378 /// Allocate new node, saving some values
1379 #define NEW_NODE(type) \
1380   viml_pexpr_new_node(type)
1381 
1382 /// Set position of the given node to position from the given token
1383 ///
1384 /// @param  cur_node  Node to modify.
1385 /// @param  cur_token  Token to set position from.
1386 #define POS_FROM_TOKEN(cur_node, cur_token) \
1387   do { \
1388     (cur_node)->start = cur_token.start; \
1389     (cur_node)->len = cur_token.len; \
1390   } while (0)
1391 
1392 /// Allocate new node and set its position from the current token
1393 ///
1394 /// If previous token happened to contain spacing then it will be included.
1395 ///
1396 /// @param  cur_node  Variable to save allocated node to.
1397 /// @param  typ  Node type.
1398 #define NEW_NODE_WITH_CUR_POS(cur_node, typ) \
1399   do { \
1400     (cur_node) = NEW_NODE(typ); \
1401     POS_FROM_TOKEN((cur_node), cur_token); \
1402     if (prev_token.type == kExprLexSpacing) { \
1403       (cur_node)->start = prev_token.start; \
1404       (cur_node)->len += prev_token.len; \
1405     } \
1406   } while (0)
1407 
1408 /// Check whether it is possible to have next expression after current
1409 ///
1410 /// For :echo: `:echo @a @a` is a valid expression. `:echo (@a @a)` is not.
1411 #define MAY_HAVE_NEXT_EXPR \
1412   (kv_size(ast_stack) == 1)
1413 
1414 /// Add operator node
1415 ///
1416 /// @param[in]  cur_node  Node to add.
1417 #define ADD_OP_NODE(cur_node) \
1418   is_invalid |= !viml_pexpr_handle_bop(pstate, &ast_stack, cur_node, \
1419                                        &want_node, &ast.err)
1420 
1421 /// Record missing operator: for things like
1422 ///
1423 ///     :echo @a @a
1424 ///
1425 /// (allowed) or
1426 ///
1427 ///     :echo (@a @a)
1428 ///
1429 /// (parsed as OpMissing(@a, @a)).
1430 #define OP_MISSING \
1431   do { \
1432     if (flags & kExprFlagsMulti && MAY_HAVE_NEXT_EXPR) { \
1433       /* Multiple expressions allowed, return without calling */ \
1434       /* viml_parser_advance(). */ \
1435       goto viml_pexpr_parse_end; \
1436     } else { \
1437       assert(*top_node_p != NULL); \
1438       ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Missing operator: %.*s")); \
1439       NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeOpMissing); \
1440       cur_node->len = 0; \
1441       ADD_OP_NODE(cur_node); \
1442       goto viml_pexpr_parse_process_token; \
1443     } \
1444   } while (0)
1445 
1446 /// Record missing value: for things like "* 5"
1447 ///
1448 /// @param[in]  msg  Error message.
1449 #define ADD_VALUE_IF_MISSING(msg) \
1450   do { \
1451     if (want_node == kENodeValue) { \
1452       ERROR_FROM_TOKEN_AND_MSG(cur_token, (msg)); \
1453       NEW_NODE_WITH_CUR_POS((*top_node_p), kExprNodeMissing); \
1454       (*top_node_p)->len = 0; \
1455       want_node = kENodeOperator; \
1456     } \
1457   } while (0)
1458 
1459 /// Set AST error, unless AST already is not correct
1460 ///
1461 /// @param[out]  ret_ast  AST to set error in.
1462 /// @param[in]  pstate  Parser state, used to get error message argument.
1463 /// @param[in]  msg  Error message, assumed to be already translated and
1464 ///                  containing a single %token "%.*s".
1465 /// @param[in]  start  Position at which error occurred.
east_set_error(const ParserState * const pstate,ExprASTError * const ret_ast_err,const char * const msg,const ParserPosition start)1466 static inline void east_set_error(const ParserState *const pstate, ExprASTError *const ret_ast_err,
1467                                   const char *const msg, const ParserPosition start)
1468   FUNC_ATTR_NONNULL_ALL FUNC_ATTR_ALWAYS_INLINE
1469 {
1470   if (ret_ast_err->msg != NULL) {
1471     return;
1472   }
1473   const ParserLine pline = pstate->reader.lines.items[start.line];
1474   ret_ast_err->msg = msg;
1475   ret_ast_err->arg_len = (int)(pline.size - start.col);
1476   ret_ast_err->arg = pline.data ? pline.data + start.col : NULL;
1477 }
1478 
1479 /// Set error from the given token and given message
1480 #define ERROR_FROM_TOKEN_AND_MSG(cur_token, msg) \
1481   do { \
1482     is_invalid = true; \
1483     east_set_error(pstate, &ast.err, msg, cur_token.start); \
1484   } while (0)
1485 
1486 /// Like #ERROR_FROM_TOKEN_AND_MSG, but gets position from a node
1487 #define ERROR_FROM_NODE_AND_MSG(node, msg) \
1488   do { \
1489     is_invalid = true; \
1490     east_set_error(pstate, &ast.err, msg, node->start); \
1491   } while (0)
1492 
1493 /// Set error from the given kExprLexInvalid token
1494 #define ERROR_FROM_TOKEN(cur_token) \
1495   ERROR_FROM_TOKEN_AND_MSG(cur_token, cur_token.data.err.msg)
1496 
1497 /// Select figure brace type, altering highlighting as well if needed
1498 ///
1499 /// @param[out]  node  Node to modify type.
1500 /// @param[in]  new_type  New type, one of ExprASTNodeType values without
1501 ///                       kExprNode prefix.
1502 /// @param[in]  hl  Corresponding highlighting, passed as an argument to #HL.
1503 #define SELECT_FIGURE_BRACE_TYPE(node, new_type, hl) \
1504   do { \
1505     ExprASTNode *const node_ = (node); \
1506     assert(node_->type == kExprNodeUnknownFigure \
1507            || node_->type == kExprNode##new_type); \
1508     node_->type = kExprNode##new_type; \
1509     if (pstate->colors) { \
1510       kv_A(*pstate->colors, node_->data.fig.opening_hl_idx).group = \
1511         HL(hl); \
1512     } \
1513   } while (0)
1514 
1515 /// Add identifier which should constitute complex identifier node
1516 ///
1517 /// This one is to be called only in case want_node is kENodeOperator.
1518 ///
1519 /// @param  new_ident_node_code  Code used to create a new identifier node and
1520 ///                              update want_node and ast_stack, without
1521 ///                              a trailing semicolon.
1522 /// @param  hl  Highlighting name to use, passed as an argument to #HL.
1523 #define ADD_IDENT(new_ident_node_code, hl) \
1524   do { \
1525     assert(want_node == kENodeOperator); \
1526     /* Operator: may only be curly braces name, but only under certain */ \
1527     /* conditions. */ \
1528     /* First condition is that there is no space before a part of complex */ \
1529     /* identifier. */ \
1530     if (prev_token.type == kExprLexSpacing) { \
1531       OP_MISSING; \
1532     } \
1533     switch ((*top_node_p)->type) { \
1534     /* Second is that previous node is one of the identifiers: */ \
1535     /* complex, plain, curly braces. */ \
1536     /* TODO(ZyX-I): Extend syntax to allow ${expr}. This is needed to */ \
1537     /* handle environment variables like those bash uses for */ \
1538     /* `export -f`: their names consist not only of alphanumeric */ \
1539     /* characetrs. */ \
1540     case kExprNodeComplexIdentifier: \
1541     case kExprNodePlainIdentifier: \
1542     case kExprNodeCurlyBracesIdentifier: { \
1543       NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeComplexIdentifier); \
1544       cur_node->len = 0; \
1545       cur_node->children = *top_node_p; \
1546       *top_node_p = cur_node; \
1547       kvi_push(ast_stack, &cur_node->children->next); \
1548       ExprASTNode **const new_top_node_p = kv_last(ast_stack); \
1549       assert(*new_top_node_p == NULL); \
1550       new_ident_node_code; \
1551       *new_top_node_p = cur_node; \
1552       HL_CUR_TOKEN(hl); \
1553       break; \
1554     } \
1555     default: { \
1556       OP_MISSING; \
1557       break; \
1558     } \
1559     } \
1560   } while (0)
1561 
1562 /// Determine whether given parse type is an assignment
1563 ///
1564 /// @param[in]  pt  Checked parse type.
1565 ///
1566 /// @return true if parsing an assignment, false otherwise.
pt_is_assignment(const ExprASTParseType pt)1567 static inline bool pt_is_assignment(const ExprASTParseType pt)
1568   FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT
1569 {
1570   return (pt == kEPTAssignment || pt == kEPTSingleAssignment);
1571 }
1572 
1573 /// Structure used to define “string shifts” necessary to map string
1574 /// highlighting to actual strings.
1575 typedef struct {
1576   size_t start;  ///< Where special character starts in original string.
1577   size_t orig_len;  ///< Length of orininal string (e.g. 4 for "\x80").
1578   size_t act_len;  ///< Length of resulting character(s) (e.g. 1 for "\x80").
1579   bool escape_not_known;  ///< True if escape sequence in original is not known.
1580 } StringShift;
1581 
1582 /// Parse and highlight single- or double-quoted string
1583 ///
1584 /// Function is supposed to detect and highlight regular expressions (but does
1585 /// not do now).
1586 ///
1587 /// @param[out]  pstate  Parser state which also contains a place where
1588 ///                      highlighting is saved.
1589 /// @param[out]  node  Node where string parsing results are saved.
1590 /// @param[in]  token  Token to highlight.
1591 /// @param[in]  ast_stack  Parser AST stack, used to detect whether current
1592 ///                        string is a regex.
1593 /// @param[in]  is_invalid  Whether currently processed token is not valid.
parse_quoted_string(ParserState * const pstate,ExprASTNode * const node,const LexExprToken token,const ExprASTStack ast_stack,const bool is_invalid)1594 static void parse_quoted_string(ParserState *const pstate, ExprASTNode *const node,
1595                                 const LexExprToken token, const ExprASTStack ast_stack,
1596                                 const bool is_invalid)
1597   FUNC_ATTR_NONNULL_ALL
1598 {
1599   const ParserLine pline = pstate->reader.lines.items[token.start.line];
1600   const char *const s = pline.data + token.start.col;
1601   const char *const e = s + token.len - token.data.str.closed;
1602   const char *p = s + 1;
1603   const bool is_double = (token.type == kExprLexDoubleQuotedString);
1604   size_t size = token.len - token.data.str.closed - 1;
1605   kvec_withinit_t(StringShift, 16) shifts;
1606   kvi_init(shifts);
1607   if (!is_double) {
1608     viml_parser_highlight(pstate, token.start, 1, HL(SingleQuote));
1609     while (p < e) {
1610       const char *const chunk_e = memchr(p, '\'', (size_t)(e - p));
1611       if (chunk_e == NULL) {
1612         break;
1613       }
1614       size--;
1615       p = chunk_e + 2;
1616       if (pstate->colors) {
1617         kvi_push(shifts, ((StringShift) {
1618           .start = token.start.col + (size_t)(chunk_e - s),
1619           .orig_len = 2,
1620           .act_len = 1,
1621           .escape_not_known = false,
1622         }));
1623       }
1624     }
1625     node->data.str.size = size;
1626     if (size == 0) {
1627       node->data.str.value = NULL;
1628     } else {
1629       char *v_p;
1630       v_p = node->data.str.value = xmallocz(size);
1631       p = s + 1;
1632       while (p < e) {
1633         const char *const chunk_e = memchr(p, '\'', (size_t)(e - p));
1634         if (chunk_e == NULL) {
1635           memcpy(v_p, p, (size_t)(e - p));
1636           break;
1637         }
1638         memcpy(v_p, p, (size_t)(chunk_e - p));
1639         v_p += (size_t)(chunk_e - p) + 1;
1640         v_p[-1] = '\'';
1641         p = chunk_e + 2;
1642       }
1643     }
1644   } else {
1645     viml_parser_highlight(pstate, token.start, 1, HL(DoubleQuote));
1646     for (p = s + 1; p < e; p++) {
1647       if (*p == '\\' && p + 1 < e) {
1648         p++;
1649         if (p + 1 == e) {
1650           size--;
1651           break;
1652         }
1653         switch (*p) {
1654         // A "\<x>" form occupies at least 4 characters, and produces up to
1655         // 6 characters: reserve space for 2 extra, but do not compute actual
1656         // length just now, it would be costy.
1657         case '<':
1658           size += 2;
1659           break;
1660         // Hexadecimal, always single byte, but at least three bytes each.
1661         case 'x':
1662         case 'X':
1663           size--;
1664           if (ascii_isxdigit(p[1])) {
1665             size--;
1666             if (p + 2 < e && ascii_isxdigit(p[2])) {
1667               size--;
1668             }
1669           }
1670           break;
1671         // Unicode
1672         //
1673         // \uF takes 1 byte which is 2 bytes less then escape sequence.
1674         // \uFF: 2 bytes, 2 bytes less.
1675         // \uFFF: 3 bytes, 2 bytes less.
1676         // \uFFFF: 3 bytes, 3 bytes less.
1677         // \UFFFFF: 4 bytes, 3 bytes less.
1678         // \UFFFFFF: 5 bytes, 3 bytes less.
1679         // \UFFFFFFF: 6 bytes, 3 bytes less.
1680         // \U7FFFFFFF: 6 bytes, 4 bytes less.
1681         case 'u':
1682         case 'U': {
1683           const char *const esc_start = p;
1684           size_t n = (*p == 'u' ? 4 : 8);
1685           int nr = 0;
1686           p++;
1687           while (p + 1 < e && n-- && ascii_isxdigit(p[1])) {
1688             p++;
1689             nr = (nr << 4) + hex2nr(*p);
1690           }
1691           // Escape length: (esc_start - 1) points to "\\", esc_start to "u"
1692           // or "U", p to the byte after last byte. So escape sequence
1693           // occupies p - (esc_start - 1), but it stands for a utf_char2len
1694           // bytes.
1695           size -= (size_t)((p - (esc_start - 1)) - utf_char2len(nr));
1696           p--;
1697           break;
1698         }
1699         // Octal, always single byte, but at least two bytes each.
1700         case '0':
1701         case '1':
1702         case '2':
1703         case '3':
1704         case '4':
1705         case '5':
1706         case '6':
1707         case '7':
1708           size--;
1709           p++;
1710           if (*p >= '0' && *p <= '7') {
1711             size--;
1712             p++;
1713             if (p < e && *p >= '0' && *p <= '7') {
1714               size--;
1715               p++;
1716             }
1717           }
1718           break;
1719         default:
1720           size--;
1721           break;
1722         }
1723       }
1724     }
1725     if (size == 0) {
1726       node->data.str.value = NULL;
1727       node->data.str.size = 0;
1728     } else {
1729       char *v_p;
1730       v_p = node->data.str.value = xmalloc(size);
1731       p = s + 1;
1732       while (p < e) {
1733         const char *const chunk_e = memchr(p, '\\', (size_t)(e - p));
1734         if (chunk_e == NULL) {
1735           memcpy(v_p, p, (size_t)(e - p));
1736           v_p += e - p;
1737           break;
1738         }
1739         memcpy(v_p, p, (size_t)(chunk_e - p));
1740         v_p += (size_t)(chunk_e - p);
1741         p = chunk_e + 1;
1742         if (p == e) {
1743           *v_p++ = '\\';
1744           break;
1745         }
1746         bool is_unknown = false;
1747         const char *const v_p_start = v_p;
1748         switch (*p) {
1749 #define SINGLE_CHAR_ESC(ch, real_ch) \
1750 case ch: { \
1751   *v_p++ = real_ch; \
1752   p++; \
1753   break; \
1754 }
1755           SINGLE_CHAR_ESC('b', BS)
1756           SINGLE_CHAR_ESC('e', ESC)
1757           SINGLE_CHAR_ESC('f', FF)
1758           SINGLE_CHAR_ESC('n', NL)
1759           SINGLE_CHAR_ESC('r', CAR)
1760           SINGLE_CHAR_ESC('t', TAB)
1761           SINGLE_CHAR_ESC('"', '"')
1762           SINGLE_CHAR_ESC('\\', '\\')
1763 #undef SINGLE_CHAR_ESC
1764 
1765         // Hexadecimal or unicode.
1766         case 'X':
1767         case 'x':
1768         case 'u':
1769         case 'U':
1770           if (p + 1 < e && ascii_isxdigit(p[1])) {
1771             size_t n;
1772             int nr;
1773             bool is_hex = (*p == 'x' || *p == 'X');
1774 
1775             if (is_hex) {
1776               n = 2;
1777             } else if (*p == 'u') {
1778               n = 4;
1779             } else {
1780               n = 8;
1781             }
1782             nr = 0;
1783             while (p + 1 < e && n-- && ascii_isxdigit(p[1])) {
1784               p++;
1785               nr = (nr << 4) + hex2nr(*p);
1786             }
1787             p++;
1788             if (is_hex) {
1789               *v_p++ = (char)nr;
1790             } else {
1791               v_p += utf_char2bytes(nr, (char_u *)v_p);
1792             }
1793           } else {
1794             is_unknown = true;
1795             *v_p++ = *p;
1796             p++;
1797           }
1798           break;
1799         // Octal: "\1", "\12", "\123".
1800         case '0':
1801         case '1':
1802         case '2':
1803         case '3':
1804         case '4':
1805         case '5':
1806         case '6':
1807         case '7': {
1808           uint8_t ch = (uint8_t)(*p++ - '0');
1809           if (p < e && *p >= '0' && *p <= '7') {
1810             ch = (uint8_t)((ch << 3) + *p++ - '0');
1811             if (p < e && *p >= '0' && *p <= '7') {
1812               ch = (uint8_t)((ch << 3) + *p++ - '0');
1813             }
1814           }
1815           *v_p++ = (char)ch;
1816           break;
1817         }
1818         // Special key, e.g.: "\<C-W>"
1819         case '<': {
1820           const size_t special_len = (
1821                                       trans_special((const char_u **)&p, (size_t)(e - p),
1822                                                     (char_u *)v_p, true, true));
1823           if (special_len != 0) {
1824             v_p += special_len;
1825           } else {
1826             is_unknown = true;
1827             mb_copy_char((const char_u **)&p, (char_u **)&v_p);
1828           }
1829           break;
1830         }
1831         default:
1832           is_unknown = true;
1833           mb_copy_char((const char_u **)&p, (char_u **)&v_p);
1834           break;
1835         }
1836         if (pstate->colors) {
1837           kvi_push(shifts, ((StringShift) {
1838             .start = token.start.col + (size_t)(chunk_e - s),
1839             .orig_len = (size_t)(p - chunk_e),
1840             .act_len = (size_t)(v_p - (char *)v_p_start),
1841             .escape_not_known = is_unknown,
1842           }));
1843         }
1844       }
1845       node->data.str.size = (size_t)(v_p - node->data.str.value);
1846     }
1847   }
1848   if (pstate->colors) {
1849     // TODO(ZyX-I): use ast_stack to determine and highlight regular expressions
1850     // TODO(ZyX-I): use ast_stack to determine and highlight printf format str
1851     // TODO(ZyX-I): use ast_stack to determine and highlight expression strings
1852     size_t next_col = token.start.col + 1;
1853     const char *const body_str = (is_double
1854                                   ? HL(DoubleQuotedBody)
1855                                   : HL(SingleQuotedBody));
1856     const char *const esc_str = (is_double
1857                                  ? HL(DoubleQuotedEscape)
1858                                  : HL(SingleQuotedQuote));
1859     const char *const ukn_esc_str = (is_double
1860                                      ? HL(DoubleQuotedUnknownEscape)
1861                                      : HL(SingleQuotedUnknownEscape));
1862     for (size_t i = 0; i < kv_size(shifts); i++) {
1863       const StringShift cur_shift = kv_A(shifts, i);
1864       if (cur_shift.start > next_col) {
1865         viml_parser_highlight(pstate, recol_pos(token.start, next_col),
1866                               cur_shift.start - next_col,
1867                               body_str);
1868       }
1869       viml_parser_highlight(pstate, recol_pos(token.start, cur_shift.start),
1870                             cur_shift.orig_len,
1871                             (cur_shift.escape_not_known
1872                              ? ukn_esc_str
1873                              : esc_str));
1874       next_col = cur_shift.start + cur_shift.orig_len;
1875     }
1876     if (next_col - token.start.col < token.len - token.data.str.closed) {
1877       viml_parser_highlight(pstate, recol_pos(token.start, next_col),
1878                             (token.start.col
1879                              + token.len
1880                              - token.data.str.closed
1881                              - next_col),
1882                             body_str);
1883     }
1884   }
1885   if (token.data.str.closed) {
1886     if (is_double) {
1887       viml_parser_highlight(pstate, shifted_pos(token.start, token.len - 1),
1888                             1, HL(DoubleQuote));
1889     } else {
1890       viml_parser_highlight(pstate, shifted_pos(token.start, token.len - 1),
1891                             1, HL(SingleQuote));
1892     }
1893   }
1894   kvi_destroy(shifts);
1895 }
1896 
1897 /// Additional flags to pass to lexer depending on want_node
1898 static const int want_node_to_lexer_flags[] = {
1899   [kENodeValue] = kELFlagIsNotCmp,
1900   [kENodeOperator] = kELFlagForbidScope,
1901 };
1902 
1903 /// Number of characters to highlight as NumberPrefix depending on the base
1904 static const uint8_t base_to_prefix_length[] = {
1905   [2] = 2,
1906   [8] = 1,
1907   [10] = 0,
1908   [16] = 2,
1909 };
1910 
1911 /// Parse one VimL expression
1912 ///
1913 /// @param  pstate  Parser state.
1914 /// @param[in]  flags  Additional flags, see ExprParserFlags
1915 ///
1916 /// @return Parsed AST.
viml_pexpr_parse(ParserState * const pstate,const int flags)1917 ExprAST viml_pexpr_parse(ParserState *const pstate, const int flags)
1918   FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
1919 {
1920   ExprAST ast = {
1921     .err = {
1922       .msg = NULL,
1923       .arg_len = 0,
1924       .arg = NULL,
1925     },
1926     .root = NULL,
1927   };
1928   // Expression stack contains current branch in AST tree: that is
1929   // - Stack item 0 contains root of the tree, i.e. &ast->root.
1930   // - Stack item i points to the previous stack items’ last child.
1931   //
1932   // When parser expects “value” node that is something like identifier or "["
1933   // (list start) last stack item contains NULL. Otherwise last stack item is
1934   // supposed to contain last “finished” value: e.g. "1" or "+(1, 1)" (node
1935   // representing "1+1").
1936   ExprASTStack ast_stack;
1937   kvi_init(ast_stack);
1938   kvi_push(ast_stack, &ast.root);
1939   ExprASTWantedNode want_node = kENodeValue;
1940   ExprASTParseTypeStack pt_stack;
1941   kvi_init(pt_stack);
1942   kvi_push(pt_stack, kEPTExpr);
1943   if (flags & kExprFlagsParseLet) {
1944     kvi_push(pt_stack, kEPTAssignment);
1945   }
1946   LexExprToken prev_token = { .type = kExprLexMissing };
1947   bool highlighted_prev_spacing = false;
1948   // Lambda node, valid when parsing lambda arguments only.
1949   ExprASTNode *lambda_node = NULL;
1950   size_t asgn_level = 0;
1951   do {
1952     const bool is_concat_or_subscript = (
1953                                          want_node == kENodeValue
1954                                          && kv_size(ast_stack) > 1
1955                                          && (*kv_Z(ast_stack,
1956                                                    1))->type == kExprNodeConcatOrSubscript);
1957     const int lexer_additional_flags = (
1958                                         kELFlagPeek
1959                                         | ((flags & kExprFlagsDisallowEOC) ? kELFlagForbidEOC : 0)
1960                                         | ((want_node == kENodeValue
1961                                             && (kv_size(ast_stack) == 1
1962                                                 || ((*kv_Z(ast_stack, 1))->type != kExprNodeConcat
1963                                                     && ((*kv_Z(ast_stack, 1))->type
1964                                                         != kExprNodeConcatOrSubscript))))
1965            ? kELFlagAllowFloat
1966            : 0));
1967     LexExprToken cur_token = viml_pexpr_next_token(pstate,
1968                                                    want_node_to_lexer_flags[want_node] |
1969                                                    lexer_additional_flags);
1970     if (cur_token.type == kExprLexEOC) {
1971       break;
1972     }
1973     LexExprTokenType tok_type = cur_token.type;
1974     const bool token_invalid = (tok_type == kExprLexInvalid);
1975     bool is_invalid = token_invalid;
1976 viml_pexpr_parse_process_token:
1977     // May use different flags this time.
1978     cur_token = viml_pexpr_next_token(pstate,
1979                                       want_node_to_lexer_flags[want_node] | lexer_additional_flags);
1980     if (tok_type == kExprLexSpacing) {
1981       if (is_invalid) {
1982         HL_CUR_TOKEN(Spacing);
1983       } else {
1984         // Do not do anything: let regular spacing be highlighted as normal.
1985         // This also allows later to highlight spacing as invalid.
1986       }
1987       goto viml_pexpr_parse_cycle_end;
1988     } else if (is_invalid && prev_token.type == kExprLexSpacing
1989                && !highlighted_prev_spacing) {
1990       viml_parser_highlight(pstate, prev_token.start, prev_token.len,
1991                             HL(Spacing));
1992       is_invalid = false;
1993       highlighted_prev_spacing = true;
1994     }
1995     const ParserLine pline = pstate->reader.lines.items[cur_token.start.line];
1996     ExprASTNode **const top_node_p = kv_last(ast_stack);
1997     assert(kv_size(ast_stack) >= 1);
1998     ExprASTNode *cur_node = NULL;
1999 #ifndef NDEBUG
2000     const bool want_value = (want_node == kENodeValue);
2001     assert(want_value == (*top_node_p == NULL));
2002     assert(kv_A(ast_stack, 0) == &ast.root);
2003     // Check that stack item i + 1 points to stack items’ i *last* child.
2004     for (size_t i = 0; i + 1 < kv_size(ast_stack); i++) {
2005       const bool item_null = (want_value && i + 2 == kv_size(ast_stack));
2006       assert((&(*kv_A(ast_stack, i))->children == kv_A(ast_stack, i + 1)
2007               && (item_null
2008                   ? (*kv_A(ast_stack, i))->children == NULL
2009                   : (*kv_A(ast_stack, i))->children->next == NULL))
2010              || ((&(*kv_A(ast_stack, i))->children->next
2011                   == kv_A(ast_stack, i + 1))
2012                  && (item_null
2013                      ? (*kv_A(ast_stack, i))->children->next == NULL
2014                      : (*kv_A(ast_stack, i))->children->next->next == NULL)));
2015     }
2016 #endif
2017     // Note: in Vim whether expression "cond?d.a:2" is valid depends both on
2018     // "cond" and whether "d" is a dictionary: expression is valid if condition
2019     // is true and "d" is a dictionary (with "a" key or it will complain about
2020     // missing one, but this is not relevant); if any of the requirements is
2021     // broken then this thing is parsed as "d . a:2" yielding missing colon
2022     // error. This parser does not allow such ambiguity, especially because it
2023     // simply can’t: whether "d" is a dictionary is not known at the parsing
2024     // time.
2025     //
2026     // Here example will always contain a concat with "a:2" sucking colon,
2027     // making expression invalid both because there is no longer a spare colon
2028     // for ternary and because concatenating dictionary with anything is not
2029     // valid. There are more cases when this will make a difference though.
2030     const bool node_is_key = (
2031                               is_concat_or_subscript
2032                               && (cur_token.type == kExprLexPlainIdentifier
2033             ? (!cur_token.data.var.autoload
2034                && cur_token.data.var.scope == kExprVarScopeMissing)
2035             : (cur_token.type == kExprLexNumber))
2036                               && prev_token.type != kExprLexSpacing);
2037     if (is_concat_or_subscript && !node_is_key) {
2038       // Note: in Vim "d. a" (this is the reason behind `prev_token.type !=
2039       // kExprLexSpacing` part of the condition) as well as any other "d.{expr}"
2040       // where "{expr}" does not look like a key is invalid whenever "d" happens
2041       // to be a dictionary. Since parser has no idea whether preceding
2042       // expression is actually a dictionary it can’t outright reject anything,
2043       // so it turns kExprNodeConcatOrSubscript into kExprNodeConcat instead,
2044       // which will yield different errors then Vim does in a number of
2045       // circumstances, and in any case runtime and not parse time errors.
2046       (*kv_Z(ast_stack, 1))->type = kExprNodeConcat;
2047     }
2048     // Pop some stack pt_stack items in case of misplaced nodes.
2049     const bool is_single_assignment = kv_last(pt_stack) == kEPTSingleAssignment;
2050     switch (kv_last(pt_stack)) {
2051     case kEPTExpr:
2052       break;
2053     case kEPTLambdaArguments:
2054       if ((want_node == kENodeOperator
2055            && tok_type != kExprLexComma
2056            && tok_type != kExprLexArrow)
2057           || (want_node == kENodeValue
2058               && !(cur_token.type == kExprLexPlainIdentifier
2059                    && cur_token.data.var.scope == kExprVarScopeMissing
2060                    && !cur_token.data.var.autoload)
2061               && tok_type != kExprLexArrow)) {
2062         lambda_node->data.fig.type_guesses.allow_lambda = false;
2063         if (lambda_node->children != NULL
2064             && lambda_node->children->type == kExprNodeComma) {
2065           // If lambda has comma child this means that parser has already seen
2066           // at least "{arg1,", so node cannot possibly be anything, but
2067           // lambda.
2068 
2069           // Vim may give E121 or E720 in this case, but it does not look
2070           // right to have either because both are results of reevaluation
2071           // possibly-lambda node as a dictionary and here this is not going
2072           // to happen.
2073           ERROR_FROM_TOKEN_AND_MSG(cur_token,
2074                                    _("E15: Expected lambda arguments list or arrow: %.*s"));
2075         } else {
2076           // Else it may appear that possibly-lambda node is actually
2077           // a dictionary or curly-braces-name identifier.
2078           lambda_node = NULL;
2079           kv_drop(pt_stack, 1);
2080         }
2081       }
2082       break;
2083     case kEPTSingleAssignment:
2084     case kEPTAssignment:
2085       if (want_node == kENodeValue
2086           && tok_type != kExprLexBracket
2087           && tok_type != kExprLexPlainIdentifier
2088           && (tok_type != kExprLexFigureBrace || cur_token.data.brc.closing)
2089           && !(node_is_key && tok_type == kExprLexNumber)
2090           && tok_type != kExprLexEnv
2091           && tok_type != kExprLexOption
2092           && tok_type != kExprLexRegister) {
2093         ERROR_FROM_TOKEN_AND_MSG(cur_token,
2094                                  _("E15: Expected value part of assignment lvalue: %.*s"));
2095         kv_drop(pt_stack, 1);
2096       } else if (want_node == kENodeOperator
2097                  && tok_type != kExprLexBracket
2098                  && (tok_type != kExprLexFigureBrace
2099                      || cur_token.data.brc.closing)
2100                  && tok_type != kExprLexDot
2101                  && (tok_type != kExprLexComma || !is_single_assignment)
2102                  && tok_type != kExprLexAssignment
2103                  // Curly brace identifiers: will contain plain identifier or
2104                  // another curly brace in position where operator is wanted.
2105                  && !((tok_type == kExprLexPlainIdentifier
2106                        || (tok_type == kExprLexFigureBrace
2107                            && !cur_token.data.brc.closing))
2108                       && prev_token.type != kExprLexSpacing)) {
2109         if (flags & kExprFlagsMulti && MAY_HAVE_NEXT_EXPR) {
2110           goto viml_pexpr_parse_end;
2111         }
2112         ERROR_FROM_TOKEN_AND_MSG(cur_token,
2113                                  _("E15: Expected assignment operator or subscript: %.*s"));
2114         kv_drop(pt_stack, 1);
2115       }
2116       assert(kv_size(pt_stack));
2117       break;
2118     }
2119     assert(kv_size(pt_stack));
2120     const ExprASTParseType cur_pt = kv_last(pt_stack);
2121     assert(lambda_node == NULL || cur_pt == kEPTLambdaArguments);
2122     switch (tok_type) {
2123     case kExprLexMissing:
2124     case kExprLexSpacing:
2125     case kExprLexEOC:
2126       abort();
2127     case kExprLexInvalid:
2128       ERROR_FROM_TOKEN(cur_token);
2129       tok_type = cur_token.data.err.type;
2130       goto viml_pexpr_parse_process_token;
2131     case kExprLexRegister: {
2132       if (want_node == kENodeOperator) {
2133         // Register in operator position: e.g. @a @a
2134         OP_MISSING;
2135       }
2136       NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeRegister);
2137       cur_node->data.reg.name = cur_token.data.reg.name;
2138       *top_node_p = cur_node;
2139       want_node = kENodeOperator;
2140       HL_CUR_TOKEN(Register);
2141       break;
2142     }
2143 #define SIMPLE_UB_OP(op) \
2144 case kExprLex##op: { \
2145   if (want_node == kENodeValue) { \
2146     /* Value level: assume unary operator. */ \
2147     NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeUnary##op); \
2148     *top_node_p = cur_node; \
2149     kvi_push(ast_stack, &cur_node->children); \
2150     HL_CUR_TOKEN(Unary##op); \
2151   } else { \
2152     NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeBinary##op); \
2153     ADD_OP_NODE(cur_node); \
2154     HL_CUR_TOKEN(Binary##op); \
2155   } \
2156   want_node = kENodeValue; \
2157   break; \
2158 }
2159       SIMPLE_UB_OP(Plus)
2160       SIMPLE_UB_OP(Minus)
2161 #undef SIMPLE_UB_OP
2162 #define SIMPLE_B_OP(op, msg) \
2163 case kExprLex##op: { \
2164   ADD_VALUE_IF_MISSING(_("E15: Unexpected " msg ": %.*s")); \
2165   NEW_NODE_WITH_CUR_POS(cur_node, kExprNode##op); \
2166   HL_CUR_TOKEN(op); \
2167   ADD_OP_NODE(cur_node); \
2168   break; \
2169 }
2170       SIMPLE_B_OP(Or, "or operator")
2171       SIMPLE_B_OP(And, "and operator")
2172 #undef SIMPLE_B_OP
2173     case kExprLexMultiplication:
2174       ADD_VALUE_IF_MISSING(_("E15: Unexpected multiplication-like operator: %.*s"));
2175       switch (cur_token.data.mul.type) {
2176 #define MUL_OP(lex_op_tail, node_op_tail) \
2177 case kExprLexMul##lex_op_tail: { \
2178   NEW_NODE_WITH_CUR_POS(cur_node, kExprNode##node_op_tail); \
2179   HL_CUR_TOKEN(node_op_tail); \
2180   break; \
2181 }
2182         MUL_OP(Mul, Multiplication)
2183         MUL_OP(Div, Division)
2184         MUL_OP(Mod, Mod)
2185 #undef MUL_OP
2186       }
2187       ADD_OP_NODE(cur_node);
2188       break;
2189     case kExprLexOption: {
2190       if (want_node == kENodeOperator) {
2191         OP_MISSING;
2192       }
2193       NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeOption);
2194       if (cur_token.type == kExprLexInvalid) {
2195         assert(cur_token.len == 1
2196                || (cur_token.len == 3
2197                    && pline.data[cur_token.start.col + 2] == ':'));
2198         cur_node->data.opt.ident = (
2199                                     pline.data + cur_token.start.col + cur_token.len);
2200         cur_node->data.opt.ident_len = 0;
2201         cur_node->data.opt.scope = (
2202                                     cur_token.len == 3
2203               ? (ExprOptScope)pline.data[cur_token.start.col + 1]
2204               : kExprOptScopeUnspecified);
2205       } else {
2206         cur_node->data.opt.ident = cur_token.data.opt.name;
2207         cur_node->data.opt.ident_len = cur_token.data.opt.len;
2208         cur_node->data.opt.scope = cur_token.data.opt.scope;
2209       }
2210       *top_node_p = cur_node;
2211       want_node = kENodeOperator;
2212       viml_parser_highlight(pstate, cur_token.start, 1, HL(OptionSigil));
2213       const size_t scope_shift = (
2214                                   cur_token.data.opt.scope == kExprOptScopeUnspecified ? 0 : 2);
2215       if (scope_shift) {
2216         viml_parser_highlight(pstate, shifted_pos(cur_token.start, 1), 1,
2217                               HL(OptionScope));
2218         viml_parser_highlight(pstate, shifted_pos(cur_token.start, 2), 1,
2219                               HL(OptionScopeDelimiter));
2220       }
2221       viml_parser_highlight(pstate, shifted_pos(cur_token.start, scope_shift + 1),
2222                             cur_token.len - (scope_shift + 1), HL(OptionName));
2223       break;
2224     }
2225     case kExprLexEnv:
2226       if (want_node == kENodeOperator) {
2227         OP_MISSING;
2228       }
2229       NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeEnvironment);
2230       cur_node->data.env.ident = pline.data + cur_token.start.col + 1;
2231       cur_node->data.env.ident_len = cur_token.len - 1;
2232       if (cur_node->data.env.ident_len == 0) {
2233         ERROR_FROM_TOKEN_AND_MSG(cur_token,
2234                                  _("E15: Environment variable name missing"));
2235       }
2236       *top_node_p = cur_node;
2237       want_node = kENodeOperator;
2238       viml_parser_highlight(pstate, cur_token.start, 1, HL(EnvironmentSigil));
2239       viml_parser_highlight(pstate, shifted_pos(cur_token.start, 1),
2240                             cur_token.len - 1, HL(EnvironmentName));
2241       break;
2242     case kExprLexNot:
2243       if (want_node == kENodeOperator) {
2244         OP_MISSING;
2245       }
2246       NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeNot);
2247       *top_node_p = cur_node;
2248       kvi_push(ast_stack, &cur_node->children);
2249       HL_CUR_TOKEN(Not);
2250       break;
2251     case kExprLexComparison:
2252       ADD_VALUE_IF_MISSING(_("E15: Expected value, got comparison operator: %.*s"));
2253       NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeComparison);
2254       if (cur_token.type == kExprLexInvalid) {
2255         cur_node->data.cmp.ccs = kCCStrategyUseOption;
2256         cur_node->data.cmp.type = kExprCmpEqual;
2257         cur_node->data.cmp.inv = false;
2258       } else {
2259         cur_node->data.cmp.ccs = cur_token.data.cmp.ccs;
2260         cur_node->data.cmp.type = cur_token.data.cmp.type;
2261         cur_node->data.cmp.inv = cur_token.data.cmp.inv;
2262       }
2263       ADD_OP_NODE(cur_node);
2264       if (cur_token.data.cmp.ccs != kCCStrategyUseOption) {
2265         viml_parser_highlight(pstate, cur_token.start, cur_token.len - 1,
2266                               HL(Comparison));
2267         viml_parser_highlight(pstate, shifted_pos(cur_token.start, cur_token.len - 1), 1,
2268                               HL(ComparisonModifier));
2269       } else {
2270         HL_CUR_TOKEN(Comparison);
2271       }
2272       want_node = kENodeValue;
2273       break;
2274     case kExprLexComma:
2275       assert(!(want_node == kENodeValue && cur_pt == kEPTLambdaArguments));
2276       if (want_node == kENodeValue) {
2277         // Value level: comma appearing here is not valid.
2278         // Note: in Vim string(,x) will give E116, this is not the case here.
2279         ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Expected value, got comma: %.*s"));
2280         NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeMissing);
2281         cur_node->len = 0;
2282         *top_node_p = cur_node;
2283         want_node = kENodeOperator;
2284       }
2285       if (cur_pt == kEPTLambdaArguments) {
2286         assert(lambda_node != NULL);
2287         assert(lambda_node->data.fig.type_guesses.allow_lambda);
2288         SELECT_FIGURE_BRACE_TYPE(lambda_node, Lambda, Lambda);
2289       }
2290       if (kv_size(ast_stack) < 2) {
2291         goto viml_pexpr_parse_invalid_comma;
2292       }
2293       for (size_t i = 1; i < kv_size(ast_stack); i++) {
2294         ExprASTNode *const *const eastnode_p =
2295           (ExprASTNode *const *)kv_Z(ast_stack, i);
2296         const ExprASTNodeType eastnode_type = (*eastnode_p)->type;
2297         const ExprOpLvl eastnode_lvl = node_lvl(**eastnode_p);
2298         if (eastnode_type == kExprNodeLambda) {
2299           assert(cur_pt == kEPTLambdaArguments
2300                  && want_node == kENodeOperator);
2301           break;
2302         } else if (eastnode_type == kExprNodeDictLiteral
2303                    || eastnode_type == kExprNodeListLiteral
2304                    || eastnode_type == kExprNodeCall) {
2305           break;
2306         } else if (eastnode_type == kExprNodeComma
2307                    || eastnode_type == kExprNodeColon
2308                    || eastnode_lvl > kEOpLvlComma) {
2309           // Do nothing
2310         } else {
2311 viml_pexpr_parse_invalid_comma:
2312           ERROR_FROM_TOKEN_AND_MSG(cur_token,
2313                                    _("E15: Comma outside of call, lambda or literal: %.*s"));
2314           break;
2315         }
2316         if (i == kv_size(ast_stack) - 1) {
2317           goto viml_pexpr_parse_invalid_comma;
2318         }
2319       }
2320       NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeComma);
2321       ADD_OP_NODE(cur_node);
2322       HL_CUR_TOKEN(Comma);
2323       break;
2324 #define EXP_VAL_COLON "E15: Expected value, got colon: %.*s"
2325     case kExprLexColon: {
2326       bool is_ternary = false;
2327       if (kv_size(ast_stack) < 2) {
2328         goto viml_pexpr_parse_invalid_colon;
2329       }
2330       bool can_be_ternary = true;
2331       bool is_subscript = false;
2332       for (size_t i = 1; i < kv_size(ast_stack); i++) {
2333         ExprASTNode *const *const eastnode_p =
2334           (ExprASTNode *const *)kv_Z(ast_stack, i);
2335         const ExprASTNodeType eastnode_type = (*eastnode_p)->type;
2336         const ExprOpLvl eastnode_lvl = node_lvl(**eastnode_p);
2337         STATIC_ASSERT(kEOpLvlTernary > kEOpLvlComma,
2338                       "Unexpected operator priorities");
2339         if (can_be_ternary && eastnode_type == kExprNodeTernaryValue
2340             && !(*eastnode_p)->data.ter.got_colon) {
2341           kv_drop(ast_stack, i);
2342           (*eastnode_p)->start = cur_token.start;
2343           (*eastnode_p)->len = cur_token.len;
2344           if (prev_token.type == kExprLexSpacing) {
2345             (*eastnode_p)->start = prev_token.start;
2346             (*eastnode_p)->len += prev_token.len;
2347           }
2348           is_ternary = true;
2349           (*eastnode_p)->data.ter.got_colon = true;
2350           ADD_VALUE_IF_MISSING(_(EXP_VAL_COLON));
2351           assert((*eastnode_p)->children != NULL);
2352           assert((*eastnode_p)->children->next == NULL);
2353           kvi_push(ast_stack, &(*eastnode_p)->children->next);
2354           break;
2355         } else if (eastnode_type == kExprNodeUnknownFigure) {
2356           SELECT_FIGURE_BRACE_TYPE(*eastnode_p, DictLiteral, Dict);
2357           break;
2358         } else if (eastnode_type == kExprNodeDictLiteral) {
2359           break;
2360         } else if (eastnode_type == kExprNodeSubscript) {
2361           is_subscript = true;
2362           // can_be_ternary = false;
2363           assert(!is_ternary);
2364           break;
2365         } else if (eastnode_type == kExprNodeColon) {
2366           goto viml_pexpr_parse_invalid_colon;
2367         } else if (eastnode_lvl >= kEOpLvlTernaryValue) {
2368           // Do nothing
2369         } else if (eastnode_lvl >= kEOpLvlComma) {
2370           can_be_ternary = false;
2371         } else {
2372           goto viml_pexpr_parse_invalid_colon;
2373         }
2374         if (i == kv_size(ast_stack) - 1) {
2375           goto viml_pexpr_parse_invalid_colon;
2376         }
2377       }
2378       if (is_subscript) {
2379         assert(kv_size(ast_stack) > 1);
2380         // Colon immediately following subscript start: it is empty subscript
2381         // part like a[:2].
2382         if (want_node == kENodeValue
2383             && (*kv_Z(ast_stack, 1))->type == kExprNodeSubscript) {
2384           NEW_NODE_WITH_CUR_POS(*top_node_p, kExprNodeMissing);
2385           (*top_node_p)->len = 0;
2386           want_node = kENodeOperator;
2387         } else {
2388           ADD_VALUE_IF_MISSING(_(EXP_VAL_COLON));
2389         }
2390         NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeColon);
2391         ADD_OP_NODE(cur_node);
2392         HL_CUR_TOKEN(SubscriptColon);
2393       } else {
2394         goto viml_pexpr_parse_valid_colon;
2395 viml_pexpr_parse_invalid_colon:
2396         ERROR_FROM_TOKEN_AND_MSG(cur_token,
2397                                  _("E15: Colon outside of dictionary or ternary operator: %.*s"));
2398 viml_pexpr_parse_valid_colon:
2399         ADD_VALUE_IF_MISSING(_(EXP_VAL_COLON));
2400         if (is_ternary) {
2401           HL_CUR_TOKEN(TernaryColon);
2402         } else {
2403           NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeColon);
2404           ADD_OP_NODE(cur_node);
2405           HL_CUR_TOKEN(Colon);
2406         }
2407       }
2408       want_node = kENodeValue;
2409       break;
2410     }
2411 #undef EXP_VAL_COLON
2412     case kExprLexBracket:
2413       if (cur_token.data.brc.closing) {
2414         ExprASTNode **new_top_node_p = NULL;
2415         // Always drop the topmost value:
2416         //
2417         // 1. When want_node != kENodeValue topmost item on stack is
2418         //    a *finished* left operand, which may as well be "{@a}" which
2419         //    needs not be finished again.
2420         // 2. Otherwise it is pointing to NULL what nobody wants.
2421         kv_drop(ast_stack, 1);
2422         if (!kv_size(ast_stack)) {
2423           NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeListLiteral);
2424           cur_node->len = 0;
2425           if (want_node != kENodeValue) {
2426             cur_node->children = *top_node_p;
2427           }
2428           *top_node_p = cur_node;
2429           goto viml_pexpr_parse_bracket_closing_error;
2430         }
2431         if (want_node == kENodeValue) {
2432           // It is OK to want value if
2433           //
2434           // 1. It is empty list literal, in which case top node will be
2435           //    ListLiteral.
2436           // 2. It is list literal with trailing comma, in which case top node
2437           //    will be that comma.
2438           // 3. It is subscript with colon, but without one of the values:
2439           //    e.g. "a[:]", "a[1:]", top node will be colon in this case.
2440           if ((*kv_last(ast_stack))->type != kExprNodeListLiteral
2441               && (*kv_last(ast_stack))->type != kExprNodeComma
2442               && (*kv_last(ast_stack))->type != kExprNodeColon) {
2443             ERROR_FROM_TOKEN_AND_MSG(cur_token,
2444                                      _("E15: Expected value, got closing bracket: %.*s"));
2445           }
2446         }
2447         do {
2448           new_top_node_p = kv_pop(ast_stack);
2449         } while (kv_size(ast_stack)
2450                  && (new_top_node_p == NULL
2451                      || ((*new_top_node_p)->type != kExprNodeListLiteral
2452                          && (*new_top_node_p)->type != kExprNodeSubscript)));
2453         ExprASTNode *new_top_node = *new_top_node_p;
2454         switch (new_top_node->type) {
2455         case kExprNodeListLiteral:
2456           if (pt_is_assignment(cur_pt) && new_top_node->children == NULL) {
2457             ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E475: Unable to assign to empty list: %.*s"));
2458           }
2459           HL_CUR_TOKEN(List);
2460           break;
2461         case kExprNodeSubscript:
2462           HL_CUR_TOKEN(SubscriptBracket);
2463           break;
2464         default:
2465 viml_pexpr_parse_bracket_closing_error:
2466           assert(!kv_size(ast_stack));
2467           ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Unexpected closing figure brace: %.*s"));
2468           HL_CUR_TOKEN(List);
2469           break;
2470         }
2471         kvi_push(ast_stack, new_top_node_p);
2472         want_node = kENodeOperator;
2473         if (kv_size(ast_stack) <= asgn_level) {
2474           assert(kv_size(ast_stack) == asgn_level);
2475           asgn_level = 0;
2476           if (cur_pt == kEPTAssignment) {
2477             assert(ast.err.msg);
2478           } else if (cur_pt == kEPTExpr
2479                      && kv_size(pt_stack) > 1
2480                      && pt_is_assignment(kv_Z(pt_stack, 1))) {
2481             kv_drop(pt_stack, 1);
2482           }
2483         }
2484         if (cur_pt == kEPTSingleAssignment && kv_size(ast_stack) == 1) {
2485           kv_drop(pt_stack, 1);
2486         }
2487       } else {
2488         if (want_node == kENodeValue) {
2489           // Value means list literal or list assignment.
2490           NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeListLiteral);
2491           *top_node_p = cur_node;
2492           kvi_push(ast_stack, &cur_node->children);
2493           want_node = kENodeValue;
2494           if (cur_pt == kEPTAssignment) {
2495             // Additional assignment parse type allows to easily forbid nested
2496             // lists.
2497             kvi_push(pt_stack, kEPTSingleAssignment);
2498           } else if (cur_pt == kEPTSingleAssignment) {
2499             ERROR_FROM_TOKEN_AND_MSG(cur_token,
2500                                      _("E475: Nested lists not allowed when assigning: %.*s"));
2501           }
2502           HL_CUR_TOKEN(List);
2503         } else {
2504           // Operator means subscript, also in assignment. But in assignment
2505           // subscript may be pretty much any expression, so need to push
2506           // kEPTExpr.
2507           if (prev_token.type == kExprLexSpacing) {
2508             OP_MISSING;
2509           }
2510           NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeSubscript);
2511           ADD_OP_NODE(cur_node);
2512           HL_CUR_TOKEN(SubscriptBracket);
2513           if (pt_is_assignment(cur_pt)) {
2514             assert(want_node == kENodeValue);  // Subtract 1 for NULL at top.
2515             asgn_level = kv_size(ast_stack) - 1;
2516             kvi_push(pt_stack, kEPTExpr);
2517           }
2518         }
2519       }
2520       break;
2521     case kExprLexFigureBrace:
2522       if (cur_token.data.brc.closing) {
2523         ExprASTNode **new_top_node_p = NULL;
2524         // Always drop the topmost value:
2525         //
2526         // 1. When want_node != kENodeValue topmost item on stack is
2527         //    a *finished* left operand, which may as well be "{@a}" which
2528         //    needs not be finished again.
2529         // 2. Otherwise it is pointing to NULL what nobody wants.
2530         kv_drop(ast_stack, 1);
2531         if (!kv_size(ast_stack)) {
2532           NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeUnknownFigure);
2533           cur_node->data.fig.type_guesses.allow_lambda = false;
2534           cur_node->data.fig.type_guesses.allow_dict = false;
2535           cur_node->data.fig.type_guesses.allow_ident = false;
2536           cur_node->len = 0;
2537           if (want_node != kENodeValue) {
2538             cur_node->children = *top_node_p;
2539           }
2540           *top_node_p = cur_node;
2541           new_top_node_p = top_node_p;
2542           goto viml_pexpr_parse_figure_brace_closing_error;
2543         }
2544         if (want_node == kENodeValue) {
2545           if ((*kv_last(ast_stack))->type != kExprNodeUnknownFigure
2546               && (*kv_last(ast_stack))->type != kExprNodeComma) {
2547             // kv_last being UnknownFigure may occur for empty dictionary
2548             // literal, while Comma is expected in case of non-empty one.
2549             ERROR_FROM_TOKEN_AND_MSG(cur_token,
2550                                      _("E15: Expected value, got closing figure brace: %.*s"));
2551           }
2552         }
2553         do {
2554           new_top_node_p = kv_pop(ast_stack);
2555         } while (kv_size(ast_stack)
2556                  && (new_top_node_p == NULL
2557                      || ((*new_top_node_p)->type != kExprNodeUnknownFigure
2558                          && (*new_top_node_p)->type != kExprNodeDictLiteral
2559                          && ((*new_top_node_p)->type
2560                              != kExprNodeCurlyBracesIdentifier)
2561                          && (*new_top_node_p)->type != kExprNodeLambda)));
2562         ExprASTNode *new_top_node = *new_top_node_p;
2563         switch (new_top_node->type) {
2564         case kExprNodeUnknownFigure:
2565           if (new_top_node->children == NULL) {
2566             // No children of curly braces node indicates empty dictionary.
2567             assert(want_node == kENodeValue);
2568             assert(new_top_node->data.fig.type_guesses.allow_dict);
2569             SELECT_FIGURE_BRACE_TYPE(new_top_node, DictLiteral, Dict);
2570             HL_CUR_TOKEN(Dict);
2571           } else if (new_top_node->data.fig.type_guesses.allow_ident) {
2572             SELECT_FIGURE_BRACE_TYPE(new_top_node, CurlyBracesIdentifier,
2573                                      Curly);
2574             HL_CUR_TOKEN(Curly);
2575           } else {
2576             // If by this time type of the node has not already been
2577             // guessed, but it definitely is not a curly braces name then
2578             // it is invalid for sure.
2579             ERROR_FROM_NODE_AND_MSG(new_top_node,
2580                                     _("E15: Don't know what figure brace means: %.*s"));
2581             if (pstate->colors) {
2582               // Will reset to NvimInvalidFigureBrace.
2583               kv_A(*pstate->colors,
2584                    new_top_node->data.fig.opening_hl_idx).group = (
2585                                                                    HL(FigureBrace));
2586             }
2587             HL_CUR_TOKEN(FigureBrace);
2588           }
2589           break;
2590         case kExprNodeDictLiteral:
2591           HL_CUR_TOKEN(Dict);
2592           break;
2593         case kExprNodeCurlyBracesIdentifier:
2594           HL_CUR_TOKEN(Curly);
2595           break;
2596         case kExprNodeLambda:
2597           HL_CUR_TOKEN(Lambda);
2598           break;
2599         default:
2600 viml_pexpr_parse_figure_brace_closing_error:
2601           assert(!kv_size(ast_stack));
2602           ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Unexpected closing figure brace: %.*s"));
2603           HL_CUR_TOKEN(FigureBrace);
2604           break;
2605         }
2606         kvi_push(ast_stack, new_top_node_p);
2607         want_node = kENodeOperator;
2608         if (kv_size(ast_stack) <= asgn_level) {
2609           assert(kv_size(ast_stack) == asgn_level);
2610           if (cur_pt == kEPTExpr
2611               && kv_size(pt_stack) > 1
2612               && pt_is_assignment(kv_Z(pt_stack, 1))) {
2613             kv_drop(pt_stack, 1);
2614             asgn_level = 0;
2615           }
2616         }
2617       } else {
2618         if (want_node == kENodeValue) {
2619           HL_CUR_TOKEN(FigureBrace);
2620           // Value: may be any of lambda, dictionary literal and curly braces
2621           // name.
2622 
2623           // Though if we are in an assignment this may only be a curly braces
2624           // name.
2625           if (pt_is_assignment(cur_pt)) {
2626             NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeCurlyBracesIdentifier);
2627             cur_node->data.fig.type_guesses.allow_lambda = false;
2628             cur_node->data.fig.type_guesses.allow_dict = false;
2629             cur_node->data.fig.type_guesses.allow_ident = true;
2630             kvi_push(pt_stack, kEPTExpr);
2631           } else {
2632             NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeUnknownFigure);
2633             cur_node->data.fig.type_guesses.allow_lambda = true;
2634             cur_node->data.fig.type_guesses.allow_dict = true;
2635             cur_node->data.fig.type_guesses.allow_ident = true;
2636           }
2637           if (pstate->colors) {
2638             cur_node->data.fig.opening_hl_idx = kv_size(*pstate->colors) - 1;
2639           }
2640           *top_node_p = cur_node;
2641           kvi_push(ast_stack, &cur_node->children);
2642           kvi_push(pt_stack, kEPTLambdaArguments);
2643           lambda_node = cur_node;
2644         } else {
2645           ADD_IDENT(do {
2646             NEW_NODE_WITH_CUR_POS(cur_node,
2647                                   kExprNodeCurlyBracesIdentifier);
2648             cur_node->data.fig.opening_hl_idx = kv_size(*pstate->colors);
2649             cur_node->data.fig.type_guesses.allow_lambda = false;
2650             cur_node->data.fig.type_guesses.allow_dict = false;
2651             cur_node->data.fig.type_guesses.allow_ident = true;
2652             kvi_push(ast_stack, &cur_node->children);
2653             if (pt_is_assignment(cur_pt)) {
2654               kvi_push(pt_stack, kEPTExpr);
2655             }
2656             want_node = kENodeValue;
2657           } while (0),
2658                     Curly);
2659         }
2660         if (pt_is_assignment(cur_pt)
2661             && !pt_is_assignment(kv_last(pt_stack))) {
2662           assert(want_node == kENodeValue);  // Subtract 1 for NULL at top.
2663           asgn_level = kv_size(ast_stack) - 1;
2664         }
2665       }
2666       break;
2667     case kExprLexArrow:
2668       if (cur_pt == kEPTLambdaArguments) {
2669         kv_drop(pt_stack, 1);
2670         assert(kv_size(pt_stack));
2671         if (want_node == kENodeValue) {
2672           // Wanting value means trailing comma and NULL at the top of the
2673           // stack.
2674           kv_drop(ast_stack, 1);
2675         }
2676         assert(kv_size(ast_stack) >= 1);
2677         while ((*kv_last(ast_stack))->type != kExprNodeLambda
2678                && (*kv_last(ast_stack))->type != kExprNodeUnknownFigure) {
2679           kv_drop(ast_stack, 1);
2680         }
2681         assert((*kv_last(ast_stack)) == lambda_node);
2682         SELECT_FIGURE_BRACE_TYPE(lambda_node, Lambda, Lambda);
2683         NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeArrow);
2684         if (lambda_node->children == NULL) {
2685           assert(want_node == kENodeValue);
2686           lambda_node->children = cur_node;
2687           kvi_push(ast_stack, &lambda_node->children);
2688         } else {
2689           assert(lambda_node->children->next == NULL);
2690           lambda_node->children->next = cur_node;
2691           kvi_push(ast_stack, &lambda_node->children->next);
2692         }
2693         kvi_push(ast_stack, &cur_node->children);
2694         lambda_node = NULL;
2695       } else {
2696         // Only first branch is valid.
2697         ADD_VALUE_IF_MISSING(_("E15: Unexpected arrow: %.*s"));
2698         ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Arrow outside of lambda: %.*s"));
2699         NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeArrow);
2700         ADD_OP_NODE(cur_node);
2701       }
2702       want_node = kENodeValue;
2703       HL_CUR_TOKEN(Arrow);
2704       break;
2705     case kExprLexPlainIdentifier: {
2706       const ExprVarScope scope = (cur_token.type == kExprLexInvalid
2707                                     ? kExprVarScopeMissing
2708                                     : cur_token.data.var.scope);
2709       if (want_node == kENodeValue) {
2710         want_node = kENodeOperator;
2711         NEW_NODE_WITH_CUR_POS(cur_node,
2712                               (node_is_key
2713                                  ? kExprNodePlainKey
2714                                  : kExprNodePlainIdentifier));
2715         cur_node->data.var.scope = scope;
2716         const size_t scope_shift = (scope == kExprVarScopeMissing ? 0 : 2);
2717         cur_node->data.var.ident = (pline.data + cur_token.start.col
2718                                     + scope_shift);
2719         cur_node->data.var.ident_len = cur_token.len - scope_shift;
2720         *top_node_p = cur_node;
2721         if (scope_shift) {
2722           assert(!node_is_key);
2723           viml_parser_highlight(pstate, cur_token.start, 1,
2724                                 HL(IdentifierScope));
2725           viml_parser_highlight(pstate, shifted_pos(cur_token.start, 1), 1,
2726                                 HL(IdentifierScopeDelimiter));
2727         }
2728         viml_parser_highlight(pstate, shifted_pos(cur_token.start,
2729                                                   scope_shift),
2730                               cur_token.len - scope_shift,
2731                               (node_is_key
2732                                  ? HL(IdentifierKey)
2733                                  : HL(IdentifierName)));
2734       } else {
2735         if (scope == kExprVarScopeMissing) {
2736           ADD_IDENT(do {
2737               NEW_NODE_WITH_CUR_POS(cur_node, kExprNodePlainIdentifier);
2738               cur_node->data.var.scope = scope;
2739               cur_node->data.var.ident = pline.data + cur_token.start.col;
2740               cur_node->data.var.ident_len = cur_token.len;
2741               want_node = kENodeOperator;
2742             } while (0),
2743                     IdentifierName);
2744         } else {
2745           OP_MISSING;
2746         }
2747       }
2748       break;
2749     }
2750     case kExprLexNumber:
2751       if (want_node != kENodeValue) {
2752         OP_MISSING;
2753       }
2754       if (node_is_key) {
2755         NEW_NODE_WITH_CUR_POS(cur_node, kExprNodePlainKey);
2756         cur_node->data.var.ident = pline.data + cur_token.start.col;
2757         cur_node->data.var.ident_len = cur_token.len;
2758         HL_CUR_TOKEN(IdentifierKey);
2759       } else if (cur_token.data.num.is_float) {
2760         NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeFloat);
2761         cur_node->data.flt.value = cur_token.data.num.val.floating;
2762         HL_CUR_TOKEN(Float);
2763       } else {
2764         NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeInteger);
2765         cur_node->data.num.value = cur_token.data.num.val.integer;
2766         const uint8_t prefix_length = base_to_prefix_length[
2767                                                             cur_token.data.num.base];
2768         viml_parser_highlight(pstate, cur_token.start, prefix_length,
2769                               HL(NumberPrefix));
2770         viml_parser_highlight(pstate, shifted_pos(cur_token.start, prefix_length),
2771                               cur_token.len - prefix_length, HL(Number));
2772       }
2773       want_node = kENodeOperator;
2774       *top_node_p = cur_node;
2775       break;
2776     case kExprLexDot:
2777       ADD_VALUE_IF_MISSING(_("E15: Unexpected dot: %.*s"));
2778       if (prev_token.type == kExprLexSpacing) {
2779         if (cur_pt == kEPTAssignment) {
2780           ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Cannot concatenate in assignments: %.*s"));
2781         }
2782         NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeConcat);
2783         HL_CUR_TOKEN(Concat);
2784       } else {
2785         NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeConcatOrSubscript);
2786         HL_CUR_TOKEN(ConcatOrSubscript);
2787       }
2788       ADD_OP_NODE(cur_node);
2789       break;
2790     case kExprLexParenthesis:
2791       if (cur_token.data.brc.closing) {
2792         if (want_node == kENodeValue) {
2793           if (kv_size(ast_stack) > 1) {
2794             const ExprASTNode *const prev_top_node = *kv_Z(ast_stack, 1);
2795             if (prev_top_node->type == kExprNodeCall) {
2796               // Function call without arguments, this is not an error.
2797               // But further code does not expect NULL nodes.
2798               kv_drop(ast_stack, 1);
2799               goto viml_pexpr_parse_no_paren_closing_error;
2800             }
2801           }
2802           ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Expected value, got parenthesis: %.*s"));
2803           NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeMissing);
2804           cur_node->len = 0;
2805           *top_node_p = cur_node;
2806         } else {
2807           // Always drop the topmost value: when want_node != kENodeValue
2808           // topmost item on stack is a *finished* left operand, which may as
2809           // well be "(@a)" which needs not be finished again.
2810           kv_drop(ast_stack, 1);
2811         }
2812 viml_pexpr_parse_no_paren_closing_error: {}
2813         ExprASTNode **new_top_node_p = NULL;
2814         while (kv_size(ast_stack)
2815                && (new_top_node_p == NULL
2816                    || ((*new_top_node_p)->type != kExprNodeNested
2817                        && (*new_top_node_p)->type != kExprNodeCall))) {
2818           new_top_node_p = kv_pop(ast_stack);
2819         }
2820         if (new_top_node_p != NULL
2821             && ((*new_top_node_p)->type == kExprNodeNested
2822                 || (*new_top_node_p)->type == kExprNodeCall)) {
2823           if ((*new_top_node_p)->type == kExprNodeNested) {
2824             HL_CUR_TOKEN(NestingParenthesis);
2825           } else {
2826             HL_CUR_TOKEN(CallingParenthesis);
2827           }
2828         } else {
2829           // “Always drop the topmost value” branch has got rid of the single
2830           // value stack had, so there is nothing known to enclose. Correct
2831           // this.
2832           if (new_top_node_p == NULL) {
2833             new_top_node_p = top_node_p;
2834           }
2835           ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Unexpected closing parenthesis: %.*s"));
2836           HL_CUR_TOKEN(NestingParenthesis);
2837           cur_node = NEW_NODE(kExprNodeNested);
2838           cur_node->start = cur_token.start;
2839           cur_node->len = 0;
2840           // Unexpected closing parenthesis, assume that it was wanted to
2841           // enclose everything in ().
2842           cur_node->children = *new_top_node_p;
2843           *new_top_node_p = cur_node;
2844           assert(cur_node->next == NULL);
2845         }
2846         kvi_push(ast_stack, new_top_node_p);
2847         want_node = kENodeOperator;
2848       } else {
2849         switch (want_node) {
2850         case kENodeValue:
2851           NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeNested);
2852           *top_node_p = cur_node;
2853           kvi_push(ast_stack, &cur_node->children);
2854           HL_CUR_TOKEN(NestingParenthesis);
2855           break;
2856         case kENodeOperator:
2857           if (prev_token.type == kExprLexSpacing) {
2858             // For some reason "function (args)" is a function call, but
2859             // "(funcref) (args)" is not. AFAIR this somehow involves
2860             // compatibility and Bram was commenting that this is
2861             // intentionally inconsistent and he is not very happy with the
2862             // situation himself.
2863             if ((*top_node_p)->type != kExprNodePlainIdentifier
2864                 && (*top_node_p)->type != kExprNodeComplexIdentifier
2865                 && (*top_node_p)->type != kExprNodeCurlyBracesIdentifier) {
2866               OP_MISSING;
2867             }
2868           }
2869           NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeCall);
2870           ADD_OP_NODE(cur_node);
2871           HL_CUR_TOKEN(CallingParenthesis);
2872           break;
2873         }
2874         want_node = kENodeValue;
2875       }
2876       break;
2877     case kExprLexQuestion: {
2878       ADD_VALUE_IF_MISSING(_("E15: Expected value, got question mark: %.*s"));
2879       NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeTernary);
2880       ADD_OP_NODE(cur_node);
2881       HL_CUR_TOKEN(Ternary);
2882       ExprASTNode *ter_val_node;
2883       NEW_NODE_WITH_CUR_POS(ter_val_node, kExprNodeTernaryValue);
2884       ter_val_node->data.ter.got_colon = false;
2885       assert(cur_node->children != NULL);
2886       assert(cur_node->children->next == NULL);
2887       assert(kv_last(ast_stack) == &cur_node->children->next);
2888       *kv_last(ast_stack) = ter_val_node;
2889       kvi_push(ast_stack, &ter_val_node->children);
2890       break;
2891     }
2892     case kExprLexDoubleQuotedString:
2893     case kExprLexSingleQuotedString: {
2894       const bool is_double = (tok_type == kExprLexDoubleQuotedString);
2895       if (!cur_token.data.str.closed) {
2896         // It is weird, but Vim has two identical errors messages with
2897         // different error numbers: "E114: Missing quote" and
2898         // "E115: Missing quote".
2899         ERROR_FROM_TOKEN_AND_MSG(cur_token, (is_double
2900                           ? _("E114: Missing double quote: %.*s")
2901                           : _("E115: Missing single quote: %.*s")));
2902       }
2903       if (want_node == kENodeOperator) {
2904         OP_MISSING;
2905       }
2906       NEW_NODE_WITH_CUR_POS(cur_node, (is_double
2907                        ? kExprNodeDoubleQuotedString
2908                        : kExprNodeSingleQuotedString));
2909       *top_node_p = cur_node;
2910       parse_quoted_string(pstate, cur_node, cur_token, ast_stack, is_invalid);
2911       want_node = kENodeOperator;
2912       break;
2913     }
2914     case kExprLexAssignment:
2915       if (cur_pt == kEPTAssignment) {
2916         kv_drop(pt_stack, 1);
2917       } else if (cur_pt == kEPTSingleAssignment) {
2918         kv_drop(pt_stack, 2);
2919         ERROR_FROM_TOKEN_AND_MSG(cur_token,
2920                                  _("E475: Expected closing bracket to end list assignment "
2921                                    "lvalue: %.*s"));
2922       } else {
2923         ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Misplaced assignment: %.*s"));
2924       }
2925       assert(kv_size(pt_stack));
2926       assert(kv_last(pt_stack) == kEPTExpr);
2927       ADD_VALUE_IF_MISSING(_("E15: Unexpected assignment: %.*s"));
2928       NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeAssignment);
2929       cur_node->data.ass.type = cur_token.data.ass.type;
2930       switch (cur_token.data.ass.type) {
2931 #define HL_ASGN(asgn, hl) \
2932 case kExprAsgn##asgn: { HL_CUR_TOKEN(hl); break; }
2933         HL_ASGN(Plain, PlainAssignment)
2934         HL_ASGN(Add, AssignmentWithAddition)
2935         HL_ASGN(Subtract, AssignmentWithSubtraction)
2936         HL_ASGN(Concat, AssignmentWithConcatenation)
2937 #undef HL_ASGN
2938       }
2939       ADD_OP_NODE(cur_node);
2940       break;
2941     }
2942 viml_pexpr_parse_cycle_end:
2943     prev_token = cur_token;
2944     highlighted_prev_spacing = false;
2945     viml_parser_advance(pstate, cur_token.len);
2946   } while (true);
2947 viml_pexpr_parse_end:
2948   assert(kv_size(pt_stack));
2949   assert(kv_size(ast_stack));
2950   if (want_node == kENodeValue
2951       // Blacklist some parse type entries as their presence means better error
2952       // message in the other branch.
2953       && kv_last(pt_stack) != kEPTLambdaArguments) {
2954     east_set_error(pstate, &ast.err, _("E15: Expected value, got EOC: %.*s"),
2955                    pstate->pos);
2956   } else if (kv_size(ast_stack) != 1) {
2957     // Something may be wrong, check whether it really is.
2958 
2959     // Pointer to ast.root must never be dropped, so “!= 1” is expected to be
2960     // the same as “> 1”.
2961     assert(kv_size(ast_stack));
2962     // Topmost stack item must be a *finished* value, so it must not be
2963     // analyzed. E.g. it may contain an already finished nested expression.
2964     kv_drop(ast_stack, 1);
2965     while (ast.err.msg == NULL && kv_size(ast_stack)) {
2966       const ExprASTNode *const cur_node = (*kv_pop(ast_stack));
2967       // This should only happen when want_node == kENodeValue.
2968       assert(cur_node != NULL);
2969       // TODO(ZyX-I): Rehighlight as invalid?
2970       switch (cur_node->type) {
2971       case kExprNodeOpMissing:
2972       case kExprNodeMissing:
2973         // Error should’ve been already reported.
2974         break;
2975       case kExprNodeCall:
2976         east_set_error(pstate, &ast.err,
2977                        _("E116: Missing closing parenthesis for function call: %.*s"),
2978                        cur_node->start);
2979         break;
2980       case kExprNodeNested:
2981         east_set_error(pstate, &ast.err,
2982                        _("E110: Missing closing parenthesis for nested expression"
2983                          ": %.*s"),
2984                        cur_node->start);
2985         break;
2986       case kExprNodeListLiteral:
2987         // For whatever reason "[1" yields "E696: Missing comma in list" error
2988         // in Vim while "[1," yields E697.
2989         east_set_error(pstate, &ast.err,
2990                        _("E697: Missing end of List ']': %.*s"),
2991                        cur_node->start);
2992         break;
2993       case kExprNodeDictLiteral:
2994         // Same problem like with list literal with E722 (missing comma) vs
2995         // E723, but additionally just "{" yields only E15.
2996         east_set_error(pstate, &ast.err,
2997                        _("E723: Missing end of Dictionary '}': %.*s"),
2998                        cur_node->start);
2999         break;
3000       case kExprNodeUnknownFigure:
3001         east_set_error(pstate, &ast.err,
3002                        _("E15: Missing closing figure brace: %.*s"),
3003                        cur_node->start);
3004         break;
3005       case kExprNodeLambda:
3006         east_set_error(pstate, &ast.err,
3007                        _("E15: Missing closing figure brace for lambda: %.*s"),
3008                        cur_node->start);
3009         break;
3010       case kExprNodeCurlyBracesIdentifier:
3011         // Until trailing "}" it is impossible to distinguish curly braces
3012         // identifier and dictionary, so it must not appear in the stack like
3013         // this.
3014         abort();
3015       case kExprNodeInteger:
3016       case kExprNodeFloat:
3017       case kExprNodeSingleQuotedString:
3018       case kExprNodeDoubleQuotedString:
3019       case kExprNodeOption:
3020       case kExprNodeEnvironment:
3021       case kExprNodeRegister:
3022       case kExprNodePlainIdentifier:
3023       case kExprNodePlainKey:
3024         // These are plain values and not containers, for them it should only
3025         // be possible to show up in the topmost stack element, but it was
3026         // unconditionally popped at the start.
3027         abort();
3028       case kExprNodeComma:
3029       case kExprNodeColon:
3030       case kExprNodeArrow:
3031         // It is actually only valid inside something else, but everything
3032         // where one of the above is valid requires to be closed and thus is
3033         // to be caught later.
3034         break;
3035       case kExprNodeSubscript:
3036       case kExprNodeConcatOrSubscript:
3037       case kExprNodeComplexIdentifier:
3038       case kExprNodeAssignment:
3039       case kExprNodeMod:
3040       case kExprNodeDivision:
3041       case kExprNodeMultiplication:
3042       case kExprNodeNot:
3043       case kExprNodeAnd:
3044       case kExprNodeOr:
3045       case kExprNodeConcat:
3046       case kExprNodeComparison:
3047       case kExprNodeUnaryMinus:
3048       case kExprNodeUnaryPlus:
3049       case kExprNodeBinaryMinus:
3050       case kExprNodeTernary:
3051       case kExprNodeBinaryPlus:
3052         // It is OK to see these in the stack.
3053         break;
3054       case kExprNodeTernaryValue:
3055         if (!cur_node->data.ter.got_colon) {
3056           // Actually Vim throws E109 in more cases.
3057           east_set_error(pstate, &ast.err, _("E109: Missing ':' after '?': %.*s"),
3058                          cur_node->start);
3059         }
3060         break;
3061       }
3062     }
3063   }
3064   kvi_destroy(ast_stack);
3065   return ast;
3066 }  // NOLINT(readability/fn_size)
3067 
3068 #undef NEW_NODE
3069 #undef HL
3070